Home | History | Annotate | Download | only in X86
      1 //===-- X86InstrAVX512.td - AVX512 Instruction Set ---------*- tablegen -*-===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file describes the X86 AVX512 instruction set, defining the
     11 // instructions, and properties of the instructions which are needed for code
     12 // generation, machine code emission, and analysis.
     13 //
     14 //===----------------------------------------------------------------------===//
     15 
     16 // Group template arguments that can be derived from the vector type (EltNum x
     17 // EltVT).  These are things like the register class for the writemask, etc.
     18 // The idea is to pass one of these as the template argument rather than the
     19 // individual arguments.
     20 // The template is also used for scalar types, in this case numelts is 1.
     21 class X86VectorVTInfo<int numelts, ValueType eltvt, RegisterClass rc,
     22                       string suffix = ""> {
     23   RegisterClass RC = rc;
     24   ValueType EltVT = eltvt;
     25   int NumElts = numelts;
     26 
     27   // Corresponding mask register class.
     28   RegisterClass KRC = !cast<RegisterClass>("VK" # NumElts);
     29 
     30   // Corresponding write-mask register class.
     31   RegisterClass KRCWM = !cast<RegisterClass>("VK" # NumElts # "WM");
     32 
     33   // The mask VT.
     34   ValueType KVT = !cast<ValueType>("v" # NumElts # "i1");
     35 
     36   // Suffix used in the instruction mnemonic.
     37   string Suffix = suffix;
     38 
     39   // VTName is a string name for vector VT. For vector types it will be
     40   // v # NumElts # EltVT, so for vector of 8 elements of i32 it will be v8i32
     41   // It is a little bit complex for scalar types, where NumElts = 1.
     42   // In this case we build v4f32 or v2f64
     43   string VTName = "v" # !if (!eq (NumElts, 1),
     44                         !if (!eq (EltVT.Size, 32), 4,
     45                         !if (!eq (EltVT.Size, 64), 2, NumElts)), NumElts) # EltVT;
     46 
     47   // The vector VT.
     48   ValueType VT = !cast<ValueType>(VTName);
     49 
     50   string EltTypeName = !cast<string>(EltVT);
     51   // Size of the element type in bits, e.g. 32 for v16i32.
     52   string EltSizeName = !subst("i", "", !subst("f", "", EltTypeName));
     53   int EltSize = EltVT.Size;
     54 
     55   // "i" for integer types and "f" for floating-point types
     56   string TypeVariantName = !subst(EltSizeName, "", EltTypeName);
     57 
     58   // Size of RC in bits, e.g. 512 for VR512.
     59   int Size = VT.Size;
     60 
     61   // The corresponding memory operand, e.g. i512mem for VR512.
     62   X86MemOperand MemOp = !cast<X86MemOperand>(TypeVariantName # Size # "mem");
     63   X86MemOperand ScalarMemOp = !cast<X86MemOperand>(EltVT # "mem");
     64   // FP scalar memory operand for intrinsics - ssmem/sdmem.
     65   Operand IntScalarMemOp = !if (!eq (EltTypeName, "f32"), !cast<Operand>("ssmem"),
     66                            !if (!eq (EltTypeName, "f64"), !cast<Operand>("sdmem"), ?));
     67 
     68   // Load patterns
     69   // Note: For 128/256-bit integer VT we choose loadv2i64/loadv4i64
     70   //       due to load promotion during legalization
     71   PatFrag LdFrag = !cast<PatFrag>("load" #
     72                                   !if (!eq (TypeVariantName, "i"),
     73                                        !if (!eq (Size, 128), "v2i64",
     74                                        !if (!eq (Size, 256), "v4i64",
     75                                        !if (!eq (Size, 512), "v8i64",
     76                                             VTName))), VTName));
     77 
     78   PatFrag AlignedLdFrag = !cast<PatFrag>("alignedload" #
     79                                          !if (!eq (TypeVariantName, "i"),
     80                                                !if (!eq (Size, 128), "v2i64",
     81                                                !if (!eq (Size, 256), "v4i64",
     82                                                !if (!eq (Size, 512), "v8i64",
     83                                                    VTName))), VTName));
     84 
     85   PatFrag ScalarLdFrag = !cast<PatFrag>("load" # EltVT);
     86 
     87   ComplexPattern ScalarIntMemCPat = !if (!eq (EltTypeName, "f32"),
     88                                           !cast<ComplexPattern>("sse_load_f32"),
     89                                     !if (!eq (EltTypeName, "f64"),
     90                                           !cast<ComplexPattern>("sse_load_f64"),
     91                                     ?));
     92 
     93   // The string to specify embedded broadcast in assembly.
     94   string BroadcastStr = "{1to" # NumElts # "}";
     95 
     96   // 8-bit compressed displacement tuple/subvector format.  This is only
     97   // defined for NumElts <= 8.
     98   CD8VForm CD8TupleForm = !if (!eq (!srl(NumElts, 4), 0),
     99                                !cast<CD8VForm>("CD8VT" # NumElts), ?);
    100 
    101   SubRegIndex SubRegIdx = !if (!eq (Size, 128), sub_xmm,
    102                           !if (!eq (Size, 256), sub_ymm, ?));
    103 
    104   Domain ExeDomain = !if (!eq (EltTypeName, "f32"), SSEPackedSingle,
    105                      !if (!eq (EltTypeName, "f64"), SSEPackedDouble,
    106                      SSEPackedInt));
    107 
    108   RegisterClass FRC = !if (!eq (EltTypeName, "f32"), FR32X, FR64X);
    109 
    110   // A vector tye of the same width with element type i64. This is used to
    111   // create patterns for logic ops.
    112   ValueType i64VT = !cast<ValueType>("v" # !srl(Size, 6) # "i64");
    113 
    114   // A vector type of the same width with element type i32.  This is used to
    115   // create the canonical constant zero node ImmAllZerosV.
    116   ValueType i32VT = !cast<ValueType>("v" # !srl(Size, 5) # "i32");
    117   dag ImmAllZerosV = (VT (bitconvert (i32VT immAllZerosV)));
    118 
    119   string ZSuffix = !if (!eq (Size, 128), "Z128",
    120                    !if (!eq (Size, 256), "Z256", "Z"));
    121 }
    122 
    123 def v64i8_info  : X86VectorVTInfo<64,  i8, VR512, "b">;
    124 def v32i16_info : X86VectorVTInfo<32, i16, VR512, "w">;
    125 def v16i32_info : X86VectorVTInfo<16, i32, VR512, "d">;
    126 def v8i64_info  : X86VectorVTInfo<8,  i64, VR512, "q">;
    127 def v16f32_info : X86VectorVTInfo<16, f32, VR512, "ps">;
    128 def v8f64_info  : X86VectorVTInfo<8,  f64, VR512, "pd">;
    129 
    130 // "x" in v32i8x_info means RC = VR256X
    131 def v32i8x_info  : X86VectorVTInfo<32,  i8, VR256X, "b">;
    132 def v16i16x_info : X86VectorVTInfo<16, i16, VR256X, "w">;
    133 def v8i32x_info  : X86VectorVTInfo<8,  i32, VR256X, "d">;
    134 def v4i64x_info  : X86VectorVTInfo<4,  i64, VR256X, "q">;
    135 def v8f32x_info  : X86VectorVTInfo<8,  f32, VR256X, "ps">;
    136 def v4f64x_info  : X86VectorVTInfo<4,  f64, VR256X, "pd">;
    137 
    138 def v16i8x_info  : X86VectorVTInfo<16,  i8, VR128X, "b">;
    139 def v8i16x_info  : X86VectorVTInfo<8,  i16, VR128X, "w">;
    140 def v4i32x_info  : X86VectorVTInfo<4,  i32, VR128X, "d">;
    141 def v2i64x_info  : X86VectorVTInfo<2,  i64, VR128X, "q">;
    142 def v4f32x_info  : X86VectorVTInfo<4,  f32, VR128X, "ps">;
    143 def v2f64x_info  : X86VectorVTInfo<2,  f64, VR128X, "pd">;
    144 
    145 // We map scalar types to the smallest (128-bit) vector type
    146 // with the appropriate element type. This allows to use the same masking logic.
    147 def i32x_info    : X86VectorVTInfo<1,  i32, GR32, "si">;
    148 def i64x_info    : X86VectorVTInfo<1,  i64, GR64, "sq">;
    149 def f32x_info    : X86VectorVTInfo<1,  f32, VR128X, "ss">;
    150 def f64x_info    : X86VectorVTInfo<1,  f64, VR128X, "sd">;
    151 
    152 class AVX512VLVectorVTInfo<X86VectorVTInfo i512, X86VectorVTInfo i256,
    153                            X86VectorVTInfo i128> {
    154   X86VectorVTInfo info512 = i512;
    155   X86VectorVTInfo info256 = i256;
    156   X86VectorVTInfo info128 = i128;
    157 }
    158 
    159 def avx512vl_i8_info  : AVX512VLVectorVTInfo<v64i8_info, v32i8x_info,
    160                                              v16i8x_info>;
    161 def avx512vl_i16_info : AVX512VLVectorVTInfo<v32i16_info, v16i16x_info,
    162                                              v8i16x_info>;
    163 def avx512vl_i32_info : AVX512VLVectorVTInfo<v16i32_info, v8i32x_info,
    164                                              v4i32x_info>;
    165 def avx512vl_i64_info : AVX512VLVectorVTInfo<v8i64_info, v4i64x_info,
    166                                              v2i64x_info>;
    167 def avx512vl_f32_info : AVX512VLVectorVTInfo<v16f32_info, v8f32x_info,
    168                                              v4f32x_info>;
    169 def avx512vl_f64_info : AVX512VLVectorVTInfo<v8f64_info, v4f64x_info,
    170                                              v2f64x_info>;
    171 
    172 class X86KVectorVTInfo<RegisterClass _krc, RegisterClass _krcwm,
    173                        ValueType _vt> {
    174   RegisterClass KRC = _krc;
    175   RegisterClass KRCWM = _krcwm;
    176   ValueType KVT = _vt;
    177 }
    178 
    179 def v1i1_info : X86KVectorVTInfo<VK1, VK1WM, v1i1>;
    180 def v2i1_info : X86KVectorVTInfo<VK2, VK2WM, v2i1>;
    181 def v4i1_info : X86KVectorVTInfo<VK4, VK4WM, v4i1>;
    182 def v8i1_info : X86KVectorVTInfo<VK8, VK8WM, v8i1>;
    183 def v16i1_info : X86KVectorVTInfo<VK16, VK16WM, v16i1>;
    184 def v32i1_info : X86KVectorVTInfo<VK32, VK32WM, v32i1>;
    185 def v64i1_info : X86KVectorVTInfo<VK64, VK64WM, v64i1>;
    186 
    187 // This multiclass generates the masking variants from the non-masking
    188 // variant.  It only provides the assembly pieces for the masking variants.
    189 // It assumes custom ISel patterns for masking which can be provided as
    190 // template arguments.
    191 multiclass AVX512_maskable_custom<bits<8> O, Format F,
    192                                   dag Outs,
    193                                   dag Ins, dag MaskingIns, dag ZeroMaskingIns,
    194                                   string OpcodeStr,
    195                                   string AttSrcAsm, string IntelSrcAsm,
    196                                   list<dag> Pattern,
    197                                   list<dag> MaskingPattern,
    198                                   list<dag> ZeroMaskingPattern,
    199                                   string MaskingConstraint = "",
    200                                   bit IsCommutable = 0,
    201                                   bit IsKCommutable = 0,
    202                                   bit IsKZCommutable = IsCommutable> {
    203   let isCommutable = IsCommutable in
    204     def NAME: AVX512<O, F, Outs, Ins,
    205                        OpcodeStr#"\t{"#AttSrcAsm#", $dst|"#
    206                                      "$dst, "#IntelSrcAsm#"}",
    207                        Pattern>;
    208 
    209   // Prefer over VMOV*rrk Pat<>
    210   let isCommutable = IsKCommutable in
    211     def NAME#k: AVX512<O, F, Outs, MaskingIns,
    212                        OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"#
    213                                      "$dst {${mask}}, "#IntelSrcAsm#"}",
    214                        MaskingPattern>,
    215               EVEX_K {
    216       // In case of the 3src subclass this is overridden with a let.
    217       string Constraints = MaskingConstraint;
    218     }
    219 
    220   // Zero mask does not add any restrictions to commute operands transformation.
    221   // So, it is Ok to use IsCommutable instead of IsKCommutable.
    222   let isCommutable = IsKZCommutable in // Prefer over VMOV*rrkz Pat<>
    223     def NAME#kz: AVX512<O, F, Outs, ZeroMaskingIns,
    224                        OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}} {z}|"#
    225                                      "$dst {${mask}} {z}, "#IntelSrcAsm#"}",
    226                        ZeroMaskingPattern>,
    227               EVEX_KZ;
    228 }
    229 
    230 
    231 // Common base class of AVX512_maskable and AVX512_maskable_3src.
    232 multiclass AVX512_maskable_common<bits<8> O, Format F, X86VectorVTInfo _,
    233                                   dag Outs,
    234                                   dag Ins, dag MaskingIns, dag ZeroMaskingIns,
    235                                   string OpcodeStr,
    236                                   string AttSrcAsm, string IntelSrcAsm,
    237                                   dag RHS, dag MaskingRHS,
    238                                   SDNode Select = vselect,
    239                                   string MaskingConstraint = "",
    240                                   bit IsCommutable = 0,
    241                                   bit IsKCommutable = 0,
    242                                   bit IsKZCommutable = IsCommutable> :
    243   AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr,
    244                          AttSrcAsm, IntelSrcAsm,
    245                          [(set _.RC:$dst, RHS)],
    246                          [(set _.RC:$dst, MaskingRHS)],
    247                          [(set _.RC:$dst,
    248                                (Select _.KRCWM:$mask, RHS, _.ImmAllZerosV))],
    249                          MaskingConstraint, IsCommutable,
    250                          IsKCommutable, IsKZCommutable>;
    251 
    252 // This multiclass generates the unconditional/non-masking, the masking and
    253 // the zero-masking variant of the vector instruction.  In the masking case, the
    254 // perserved vector elements come from a new dummy input operand tied to $dst.
    255 // This version uses a separate dag for non-masking and masking.
    256 multiclass AVX512_maskable_split<bits<8> O, Format F, X86VectorVTInfo _,
    257                            dag Outs, dag Ins, string OpcodeStr,
    258                            string AttSrcAsm, string IntelSrcAsm,
    259                            dag RHS, dag MaskRHS,
    260                            bit IsCommutable = 0, bit IsKCommutable = 0,
    261                            SDNode Select = vselect> :
    262    AVX512_maskable_custom<O, F, Outs, Ins,
    263                           !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
    264                           !con((ins _.KRCWM:$mask), Ins),
    265                           OpcodeStr, AttSrcAsm, IntelSrcAsm,
    266                           [(set _.RC:$dst, RHS)],
    267                           [(set _.RC:$dst,
    268                               (Select _.KRCWM:$mask, MaskRHS, _.RC:$src0))],
    269                           [(set _.RC:$dst,
    270                               (Select _.KRCWM:$mask, MaskRHS, _.ImmAllZerosV))],
    271                           "$src0 = $dst", IsCommutable, IsKCommutable>;
    272 
    273 // This multiclass generates the unconditional/non-masking, the masking and
    274 // the zero-masking variant of the vector instruction.  In the masking case, the
    275 // perserved vector elements come from a new dummy input operand tied to $dst.
    276 multiclass AVX512_maskable<bits<8> O, Format F, X86VectorVTInfo _,
    277                            dag Outs, dag Ins, string OpcodeStr,
    278                            string AttSrcAsm, string IntelSrcAsm,
    279                            dag RHS,
    280                            bit IsCommutable = 0, bit IsKCommutable = 0,
    281                            bit IsKZCommutable = IsCommutable,
    282                            SDNode Select = vselect> :
    283    AVX512_maskable_common<O, F, _, Outs, Ins,
    284                           !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
    285                           !con((ins _.KRCWM:$mask), Ins),
    286                           OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
    287                           (Select _.KRCWM:$mask, RHS, _.RC:$src0),
    288                           Select, "$src0 = $dst", IsCommutable, IsKCommutable,
    289                           IsKZCommutable>;
    290 
    291 // This multiclass generates the unconditional/non-masking, the masking and
    292 // the zero-masking variant of the scalar instruction.
    293 multiclass AVX512_maskable_scalar<bits<8> O, Format F, X86VectorVTInfo _,
    294                            dag Outs, dag Ins, string OpcodeStr,
    295                            string AttSrcAsm, string IntelSrcAsm,
    296                            dag RHS,
    297                            bit IsCommutable = 0> :
    298    AVX512_maskable<O, F, _, Outs, Ins, OpcodeStr, AttSrcAsm, IntelSrcAsm,
    299                    RHS, IsCommutable, 0, IsCommutable, X86selects>;
    300 
    301 // Similar to AVX512_maskable but in this case one of the source operands
    302 // ($src1) is already tied to $dst so we just use that for the preserved
    303 // vector elements.  NOTE that the NonTiedIns (the ins dag) should exclude
    304 // $src1.
    305 multiclass AVX512_maskable_3src<bits<8> O, Format F, X86VectorVTInfo _,
    306                                 dag Outs, dag NonTiedIns, string OpcodeStr,
    307                                 string AttSrcAsm, string IntelSrcAsm,
    308                                 dag RHS,
    309                                 bit IsCommutable = 0,
    310                                 bit IsKCommutable = 0,
    311                                 SDNode Select = vselect,
    312                                 bit MaskOnly = 0> :
    313    AVX512_maskable_common<O, F, _, Outs,
    314                           !con((ins _.RC:$src1), NonTiedIns),
    315                           !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
    316                           !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
    317                           OpcodeStr, AttSrcAsm, IntelSrcAsm,
    318                           !if(MaskOnly, (null_frag), RHS),
    319                           (Select _.KRCWM:$mask, RHS, _.RC:$src1),
    320                           Select, "", IsCommutable, IsKCommutable>;
    321 
    322 // Similar to AVX512_maskable_3src but in this case the input VT for the tied
    323 // operand differs from the output VT. This requires a bitconvert on
    324 // the preserved vector going into the vselect.
    325 // NOTE: The unmasked pattern is disabled.
    326 multiclass AVX512_maskable_3src_cast<bits<8> O, Format F, X86VectorVTInfo OutVT,
    327                                      X86VectorVTInfo InVT,
    328                                      dag Outs, dag NonTiedIns, string OpcodeStr,
    329                                      string AttSrcAsm, string IntelSrcAsm,
    330                                      dag RHS, bit IsCommutable = 0> :
    331    AVX512_maskable_common<O, F, OutVT, Outs,
    332                           !con((ins InVT.RC:$src1), NonTiedIns),
    333                           !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns),
    334                           !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns),
    335                           OpcodeStr, AttSrcAsm, IntelSrcAsm, (null_frag),
    336                           (vselect InVT.KRCWM:$mask, RHS,
    337                            (bitconvert InVT.RC:$src1)),
    338                            vselect, "", IsCommutable>;
    339 
    340 multiclass AVX512_maskable_3src_scalar<bits<8> O, Format F, X86VectorVTInfo _,
    341                                      dag Outs, dag NonTiedIns, string OpcodeStr,
    342                                      string AttSrcAsm, string IntelSrcAsm,
    343                                      dag RHS,
    344                                      bit IsCommutable = 0,
    345                                      bit IsKCommutable = 0,
    346                                      bit MaskOnly = 0> :
    347    AVX512_maskable_3src<O, F, _, Outs, NonTiedIns, OpcodeStr, AttSrcAsm,
    348                         IntelSrcAsm, RHS, IsCommutable, IsKCommutable,
    349                         X86selects, MaskOnly>;
    350 
    351 multiclass AVX512_maskable_in_asm<bits<8> O, Format F, X86VectorVTInfo _,
    352                                   dag Outs, dag Ins,
    353                                   string OpcodeStr,
    354                                   string AttSrcAsm, string IntelSrcAsm,
    355                                   list<dag> Pattern> :
    356    AVX512_maskable_custom<O, F, Outs, Ins,
    357                           !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
    358                           !con((ins _.KRCWM:$mask), Ins),
    359                           OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [],
    360                           "$src0 = $dst">;
    361 
    362 multiclass AVX512_maskable_3src_in_asm<bits<8> O, Format F, X86VectorVTInfo _,
    363                                        dag Outs, dag NonTiedIns,
    364                                        string OpcodeStr,
    365                                        string AttSrcAsm, string IntelSrcAsm,
    366                                        list<dag> Pattern> :
    367    AVX512_maskable_custom<O, F, Outs,
    368                           !con((ins _.RC:$src1), NonTiedIns),
    369                           !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
    370                           !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
    371                           OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [],
    372                           "">;
    373 
    374 // Instruction with mask that puts result in mask register,
    375 // like "compare" and "vptest"
    376 multiclass AVX512_maskable_custom_cmp<bits<8> O, Format F,
    377                                   dag Outs,
    378                                   dag Ins, dag MaskingIns,
    379                                   string OpcodeStr,
    380                                   string AttSrcAsm, string IntelSrcAsm,
    381                                   list<dag> Pattern,
    382                                   list<dag> MaskingPattern,
    383                                   bit IsCommutable = 0> {
    384     let isCommutable = IsCommutable in
    385     def NAME: AVX512<O, F, Outs, Ins,
    386                        OpcodeStr#"\t{"#AttSrcAsm#", $dst|"#
    387                                      "$dst, "#IntelSrcAsm#"}",
    388                        Pattern>;
    389 
    390     def NAME#k: AVX512<O, F, Outs, MaskingIns,
    391                        OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"#
    392                                      "$dst {${mask}}, "#IntelSrcAsm#"}",
    393                        MaskingPattern>, EVEX_K;
    394 }
    395 
    396 multiclass AVX512_maskable_common_cmp<bits<8> O, Format F, X86VectorVTInfo _,
    397                                   dag Outs,
    398                                   dag Ins, dag MaskingIns,
    399                                   string OpcodeStr,
    400                                   string AttSrcAsm, string IntelSrcAsm,
    401                                   dag RHS, dag MaskingRHS,
    402                                   bit IsCommutable = 0> :
    403   AVX512_maskable_custom_cmp<O, F, Outs, Ins, MaskingIns, OpcodeStr,
    404                          AttSrcAsm, IntelSrcAsm,
    405                          [(set _.KRC:$dst, RHS)],
    406                          [(set _.KRC:$dst, MaskingRHS)], IsCommutable>;
    407 
    408 multiclass AVX512_maskable_cmp<bits<8> O, Format F, X86VectorVTInfo _,
    409                            dag Outs, dag Ins, string OpcodeStr,
    410                            string AttSrcAsm, string IntelSrcAsm,
    411                            dag RHS, bit IsCommutable = 0> :
    412    AVX512_maskable_common_cmp<O, F, _, Outs, Ins,
    413                           !con((ins _.KRCWM:$mask), Ins),
    414                           OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
    415                           (and _.KRCWM:$mask, RHS), IsCommutable>;
    416 
    417 multiclass AVX512_maskable_cmp_alt<bits<8> O, Format F, X86VectorVTInfo _,
    418                            dag Outs, dag Ins, string OpcodeStr,
    419                            string AttSrcAsm, string IntelSrcAsm> :
    420    AVX512_maskable_custom_cmp<O, F, Outs,
    421                              Ins, !con((ins _.KRCWM:$mask),Ins), OpcodeStr,
    422                              AttSrcAsm, IntelSrcAsm, [], []>;
    423 
    424 // This multiclass generates the unconditional/non-masking, the masking and
    425 // the zero-masking variant of the vector instruction.  In the masking case, the
    426 // perserved vector elements come from a new dummy input operand tied to $dst.
    427 multiclass AVX512_maskable_logic<bits<8> O, Format F, X86VectorVTInfo _,
    428                            dag Outs, dag Ins, string OpcodeStr,
    429                            string AttSrcAsm, string IntelSrcAsm,
    430                            dag RHS, dag MaskedRHS,
    431                            bit IsCommutable = 0, SDNode Select = vselect> :
    432    AVX512_maskable_custom<O, F, Outs, Ins,
    433                           !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
    434                           !con((ins _.KRCWM:$mask), Ins),
    435                           OpcodeStr, AttSrcAsm, IntelSrcAsm,
    436                           [(set _.RC:$dst, RHS)],
    437                           [(set _.RC:$dst,
    438                                 (Select _.KRCWM:$mask, MaskedRHS, _.RC:$src0))],
    439                           [(set _.RC:$dst,
    440                                 (Select _.KRCWM:$mask, MaskedRHS,
    441                                         _.ImmAllZerosV))],
    442                           "$src0 = $dst", IsCommutable>;
    443 
    444 
    445 // Alias instruction that maps zero vector to pxor / xorp* for AVX-512.
    446 // This is expanded by ExpandPostRAPseudos to an xorps / vxorps, and then
    447 // swizzled by ExecutionDomainFix to pxor.
    448 // We set canFoldAsLoad because this can be converted to a constant-pool
    449 // load of an all-zeros value if folding it would be beneficial.
    450 let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
    451     isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in {
    452 def AVX512_512_SET0 : I<0, Pseudo, (outs VR512:$dst), (ins), "",
    453                [(set VR512:$dst, (v16i32 immAllZerosV))]>;
    454 def AVX512_512_SETALLONES : I<0, Pseudo, (outs VR512:$dst), (ins), "",
    455                [(set VR512:$dst, (v16i32 immAllOnesV))]>;
    456 }
    457 
    458 // Alias instructions that allow VPTERNLOG to be used with a mask to create
    459 // a mix of all ones and all zeros elements. This is done this way to force
    460 // the same register to be used as input for all three sources.
    461 let isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteVecALU] in {
    462 def AVX512_512_SEXT_MASK_32 : I<0, Pseudo, (outs VR512:$dst),
    463                                 (ins VK16WM:$mask), "",
    464                            [(set VR512:$dst, (vselect (v16i1 VK16WM:$mask),
    465                                                       (v16i32 immAllOnesV),
    466                                                       (v16i32 immAllZerosV)))]>;
    467 def AVX512_512_SEXT_MASK_64 : I<0, Pseudo, (outs VR512:$dst),
    468                                 (ins VK8WM:$mask), "",
    469                 [(set VR512:$dst, (vselect (v8i1 VK8WM:$mask),
    470                                            (bc_v8i64 (v16i32 immAllOnesV)),
    471                                            (bc_v8i64 (v16i32 immAllZerosV))))]>;
    472 }
    473 
    474 let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
    475     isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in {
    476 def AVX512_128_SET0 : I<0, Pseudo, (outs VR128X:$dst), (ins), "",
    477                [(set VR128X:$dst, (v4i32 immAllZerosV))]>;
    478 def AVX512_256_SET0 : I<0, Pseudo, (outs VR256X:$dst), (ins), "",
    479                [(set VR256X:$dst, (v8i32 immAllZerosV))]>;
    480 }
    481 
    482 // Alias instructions that map fld0 to xorps for sse or vxorps for avx.
    483 // This is expanded by ExpandPostRAPseudos.
    484 let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
    485     isPseudo = 1, SchedRW = [WriteZero], Predicates = [HasAVX512] in {
    486   def AVX512_FsFLD0SS : I<0, Pseudo, (outs FR32X:$dst), (ins), "",
    487                           [(set FR32X:$dst, fp32imm0)]>;
    488   def AVX512_FsFLD0SD : I<0, Pseudo, (outs FR64X:$dst), (ins), "",
    489                           [(set FR64X:$dst, fpimm0)]>;
    490 }
    491 
    492 //===----------------------------------------------------------------------===//
    493 // AVX-512 - VECTOR INSERT
    494 //
    495 
    496 // Supports two different pattern operators for mask and unmasked ops. Allows
    497 // null_frag to be passed for one.
    498 multiclass vinsert_for_size_split<int Opcode, X86VectorVTInfo From,
    499                                   X86VectorVTInfo To,
    500                                   SDPatternOperator vinsert_insert,
    501                                   SDPatternOperator vinsert_for_mask,
    502                                   X86FoldableSchedWrite sched> {
    503   let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
    504     defm rr : AVX512_maskable_split<Opcode, MRMSrcReg, To, (outs To.RC:$dst),
    505                    (ins To.RC:$src1, From.RC:$src2, u8imm:$src3),
    506                    "vinsert" # From.EltTypeName # "x" # From.NumElts,
    507                    "$src3, $src2, $src1", "$src1, $src2, $src3",
    508                    (vinsert_insert:$src3 (To.VT To.RC:$src1),
    509                                          (From.VT From.RC:$src2),
    510                                          (iPTR imm)),
    511                    (vinsert_for_mask:$src3 (To.VT To.RC:$src1),
    512                                            (From.VT From.RC:$src2),
    513                                            (iPTR imm))>,
    514                    AVX512AIi8Base, EVEX_4V, Sched<[sched]>;
    515     let mayLoad = 1 in
    516     defm rm : AVX512_maskable_split<Opcode, MRMSrcMem, To, (outs To.RC:$dst),
    517                    (ins To.RC:$src1, From.MemOp:$src2, u8imm:$src3),
    518                    "vinsert" # From.EltTypeName # "x" # From.NumElts,
    519                    "$src3, $src2, $src1", "$src1, $src2, $src3",
    520                    (vinsert_insert:$src3 (To.VT To.RC:$src1),
    521                                (From.VT (bitconvert (From.LdFrag addr:$src2))),
    522                                (iPTR imm)),
    523                    (vinsert_for_mask:$src3 (To.VT To.RC:$src1),
    524                                (From.VT (bitconvert (From.LdFrag addr:$src2))),
    525                                (iPTR imm))>, AVX512AIi8Base, EVEX_4V,
    526                    EVEX_CD8<From.EltSize, From.CD8TupleForm>,
    527                    Sched<[sched.Folded, ReadAfterLd]>;
    528   }
    529 }
    530 
    531 // Passes the same pattern operator for masked and unmasked ops.
    532 multiclass vinsert_for_size<int Opcode, X86VectorVTInfo From,
    533                             X86VectorVTInfo To,
    534                             SDPatternOperator vinsert_insert,
    535                             X86FoldableSchedWrite sched> :
    536   vinsert_for_size_split<Opcode, From, To, vinsert_insert, vinsert_insert, sched>;
    537 
    538 multiclass vinsert_for_size_lowering<string InstrStr, X86VectorVTInfo From,
    539                        X86VectorVTInfo To, PatFrag vinsert_insert,
    540                        SDNodeXForm INSERT_get_vinsert_imm , list<Predicate> p> {
    541   let Predicates = p in {
    542     def : Pat<(vinsert_insert:$ins
    543                      (To.VT To.RC:$src1), (From.VT From.RC:$src2), (iPTR imm)),
    544               (To.VT (!cast<Instruction>(InstrStr#"rr")
    545                      To.RC:$src1, From.RC:$src2,
    546                      (INSERT_get_vinsert_imm To.RC:$ins)))>;
    547 
    548     def : Pat<(vinsert_insert:$ins
    549                   (To.VT To.RC:$src1),
    550                   (From.VT (bitconvert (From.LdFrag addr:$src2))),
    551                   (iPTR imm)),
    552               (To.VT (!cast<Instruction>(InstrStr#"rm")
    553                   To.RC:$src1, addr:$src2,
    554                   (INSERT_get_vinsert_imm To.RC:$ins)))>;
    555   }
    556 }
    557 
    558 multiclass vinsert_for_type<ValueType EltVT32, int Opcode128,
    559                             ValueType EltVT64, int Opcode256,
    560                             X86FoldableSchedWrite sched> {
    561 
    562   let Predicates = [HasVLX] in
    563     defm NAME # "32x4Z256" : vinsert_for_size<Opcode128,
    564                                  X86VectorVTInfo< 4, EltVT32, VR128X>,
    565                                  X86VectorVTInfo< 8, EltVT32, VR256X>,
    566                                  vinsert128_insert, sched>, EVEX_V256;
    567 
    568   defm NAME # "32x4Z" : vinsert_for_size<Opcode128,
    569                                  X86VectorVTInfo< 4, EltVT32, VR128X>,
    570                                  X86VectorVTInfo<16, EltVT32, VR512>,
    571                                  vinsert128_insert, sched>, EVEX_V512;
    572 
    573   defm NAME # "64x4Z" : vinsert_for_size<Opcode256,
    574                                  X86VectorVTInfo< 4, EltVT64, VR256X>,
    575                                  X86VectorVTInfo< 8, EltVT64, VR512>,
    576                                  vinsert256_insert, sched>, VEX_W, EVEX_V512;
    577 
    578   // Even with DQI we'd like to only use these instructions for masking.
    579   let Predicates = [HasVLX, HasDQI] in
    580     defm NAME # "64x2Z256" : vinsert_for_size_split<Opcode128,
    581                                    X86VectorVTInfo< 2, EltVT64, VR128X>,
    582                                    X86VectorVTInfo< 4, EltVT64, VR256X>,
    583                                    null_frag, vinsert128_insert, sched>,
    584                                    VEX_W1X, EVEX_V256;
    585 
    586   // Even with DQI we'd like to only use these instructions for masking.
    587   let Predicates = [HasDQI] in {
    588     defm NAME # "64x2Z" : vinsert_for_size_split<Opcode128,
    589                                  X86VectorVTInfo< 2, EltVT64, VR128X>,
    590                                  X86VectorVTInfo< 8, EltVT64, VR512>,
    591                                  null_frag, vinsert128_insert, sched>,
    592                                  VEX_W, EVEX_V512;
    593 
    594     defm NAME # "32x8Z" : vinsert_for_size_split<Opcode256,
    595                                    X86VectorVTInfo< 8, EltVT32, VR256X>,
    596                                    X86VectorVTInfo<16, EltVT32, VR512>,
    597                                    null_frag, vinsert256_insert, sched>,
    598                                    EVEX_V512;
    599   }
    600 }
    601 
    602 // FIXME: Is there a better scheduler class for VINSERTF/VINSERTI?
    603 defm VINSERTF : vinsert_for_type<f32, 0x18, f64, 0x1a, WriteFShuffle256>;
    604 defm VINSERTI : vinsert_for_type<i32, 0x38, i64, 0x3a, WriteShuffle256>;
    605 
    606 // Codegen pattern with the alternative types,
    607 // Even with AVX512DQ we'll still use these for unmasked operations.
    608 defm : vinsert_for_size_lowering<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info,
    609               vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
    610 defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info,
    611               vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
    612 
    613 defm : vinsert_for_size_lowering<"VINSERTF32x4Z", v2f64x_info, v8f64_info,
    614               vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
    615 defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v2i64x_info, v8i64_info,
    616               vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
    617 
    618 defm : vinsert_for_size_lowering<"VINSERTF64x4Z", v8f32x_info, v16f32_info,
    619               vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
    620 defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v8i32x_info, v16i32_info,
    621               vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
    622 
    623 // Codegen pattern with the alternative types insert VEC128 into VEC256
    624 defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info,
    625               vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
    626 defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info,
    627               vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
    628 // Codegen pattern with the alternative types insert VEC128 into VEC512
    629 defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v8i16x_info, v32i16_info,
    630               vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
    631 defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v16i8x_info, v64i8_info,
    632                vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
    633 // Codegen pattern with the alternative types insert VEC256 into VEC512
    634 defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v16i16x_info, v32i16_info,
    635               vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
    636 defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v32i8x_info, v64i8_info,
    637               vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
    638 
    639 
    640 multiclass vinsert_for_mask_cast<string InstrStr, X86VectorVTInfo From,
    641                                  X86VectorVTInfo To, X86VectorVTInfo Cast,
    642                                  PatFrag vinsert_insert,
    643                                  SDNodeXForm INSERT_get_vinsert_imm,
    644                                  list<Predicate> p> {
    645 let Predicates = p in {
    646   def : Pat<(Cast.VT
    647              (vselect Cast.KRCWM:$mask,
    648                       (bitconvert
    649                        (vinsert_insert:$ins (To.VT To.RC:$src1),
    650                                             (From.VT From.RC:$src2),
    651                                             (iPTR imm))),
    652                       Cast.RC:$src0)),
    653             (!cast<Instruction>(InstrStr#"rrk")
    654              Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2,
    655              (INSERT_get_vinsert_imm To.RC:$ins))>;
    656   def : Pat<(Cast.VT
    657              (vselect Cast.KRCWM:$mask,
    658                       (bitconvert
    659                        (vinsert_insert:$ins (To.VT To.RC:$src1),
    660                                             (From.VT
    661                                              (bitconvert
    662                                               (From.LdFrag addr:$src2))),
    663                                             (iPTR imm))),
    664                       Cast.RC:$src0)),
    665             (!cast<Instruction>(InstrStr#"rmk")
    666              Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, addr:$src2,
    667              (INSERT_get_vinsert_imm To.RC:$ins))>;
    668 
    669   def : Pat<(Cast.VT
    670              (vselect Cast.KRCWM:$mask,
    671                       (bitconvert
    672                        (vinsert_insert:$ins (To.VT To.RC:$src1),
    673                                             (From.VT From.RC:$src2),
    674                                             (iPTR imm))),
    675                       Cast.ImmAllZerosV)),
    676             (!cast<Instruction>(InstrStr#"rrkz")
    677              Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2,
    678              (INSERT_get_vinsert_imm To.RC:$ins))>;
    679   def : Pat<(Cast.VT
    680              (vselect Cast.KRCWM:$mask,
    681                       (bitconvert
    682                        (vinsert_insert:$ins (To.VT To.RC:$src1),
    683                                             (From.VT
    684                                              (bitconvert
    685                                               (From.LdFrag addr:$src2))),
    686                                             (iPTR imm))),
    687                       Cast.ImmAllZerosV)),
    688             (!cast<Instruction>(InstrStr#"rmkz")
    689              Cast.KRCWM:$mask, To.RC:$src1, addr:$src2,
    690              (INSERT_get_vinsert_imm To.RC:$ins))>;
    691 }
    692 }
    693 
    694 defm : vinsert_for_mask_cast<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info,
    695                              v8f32x_info, vinsert128_insert,
    696                              INSERT_get_vinsert128_imm, [HasVLX]>;
    697 defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4f32x_info, v8f32x_info,
    698                              v4f64x_info, vinsert128_insert,
    699                              INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
    700 
    701 defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info,
    702                              v8i32x_info, vinsert128_insert,
    703                              INSERT_get_vinsert128_imm, [HasVLX]>;
    704 defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info,
    705                              v8i32x_info, vinsert128_insert,
    706                              INSERT_get_vinsert128_imm, [HasVLX]>;
    707 defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info,
    708                              v8i32x_info, vinsert128_insert,
    709                              INSERT_get_vinsert128_imm, [HasVLX]>;
    710 defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4i32x_info, v8i32x_info,
    711                              v4i64x_info, vinsert128_insert,
    712                              INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
    713 defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v8i16x_info, v16i16x_info,
    714                              v4i64x_info, vinsert128_insert,
    715                              INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
    716 defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v16i8x_info, v32i8x_info,
    717                              v4i64x_info, vinsert128_insert,
    718                              INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
    719 
    720 defm : vinsert_for_mask_cast<"VINSERTF32x4Z", v2f64x_info, v8f64_info,
    721                              v16f32_info, vinsert128_insert,
    722                              INSERT_get_vinsert128_imm, [HasAVX512]>;
    723 defm : vinsert_for_mask_cast<"VINSERTF64x2Z", v4f32x_info, v16f32_info,
    724                              v8f64_info, vinsert128_insert,
    725                              INSERT_get_vinsert128_imm, [HasDQI]>;
    726 
    727 defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v2i64x_info, v8i64_info,
    728                              v16i32_info, vinsert128_insert,
    729                              INSERT_get_vinsert128_imm, [HasAVX512]>;
    730 defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v8i16x_info, v32i16_info,
    731                              v16i32_info, vinsert128_insert,
    732                              INSERT_get_vinsert128_imm, [HasAVX512]>;
    733 defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v16i8x_info, v64i8_info,
    734                              v16i32_info, vinsert128_insert,
    735                              INSERT_get_vinsert128_imm, [HasAVX512]>;
    736 defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v4i32x_info, v16i32_info,
    737                              v8i64_info, vinsert128_insert,
    738                              INSERT_get_vinsert128_imm, [HasDQI]>;
    739 defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v8i16x_info, v32i16_info,
    740                              v8i64_info, vinsert128_insert,
    741                              INSERT_get_vinsert128_imm, [HasDQI]>;
    742 defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v16i8x_info, v64i8_info,
    743                              v8i64_info, vinsert128_insert,
    744                              INSERT_get_vinsert128_imm, [HasDQI]>;
    745 
    746 defm : vinsert_for_mask_cast<"VINSERTF32x8Z", v4f64x_info, v8f64_info,
    747                              v16f32_info, vinsert256_insert,
    748                              INSERT_get_vinsert256_imm, [HasDQI]>;
    749 defm : vinsert_for_mask_cast<"VINSERTF64x4Z", v8f32x_info, v16f32_info,
    750                              v8f64_info, vinsert256_insert,
    751                              INSERT_get_vinsert256_imm, [HasAVX512]>;
    752 
    753 defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v4i64x_info, v8i64_info,
    754                              v16i32_info, vinsert256_insert,
    755                              INSERT_get_vinsert256_imm, [HasDQI]>;
    756 defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v16i16x_info, v32i16_info,
    757                              v16i32_info, vinsert256_insert,
    758                              INSERT_get_vinsert256_imm, [HasDQI]>;
    759 defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v32i8x_info, v64i8_info,
    760                              v16i32_info, vinsert256_insert,
    761                              INSERT_get_vinsert256_imm, [HasDQI]>;
    762 defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v8i32x_info, v16i32_info,
    763                              v8i64_info, vinsert256_insert,
    764                              INSERT_get_vinsert256_imm, [HasAVX512]>;
    765 defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v16i16x_info, v32i16_info,
    766                              v8i64_info, vinsert256_insert,
    767                              INSERT_get_vinsert256_imm, [HasAVX512]>;
    768 defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v32i8x_info, v64i8_info,
    769                              v8i64_info, vinsert256_insert,
    770                              INSERT_get_vinsert256_imm, [HasAVX512]>;
    771 
    772 // vinsertps - insert f32 to XMM
    773 let ExeDomain = SSEPackedSingle in {
    774 def VINSERTPSZrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst),
    775       (ins VR128X:$src1, VR128X:$src2, u8imm:$src3),
    776       "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
    777       [(set VR128X:$dst, (X86insertps VR128X:$src1, VR128X:$src2, imm:$src3))]>,
    778       EVEX_4V, Sched<[SchedWriteFShuffle.XMM]>;
    779 def VINSERTPSZrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst),
    780       (ins VR128X:$src1, f32mem:$src2, u8imm:$src3),
    781       "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
    782       [(set VR128X:$dst, (X86insertps VR128X:$src1,
    783                           (v4f32 (scalar_to_vector (loadf32 addr:$src2))),
    784                           imm:$src3))]>,
    785       EVEX_4V, EVEX_CD8<32, CD8VT1>,
    786       Sched<[SchedWriteFShuffle.XMM.Folded, ReadAfterLd]>;
    787 }
    788 
    789 //===----------------------------------------------------------------------===//
    790 // AVX-512 VECTOR EXTRACT
    791 //---
    792 
    793 // Supports two different pattern operators for mask and unmasked ops. Allows
    794 // null_frag to be passed for one.
    795 multiclass vextract_for_size_split<int Opcode,
    796                                    X86VectorVTInfo From, X86VectorVTInfo To,
    797                                    SDPatternOperator vextract_extract,
    798                                    SDPatternOperator vextract_for_mask,
    799                                    SchedWrite SchedRR, SchedWrite SchedMR> {
    800 
    801   let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
    802     defm rr : AVX512_maskable_split<Opcode, MRMDestReg, To, (outs To.RC:$dst),
    803                 (ins From.RC:$src1, u8imm:$idx),
    804                 "vextract" # To.EltTypeName # "x" # To.NumElts,
    805                 "$idx, $src1", "$src1, $idx",
    806                 (vextract_extract:$idx (From.VT From.RC:$src1), (iPTR imm)),
    807                 (vextract_for_mask:$idx (From.VT From.RC:$src1), (iPTR imm))>,
    808                 AVX512AIi8Base, EVEX, Sched<[SchedRR]>;
    809 
    810     def mr  : AVX512AIi8<Opcode, MRMDestMem, (outs),
    811                     (ins To.MemOp:$dst, From.RC:$src1, u8imm:$idx),
    812                     "vextract" # To.EltTypeName # "x" # To.NumElts #
    813                         "\t{$idx, $src1, $dst|$dst, $src1, $idx}",
    814                     [(store (To.VT (vextract_extract:$idx
    815                                     (From.VT From.RC:$src1), (iPTR imm))),
    816                              addr:$dst)]>, EVEX,
    817                     Sched<[SchedMR]>;
    818 
    819     let mayStore = 1, hasSideEffects = 0 in
    820     def mrk : AVX512AIi8<Opcode, MRMDestMem, (outs),
    821                     (ins To.MemOp:$dst, To.KRCWM:$mask,
    822                                         From.RC:$src1, u8imm:$idx),
    823                      "vextract" # To.EltTypeName # "x" # To.NumElts #
    824                           "\t{$idx, $src1, $dst {${mask}}|"
    825                           "$dst {${mask}}, $src1, $idx}", []>,
    826                     EVEX_K, EVEX, Sched<[SchedMR]>, NotMemoryFoldable;
    827   }
    828 }
    829 
    830 // Passes the same pattern operator for masked and unmasked ops.
    831 multiclass vextract_for_size<int Opcode, X86VectorVTInfo From,
    832                              X86VectorVTInfo To,
    833                              SDPatternOperator vextract_extract,
    834                              SchedWrite SchedRR, SchedWrite SchedMR> :
    835   vextract_for_size_split<Opcode, From, To, vextract_extract, vextract_extract, SchedRR, SchedMR>;
    836 
    837 // Codegen pattern for the alternative types
    838 multiclass vextract_for_size_lowering<string InstrStr, X86VectorVTInfo From,
    839                 X86VectorVTInfo To, PatFrag vextract_extract,
    840                 SDNodeXForm EXTRACT_get_vextract_imm, list<Predicate> p> {
    841   let Predicates = p in {
    842      def : Pat<(vextract_extract:$ext (From.VT From.RC:$src1), (iPTR imm)),
    843                (To.VT (!cast<Instruction>(InstrStr#"rr")
    844                           From.RC:$src1,
    845                           (EXTRACT_get_vextract_imm To.RC:$ext)))>;
    846      def : Pat<(store (To.VT (vextract_extract:$ext (From.VT From.RC:$src1),
    847                               (iPTR imm))), addr:$dst),
    848                (!cast<Instruction>(InstrStr#"mr") addr:$dst, From.RC:$src1,
    849                 (EXTRACT_get_vextract_imm To.RC:$ext))>;
    850   }
    851 }
    852 
    853 multiclass vextract_for_type<ValueType EltVT32, int Opcode128,
    854                              ValueType EltVT64, int Opcode256,
    855                              SchedWrite SchedRR, SchedWrite SchedMR> {
    856   let Predicates = [HasAVX512] in {
    857     defm NAME # "32x4Z" : vextract_for_size<Opcode128,
    858                                    X86VectorVTInfo<16, EltVT32, VR512>,
    859                                    X86VectorVTInfo< 4, EltVT32, VR128X>,
    860                                    vextract128_extract, SchedRR, SchedMR>,
    861                                        EVEX_V512, EVEX_CD8<32, CD8VT4>;
    862     defm NAME # "64x4Z" : vextract_for_size<Opcode256,
    863                                    X86VectorVTInfo< 8, EltVT64, VR512>,
    864                                    X86VectorVTInfo< 4, EltVT64, VR256X>,
    865                                    vextract256_extract, SchedRR, SchedMR>,
    866                                        VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT4>;
    867   }
    868   let Predicates = [HasVLX] in
    869     defm NAME # "32x4Z256" : vextract_for_size<Opcode128,
    870                                  X86VectorVTInfo< 8, EltVT32, VR256X>,
    871                                  X86VectorVTInfo< 4, EltVT32, VR128X>,
    872                                  vextract128_extract, SchedRR, SchedMR>,
    873                                      EVEX_V256, EVEX_CD8<32, CD8VT4>;
    874 
    875   // Even with DQI we'd like to only use these instructions for masking.
    876   let Predicates = [HasVLX, HasDQI] in
    877     defm NAME # "64x2Z256" : vextract_for_size_split<Opcode128,
    878                                  X86VectorVTInfo< 4, EltVT64, VR256X>,
    879                                  X86VectorVTInfo< 2, EltVT64, VR128X>,
    880                                  null_frag, vextract128_extract, SchedRR, SchedMR>,
    881                                      VEX_W1X, EVEX_V256, EVEX_CD8<64, CD8VT2>;
    882 
    883   // Even with DQI we'd like to only use these instructions for masking.
    884   let Predicates = [HasDQI] in {
    885     defm NAME # "64x2Z" : vextract_for_size_split<Opcode128,
    886                                  X86VectorVTInfo< 8, EltVT64, VR512>,
    887                                  X86VectorVTInfo< 2, EltVT64, VR128X>,
    888                                  null_frag, vextract128_extract, SchedRR, SchedMR>,
    889                                      VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT2>;
    890     defm NAME # "32x8Z" : vextract_for_size_split<Opcode256,
    891                                  X86VectorVTInfo<16, EltVT32, VR512>,
    892                                  X86VectorVTInfo< 8, EltVT32, VR256X>,
    893                                  null_frag, vextract256_extract, SchedRR, SchedMR>,
    894                                      EVEX_V512, EVEX_CD8<32, CD8VT8>;
    895   }
    896 }
    897 
    898 // TODO - replace WriteFStore/WriteVecStore with X86SchedWriteMoveLSWidths types.
    899 defm VEXTRACTF : vextract_for_type<f32, 0x19, f64, 0x1b, WriteFShuffle256, WriteFStore>;
    900 defm VEXTRACTI : vextract_for_type<i32, 0x39, i64, 0x3b, WriteShuffle256, WriteVecStore>;
    901 
    902 // extract_subvector codegen patterns with the alternative types.
    903 // Even with AVX512DQ we'll still use these for unmasked operations.
    904 defm : vextract_for_size_lowering<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info,
    905           vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
    906 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info,
    907           vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
    908 
    909 defm : vextract_for_size_lowering<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info,
    910           vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
    911 defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info,
    912           vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
    913 
    914 defm : vextract_for_size_lowering<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info,
    915           vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
    916 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info,
    917           vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
    918 
    919 // Codegen pattern with the alternative types extract VEC128 from VEC256
    920 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info,
    921           vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
    922 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info,
    923           vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
    924 
    925 // Codegen pattern with the alternative types extract VEC128 from VEC512
    926 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info,
    927                  vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
    928 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info,
    929                  vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
    930 // Codegen pattern with the alternative types extract VEC256 from VEC512
    931 defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info,
    932                  vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
    933 defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info,
    934                  vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
    935 
    936 
    937 // A 128-bit extract from bits [255:128] of a 512-bit vector should use a
    938 // smaller extract to enable EVEX->VEX.
    939 let Predicates = [NoVLX] in {
    940 def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))),
    941           (v2i64 (VEXTRACTI128rr
    942                   (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)),
    943                   (iPTR 1)))>;
    944 def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))),
    945           (v2f64 (VEXTRACTF128rr
    946                   (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)),
    947                   (iPTR 1)))>;
    948 def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))),
    949           (v4i32 (VEXTRACTI128rr
    950                   (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)),
    951                   (iPTR 1)))>;
    952 def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))),
    953           (v4f32 (VEXTRACTF128rr
    954                   (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)),
    955                   (iPTR 1)))>;
    956 def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))),
    957           (v8i16 (VEXTRACTI128rr
    958                   (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)),
    959                   (iPTR 1)))>;
    960 def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))),
    961           (v16i8 (VEXTRACTI128rr
    962                   (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)),
    963                   (iPTR 1)))>;
    964 }
    965 
    966 // A 128-bit extract from bits [255:128] of a 512-bit vector should use a
    967 // smaller extract to enable EVEX->VEX.
    968 let Predicates = [HasVLX] in {
    969 def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))),
    970           (v2i64 (VEXTRACTI32x4Z256rr
    971                   (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)),
    972                   (iPTR 1)))>;
    973 def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))),
    974           (v2f64 (VEXTRACTF32x4Z256rr
    975                   (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)),
    976                   (iPTR 1)))>;
    977 def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))),
    978           (v4i32 (VEXTRACTI32x4Z256rr
    979                   (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)),
    980                   (iPTR 1)))>;
    981 def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))),
    982           (v4f32 (VEXTRACTF32x4Z256rr
    983                   (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)),
    984                   (iPTR 1)))>;
    985 def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))),
    986           (v8i16 (VEXTRACTI32x4Z256rr
    987                   (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)),
    988                   (iPTR 1)))>;
    989 def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))),
    990           (v16i8 (VEXTRACTI32x4Z256rr
    991                   (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)),
    992                   (iPTR 1)))>;
    993 }
    994 
    995 
    996 // Additional patterns for handling a bitcast between the vselect and the
    997 // extract_subvector.
    998 multiclass vextract_for_mask_cast<string InstrStr, X86VectorVTInfo From,
    999                                   X86VectorVTInfo To, X86VectorVTInfo Cast,
   1000                                   PatFrag vextract_extract,
   1001                                   SDNodeXForm EXTRACT_get_vextract_imm,
   1002                                   list<Predicate> p> {
   1003 let Predicates = p in {
   1004   def : Pat<(Cast.VT (vselect Cast.KRCWM:$mask,
   1005                               (bitconvert
   1006                                (To.VT (vextract_extract:$ext
   1007                                        (From.VT From.RC:$src), (iPTR imm)))),
   1008                               To.RC:$src0)),
   1009             (Cast.VT (!cast<Instruction>(InstrStr#"rrk")
   1010                       Cast.RC:$src0, Cast.KRCWM:$mask, From.RC:$src,
   1011                       (EXTRACT_get_vextract_imm To.RC:$ext)))>;
   1012 
   1013   def : Pat<(Cast.VT (vselect Cast.KRCWM:$mask,
   1014                               (bitconvert
   1015                                (To.VT (vextract_extract:$ext
   1016                                        (From.VT From.RC:$src), (iPTR imm)))),
   1017                               Cast.ImmAllZerosV)),
   1018             (Cast.VT (!cast<Instruction>(InstrStr#"rrkz")
   1019                       Cast.KRCWM:$mask, From.RC:$src,
   1020                       (EXTRACT_get_vextract_imm To.RC:$ext)))>;
   1021 }
   1022 }
   1023 
   1024 defm : vextract_for_mask_cast<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info,
   1025                               v4f32x_info, vextract128_extract,
   1026                               EXTRACT_get_vextract128_imm, [HasVLX]>;
   1027 defm : vextract_for_mask_cast<"VEXTRACTF64x2Z256", v8f32x_info, v4f32x_info,
   1028                               v2f64x_info, vextract128_extract,
   1029                               EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
   1030 
   1031 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info,
   1032                               v4i32x_info, vextract128_extract,
   1033                               EXTRACT_get_vextract128_imm, [HasVLX]>;
   1034 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info,
   1035                               v4i32x_info, vextract128_extract,
   1036                               EXTRACT_get_vextract128_imm, [HasVLX]>;
   1037 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info,
   1038                               v4i32x_info, vextract128_extract,
   1039                               EXTRACT_get_vextract128_imm, [HasVLX]>;
   1040 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v8i32x_info, v4i32x_info,
   1041                               v2i64x_info, vextract128_extract,
   1042                               EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
   1043 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v16i16x_info, v8i16x_info,
   1044                               v2i64x_info, vextract128_extract,
   1045                               EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
   1046 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v32i8x_info, v16i8x_info,
   1047                               v2i64x_info, vextract128_extract,
   1048                               EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
   1049 
   1050 defm : vextract_for_mask_cast<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info,
   1051                               v4f32x_info, vextract128_extract,
   1052                               EXTRACT_get_vextract128_imm, [HasAVX512]>;
   1053 defm : vextract_for_mask_cast<"VEXTRACTF64x2Z", v16f32_info, v4f32x_info,
   1054                               v2f64x_info, vextract128_extract,
   1055                               EXTRACT_get_vextract128_imm, [HasDQI]>;
   1056 
   1057 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info,
   1058                               v4i32x_info, vextract128_extract,
   1059                               EXTRACT_get_vextract128_imm, [HasAVX512]>;
   1060 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info,
   1061                               v4i32x_info, vextract128_extract,
   1062                               EXTRACT_get_vextract128_imm, [HasAVX512]>;
   1063 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info,
   1064                               v4i32x_info, vextract128_extract,
   1065                               EXTRACT_get_vextract128_imm, [HasAVX512]>;
   1066 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v16i32_info, v4i32x_info,
   1067                               v2i64x_info, vextract128_extract,
   1068                               EXTRACT_get_vextract128_imm, [HasDQI]>;
   1069 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v32i16_info, v8i16x_info,
   1070                               v2i64x_info, vextract128_extract,
   1071                               EXTRACT_get_vextract128_imm, [HasDQI]>;
   1072 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v64i8_info, v16i8x_info,
   1073                               v2i64x_info, vextract128_extract,
   1074                               EXTRACT_get_vextract128_imm, [HasDQI]>;
   1075 
   1076 defm : vextract_for_mask_cast<"VEXTRACTF32x8Z", v8f64_info, v4f64x_info,
   1077                               v8f32x_info, vextract256_extract,
   1078                               EXTRACT_get_vextract256_imm, [HasDQI]>;
   1079 defm : vextract_for_mask_cast<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info,
   1080                               v4f64x_info, vextract256_extract,
   1081                               EXTRACT_get_vextract256_imm, [HasAVX512]>;
   1082 
   1083 defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v8i64_info, v4i64x_info,
   1084                               v8i32x_info, vextract256_extract,
   1085                               EXTRACT_get_vextract256_imm, [HasDQI]>;
   1086 defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v32i16_info, v16i16x_info,
   1087                               v8i32x_info, vextract256_extract,
   1088                               EXTRACT_get_vextract256_imm, [HasDQI]>;
   1089 defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v64i8_info, v32i8x_info,
   1090                               v8i32x_info, vextract256_extract,
   1091                               EXTRACT_get_vextract256_imm, [HasDQI]>;
   1092 defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info,
   1093                               v4i64x_info, vextract256_extract,
   1094                               EXTRACT_get_vextract256_imm, [HasAVX512]>;
   1095 defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info,
   1096                               v4i64x_info, vextract256_extract,
   1097                               EXTRACT_get_vextract256_imm, [HasAVX512]>;
   1098 defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info,
   1099                               v4i64x_info, vextract256_extract,
   1100                               EXTRACT_get_vextract256_imm, [HasAVX512]>;
   1101 
   1102 // vextractps - extract 32 bits from XMM
   1103 def VEXTRACTPSZrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32:$dst),
   1104       (ins VR128X:$src1, u8imm:$src2),
   1105       "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
   1106       [(set GR32:$dst, (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2))]>,
   1107       EVEX, VEX_WIG, Sched<[WriteVecExtract]>;
   1108 
   1109 def VEXTRACTPSZmr : AVX512AIi8<0x17, MRMDestMem, (outs),
   1110       (ins f32mem:$dst, VR128X:$src1, u8imm:$src2),
   1111       "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
   1112       [(store (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2),
   1113                           addr:$dst)]>,
   1114       EVEX, VEX_WIG, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecExtractSt]>;
   1115 
   1116 //===---------------------------------------------------------------------===//
   1117 // AVX-512 BROADCAST
   1118 //---
   1119 // broadcast with a scalar argument.
   1120 multiclass avx512_broadcast_scalar<bits<8> opc, string OpcodeStr,
   1121                             string Name,
   1122                             X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo> {
   1123   def : Pat<(DestInfo.VT (X86VBroadcast SrcInfo.FRC:$src)),
   1124             (!cast<Instruction>(Name#DestInfo.ZSuffix#r)
   1125              (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>;
   1126   def : Pat<(DestInfo.VT (vselect DestInfo.KRCWM:$mask,
   1127                                   (X86VBroadcast SrcInfo.FRC:$src),
   1128                                   DestInfo.RC:$src0)),
   1129             (!cast<Instruction>(Name#DestInfo.ZSuffix#rk)
   1130              DestInfo.RC:$src0, DestInfo.KRCWM:$mask,
   1131              (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>;
   1132   def : Pat<(DestInfo.VT (vselect DestInfo.KRCWM:$mask,
   1133                                   (X86VBroadcast SrcInfo.FRC:$src),
   1134                                   DestInfo.ImmAllZerosV)),
   1135             (!cast<Instruction>(Name#DestInfo.ZSuffix#rkz)
   1136              DestInfo.KRCWM:$mask, (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>;
   1137 }
   1138 
   1139 // Split version to allow mask and broadcast node to be different types. This
   1140 // helps support the 32x2 broadcasts.
   1141 multiclass avx512_broadcast_rm_split<bits<8> opc, string OpcodeStr,
   1142                                      string Name,
   1143                                      SchedWrite SchedRR, SchedWrite SchedRM,
   1144                                      X86VectorVTInfo MaskInfo,
   1145                                      X86VectorVTInfo DestInfo,
   1146                                      X86VectorVTInfo SrcInfo,
   1147                                      SDPatternOperator UnmaskedOp = X86VBroadcast> {
   1148   let ExeDomain = DestInfo.ExeDomain, hasSideEffects = 0 in {
   1149   defm r : AVX512_maskable_split<opc, MRMSrcReg, MaskInfo,
   1150                    (outs MaskInfo.RC:$dst),
   1151                    (ins SrcInfo.RC:$src), OpcodeStr, "$src", "$src",
   1152                    (MaskInfo.VT
   1153                     (bitconvert
   1154                      (DestInfo.VT
   1155                       (UnmaskedOp (SrcInfo.VT SrcInfo.RC:$src))))),
   1156                    (MaskInfo.VT
   1157                     (bitconvert
   1158                      (DestInfo.VT
   1159                       (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src)))))>,
   1160                    T8PD, EVEX, Sched<[SchedRR]>;
   1161   let mayLoad = 1 in
   1162   defm m : AVX512_maskable_split<opc, MRMSrcMem, MaskInfo,
   1163                    (outs MaskInfo.RC:$dst),
   1164                    (ins SrcInfo.ScalarMemOp:$src), OpcodeStr, "$src", "$src",
   1165                    (MaskInfo.VT
   1166                     (bitconvert
   1167                      (DestInfo.VT (UnmaskedOp
   1168                                    (SrcInfo.ScalarLdFrag addr:$src))))),
   1169                    (MaskInfo.VT
   1170                     (bitconvert
   1171                      (DestInfo.VT (X86VBroadcast
   1172                                    (SrcInfo.ScalarLdFrag addr:$src)))))>,
   1173                    T8PD, EVEX, EVEX_CD8<SrcInfo.EltSize, CD8VT1>,
   1174                    Sched<[SchedRM]>;
   1175   }
   1176 
   1177   def : Pat<(MaskInfo.VT
   1178              (bitconvert
   1179               (DestInfo.VT (UnmaskedOp
   1180                             (SrcInfo.VT (scalar_to_vector
   1181                                          (SrcInfo.ScalarLdFrag addr:$src))))))),
   1182             (!cast<Instruction>(Name#MaskInfo.ZSuffix#m) addr:$src)>;
   1183   def : Pat<(MaskInfo.VT (vselect MaskInfo.KRCWM:$mask,
   1184                           (bitconvert
   1185                            (DestInfo.VT
   1186                             (X86VBroadcast
   1187                              (SrcInfo.VT (scalar_to_vector
   1188                                           (SrcInfo.ScalarLdFrag addr:$src)))))),
   1189                           MaskInfo.RC:$src0)),
   1190             (!cast<Instruction>(Name#DestInfo.ZSuffix#mk)
   1191              MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask, addr:$src)>;
   1192   def : Pat<(MaskInfo.VT (vselect MaskInfo.KRCWM:$mask,
   1193                           (bitconvert
   1194                            (DestInfo.VT
   1195                             (X86VBroadcast
   1196                              (SrcInfo.VT (scalar_to_vector
   1197                                           (SrcInfo.ScalarLdFrag addr:$src)))))),
   1198                           MaskInfo.ImmAllZerosV)),
   1199             (!cast<Instruction>(Name#MaskInfo.ZSuffix#mkz)
   1200              MaskInfo.KRCWM:$mask, addr:$src)>;
   1201 }
   1202 
   1203 // Helper class to force mask and broadcast result to same type.
   1204 multiclass avx512_broadcast_rm<bits<8> opc, string OpcodeStr, string Name,
   1205                                SchedWrite SchedRR, SchedWrite SchedRM,
   1206                                X86VectorVTInfo DestInfo,
   1207                                X86VectorVTInfo SrcInfo> :
   1208   avx512_broadcast_rm_split<opc, OpcodeStr, Name, SchedRR, SchedRM,
   1209                             DestInfo, DestInfo, SrcInfo>;
   1210 
   1211 multiclass avx512_fp_broadcast_sd<bits<8> opc, string OpcodeStr,
   1212                                                        AVX512VLVectorVTInfo _> {
   1213   let Predicates = [HasAVX512] in {
   1214     defm Z  : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
   1215                                   WriteFShuffle256Ld, _.info512, _.info128>,
   1216               avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info512,
   1217                                       _.info128>,
   1218               EVEX_V512;
   1219   }
   1220 
   1221   let Predicates = [HasVLX] in {
   1222     defm Z256  : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
   1223                                      WriteFShuffle256Ld, _.info256, _.info128>,
   1224                  avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info256,
   1225                                          _.info128>,
   1226                  EVEX_V256;
   1227   }
   1228 }
   1229 
   1230 multiclass avx512_fp_broadcast_ss<bits<8> opc, string OpcodeStr,
   1231                                                        AVX512VLVectorVTInfo _> {
   1232   let Predicates = [HasAVX512] in {
   1233     defm Z  : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
   1234                                   WriteFShuffle256Ld, _.info512, _.info128>,
   1235               avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info512,
   1236                                       _.info128>,
   1237               EVEX_V512;
   1238   }
   1239 
   1240   let Predicates = [HasVLX] in {
   1241     defm Z256  : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
   1242                                      WriteFShuffle256Ld, _.info256, _.info128>,
   1243                  avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info256,
   1244                                          _.info128>,
   1245                  EVEX_V256;
   1246     defm Z128  : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
   1247                                      WriteFShuffle256Ld, _.info128, _.info128>,
   1248                  avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info128,
   1249                                          _.info128>,
   1250                  EVEX_V128;
   1251   }
   1252 }
   1253 defm VBROADCASTSS  : avx512_fp_broadcast_ss<0x18, "vbroadcastss",
   1254                                        avx512vl_f32_info>;
   1255 defm VBROADCASTSD  : avx512_fp_broadcast_sd<0x19, "vbroadcastsd",
   1256                                        avx512vl_f64_info>, VEX_W1X;
   1257 
   1258 multiclass avx512_int_broadcast_reg<bits<8> opc, SchedWrite SchedRR,
   1259                                     X86VectorVTInfo _, SDPatternOperator OpNode,
   1260                                     RegisterClass SrcRC> {
   1261   let ExeDomain = _.ExeDomain in
   1262   defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
   1263                          (ins SrcRC:$src),
   1264                          "vpbroadcast"##_.Suffix, "$src", "$src",
   1265                          (_.VT (OpNode SrcRC:$src))>, T8PD, EVEX,
   1266                          Sched<[SchedRR]>;
   1267 }
   1268 
   1269 multiclass avx512_int_broadcastbw_reg<bits<8> opc, string Name, SchedWrite SchedRR,
   1270                                     X86VectorVTInfo _, SDPatternOperator OpNode,
   1271                                     RegisterClass SrcRC, SubRegIndex Subreg> {
   1272   let hasSideEffects = 0, ExeDomain = _.ExeDomain in
   1273   defm r : AVX512_maskable_custom<opc, MRMSrcReg,
   1274                         (outs _.RC:$dst), (ins GR32:$src),
   1275                         !con((ins _.RC:$src0, _.KRCWM:$mask), (ins GR32:$src)),
   1276                         !con((ins _.KRCWM:$mask), (ins GR32:$src)),
   1277                         "vpbroadcast"##_.Suffix, "$src", "$src", [], [], [],
   1278                         "$src0 = $dst">, T8PD, EVEX, Sched<[SchedRR]>;
   1279 
   1280   def : Pat <(_.VT (OpNode SrcRC:$src)),
   1281              (!cast<Instruction>(Name#r)
   1282               (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
   1283 
   1284   def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.RC:$src0),
   1285              (!cast<Instruction>(Name#rk) _.RC:$src0, _.KRCWM:$mask,
   1286               (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
   1287 
   1288   def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.ImmAllZerosV),
   1289              (!cast<Instruction>(Name#rkz) _.KRCWM:$mask,
   1290               (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
   1291 }
   1292 
   1293 multiclass avx512_int_broadcastbw_reg_vl<bits<8> opc, string Name,
   1294                       AVX512VLVectorVTInfo _, SDPatternOperator OpNode,
   1295                       RegisterClass SrcRC, SubRegIndex Subreg, Predicate prd> {
   1296   let Predicates = [prd] in
   1297     defm Z : avx512_int_broadcastbw_reg<opc, Name#Z, WriteShuffle256, _.info512,
   1298               OpNode, SrcRC, Subreg>, EVEX_V512;
   1299   let Predicates = [prd, HasVLX] in {
   1300     defm Z256 : avx512_int_broadcastbw_reg<opc, Name#Z256, WriteShuffle256,
   1301               _.info256, OpNode, SrcRC, Subreg>, EVEX_V256;
   1302     defm Z128 : avx512_int_broadcastbw_reg<opc, Name#Z128, WriteShuffle,
   1303               _.info128, OpNode, SrcRC, Subreg>, EVEX_V128;
   1304   }
   1305 }
   1306 
   1307 multiclass avx512_int_broadcast_reg_vl<bits<8> opc, AVX512VLVectorVTInfo _,
   1308                                        SDPatternOperator OpNode,
   1309                                        RegisterClass SrcRC, Predicate prd> {
   1310   let Predicates = [prd] in
   1311     defm Z : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info512, OpNode,
   1312                                       SrcRC>, EVEX_V512;
   1313   let Predicates = [prd, HasVLX] in {
   1314     defm Z256 : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info256, OpNode,
   1315                                          SrcRC>, EVEX_V256;
   1316     defm Z128 : avx512_int_broadcast_reg<opc, WriteShuffle, _.info128, OpNode,
   1317                                          SrcRC>, EVEX_V128;
   1318   }
   1319 }
   1320 
   1321 defm VPBROADCASTBr : avx512_int_broadcastbw_reg_vl<0x7A, "VPBROADCASTBr",
   1322                        avx512vl_i8_info, X86VBroadcast, GR8, sub_8bit, HasBWI>;
   1323 defm VPBROADCASTWr : avx512_int_broadcastbw_reg_vl<0x7B, "VPBROADCASTWr",
   1324                        avx512vl_i16_info, X86VBroadcast, GR16, sub_16bit,
   1325                        HasBWI>;
   1326 defm VPBROADCASTDr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i32_info,
   1327                                                  X86VBroadcast, GR32, HasAVX512>;
   1328 defm VPBROADCASTQr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i64_info,
   1329                                                  X86VBroadcast, GR64, HasAVX512>, VEX_W;
   1330 
   1331 // Provide aliases for broadcast from the same register class that
   1332 // automatically does the extract.
   1333 multiclass avx512_int_broadcast_rm_lowering<string Name,
   1334                                             X86VectorVTInfo DestInfo,
   1335                                             X86VectorVTInfo SrcInfo,
   1336                                             X86VectorVTInfo ExtInfo> {
   1337   def : Pat<(DestInfo.VT (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))),
   1338             (!cast<Instruction>(Name#DestInfo.ZSuffix#"r")
   1339                 (ExtInfo.VT (EXTRACT_SUBREG (SrcInfo.VT SrcInfo.RC:$src), sub_xmm)))>;
   1340 }
   1341 
   1342 multiclass avx512_int_broadcast_rm_vl<bits<8> opc, string OpcodeStr,
   1343                                         AVX512VLVectorVTInfo _, Predicate prd> {
   1344   let Predicates = [prd] in {
   1345     defm Z :   avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteShuffle256,
   1346                                    WriteShuffle256Ld, _.info512, _.info128>,
   1347                avx512_int_broadcast_rm_lowering<NAME, _.info512, _.info256, _.info128>,
   1348                                   EVEX_V512;
   1349     // Defined separately to avoid redefinition.
   1350     defm Z_Alt : avx512_int_broadcast_rm_lowering<NAME, _.info512, _.info512, _.info128>;
   1351   }
   1352   let Predicates = [prd, HasVLX] in {
   1353     defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteShuffle256,
   1354                                     WriteShuffle256Ld, _.info256, _.info128>,
   1355                 avx512_int_broadcast_rm_lowering<NAME, _.info256, _.info256, _.info128>,
   1356                                  EVEX_V256;
   1357     defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteShuffle,
   1358                                     WriteShuffleXLd, _.info128, _.info128>,
   1359                                  EVEX_V128;
   1360   }
   1361 }
   1362 
   1363 defm VPBROADCASTB  : avx512_int_broadcast_rm_vl<0x78, "vpbroadcastb",
   1364                                            avx512vl_i8_info, HasBWI>;
   1365 defm VPBROADCASTW  : avx512_int_broadcast_rm_vl<0x79, "vpbroadcastw",
   1366                                            avx512vl_i16_info, HasBWI>;
   1367 defm VPBROADCASTD  : avx512_int_broadcast_rm_vl<0x58, "vpbroadcastd",
   1368                                            avx512vl_i32_info, HasAVX512>;
   1369 defm VPBROADCASTQ  : avx512_int_broadcast_rm_vl<0x59, "vpbroadcastq",
   1370                                            avx512vl_i64_info, HasAVX512>, VEX_W1X;
   1371 
   1372 multiclass avx512_subvec_broadcast_rm<bits<8> opc, string OpcodeStr,
   1373                           X86VectorVTInfo _Dst, X86VectorVTInfo _Src> {
   1374   defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
   1375                            (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src",
   1376                            (_Dst.VT (X86SubVBroadcast
   1377                              (_Src.VT (bitconvert (_Src.LdFrag addr:$src)))))>,
   1378                            Sched<[SchedWriteShuffle.YMM.Folded]>,
   1379                            AVX5128IBase, EVEX;
   1380 }
   1381 
   1382 // This should be used for the AVX512DQ broadcast instructions. It disables
   1383 // the unmasked patterns so that we only use the DQ instructions when masking
   1384 //  is requested.
   1385 multiclass avx512_subvec_broadcast_rm_dq<bits<8> opc, string OpcodeStr,
   1386                           X86VectorVTInfo _Dst, X86VectorVTInfo _Src> {
   1387   let hasSideEffects = 0, mayLoad = 1 in
   1388   defm rm : AVX512_maskable_split<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
   1389                            (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src",
   1390                            (null_frag),
   1391                            (_Dst.VT (X86SubVBroadcast
   1392                              (_Src.VT (bitconvert (_Src.LdFrag addr:$src)))))>,
   1393                            Sched<[SchedWriteShuffle.YMM.Folded]>,
   1394                            AVX5128IBase, EVEX;
   1395 }
   1396 
   1397 let Predicates = [HasAVX512] in {
   1398   // 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD.
   1399   def : Pat<(v8i64 (X86VBroadcast (v8i64 (X86vzload addr:$src)))),
   1400             (VPBROADCASTQZm addr:$src)>;
   1401 }
   1402 
   1403 let Predicates = [HasVLX] in {
   1404   // 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD.
   1405   def : Pat<(v2i64 (X86VBroadcast (v2i64 (X86vzload addr:$src)))),
   1406             (VPBROADCASTQZ128m addr:$src)>;
   1407   def : Pat<(v4i64 (X86VBroadcast (v4i64 (X86vzload addr:$src)))),
   1408             (VPBROADCASTQZ256m addr:$src)>;
   1409 }
   1410 let Predicates = [HasVLX, HasBWI] in {
   1411   // loadi16 is tricky to fold, because !isTypeDesirableForOp, justifiably.
   1412   // This means we'll encounter truncated i32 loads; match that here.
   1413   def : Pat<(v8i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
   1414             (VPBROADCASTWZ128m addr:$src)>;
   1415   def : Pat<(v16i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
   1416             (VPBROADCASTWZ256m addr:$src)>;
   1417   def : Pat<(v8i16 (X86VBroadcast
   1418               (i16 (trunc (i32 (zextloadi16 addr:$src)))))),
   1419             (VPBROADCASTWZ128m addr:$src)>;
   1420   def : Pat<(v16i16 (X86VBroadcast
   1421               (i16 (trunc (i32 (zextloadi16 addr:$src)))))),
   1422             (VPBROADCASTWZ256m addr:$src)>;
   1423 }
   1424 
   1425 //===----------------------------------------------------------------------===//
   1426 // AVX-512 BROADCAST SUBVECTORS
   1427 //
   1428 
   1429 defm VBROADCASTI32X4 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
   1430                        v16i32_info, v4i32x_info>,
   1431                        EVEX_V512, EVEX_CD8<32, CD8VT4>;
   1432 defm VBROADCASTF32X4 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4",
   1433                        v16f32_info, v4f32x_info>,
   1434                        EVEX_V512, EVEX_CD8<32, CD8VT4>;
   1435 defm VBROADCASTI64X4 : avx512_subvec_broadcast_rm<0x5b, "vbroadcasti64x4",
   1436                        v8i64_info, v4i64x_info>, VEX_W,
   1437                        EVEX_V512, EVEX_CD8<64, CD8VT4>;
   1438 defm VBROADCASTF64X4 : avx512_subvec_broadcast_rm<0x1b, "vbroadcastf64x4",
   1439                        v8f64_info, v4f64x_info>, VEX_W,
   1440                        EVEX_V512, EVEX_CD8<64, CD8VT4>;
   1441 
   1442 let Predicates = [HasAVX512] in {
   1443 def : Pat<(v16f32 (X86SubVBroadcast (loadv8f32 addr:$src))),
   1444           (VBROADCASTF64X4rm addr:$src)>;
   1445 def : Pat<(v16i32 (X86SubVBroadcast (bc_v8i32 (loadv4i64 addr:$src)))),
   1446           (VBROADCASTI64X4rm addr:$src)>;
   1447 def : Pat<(v32i16 (X86SubVBroadcast (bc_v16i16 (loadv4i64 addr:$src)))),
   1448           (VBROADCASTI64X4rm addr:$src)>;
   1449 def : Pat<(v64i8 (X86SubVBroadcast (bc_v32i8 (loadv4i64 addr:$src)))),
   1450           (VBROADCASTI64X4rm addr:$src)>;
   1451 
   1452 // Provide fallback in case the load node that is used in the patterns above
   1453 // is used by additional users, which prevents the pattern selection.
   1454 def : Pat<(v8f64 (X86SubVBroadcast (v4f64 VR256X:$src))),
   1455           (VINSERTF64x4Zrr (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
   1456                            (v4f64 VR256X:$src), 1)>;
   1457 def : Pat<(v16f32 (X86SubVBroadcast (v8f32 VR256X:$src))),
   1458           (VINSERTF64x4Zrr (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
   1459                            (v8f32 VR256X:$src), 1)>;
   1460 def : Pat<(v8i64 (X86SubVBroadcast (v4i64 VR256X:$src))),
   1461           (VINSERTI64x4Zrr (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
   1462                            (v4i64 VR256X:$src), 1)>;
   1463 def : Pat<(v16i32 (X86SubVBroadcast (v8i32 VR256X:$src))),
   1464           (VINSERTI64x4Zrr (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
   1465                            (v8i32 VR256X:$src), 1)>;
   1466 def : Pat<(v32i16 (X86SubVBroadcast (v16i16 VR256X:$src))),
   1467           (VINSERTI64x4Zrr (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
   1468                            (v16i16 VR256X:$src), 1)>;
   1469 def : Pat<(v64i8 (X86SubVBroadcast (v32i8 VR256X:$src))),
   1470           (VINSERTI64x4Zrr (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
   1471                            (v32i8 VR256X:$src), 1)>;
   1472 
   1473 def : Pat<(v8f64 (X86SubVBroadcast (loadv2f64 addr:$src))),
   1474           (VBROADCASTF32X4rm addr:$src)>;
   1475 def : Pat<(v8i64 (X86SubVBroadcast (loadv2i64 addr:$src))),
   1476           (VBROADCASTI32X4rm addr:$src)>;
   1477 def : Pat<(v32i16 (X86SubVBroadcast (bc_v8i16 (loadv2i64 addr:$src)))),
   1478           (VBROADCASTI32X4rm addr:$src)>;
   1479 def : Pat<(v64i8 (X86SubVBroadcast (bc_v16i8 (loadv2i64 addr:$src)))),
   1480           (VBROADCASTI32X4rm addr:$src)>;
   1481 
   1482 // Patterns for selects of bitcasted operations.
   1483 def : Pat<(vselect VK16WM:$mask,
   1484                    (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
   1485                    (bc_v16f32 (v16i32 immAllZerosV))),
   1486           (VBROADCASTF32X4rmkz VK16WM:$mask, addr:$src)>;
   1487 def : Pat<(vselect VK16WM:$mask,
   1488                    (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
   1489                    VR512:$src0),
   1490           (VBROADCASTF32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
   1491 def : Pat<(vselect VK16WM:$mask,
   1492                    (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv2i64 addr:$src)))),
   1493                    (v16i32 immAllZerosV)),
   1494           (VBROADCASTI32X4rmkz VK16WM:$mask, addr:$src)>;
   1495 def : Pat<(vselect VK16WM:$mask,
   1496                    (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv2i64 addr:$src)))),
   1497                    VR512:$src0),
   1498           (VBROADCASTI32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
   1499 
   1500 def : Pat<(vselect VK8WM:$mask,
   1501                    (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv8f32 addr:$src)))),
   1502                    (bc_v8f64 (v16i32 immAllZerosV))),
   1503           (VBROADCASTF64X4rmkz VK8WM:$mask, addr:$src)>;
   1504 def : Pat<(vselect VK8WM:$mask,
   1505                    (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv8f32 addr:$src)))),
   1506                    VR512:$src0),
   1507           (VBROADCASTF64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
   1508 def : Pat<(vselect VK8WM:$mask,
   1509                    (bc_v8i64 (v16i32 (X86SubVBroadcast (bc_v8i32 (loadv4i64 addr:$src))))),
   1510                    (bc_v8i64 (v16i32 immAllZerosV))),
   1511           (VBROADCASTI64X4rmkz VK8WM:$mask, addr:$src)>;
   1512 def : Pat<(vselect VK8WM:$mask,
   1513                    (bc_v8i64 (v16i32 (X86SubVBroadcast (bc_v8i32 (loadv4i64 addr:$src))))),
   1514                    VR512:$src0),
   1515           (VBROADCASTI64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
   1516 }
   1517 
   1518 let Predicates = [HasVLX] in {
   1519 defm VBROADCASTI32X4Z256 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
   1520                            v8i32x_info, v4i32x_info>,
   1521                            EVEX_V256, EVEX_CD8<32, CD8VT4>;
   1522 defm VBROADCASTF32X4Z256 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4",
   1523                            v8f32x_info, v4f32x_info>,
   1524                            EVEX_V256, EVEX_CD8<32, CD8VT4>;
   1525 
   1526 def : Pat<(v4f64 (X86SubVBroadcast (loadv2f64 addr:$src))),
   1527           (VBROADCASTF32X4Z256rm addr:$src)>;
   1528 def : Pat<(v4i64 (X86SubVBroadcast (loadv2i64 addr:$src))),
   1529           (VBROADCASTI32X4Z256rm addr:$src)>;
   1530 def : Pat<(v16i16 (X86SubVBroadcast (bc_v8i16 (loadv2i64 addr:$src)))),
   1531           (VBROADCASTI32X4Z256rm addr:$src)>;
   1532 def : Pat<(v32i8 (X86SubVBroadcast (bc_v16i8 (loadv2i64 addr:$src)))),
   1533           (VBROADCASTI32X4Z256rm addr:$src)>;
   1534 
   1535 // Patterns for selects of bitcasted operations.
   1536 def : Pat<(vselect VK8WM:$mask,
   1537                    (bc_v8f32 (v4f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
   1538                    (bc_v8f32 (v8i32 immAllZerosV))),
   1539           (VBROADCASTF32X4Z256rmkz VK8WM:$mask, addr:$src)>;
   1540 def : Pat<(vselect VK8WM:$mask,
   1541                    (bc_v8f32 (v4f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
   1542                    VR256X:$src0),
   1543           (VBROADCASTF32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>;
   1544 def : Pat<(vselect VK8WM:$mask,
   1545                    (bc_v8i32 (v4i64 (X86SubVBroadcast (loadv2i64 addr:$src)))),
   1546                    (v8i32 immAllZerosV)),
   1547           (VBROADCASTI32X4Z256rmkz VK8WM:$mask, addr:$src)>;
   1548 def : Pat<(vselect VK8WM:$mask,
   1549                    (bc_v8i32 (v4i64 (X86SubVBroadcast (loadv2i64 addr:$src)))),
   1550                    VR256X:$src0),
   1551           (VBROADCASTI32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>;
   1552 
   1553 
   1554 // Provide fallback in case the load node that is used in the patterns above
   1555 // is used by additional users, which prevents the pattern selection.
   1556 def : Pat<(v4f64 (X86SubVBroadcast (v2f64 VR128X:$src))),
   1557           (VINSERTF32x4Z256rr (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
   1558                               (v2f64 VR128X:$src), 1)>;
   1559 def : Pat<(v8f32 (X86SubVBroadcast (v4f32 VR128X:$src))),
   1560           (VINSERTF32x4Z256rr (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
   1561                               (v4f32 VR128X:$src), 1)>;
   1562 def : Pat<(v4i64 (X86SubVBroadcast (v2i64 VR128X:$src))),
   1563           (VINSERTI32x4Z256rr (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
   1564                               (v2i64 VR128X:$src), 1)>;
   1565 def : Pat<(v8i32 (X86SubVBroadcast (v4i32 VR128X:$src))),
   1566           (VINSERTI32x4Z256rr (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
   1567                               (v4i32 VR128X:$src), 1)>;
   1568 def : Pat<(v16i16 (X86SubVBroadcast (v8i16 VR128X:$src))),
   1569           (VINSERTI32x4Z256rr (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
   1570                               (v8i16 VR128X:$src), 1)>;
   1571 def : Pat<(v32i8 (X86SubVBroadcast (v16i8 VR128X:$src))),
   1572           (VINSERTI32x4Z256rr (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
   1573                               (v16i8 VR128X:$src), 1)>;
   1574 }
   1575 
   1576 let Predicates = [HasVLX, HasDQI] in {
   1577 defm VBROADCASTI64X2Z128 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2",
   1578                            v4i64x_info, v2i64x_info>, VEX_W1X,
   1579                            EVEX_V256, EVEX_CD8<64, CD8VT2>;
   1580 defm VBROADCASTF64X2Z128 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2",
   1581                            v4f64x_info, v2f64x_info>, VEX_W1X,
   1582                            EVEX_V256, EVEX_CD8<64, CD8VT2>;
   1583 
   1584 // Patterns for selects of bitcasted operations.
   1585 def : Pat<(vselect VK4WM:$mask,
   1586                    (bc_v4f64 (v8f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
   1587                    (bc_v4f64 (v8i32 immAllZerosV))),
   1588           (VBROADCASTF64X2Z128rmkz VK4WM:$mask, addr:$src)>;
   1589 def : Pat<(vselect VK4WM:$mask,
   1590                    (bc_v4f64 (v8f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
   1591                    VR256X:$src0),
   1592           (VBROADCASTF64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>;
   1593 def : Pat<(vselect VK4WM:$mask,
   1594                    (bc_v4i64 (v8i32 (X86SubVBroadcast (bc_v4i32 (loadv2i64 addr:$src))))),
   1595                    (bc_v4i64 (v8i32 immAllZerosV))),
   1596           (VBROADCASTI64X2Z128rmkz VK4WM:$mask, addr:$src)>;
   1597 def : Pat<(vselect VK4WM:$mask,
   1598                    (bc_v4i64 (v8i32 (X86SubVBroadcast (bc_v4i32 (loadv2i64 addr:$src))))),
   1599                    VR256X:$src0),
   1600           (VBROADCASTI64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>;
   1601 }
   1602 
   1603 let Predicates = [HasDQI] in {
   1604 defm VBROADCASTI64X2 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2",
   1605                        v8i64_info, v2i64x_info>, VEX_W,
   1606                        EVEX_V512, EVEX_CD8<64, CD8VT2>;
   1607 defm VBROADCASTI32X8 : avx512_subvec_broadcast_rm_dq<0x5b, "vbroadcasti32x8",
   1608                        v16i32_info, v8i32x_info>,
   1609                        EVEX_V512, EVEX_CD8<32, CD8VT8>;
   1610 defm VBROADCASTF64X2 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2",
   1611                        v8f64_info, v2f64x_info>, VEX_W,
   1612                        EVEX_V512, EVEX_CD8<64, CD8VT2>;
   1613 defm VBROADCASTF32X8 : avx512_subvec_broadcast_rm_dq<0x1b, "vbroadcastf32x8",
   1614                        v16f32_info, v8f32x_info>,
   1615                        EVEX_V512, EVEX_CD8<32, CD8VT8>;
   1616 
   1617 // Patterns for selects of bitcasted operations.
   1618 def : Pat<(vselect VK16WM:$mask,
   1619                    (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv4f64 addr:$src)))),
   1620                    (bc_v16f32 (v16i32 immAllZerosV))),
   1621           (VBROADCASTF32X8rmkz VK16WM:$mask, addr:$src)>;
   1622 def : Pat<(vselect VK16WM:$mask,
   1623                    (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv4f64 addr:$src)))),
   1624                    VR512:$src0),
   1625           (VBROADCASTF32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
   1626 def : Pat<(vselect VK16WM:$mask,
   1627                    (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv4i64 addr:$src)))),
   1628                    (v16i32 immAllZerosV)),
   1629           (VBROADCASTI32X8rmkz VK16WM:$mask, addr:$src)>;
   1630 def : Pat<(vselect VK16WM:$mask,
   1631                    (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv4i64 addr:$src)))),
   1632                    VR512:$src0),
   1633           (VBROADCASTI32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
   1634 
   1635 def : Pat<(vselect VK8WM:$mask,
   1636                    (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
   1637                    (bc_v8f64 (v16i32 immAllZerosV))),
   1638           (VBROADCASTF64X2rmkz VK8WM:$mask, addr:$src)>;
   1639 def : Pat<(vselect VK8WM:$mask,
   1640                    (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
   1641                    VR512:$src0),
   1642           (VBROADCASTF64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
   1643 def : Pat<(vselect VK8WM:$mask,
   1644                    (bc_v8i64 (v16i32 (X86SubVBroadcast (bc_v4i32 (loadv2i64 addr:$src))))),
   1645                    (bc_v8i64 (v16i32 immAllZerosV))),
   1646           (VBROADCASTI64X2rmkz VK8WM:$mask, addr:$src)>;
   1647 def : Pat<(vselect VK8WM:$mask,
   1648                    (bc_v8i64 (v16i32 (X86SubVBroadcast (bc_v4i32 (loadv2i64 addr:$src))))),
   1649                    VR512:$src0),
   1650           (VBROADCASTI64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
   1651 }
   1652 
   1653 multiclass avx512_common_broadcast_32x2<bits<8> opc, string OpcodeStr,
   1654                          AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src> {
   1655   let Predicates = [HasDQI] in
   1656     defm Z :    avx512_broadcast_rm_split<opc, OpcodeStr, NAME, WriteShuffle256,
   1657                                           WriteShuffle256Ld, _Dst.info512,
   1658                                           _Src.info512, _Src.info128, null_frag>,
   1659                                           EVEX_V512;
   1660   let Predicates = [HasDQI, HasVLX] in
   1661     defm Z256 : avx512_broadcast_rm_split<opc, OpcodeStr, NAME, WriteShuffle256,
   1662                                           WriteShuffle256Ld, _Dst.info256,
   1663                                           _Src.info256, _Src.info128, null_frag>,
   1664                                           EVEX_V256;
   1665 }
   1666 
   1667 multiclass avx512_common_broadcast_i32x2<bits<8> opc, string OpcodeStr,
   1668                          AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src> :
   1669   avx512_common_broadcast_32x2<opc, OpcodeStr, _Dst, _Src> {
   1670 
   1671   let Predicates = [HasDQI, HasVLX] in
   1672     defm Z128 : avx512_broadcast_rm_split<opc, OpcodeStr, NAME, WriteShuffle,
   1673                                           WriteShuffleXLd, _Dst.info128,
   1674                                           _Src.info128, _Src.info128, null_frag>,
   1675                                           EVEX_V128;
   1676 }
   1677 
   1678 defm VBROADCASTI32X2  : avx512_common_broadcast_i32x2<0x59, "vbroadcasti32x2",
   1679                                           avx512vl_i32_info, avx512vl_i64_info>;
   1680 defm VBROADCASTF32X2  : avx512_common_broadcast_32x2<0x19, "vbroadcastf32x2",
   1681                                           avx512vl_f32_info, avx512vl_f64_info>;
   1682 
   1683 let Predicates = [HasVLX] in {
   1684 def : Pat<(v8f32 (X86VBroadcast (v8f32 VR256X:$src))),
   1685           (VBROADCASTSSZ256r (v4f32 (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)))>;
   1686 def : Pat<(v4f64 (X86VBroadcast (v4f64 VR256X:$src))),
   1687           (VBROADCASTSDZ256r (v2f64 (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)))>;
   1688 }
   1689 
   1690 def : Pat<(v16f32 (X86VBroadcast (v16f32 VR512:$src))),
   1691           (VBROADCASTSSZr (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)))>;
   1692 def : Pat<(v16f32 (X86VBroadcast (v8f32 VR256X:$src))),
   1693           (VBROADCASTSSZr (v4f32 (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)))>;
   1694 
   1695 def : Pat<(v8f64 (X86VBroadcast (v8f64 VR512:$src))),
   1696           (VBROADCASTSDZr (v2f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm)))>;
   1697 def : Pat<(v8f64 (X86VBroadcast (v4f64 VR256X:$src))),
   1698           (VBROADCASTSDZr (v2f64 (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)))>;
   1699 
   1700 //===----------------------------------------------------------------------===//
   1701 // AVX-512 BROADCAST MASK TO VECTOR REGISTER
   1702 //---
   1703 multiclass avx512_mask_broadcastm<bits<8> opc, string OpcodeStr,
   1704                                   X86VectorVTInfo _, RegisterClass KRC> {
   1705   def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.RC:$dst), (ins KRC:$src),
   1706                   !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
   1707                   [(set _.RC:$dst, (_.VT (X86VBroadcastm KRC:$src)))]>,
   1708                   EVEX, Sched<[WriteShuffle]>;
   1709 }
   1710 
   1711 multiclass avx512_mask_broadcast<bits<8> opc, string OpcodeStr,
   1712                                  AVX512VLVectorVTInfo VTInfo, RegisterClass KRC> {
   1713   let Predicates = [HasCDI] in
   1714     defm Z : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info512, KRC>, EVEX_V512;
   1715   let Predicates = [HasCDI, HasVLX] in {
   1716     defm Z256 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info256, KRC>, EVEX_V256;
   1717     defm Z128 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info128, KRC>, EVEX_V128;
   1718   }
   1719 }
   1720 
   1721 defm VPBROADCASTMW2D : avx512_mask_broadcast<0x3A, "vpbroadcastmw2d",
   1722                                                avx512vl_i32_info, VK16>;
   1723 defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q",
   1724                                                avx512vl_i64_info, VK8>, VEX_W;
   1725 
   1726 //===----------------------------------------------------------------------===//
   1727 // -- VPERMI2 - 3 source operands form --
   1728 multiclass avx512_perm_i<bits<8> opc, string OpcodeStr,
   1729                          X86FoldableSchedWrite sched,
   1730                          X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
   1731 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain,
   1732     hasSideEffects = 0 in {
   1733   defm rr: AVX512_maskable_3src_cast<opc, MRMSrcReg, _, IdxVT, (outs _.RC:$dst),
   1734           (ins _.RC:$src2, _.RC:$src3),
   1735           OpcodeStr, "$src3, $src2", "$src2, $src3",
   1736           (_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1, _.RC:$src3)), 1>,
   1737           EVEX_4V, AVX5128IBase, Sched<[sched]>;
   1738 
   1739   let mayLoad = 1 in
   1740   defm rm: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst),
   1741             (ins _.RC:$src2, _.MemOp:$src3),
   1742             OpcodeStr, "$src3, $src2", "$src2, $src3",
   1743             (_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1,
   1744                    (_.VT (bitconvert (_.LdFrag addr:$src3))))), 1>,
   1745             EVEX_4V, AVX5128IBase, Sched<[sched.Folded, ReadAfterLd]>;
   1746   }
   1747 }
   1748 
   1749 multiclass avx512_perm_i_mb<bits<8> opc, string OpcodeStr,
   1750                             X86FoldableSchedWrite sched,
   1751                             X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
   1752   let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain,
   1753       hasSideEffects = 0, mayLoad = 1 in
   1754   defm rmb: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst),
   1755               (ins _.RC:$src2, _.ScalarMemOp:$src3),
   1756               OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
   1757               !strconcat("$src2, ${src3}", _.BroadcastStr ),
   1758               (_.VT (X86VPermt2 _.RC:$src2,
   1759                IdxVT.RC:$src1,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))), 1>,
   1760               AVX5128IBase, EVEX_4V, EVEX_B,
   1761               Sched<[sched.Folded, ReadAfterLd]>;
   1762 }
   1763 
   1764 multiclass avx512_perm_i_sizes<bits<8> opc, string OpcodeStr,
   1765                                X86FoldableSchedWrite sched,
   1766                                AVX512VLVectorVTInfo VTInfo,
   1767                                AVX512VLVectorVTInfo ShuffleMask> {
   1768   defm NAME: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info512,
   1769                            ShuffleMask.info512>,
   1770             avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info512,
   1771                              ShuffleMask.info512>, EVEX_V512;
   1772   let Predicates = [HasVLX] in {
   1773   defm NAME#128: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info128,
   1774                                ShuffleMask.info128>,
   1775                  avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info128,
   1776                                   ShuffleMask.info128>, EVEX_V128;
   1777   defm NAME#256: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info256,
   1778                                ShuffleMask.info256>,
   1779                  avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info256,
   1780                                   ShuffleMask.info256>, EVEX_V256;
   1781   }
   1782 }
   1783 
   1784 multiclass avx512_perm_i_sizes_bw<bits<8> opc, string OpcodeStr,
   1785                                   X86FoldableSchedWrite sched,
   1786                                   AVX512VLVectorVTInfo VTInfo,
   1787                                   AVX512VLVectorVTInfo Idx,
   1788                                   Predicate Prd> {
   1789   let Predicates = [Prd] in
   1790   defm NAME: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info512,
   1791                            Idx.info512>, EVEX_V512;
   1792   let Predicates = [Prd, HasVLX] in {
   1793   defm NAME#128: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info128,
   1794                                Idx.info128>, EVEX_V128;
   1795   defm NAME#256: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info256,
   1796                                Idx.info256>,  EVEX_V256;
   1797   }
   1798 }
   1799 
   1800 defm VPERMI2D  : avx512_perm_i_sizes<0x76, "vpermi2d", WriteVarShuffle256,
   1801                   avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
   1802 defm VPERMI2Q  : avx512_perm_i_sizes<0x76, "vpermi2q", WriteVarShuffle256,
   1803                   avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
   1804 defm VPERMI2W  : avx512_perm_i_sizes_bw<0x75, "vpermi2w", WriteVarShuffle256,
   1805                   avx512vl_i16_info, avx512vl_i16_info, HasBWI>,
   1806                   VEX_W, EVEX_CD8<16, CD8VF>;
   1807 defm VPERMI2B  : avx512_perm_i_sizes_bw<0x75, "vpermi2b", WriteVarShuffle256,
   1808                   avx512vl_i8_info, avx512vl_i8_info, HasVBMI>,
   1809                   EVEX_CD8<8, CD8VF>;
   1810 defm VPERMI2PS : avx512_perm_i_sizes<0x77, "vpermi2ps", WriteFVarShuffle256,
   1811                   avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
   1812 defm VPERMI2PD : avx512_perm_i_sizes<0x77, "vpermi2pd", WriteFVarShuffle256,
   1813                   avx512vl_f64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
   1814 
   1815 // Extra patterns to deal with extra bitcasts due to passthru and index being
   1816 // different types on the fp versions.
   1817 multiclass avx512_perm_i_lowering<string InstrStr, X86VectorVTInfo _,
   1818                                   X86VectorVTInfo IdxVT,
   1819                                   X86VectorVTInfo CastVT> {
   1820   def : Pat<(_.VT (vselect _.KRCWM:$mask,
   1821                              (X86VPermt2 (_.VT _.RC:$src2),
   1822                                          (IdxVT.VT (bitconvert (CastVT.VT _.RC:$src1))), _.RC:$src3),
   1823                              (_.VT (bitconvert (CastVT.VT _.RC:$src1))))),
   1824             (!cast<Instruction>(InstrStr#"rrk") _.RC:$src1, _.KRCWM:$mask,
   1825                                                 _.RC:$src2, _.RC:$src3)>;
   1826   def : Pat<(_.VT (vselect _.KRCWM:$mask,
   1827                              (X86VPermt2 _.RC:$src2,
   1828                                          (IdxVT.VT (bitconvert  (CastVT.VT _.RC:$src1))),
   1829                                          (_.LdFrag addr:$src3)),
   1830                              (_.VT (bitconvert  (CastVT.VT _.RC:$src1))))),
   1831             (!cast<Instruction>(InstrStr#"rmk") _.RC:$src1, _.KRCWM:$mask,
   1832                                                 _.RC:$src2, addr:$src3)>;
   1833   def : Pat<(_.VT (vselect _.KRCWM:$mask,
   1834                              (X86VPermt2 _.RC:$src2,
   1835                                          (IdxVT.VT (bitconvert  (CastVT.VT _.RC:$src1))),
   1836                                          (X86VBroadcast (_.ScalarLdFrag addr:$src3))),
   1837                              (_.VT (bitconvert  (CastVT.VT _.RC:$src1))))),
   1838             (!cast<Instruction>(InstrStr#"rmbk") _.RC:$src1, _.KRCWM:$mask,
   1839                                                  _.RC:$src2, addr:$src3)>;
   1840 }
   1841 
   1842 // TODO: Should we add more casts? The vXi64 case is common due to ABI.
   1843 defm : avx512_perm_i_lowering<"VPERMI2PS", v16f32_info, v16i32_info, v8i64_info>;
   1844 defm : avx512_perm_i_lowering<"VPERMI2PS256", v8f32x_info, v8i32x_info, v4i64x_info>;
   1845 defm : avx512_perm_i_lowering<"VPERMI2PS128", v4f32x_info, v4i32x_info, v2i64x_info>;
   1846 
   1847 // VPERMT2
   1848 multiclass avx512_perm_t<bits<8> opc, string OpcodeStr,
   1849                          X86FoldableSchedWrite sched,
   1850                          X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
   1851 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
   1852   defm rr: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
   1853           (ins IdxVT.RC:$src2, _.RC:$src3),
   1854           OpcodeStr, "$src3, $src2", "$src2, $src3",
   1855           (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2, _.RC:$src3)), 1>,
   1856           EVEX_4V, AVX5128IBase, Sched<[sched]>;
   1857 
   1858   defm rm: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
   1859             (ins IdxVT.RC:$src2, _.MemOp:$src3),
   1860             OpcodeStr, "$src3, $src2", "$src2, $src3",
   1861             (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2,
   1862                    (bitconvert (_.LdFrag addr:$src3)))), 1>,
   1863             EVEX_4V, AVX5128IBase, Sched<[sched.Folded, ReadAfterLd]>;
   1864   }
   1865 }
   1866 multiclass avx512_perm_t_mb<bits<8> opc, string OpcodeStr,
   1867                             X86FoldableSchedWrite sched,
   1868                             X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
   1869   let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in
   1870   defm rmb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
   1871               (ins IdxVT.RC:$src2, _.ScalarMemOp:$src3),
   1872               OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
   1873               !strconcat("$src2, ${src3}", _.BroadcastStr ),
   1874               (_.VT (X86VPermt2 _.RC:$src1,
   1875                IdxVT.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))), 1>,
   1876               AVX5128IBase, EVEX_4V, EVEX_B,
   1877               Sched<[sched.Folded, ReadAfterLd]>;
   1878 }
   1879 
   1880 multiclass avx512_perm_t_sizes<bits<8> opc, string OpcodeStr,
   1881                                X86FoldableSchedWrite sched,
   1882                                AVX512VLVectorVTInfo VTInfo,
   1883                                AVX512VLVectorVTInfo ShuffleMask> {
   1884   defm NAME: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info512,
   1885                               ShuffleMask.info512>,
   1886             avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info512,
   1887                               ShuffleMask.info512>, EVEX_V512;
   1888   let Predicates = [HasVLX] in {
   1889   defm NAME#128: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info128,
   1890                               ShuffleMask.info128>,
   1891                  avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info128,
   1892                               ShuffleMask.info128>, EVEX_V128;
   1893   defm NAME#256: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info256,
   1894                               ShuffleMask.info256>,
   1895                  avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info256,
   1896                               ShuffleMask.info256>, EVEX_V256;
   1897   }
   1898 }
   1899 
   1900 multiclass avx512_perm_t_sizes_bw<bits<8> opc, string OpcodeStr,
   1901                                   X86FoldableSchedWrite sched,
   1902                                   AVX512VLVectorVTInfo VTInfo,
   1903                                   AVX512VLVectorVTInfo Idx, Predicate Prd> {
   1904   let Predicates = [Prd] in
   1905   defm NAME: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info512,
   1906                            Idx.info512>, EVEX_V512;
   1907   let Predicates = [Prd, HasVLX] in {
   1908   defm NAME#128: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info128,
   1909                                Idx.info128>, EVEX_V128;
   1910   defm NAME#256: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info256,
   1911                                Idx.info256>, EVEX_V256;
   1912   }
   1913 }
   1914 
   1915 defm VPERMT2D  : avx512_perm_t_sizes<0x7E, "vpermt2d", WriteVarShuffle256,
   1916                   avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
   1917 defm VPERMT2Q  : avx512_perm_t_sizes<0x7E, "vpermt2q", WriteVarShuffle256,
   1918                   avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
   1919 defm VPERMT2W  : avx512_perm_t_sizes_bw<0x7D, "vpermt2w", WriteVarShuffle256,
   1920                   avx512vl_i16_info, avx512vl_i16_info, HasBWI>,
   1921                   VEX_W, EVEX_CD8<16, CD8VF>;
   1922 defm VPERMT2B  : avx512_perm_t_sizes_bw<0x7D, "vpermt2b", WriteVarShuffle256,
   1923                   avx512vl_i8_info, avx512vl_i8_info, HasVBMI>,
   1924                   EVEX_CD8<8, CD8VF>;
   1925 defm VPERMT2PS : avx512_perm_t_sizes<0x7F, "vpermt2ps", WriteFVarShuffle256,
   1926                   avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
   1927 defm VPERMT2PD : avx512_perm_t_sizes<0x7F, "vpermt2pd", WriteFVarShuffle256,
   1928                   avx512vl_f64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
   1929 
   1930 //===----------------------------------------------------------------------===//
   1931 // AVX-512 - BLEND using mask
   1932 //
   1933 
   1934 multiclass WriteFVarBlendask<bits<8> opc, string OpcodeStr,
   1935                              X86FoldableSchedWrite sched, X86VectorVTInfo _> {
   1936   let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
   1937   def rr : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
   1938              (ins _.RC:$src1, _.RC:$src2),
   1939              !strconcat(OpcodeStr,
   1940              "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"), []>,
   1941              EVEX_4V, Sched<[sched]>;
   1942   def rrk : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
   1943              (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
   1944              !strconcat(OpcodeStr,
   1945              "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
   1946              []>, EVEX_4V, EVEX_K, Sched<[sched]>;
   1947   def rrkz : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
   1948              (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
   1949              !strconcat(OpcodeStr,
   1950              "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"),
   1951              []>, EVEX_4V, EVEX_KZ, Sched<[sched]>, NotMemoryFoldable;
   1952   let mayLoad = 1 in {
   1953   def rm  : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
   1954              (ins _.RC:$src1, _.MemOp:$src2),
   1955              !strconcat(OpcodeStr,
   1956              "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"),
   1957              []>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
   1958              Sched<[sched.Folded, ReadAfterLd]>;
   1959   def rmk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
   1960              (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
   1961              !strconcat(OpcodeStr,
   1962              "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
   1963              []>, EVEX_4V, EVEX_K, EVEX_CD8<_.EltSize, CD8VF>,
   1964              Sched<[sched.Folded, ReadAfterLd]>;
   1965   def rmkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
   1966              (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
   1967              !strconcat(OpcodeStr,
   1968              "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"),
   1969              []>, EVEX_4V, EVEX_KZ, EVEX_CD8<_.EltSize, CD8VF>,
   1970              Sched<[sched.Folded, ReadAfterLd]>, NotMemoryFoldable;
   1971   }
   1972   }
   1973 }
   1974 multiclass WriteFVarBlendask_rmb<bits<8> opc, string OpcodeStr,
   1975                                  X86FoldableSchedWrite sched, X86VectorVTInfo _> {
   1976   let mayLoad = 1, hasSideEffects = 0 in {
   1977   def rmbk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
   1978       (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2),
   1979        !strconcat(OpcodeStr,
   1980             "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
   1981             "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"), []>,
   1982       EVEX_4V, EVEX_K, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
   1983       Sched<[sched.Folded, ReadAfterLd]>;
   1984 
   1985   def rmbkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
   1986       (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2),
   1987        !strconcat(OpcodeStr,
   1988             "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}} {z}|",
   1989             "$dst {${mask}} {z}, $src1, ${src2}", _.BroadcastStr, "}"), []>,
   1990       EVEX_4V, EVEX_KZ, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
   1991       Sched<[sched.Folded, ReadAfterLd]>, NotMemoryFoldable;
   1992 
   1993   def rmb : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
   1994       (ins _.RC:$src1, _.ScalarMemOp:$src2),
   1995        !strconcat(OpcodeStr,
   1996             "\t{${src2}", _.BroadcastStr, ", $src1, $dst|",
   1997             "$dst, $src1, ${src2}", _.BroadcastStr, "}"), []>,
   1998       EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
   1999       Sched<[sched.Folded, ReadAfterLd]>;
   2000   }
   2001 }
   2002 
   2003 multiclass blendmask_dq<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched,
   2004                         AVX512VLVectorVTInfo VTInfo> {
   2005   defm Z : WriteFVarBlendask<opc, OpcodeStr, sched.ZMM, VTInfo.info512>,
   2006            WriteFVarBlendask_rmb<opc, OpcodeStr, sched.ZMM, VTInfo.info512>,
   2007                                  EVEX_V512;
   2008 
   2009   let Predicates = [HasVLX] in {
   2010     defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched.YMM, VTInfo.info256>,
   2011                 WriteFVarBlendask_rmb<opc, OpcodeStr, sched.YMM, VTInfo.info256>,
   2012                                       EVEX_V256;
   2013     defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched.XMM, VTInfo.info128>,
   2014                 WriteFVarBlendask_rmb<opc, OpcodeStr, sched.XMM, VTInfo.info128>,
   2015                                       EVEX_V128;
   2016   }
   2017 }
   2018 
   2019 multiclass blendmask_bw<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched,
   2020                         AVX512VLVectorVTInfo VTInfo> {
   2021   let Predicates = [HasBWI] in
   2022     defm Z : WriteFVarBlendask<opc, OpcodeStr, sched.ZMM, VTInfo.info512>,
   2023                                EVEX_V512;
   2024 
   2025   let Predicates = [HasBWI, HasVLX] in {
   2026     defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched.YMM, VTInfo.info256>,
   2027                                   EVEX_V256;
   2028     defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched.XMM, VTInfo.info128>,
   2029                                   EVEX_V128;
   2030   }
   2031 }
   2032 
   2033 defm VBLENDMPS : blendmask_dq<0x65, "vblendmps", SchedWriteFVarBlend,
   2034                               avx512vl_f32_info>;
   2035 defm VBLENDMPD : blendmask_dq<0x65, "vblendmpd", SchedWriteFVarBlend,
   2036                               avx512vl_f64_info>, VEX_W;
   2037 defm VPBLENDMD : blendmask_dq<0x64, "vpblendmd", SchedWriteVarBlend,
   2038                               avx512vl_i32_info>;
   2039 defm VPBLENDMQ : blendmask_dq<0x64, "vpblendmq", SchedWriteVarBlend,
   2040                               avx512vl_i64_info>, VEX_W;
   2041 defm VPBLENDMB : blendmask_bw<0x66, "vpblendmb", SchedWriteVarBlend,
   2042                               avx512vl_i8_info>;
   2043 defm VPBLENDMW : blendmask_bw<0x66, "vpblendmw", SchedWriteVarBlend,
   2044                               avx512vl_i16_info>, VEX_W;
   2045 
   2046 //===----------------------------------------------------------------------===//
   2047 // Compare Instructions
   2048 //===----------------------------------------------------------------------===//
   2049 
   2050 // avx512_cmp_scalar - AVX512 CMPSS and CMPSD
   2051 
   2052 multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeRnd,
   2053                              X86FoldableSchedWrite sched> {
   2054   defm  rr_Int  : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
   2055                       (outs _.KRC:$dst),
   2056                       (ins _.RC:$src1, _.RC:$src2, AVXCC:$cc),
   2057                       "vcmp${cc}"#_.Suffix,
   2058                       "$src2, $src1", "$src1, $src2",
   2059                       (OpNode (_.VT _.RC:$src1),
   2060                               (_.VT _.RC:$src2),
   2061                               imm:$cc)>, EVEX_4V, Sched<[sched]>;
   2062   let mayLoad = 1 in
   2063   defm  rm_Int  : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
   2064                     (outs _.KRC:$dst),
   2065                     (ins _.RC:$src1, _.IntScalarMemOp:$src2, AVXCC:$cc),
   2066                     "vcmp${cc}"#_.Suffix,
   2067                     "$src2, $src1", "$src1, $src2",
   2068                     (OpNode (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2,
   2069                         imm:$cc)>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>,
   2070                     Sched<[sched.Folded, ReadAfterLd]>;
   2071 
   2072   defm  rrb_Int  : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
   2073                      (outs _.KRC:$dst),
   2074                      (ins _.RC:$src1, _.RC:$src2, AVXCC:$cc),
   2075                      "vcmp${cc}"#_.Suffix,
   2076                      "{sae}, $src2, $src1", "$src1, $src2, {sae}",
   2077                      (OpNodeRnd (_.VT _.RC:$src1),
   2078                                 (_.VT _.RC:$src2),
   2079                                 imm:$cc,
   2080                                 (i32 FROUND_NO_EXC))>,
   2081                      EVEX_4V, EVEX_B, Sched<[sched]>;
   2082   // Accept explicit immediate argument form instead of comparison code.
   2083   let isAsmParserOnly = 1, hasSideEffects = 0 in {
   2084     defm  rri_alt  : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _,
   2085                         (outs VK1:$dst),
   2086                         (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
   2087                         "vcmp"#_.Suffix,
   2088                         "$cc, $src2, $src1", "$src1, $src2, $cc">, EVEX_4V,
   2089                         Sched<[sched]>, NotMemoryFoldable;
   2090   let mayLoad = 1 in
   2091     defm  rmi_alt  : AVX512_maskable_cmp_alt<0xC2, MRMSrcMem, _,
   2092                         (outs _.KRC:$dst),
   2093                         (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
   2094                         "vcmp"#_.Suffix,
   2095                         "$cc, $src2, $src1", "$src1, $src2, $cc">,
   2096                         EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>,
   2097                         Sched<[sched.Folded, ReadAfterLd]>, NotMemoryFoldable;
   2098 
   2099     defm  rrb_alt  : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _,
   2100                        (outs _.KRC:$dst),
   2101                        (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
   2102                        "vcmp"#_.Suffix,
   2103                        "$cc, {sae}, $src2, $src1","$src1, $src2, {sae}, $cc">,
   2104                        EVEX_4V, EVEX_B, Sched<[sched]>, NotMemoryFoldable;
   2105   }// let isAsmParserOnly = 1, hasSideEffects = 0
   2106 
   2107   let isCodeGenOnly = 1 in {
   2108     let isCommutable = 1 in
   2109     def rr : AVX512Ii8<0xC2, MRMSrcReg,
   2110                 (outs _.KRC:$dst), (ins _.FRC:$src1, _.FRC:$src2, AVXCC:$cc),
   2111                 !strconcat("vcmp${cc}", _.Suffix,
   2112                            "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
   2113                 [(set _.KRC:$dst, (OpNode _.FRC:$src1,
   2114                                           _.FRC:$src2,
   2115                                           imm:$cc))]>,
   2116                 EVEX_4V, Sched<[sched]>;
   2117     def rm : AVX512Ii8<0xC2, MRMSrcMem,
   2118               (outs _.KRC:$dst),
   2119               (ins _.FRC:$src1, _.ScalarMemOp:$src2, AVXCC:$cc),
   2120               !strconcat("vcmp${cc}", _.Suffix,
   2121                          "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
   2122               [(set _.KRC:$dst, (OpNode _.FRC:$src1,
   2123                                         (_.ScalarLdFrag addr:$src2),
   2124                                         imm:$cc))]>,
   2125               EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>,
   2126               Sched<[sched.Folded, ReadAfterLd]>;
   2127   }
   2128 }
   2129 
   2130 let Predicates = [HasAVX512] in {
   2131   let ExeDomain = SSEPackedSingle in
   2132   defm VCMPSSZ : avx512_cmp_scalar<f32x_info, X86cmpms, X86cmpmsRnd,
   2133                                    SchedWriteFCmp.Scl>, AVX512XSIi8Base;
   2134   let ExeDomain = SSEPackedDouble in
   2135   defm VCMPSDZ : avx512_cmp_scalar<f64x_info, X86cmpms, X86cmpmsRnd,
   2136                                    SchedWriteFCmp.Scl>, AVX512XDIi8Base, VEX_W;
   2137 }
   2138 
   2139 multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr, PatFrag OpNode,
   2140                               X86FoldableSchedWrite sched, X86VectorVTInfo _,
   2141                               bit IsCommutable> {
   2142   let isCommutable = IsCommutable in
   2143   def rr : AVX512BI<opc, MRMSrcReg,
   2144              (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2),
   2145              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
   2146              [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2)))]>,
   2147              EVEX_4V, Sched<[sched]>;
   2148   def rm : AVX512BI<opc, MRMSrcMem,
   2149              (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2),
   2150              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
   2151              [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
   2152                                        (_.VT (bitconvert (_.LdFrag addr:$src2)))))]>,
   2153              EVEX_4V, Sched<[sched.Folded, ReadAfterLd]>;
   2154   let isCommutable = IsCommutable in
   2155   def rrk : AVX512BI<opc, MRMSrcReg,
   2156               (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
   2157               !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
   2158                           "$dst {${mask}}, $src1, $src2}"),
   2159               [(set _.KRC:$dst, (and _.KRCWM:$mask,
   2160                                    (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))))]>,
   2161               EVEX_4V, EVEX_K, Sched<[sched]>;
   2162   def rmk : AVX512BI<opc, MRMSrcMem,
   2163               (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
   2164               !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
   2165                           "$dst {${mask}}, $src1, $src2}"),
   2166               [(set _.KRC:$dst, (and _.KRCWM:$mask,
   2167                                    (OpNode (_.VT _.RC:$src1),
   2168                                        (_.VT (bitconvert
   2169                                               (_.LdFrag addr:$src2))))))]>,
   2170               EVEX_4V, EVEX_K, Sched<[sched.Folded, ReadAfterLd]>;
   2171 }
   2172 
   2173 multiclass avx512_icmp_packed_rmb<bits<8> opc, string OpcodeStr, PatFrag OpNode,
   2174                                   X86FoldableSchedWrite sched, X86VectorVTInfo _,
   2175                                   bit IsCommutable> :
   2176            avx512_icmp_packed<opc, OpcodeStr, OpNode, sched, _, IsCommutable> {
   2177   def rmb : AVX512BI<opc, MRMSrcMem,
   2178               (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2),
   2179               !strconcat(OpcodeStr, "\t{${src2}", _.BroadcastStr, ", $src1, $dst",
   2180                                     "|$dst, $src1, ${src2}", _.BroadcastStr, "}"),
   2181               [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
   2182                               (X86VBroadcast (_.ScalarLdFrag addr:$src2))))]>,
   2183               EVEX_4V, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
   2184   def rmbk : AVX512BI<opc, MRMSrcMem,
   2185                (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
   2186                                        _.ScalarMemOp:$src2),
   2187                !strconcat(OpcodeStr,
   2188                           "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
   2189                           "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"),
   2190                [(set _.KRC:$dst, (and _.KRCWM:$mask,
   2191                                       (OpNode (_.VT _.RC:$src1),
   2192                                         (X86VBroadcast
   2193                                           (_.ScalarLdFrag addr:$src2)))))]>,
   2194                EVEX_4V, EVEX_K, EVEX_B,
   2195                Sched<[sched.Folded, ReadAfterLd]>;
   2196 }
   2197 
   2198 multiclass avx512_icmp_packed_vl<bits<8> opc, string OpcodeStr, PatFrag OpNode,
   2199                                  X86SchedWriteWidths sched,
   2200                                  AVX512VLVectorVTInfo VTInfo, Predicate prd,
   2201                                  bit IsCommutable = 0> {
   2202   let Predicates = [prd] in
   2203   defm Z : avx512_icmp_packed<opc, OpcodeStr, OpNode, sched.ZMM,
   2204                               VTInfo.info512, IsCommutable>, EVEX_V512;
   2205 
   2206   let Predicates = [prd, HasVLX] in {
   2207     defm Z256 : avx512_icmp_packed<opc, OpcodeStr, OpNode, sched.YMM,
   2208                                    VTInfo.info256, IsCommutable>, EVEX_V256;
   2209     defm Z128 : avx512_icmp_packed<opc, OpcodeStr, OpNode, sched.XMM,
   2210                                    VTInfo.info128, IsCommutable>, EVEX_V128;
   2211   }
   2212 }
   2213 
   2214 multiclass avx512_icmp_packed_rmb_vl<bits<8> opc, string OpcodeStr,
   2215                                      PatFrag OpNode, X86SchedWriteWidths sched,
   2216                                      AVX512VLVectorVTInfo VTInfo,
   2217                                      Predicate prd, bit IsCommutable = 0> {
   2218   let Predicates = [prd] in
   2219   defm Z : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, sched.ZMM,
   2220                                   VTInfo.info512, IsCommutable>, EVEX_V512;
   2221 
   2222   let Predicates = [prd, HasVLX] in {
   2223     defm Z256 : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, sched.YMM,
   2224                                        VTInfo.info256, IsCommutable>, EVEX_V256;
   2225     defm Z128 : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, sched.XMM,
   2226                                        VTInfo.info128, IsCommutable>, EVEX_V128;
   2227   }
   2228 }
   2229 
   2230 // This fragment treats X86cmpm as commutable to help match loads in both
   2231 // operands for PCMPEQ.
   2232 def X86setcc_commute : SDNode<"ISD::SETCC", SDTSetCC, [SDNPCommutative]>;
   2233 def X86pcmpeqm_c : PatFrag<(ops node:$src1, node:$src2),
   2234                            (X86setcc_commute node:$src1, node:$src2, SETEQ)>;
   2235 def X86pcmpgtm : PatFrag<(ops node:$src1, node:$src2),
   2236                          (setcc node:$src1, node:$src2, SETGT)>;
   2237 
   2238 // AddedComplexity is needed because the explicit SETEQ/SETGT CondCode doesn't
   2239 // increase the pattern complexity the way an immediate would.
   2240 let AddedComplexity = 2 in {
   2241 // FIXME: Is there a better scheduler class for VPCMP?
   2242 defm VPCMPEQB : avx512_icmp_packed_vl<0x74, "vpcmpeqb", X86pcmpeqm_c,
   2243                       SchedWriteVecALU, avx512vl_i8_info, HasBWI, 1>,
   2244                 EVEX_CD8<8, CD8VF>, VEX_WIG;
   2245 
   2246 defm VPCMPEQW : avx512_icmp_packed_vl<0x75, "vpcmpeqw", X86pcmpeqm_c,
   2247                       SchedWriteVecALU, avx512vl_i16_info, HasBWI, 1>,
   2248                 EVEX_CD8<16, CD8VF>, VEX_WIG;
   2249 
   2250 defm VPCMPEQD : avx512_icmp_packed_rmb_vl<0x76, "vpcmpeqd", X86pcmpeqm_c,
   2251                       SchedWriteVecALU, avx512vl_i32_info, HasAVX512, 1>,
   2252                 EVEX_CD8<32, CD8VF>;
   2253 
   2254 defm VPCMPEQQ : avx512_icmp_packed_rmb_vl<0x29, "vpcmpeqq", X86pcmpeqm_c,
   2255                       SchedWriteVecALU, avx512vl_i64_info, HasAVX512, 1>,
   2256                 T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
   2257 
   2258 defm VPCMPGTB : avx512_icmp_packed_vl<0x64, "vpcmpgtb", X86pcmpgtm,
   2259                       SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
   2260                 EVEX_CD8<8, CD8VF>, VEX_WIG;
   2261 
   2262 defm VPCMPGTW : avx512_icmp_packed_vl<0x65, "vpcmpgtw", X86pcmpgtm,
   2263                       SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
   2264                 EVEX_CD8<16, CD8VF>, VEX_WIG;
   2265 
   2266 defm VPCMPGTD : avx512_icmp_packed_rmb_vl<0x66, "vpcmpgtd", X86pcmpgtm,
   2267                       SchedWriteVecALU, avx512vl_i32_info, HasAVX512>,
   2268                 EVEX_CD8<32, CD8VF>;
   2269 
   2270 defm VPCMPGTQ : avx512_icmp_packed_rmb_vl<0x37, "vpcmpgtq", X86pcmpgtm,
   2271                       SchedWriteVecALU, avx512vl_i64_info, HasAVX512>,
   2272                 T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
   2273 }
   2274 
   2275 multiclass avx512_icmp_cc<bits<8> opc, string Suffix, PatFrag Frag,
   2276                           PatFrag CommFrag, X86FoldableSchedWrite sched,
   2277                           X86VectorVTInfo _, string Name> {
   2278   let isCommutable = 1 in
   2279   def rri : AVX512AIi8<opc, MRMSrcReg,
   2280              (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, AVX512ICC:$cc),
   2281              !strconcat("vpcmp${cc}", Suffix,
   2282                         "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
   2283              [(set _.KRC:$dst, (_.KVT (Frag:$cc (_.VT _.RC:$src1),
   2284                                                 (_.VT _.RC:$src2),
   2285                                                 cond)))]>,
   2286              EVEX_4V, Sched<[sched]>;
   2287   def rmi : AVX512AIi8<opc, MRMSrcMem,
   2288              (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, AVX512ICC:$cc),
   2289              !strconcat("vpcmp${cc}", Suffix,
   2290                         "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
   2291              [(set _.KRC:$dst, (_.KVT
   2292                                 (Frag:$cc
   2293                                  (_.VT _.RC:$src1),
   2294                                  (_.VT (bitconvert (_.LdFrag addr:$src2))),
   2295                                  cond)))]>,
   2296              EVEX_4V, Sched<[sched.Folded, ReadAfterLd]>;
   2297   let isCommutable = 1 in
   2298   def rrik : AVX512AIi8<opc, MRMSrcReg,
   2299               (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2,
   2300                                       AVX512ICC:$cc),
   2301               !strconcat("vpcmp${cc}", Suffix,
   2302                          "\t{$src2, $src1, $dst {${mask}}|",
   2303                          "$dst {${mask}}, $src1, $src2}"),
   2304               [(set _.KRC:$dst, (and _.KRCWM:$mask,
   2305                                      (_.KVT (Frag:$cc (_.VT _.RC:$src1),
   2306                                                       (_.VT _.RC:$src2),
   2307                                                       cond))))]>,
   2308               EVEX_4V, EVEX_K, Sched<[sched]>;
   2309   def rmik : AVX512AIi8<opc, MRMSrcMem,
   2310               (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2,
   2311                                     AVX512ICC:$cc),
   2312               !strconcat("vpcmp${cc}", Suffix,
   2313                          "\t{$src2, $src1, $dst {${mask}}|",
   2314                          "$dst {${mask}}, $src1, $src2}"),
   2315               [(set _.KRC:$dst, (and _.KRCWM:$mask,
   2316                                      (_.KVT
   2317                                       (Frag:$cc
   2318                                        (_.VT _.RC:$src1),
   2319                                        (_.VT (bitconvert
   2320                                               (_.LdFrag addr:$src2))),
   2321                                        cond))))]>,
   2322               EVEX_4V, EVEX_K, Sched<[sched.Folded, ReadAfterLd]>;
   2323 
   2324   // Accept explicit immediate argument form instead of comparison code.
   2325   let isAsmParserOnly = 1, hasSideEffects = 0 in {
   2326     def rri_alt : AVX512AIi8<opc, MRMSrcReg,
   2327                (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
   2328                !strconcat("vpcmp", Suffix, "\t{$cc, $src2, $src1, $dst|",
   2329                           "$dst, $src1, $src2, $cc}"), []>,
   2330                EVEX_4V, Sched<[sched]>, NotMemoryFoldable;
   2331     let mayLoad = 1 in
   2332     def rmi_alt : AVX512AIi8<opc, MRMSrcMem,
   2333                (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
   2334                !strconcat("vpcmp", Suffix, "\t{$cc, $src2, $src1, $dst|",
   2335                           "$dst, $src1, $src2, $cc}"), []>,
   2336                EVEX_4V, Sched<[sched.Folded, ReadAfterLd]>, NotMemoryFoldable;
   2337     def rrik_alt : AVX512AIi8<opc, MRMSrcReg,
   2338                (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2,
   2339                                        u8imm:$cc),
   2340                !strconcat("vpcmp", Suffix,
   2341                           "\t{$cc, $src2, $src1, $dst {${mask}}|",
   2342                           "$dst {${mask}}, $src1, $src2, $cc}"), []>,
   2343                EVEX_4V, EVEX_K, Sched<[sched]>, NotMemoryFoldable;
   2344     let mayLoad = 1 in
   2345     def rmik_alt : AVX512AIi8<opc, MRMSrcMem,
   2346                (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2,
   2347                                        u8imm:$cc),
   2348                !strconcat("vpcmp", Suffix,
   2349                           "\t{$cc, $src2, $src1, $dst {${mask}}|",
   2350                           "$dst {${mask}}, $src1, $src2, $cc}"), []>,
   2351                EVEX_4V, EVEX_K, Sched<[sched.Folded, ReadAfterLd]>,
   2352                NotMemoryFoldable;
   2353   }
   2354 
   2355   def : Pat<(_.KVT (CommFrag:$cc (bitconvert (_.LdFrag addr:$src2)),
   2356                                  (_.VT _.RC:$src1), cond)),
   2357             (!cast<Instruction>(Name#_.ZSuffix#"rmi")
   2358              _.RC:$src1, addr:$src2, (CommFrag.OperandTransform $cc))>;
   2359 
   2360   def : Pat<(and _.KRCWM:$mask,
   2361                  (_.KVT (CommFrag:$cc (bitconvert (_.LdFrag addr:$src2)),
   2362                                       (_.VT _.RC:$src1), cond))),
   2363             (!cast<Instruction>(Name#_.ZSuffix#"rmik")
   2364              _.KRCWM:$mask, _.RC:$src1, addr:$src2,
   2365              (CommFrag.OperandTransform $cc))>;
   2366 }
   2367 
   2368 multiclass avx512_icmp_cc_rmb<bits<8> opc, string Suffix, PatFrag Frag,
   2369                               PatFrag CommFrag, X86FoldableSchedWrite sched,
   2370                               X86VectorVTInfo _, string Name> :
   2371            avx512_icmp_cc<opc, Suffix, Frag, CommFrag, sched, _, Name> {
   2372   def rmib : AVX512AIi8<opc, MRMSrcMem,
   2373              (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2,
   2374                                      AVX512ICC:$cc),
   2375              !strconcat("vpcmp${cc}", Suffix,
   2376                         "\t{${src2}", _.BroadcastStr, ", $src1, $dst|",
   2377                         "$dst, $src1, ${src2}", _.BroadcastStr, "}"),
   2378              [(set _.KRC:$dst, (_.KVT (Frag:$cc
   2379                                        (_.VT _.RC:$src1),
   2380                                        (X86VBroadcast
   2381                                         (_.ScalarLdFrag addr:$src2)),
   2382                                        cond)))]>,
   2383              EVEX_4V, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
   2384   def rmibk : AVX512AIi8<opc, MRMSrcMem,
   2385               (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
   2386                                        _.ScalarMemOp:$src2, AVX512ICC:$cc),
   2387               !strconcat("vpcmp${cc}", Suffix,
   2388                        "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
   2389                        "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"),
   2390               [(set _.KRC:$dst, (and _.KRCWM:$mask,
   2391                                      (_.KVT (Frag:$cc
   2392                                              (_.VT _.RC:$src1),
   2393                                              (X86VBroadcast
   2394                                               (_.ScalarLdFrag addr:$src2)),
   2395                                              cond))))]>,
   2396               EVEX_4V, EVEX_K, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
   2397 
   2398   // Accept explicit immediate argument form instead of comparison code.
   2399   let isAsmParserOnly = 1, hasSideEffects = 0, mayLoad = 1 in {
   2400     def rmib_alt : AVX512AIi8<opc, MRMSrcMem,
   2401                (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2,
   2402                                        u8imm:$cc),
   2403                !strconcat("vpcmp", Suffix,
   2404                    "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst|",
   2405                    "$dst, $src1, ${src2}", _.BroadcastStr, ", $cc}"), []>,
   2406                EVEX_4V, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>,
   2407                NotMemoryFoldable;
   2408     def rmibk_alt : AVX512AIi8<opc, MRMSrcMem,
   2409                (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
   2410                                        _.ScalarMemOp:$src2, u8imm:$cc),
   2411                !strconcat("vpcmp", Suffix,
   2412                   "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
   2413                   "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, ", $cc}"), []>,
   2414                EVEX_4V, EVEX_K, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>,
   2415                NotMemoryFoldable;
   2416   }
   2417 
   2418   def : Pat<(_.KVT (CommFrag:$cc (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
   2419                     (_.VT _.RC:$src1), cond)),
   2420             (!cast<Instruction>(Name#_.ZSuffix#"rmib")
   2421              _.RC:$src1, addr:$src2, (CommFrag.OperandTransform $cc))>;
   2422 
   2423   def : Pat<(and _.KRCWM:$mask,
   2424                  (_.KVT (CommFrag:$cc (X86VBroadcast
   2425                                        (_.ScalarLdFrag addr:$src2)),
   2426                                       (_.VT _.RC:$src1), cond))),
   2427             (!cast<Instruction>(Name#_.ZSuffix#"rmibk")
   2428              _.KRCWM:$mask, _.RC:$src1, addr:$src2,
   2429              (CommFrag.OperandTransform $cc))>;
   2430 }
   2431 
   2432 multiclass avx512_icmp_cc_vl<bits<8> opc, string Suffix, PatFrag Frag,
   2433                              PatFrag CommFrag, X86SchedWriteWidths sched,
   2434                              AVX512VLVectorVTInfo VTInfo, Predicate prd> {
   2435   let Predicates = [prd] in
   2436   defm Z : avx512_icmp_cc<opc, Suffix, Frag, CommFrag, sched.ZMM,
   2437                           VTInfo.info512, NAME>, EVEX_V512;
   2438 
   2439   let Predicates = [prd, HasVLX] in {
   2440     defm Z256 : avx512_icmp_cc<opc, Suffix, Frag, CommFrag, sched.YMM,
   2441                                VTInfo.info256, NAME>, EVEX_V256;
   2442     defm Z128 : avx512_icmp_cc<opc, Suffix, Frag, CommFrag, sched.XMM,
   2443                                VTInfo.info128, NAME>, EVEX_V128;
   2444   }
   2445 }
   2446 
   2447 multiclass avx512_icmp_cc_rmb_vl<bits<8> opc, string Suffix, PatFrag Frag,
   2448                                  PatFrag CommFrag, X86SchedWriteWidths sched,
   2449                                  AVX512VLVectorVTInfo VTInfo, Predicate prd> {
   2450   let Predicates = [prd] in
   2451   defm Z : avx512_icmp_cc_rmb<opc, Suffix, Frag, CommFrag, sched.ZMM,
   2452                               VTInfo.info512, NAME>, EVEX_V512;
   2453 
   2454   let Predicates = [prd, HasVLX] in {
   2455     defm Z256 : avx512_icmp_cc_rmb<opc, Suffix, Frag, CommFrag, sched.YMM,
   2456                                     VTInfo.info256, NAME>, EVEX_V256;
   2457     defm Z128 : avx512_icmp_cc_rmb<opc, Suffix, Frag, CommFrag, sched.XMM,
   2458                                    VTInfo.info128, NAME>, EVEX_V128;
   2459   }
   2460 }
   2461 
   2462 def X86pcmpm_imm : SDNodeXForm<setcc, [{
   2463   ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
   2464   uint8_t SSECC = X86::getVPCMPImmForCond(CC);
   2465   return getI8Imm(SSECC, SDLoc(N));
   2466 }]>;
   2467 
   2468 // Swapped operand version of the above.
   2469 def X86pcmpm_imm_commute : SDNodeXForm<setcc, [{
   2470   ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
   2471   uint8_t SSECC = X86::getVPCMPImmForCond(CC);
   2472   SSECC = X86::getSwappedVPCMPImm(SSECC);
   2473   return getI8Imm(SSECC, SDLoc(N));
   2474 }]>;
   2475 
   2476 def X86pcmpm : PatFrag<(ops node:$src1, node:$src2, node:$cc),
   2477                        (setcc node:$src1, node:$src2, node:$cc), [{
   2478   ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
   2479   return !ISD::isUnsignedIntSetCC(CC);
   2480 }], X86pcmpm_imm>;
   2481 
   2482 // Same as above, but commutes immediate. Use for load folding.
   2483 def X86pcmpm_commute : PatFrag<(ops node:$src1, node:$src2, node:$cc),
   2484                                (setcc node:$src1, node:$src2, node:$cc), [{
   2485   ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
   2486   return !ISD::isUnsignedIntSetCC(CC);
   2487 }], X86pcmpm_imm_commute>;
   2488 
   2489 def X86pcmpum : PatFrag<(ops node:$src1, node:$src2, node:$cc),
   2490                         (setcc node:$src1, node:$src2, node:$cc), [{
   2491   ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
   2492   return ISD::isUnsignedIntSetCC(CC);
   2493 }], X86pcmpm_imm>;
   2494 
   2495 // Same as above, but commutes immediate. Use for load folding.
   2496 def X86pcmpum_commute : PatFrag<(ops node:$src1, node:$src2, node:$cc),
   2497                                 (setcc node:$src1, node:$src2, node:$cc), [{
   2498   ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
   2499   return ISD::isUnsignedIntSetCC(CC);
   2500 }], X86pcmpm_imm_commute>;
   2501 
   2502 // FIXME: Is there a better scheduler class for VPCMP/VPCMPU?
   2503 defm VPCMPB : avx512_icmp_cc_vl<0x3F, "b", X86pcmpm, X86pcmpm_commute,
   2504                                 SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
   2505                                 EVEX_CD8<8, CD8VF>;
   2506 defm VPCMPUB : avx512_icmp_cc_vl<0x3E, "ub", X86pcmpum, X86pcmpum_commute,
   2507                                  SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
   2508                                  EVEX_CD8<8, CD8VF>;
   2509 
   2510 defm VPCMPW : avx512_icmp_cc_vl<0x3F, "w", X86pcmpm, X86pcmpm_commute,
   2511                                 SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
   2512                                 VEX_W, EVEX_CD8<16, CD8VF>;
   2513 defm VPCMPUW : avx512_icmp_cc_vl<0x3E, "uw", X86pcmpum, X86pcmpum_commute,
   2514                                  SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
   2515                                  VEX_W, EVEX_CD8<16, CD8VF>;
   2516 
   2517 defm VPCMPD : avx512_icmp_cc_rmb_vl<0x1F, "d", X86pcmpm, X86pcmpm_commute,
   2518                                     SchedWriteVecALU, avx512vl_i32_info,
   2519                                     HasAVX512>, EVEX_CD8<32, CD8VF>;
   2520 defm VPCMPUD : avx512_icmp_cc_rmb_vl<0x1E, "ud", X86pcmpum, X86pcmpum_commute,
   2521                                      SchedWriteVecALU, avx512vl_i32_info,
   2522                                      HasAVX512>, EVEX_CD8<32, CD8VF>;
   2523 
   2524 defm VPCMPQ : avx512_icmp_cc_rmb_vl<0x1F, "q", X86pcmpm, X86pcmpm_commute,
   2525                                     SchedWriteVecALU, avx512vl_i64_info,
   2526                                     HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>;
   2527 defm VPCMPUQ : avx512_icmp_cc_rmb_vl<0x1E, "uq", X86pcmpum, X86pcmpum_commute,
   2528                                      SchedWriteVecALU, avx512vl_i64_info,
   2529                                      HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>;
   2530 
   2531 multiclass avx512_vcmp_common<X86FoldableSchedWrite sched, X86VectorVTInfo _,
   2532                               string Name> {
   2533   defm  rri  : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
   2534                    (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2,AVXCC:$cc),
   2535                    "vcmp${cc}"#_.Suffix,
   2536                    "$src2, $src1", "$src1, $src2",
   2537                    (X86cmpm (_.VT _.RC:$src1),
   2538                          (_.VT _.RC:$src2),
   2539                            imm:$cc), 1>,
   2540                    Sched<[sched]>;
   2541 
   2542   defm  rmi  : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
   2543                 (outs _.KRC:$dst),(ins _.RC:$src1, _.MemOp:$src2, AVXCC:$cc),
   2544                 "vcmp${cc}"#_.Suffix,
   2545                 "$src2, $src1", "$src1, $src2",
   2546                 (X86cmpm (_.VT _.RC:$src1),
   2547                         (_.VT (bitconvert (_.LdFrag addr:$src2))),
   2548                         imm:$cc)>,
   2549                 Sched<[sched.Folded, ReadAfterLd]>;
   2550 
   2551   defm  rmbi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
   2552                 (outs _.KRC:$dst),
   2553                 (ins _.RC:$src1, _.ScalarMemOp:$src2, AVXCC:$cc),
   2554                 "vcmp${cc}"#_.Suffix,
   2555                 "${src2}"##_.BroadcastStr##", $src1",
   2556                 "$src1, ${src2}"##_.BroadcastStr,
   2557                 (X86cmpm (_.VT _.RC:$src1),
   2558                         (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
   2559                         imm:$cc)>,
   2560                 EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
   2561   // Accept explicit immediate argument form instead of comparison code.
   2562   let isAsmParserOnly = 1, hasSideEffects = 0 in {
   2563     defm  rri_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _,
   2564                          (outs _.KRC:$dst),
   2565                          (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
   2566                          "vcmp"#_.Suffix,
   2567                          "$cc, $src2, $src1", "$src1, $src2, $cc">,
   2568                          Sched<[sched]>, NotMemoryFoldable;
   2569 
   2570     let mayLoad = 1 in {
   2571       defm rmi_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcMem, _,
   2572                              (outs _.KRC:$dst),
   2573                              (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
   2574                              "vcmp"#_.Suffix,
   2575                              "$cc, $src2, $src1", "$src1, $src2, $cc">,
   2576                              Sched<[sched.Folded, ReadAfterLd]>,
   2577                              NotMemoryFoldable;
   2578 
   2579       defm  rmbi_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcMem, _,
   2580                          (outs _.KRC:$dst),
   2581                          (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
   2582                          "vcmp"#_.Suffix,
   2583                          "$cc, ${src2}"##_.BroadcastStr##", $src1",
   2584                          "$src1, ${src2}"##_.BroadcastStr##", $cc">,
   2585                          EVEX_B, Sched<[sched.Folded, ReadAfterLd]>,
   2586                          NotMemoryFoldable;
   2587     }
   2588   }
   2589 
   2590   // Patterns for selecting with loads in other operand.
   2591   def : Pat<(X86cmpm (_.LdFrag addr:$src2), (_.VT _.RC:$src1),
   2592                      CommutableCMPCC:$cc),
   2593             (!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2,
   2594                                                       imm:$cc)>;
   2595 
   2596   def : Pat<(and _.KRCWM:$mask, (X86cmpm (_.LdFrag addr:$src2),
   2597                                          (_.VT _.RC:$src1),
   2598                                          CommutableCMPCC:$cc)),
   2599             (!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask,
   2600                                                        _.RC:$src1, addr:$src2,
   2601                                                        imm:$cc)>;
   2602 
   2603   def : Pat<(X86cmpm (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
   2604                      (_.VT _.RC:$src1), CommutableCMPCC:$cc),
   2605             (!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2,
   2606                                                        imm:$cc)>;
   2607 
   2608   def : Pat<(and _.KRCWM:$mask, (X86cmpm (X86VBroadcast
   2609                                           (_.ScalarLdFrag addr:$src2)),
   2610                                          (_.VT _.RC:$src1),
   2611                                          CommutableCMPCC:$cc)),
   2612             (!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask,
   2613                                                         _.RC:$src1, addr:$src2,
   2614                                                         imm:$cc)>;
   2615 }
   2616 
   2617 multiclass avx512_vcmp_sae<X86FoldableSchedWrite sched, X86VectorVTInfo _> {
   2618   // comparison code form (VCMP[EQ/LT/LE/...]
   2619   defm  rrib  : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
   2620                      (outs _.KRC:$dst),(ins _.RC:$src1, _.RC:$src2, AVXCC:$cc),
   2621                      "vcmp${cc}"#_.Suffix,
   2622                      "{sae}, $src2, $src1", "$src1, $src2, {sae}",
   2623                      (X86cmpmRnd (_.VT _.RC:$src1),
   2624                                     (_.VT _.RC:$src2),
   2625                                     imm:$cc,
   2626                                 (i32 FROUND_NO_EXC))>,
   2627                      EVEX_B, Sched<[sched]>;
   2628 
   2629   let isAsmParserOnly = 1, hasSideEffects = 0 in {
   2630     defm  rrib_alt  : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _,
   2631                          (outs _.KRC:$dst),
   2632                          (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
   2633                          "vcmp"#_.Suffix,
   2634                          "$cc, {sae}, $src2, $src1",
   2635                          "$src1, $src2, {sae}, $cc">,
   2636                          EVEX_B, Sched<[sched]>, NotMemoryFoldable;
   2637    }
   2638 }
   2639 
   2640 multiclass avx512_vcmp<X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
   2641   let Predicates = [HasAVX512] in {
   2642     defm Z    : avx512_vcmp_common<sched.ZMM, _.info512, NAME>,
   2643                 avx512_vcmp_sae<sched.ZMM, _.info512>, EVEX_V512;
   2644 
   2645   }
   2646   let Predicates = [HasAVX512,HasVLX] in {
   2647    defm Z128 : avx512_vcmp_common<sched.XMM, _.info128, NAME>, EVEX_V128;
   2648    defm Z256 : avx512_vcmp_common<sched.YMM, _.info256, NAME>, EVEX_V256;
   2649   }
   2650 }
   2651 
   2652 defm VCMPPD : avx512_vcmp<SchedWriteFCmp, avx512vl_f64_info>,
   2653                           AVX512PDIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
   2654 defm VCMPPS : avx512_vcmp<SchedWriteFCmp, avx512vl_f32_info>,
   2655                           AVX512PSIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
   2656 
   2657 // Patterns to select fp compares with load as first operand.
   2658 let Predicates = [HasAVX512] in {
   2659   def : Pat<(v1i1 (X86cmpms (loadf64 addr:$src2), FR64X:$src1,
   2660                             CommutableCMPCC:$cc)),
   2661             (VCMPSDZrm FR64X:$src1, addr:$src2, imm:$cc)>;
   2662 
   2663   def : Pat<(v1i1 (X86cmpms (loadf32 addr:$src2), FR32X:$src1,
   2664                             CommutableCMPCC:$cc)),
   2665             (VCMPSSZrm FR32X:$src1, addr:$src2, imm:$cc)>;
   2666 }
   2667 
   2668 // ----------------------------------------------------------------
   2669 // FPClass
   2670 //handle fpclass instruction  mask =  op(reg_scalar,imm)
   2671 //                                    op(mem_scalar,imm)
   2672 multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr, SDNode OpNode,
   2673                                  X86FoldableSchedWrite sched, X86VectorVTInfo _,
   2674                                  Predicate prd> {
   2675   let Predicates = [prd], ExeDomain = _.ExeDomain in {
   2676       def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
   2677                       (ins _.RC:$src1, i32u8imm:$src2),
   2678                       OpcodeStr##_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
   2679                       [(set _.KRC:$dst,(OpNode (_.VT _.RC:$src1),
   2680                               (i32 imm:$src2)))]>,
   2681                       Sched<[sched]>;
   2682       def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
   2683                       (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
   2684                       OpcodeStr##_.Suffix#
   2685                       "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
   2686                       [(set _.KRC:$dst,(and _.KRCWM:$mask,
   2687                                       (OpNode (_.VT _.RC:$src1),
   2688                                       (i32 imm:$src2))))]>,
   2689                       EVEX_K, Sched<[sched]>;
   2690     def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
   2691                     (ins _.IntScalarMemOp:$src1, i32u8imm:$src2),
   2692                     OpcodeStr##_.Suffix##
   2693                               "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
   2694                     [(set _.KRC:$dst,
   2695                           (OpNode _.ScalarIntMemCPat:$src1,
   2696                                   (i32 imm:$src2)))]>,
   2697                     Sched<[sched.Folded, ReadAfterLd]>;
   2698     def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
   2699                     (ins _.KRCWM:$mask, _.IntScalarMemOp:$src1, i32u8imm:$src2),
   2700                     OpcodeStr##_.Suffix##
   2701                     "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
   2702                     [(set _.KRC:$dst,(and _.KRCWM:$mask,
   2703                         (OpNode _.ScalarIntMemCPat:$src1,
   2704                             (i32 imm:$src2))))]>,
   2705                     EVEX_K, Sched<[sched.Folded, ReadAfterLd]>;
   2706   }
   2707 }
   2708 
   2709 //handle fpclass instruction mask = fpclass(reg_vec, reg_vec, imm)
   2710 //                                  fpclass(reg_vec, mem_vec, imm)
   2711 //                                  fpclass(reg_vec, broadcast(eltVt), imm)
   2712 multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr, SDNode OpNode,
   2713                                  X86FoldableSchedWrite sched, X86VectorVTInfo _,
   2714                                  string mem, string broadcast>{
   2715   let ExeDomain = _.ExeDomain in {
   2716   def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
   2717                       (ins _.RC:$src1, i32u8imm:$src2),
   2718                       OpcodeStr##_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
   2719                       [(set _.KRC:$dst,(OpNode (_.VT _.RC:$src1),
   2720                                        (i32 imm:$src2)))]>,
   2721                       Sched<[sched]>;
   2722   def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
   2723                       (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
   2724                       OpcodeStr##_.Suffix#
   2725                       "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
   2726                       [(set _.KRC:$dst,(and _.KRCWM:$mask,
   2727                                        (OpNode (_.VT _.RC:$src1),
   2728                                        (i32 imm:$src2))))]>,
   2729                       EVEX_K, Sched<[sched]>;
   2730   def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
   2731                     (ins _.MemOp:$src1, i32u8imm:$src2),
   2732                     OpcodeStr##_.Suffix##mem#
   2733                     "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
   2734                     [(set _.KRC:$dst,(OpNode
   2735                                      (_.VT (bitconvert (_.LdFrag addr:$src1))),
   2736                                      (i32 imm:$src2)))]>,
   2737                     Sched<[sched.Folded, ReadAfterLd]>;
   2738   def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
   2739                     (ins _.KRCWM:$mask, _.MemOp:$src1, i32u8imm:$src2),
   2740                     OpcodeStr##_.Suffix##mem#
   2741                     "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
   2742                     [(set _.KRC:$dst, (and _.KRCWM:$mask, (OpNode
   2743                                   (_.VT (bitconvert (_.LdFrag addr:$src1))),
   2744                                   (i32 imm:$src2))))]>,
   2745                     EVEX_K, Sched<[sched.Folded, ReadAfterLd]>;
   2746   def rmb : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
   2747                     (ins _.ScalarMemOp:$src1, i32u8imm:$src2),
   2748                     OpcodeStr##_.Suffix##broadcast##"\t{$src2, ${src1}"##
   2749                                       _.BroadcastStr##", $dst|$dst, ${src1}"
   2750                                                   ##_.BroadcastStr##", $src2}",
   2751                     [(set _.KRC:$dst,(OpNode
   2752                                      (_.VT (X86VBroadcast
   2753                                            (_.ScalarLdFrag addr:$src1))),
   2754                                      (i32 imm:$src2)))]>,
   2755                     EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
   2756   def rmbk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
   2757                     (ins _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2),
   2758                     OpcodeStr##_.Suffix##broadcast##"\t{$src2, ${src1}"##
   2759                           _.BroadcastStr##", $dst {${mask}}|$dst {${mask}}, ${src1}"##
   2760                                                    _.BroadcastStr##", $src2}",
   2761                     [(set _.KRC:$dst,(and _.KRCWM:$mask, (OpNode
   2762                                      (_.VT (X86VBroadcast
   2763                                            (_.ScalarLdFrag addr:$src1))),
   2764                                      (i32 imm:$src2))))]>,
   2765                     EVEX_B, EVEX_K,  Sched<[sched.Folded, ReadAfterLd]>;
   2766   }
   2767 }
   2768 
   2769 multiclass avx512_vector_fpclass_all<string OpcodeStr, AVX512VLVectorVTInfo _,
   2770                                      bits<8> opc, SDNode OpNode,
   2771                                      X86SchedWriteWidths sched, Predicate prd,
   2772                                      string broadcast>{
   2773   let Predicates = [prd] in {
   2774     defm Z    : avx512_vector_fpclass<opc, OpcodeStr, OpNode, sched.ZMM,
   2775                                       _.info512, "{z}", broadcast>, EVEX_V512;
   2776   }
   2777   let Predicates = [prd, HasVLX] in {
   2778     defm Z128 : avx512_vector_fpclass<opc, OpcodeStr, OpNode, sched.XMM,
   2779                                       _.info128, "{x}", broadcast>, EVEX_V128;
   2780     defm Z256 : avx512_vector_fpclass<opc, OpcodeStr, OpNode, sched.YMM,
   2781                                       _.info256, "{y}", broadcast>, EVEX_V256;
   2782   }
   2783 }
   2784 
   2785 multiclass avx512_fp_fpclass_all<string OpcodeStr, bits<8> opcVec,
   2786                                  bits<8> opcScalar, SDNode VecOpNode,
   2787                                  SDNode ScalarOpNode, X86SchedWriteWidths sched,
   2788                                  Predicate prd> {
   2789   defm PS : avx512_vector_fpclass_all<OpcodeStr,  avx512vl_f32_info, opcVec,
   2790                                       VecOpNode, sched, prd, "{l}">,
   2791                                       EVEX_CD8<32, CD8VF>;
   2792   defm PD : avx512_vector_fpclass_all<OpcodeStr,  avx512vl_f64_info, opcVec,
   2793                                       VecOpNode, sched, prd, "{q}">,
   2794                                       EVEX_CD8<64, CD8VF> , VEX_W;
   2795   defm SSZ : avx512_scalar_fpclass<opcScalar, OpcodeStr, ScalarOpNode,
   2796                                    sched.Scl, f32x_info, prd>,
   2797                                    EVEX_CD8<32, CD8VT1>;
   2798   defm SDZ : avx512_scalar_fpclass<opcScalar, OpcodeStr, ScalarOpNode,
   2799                                    sched.Scl, f64x_info, prd>,
   2800                                    EVEX_CD8<64, CD8VT1>, VEX_W;
   2801 }
   2802 
   2803 defm VFPCLASS : avx512_fp_fpclass_all<"vfpclass", 0x66, 0x67, X86Vfpclass,
   2804                                       X86Vfpclasss, SchedWriteFCmp, HasDQI>,
   2805                                       AVX512AIi8Base, EVEX;
   2806 
   2807 //-----------------------------------------------------------------
   2808 // Mask register copy, including
   2809 // - copy between mask registers
   2810 // - load/store mask registers
   2811 // - copy from GPR to mask register and vice versa
   2812 //
   2813 multiclass avx512_mask_mov<bits<8> opc_kk, bits<8> opc_km, bits<8> opc_mk,
   2814                          string OpcodeStr, RegisterClass KRC,
   2815                          ValueType vvt, X86MemOperand x86memop> {
   2816   let isMoveReg = 1, hasSideEffects = 0, SchedRW = [WriteMove] in
   2817   def kk : I<opc_kk, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
   2818              !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
   2819              Sched<[WriteMove]>;
   2820   def km : I<opc_km, MRMSrcMem, (outs KRC:$dst), (ins x86memop:$src),
   2821              !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
   2822              [(set KRC:$dst, (vvt (load addr:$src)))]>,
   2823              Sched<[WriteLoad]>;
   2824   def mk : I<opc_mk, MRMDestMem, (outs), (ins x86memop:$dst, KRC:$src),
   2825              !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
   2826              [(store KRC:$src, addr:$dst)]>,
   2827              Sched<[WriteStore]>;
   2828 }
   2829 
   2830 multiclass avx512_mask_mov_gpr<bits<8> opc_kr, bits<8> opc_rk,
   2831                              string OpcodeStr,
   2832                              RegisterClass KRC, RegisterClass GRC> {
   2833   let hasSideEffects = 0 in {
   2834     def kr : I<opc_kr, MRMSrcReg, (outs KRC:$dst), (ins GRC:$src),
   2835                !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
   2836                Sched<[WriteMove]>;
   2837     def rk : I<opc_rk, MRMSrcReg, (outs GRC:$dst), (ins KRC:$src),
   2838                !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
   2839                Sched<[WriteMove]>;
   2840   }
   2841 }
   2842 
   2843 let Predicates = [HasDQI] in
   2844   defm KMOVB : avx512_mask_mov<0x90, 0x90, 0x91, "kmovb", VK8, v8i1, i8mem>,
   2845                avx512_mask_mov_gpr<0x92, 0x93, "kmovb", VK8, GR32>,
   2846                VEX, PD;
   2847 
   2848 let Predicates = [HasAVX512] in
   2849   defm KMOVW : avx512_mask_mov<0x90, 0x90, 0x91, "kmovw", VK16, v16i1, i16mem>,
   2850                avx512_mask_mov_gpr<0x92, 0x93, "kmovw", VK16, GR32>,
   2851                VEX, PS;
   2852 
   2853 let Predicates = [HasBWI] in {
   2854   defm KMOVD : avx512_mask_mov<0x90, 0x90, 0x91, "kmovd", VK32, v32i1,i32mem>,
   2855                VEX, PD, VEX_W;
   2856   defm KMOVD : avx512_mask_mov_gpr<0x92, 0x93, "kmovd", VK32, GR32>,
   2857                VEX, XD;
   2858   defm KMOVQ : avx512_mask_mov<0x90, 0x90, 0x91, "kmovq", VK64, v64i1, i64mem>,
   2859                VEX, PS, VEX_W;
   2860   defm KMOVQ : avx512_mask_mov_gpr<0x92, 0x93, "kmovq", VK64, GR64>,
   2861                VEX, XD, VEX_W;
   2862 }
   2863 
   2864 // GR from/to mask register
   2865 def : Pat<(v16i1 (bitconvert (i16 GR16:$src))),
   2866           (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit)), VK16)>;
   2867 def : Pat<(i16 (bitconvert (v16i1 VK16:$src))),
   2868           (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK16:$src, GR32)), sub_16bit)>;
   2869 
   2870 def : Pat<(v8i1 (bitconvert (i8 GR8:$src))),
   2871           (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR8:$src, sub_8bit)), VK8)>;
   2872 def : Pat<(i8 (bitconvert (v8i1 VK8:$src))),
   2873           (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK8:$src, GR32)), sub_8bit)>;
   2874 
   2875 def : Pat<(i32 (zext (i16 (bitconvert (v16i1 VK16:$src))))),
   2876           (KMOVWrk VK16:$src)>;
   2877 def : Pat<(i32 (anyext (i16 (bitconvert (v16i1 VK16:$src))))),
   2878           (COPY_TO_REGCLASS VK16:$src, GR32)>;
   2879 
   2880 def : Pat<(i32 (zext (i8 (bitconvert (v8i1 VK8:$src))))),
   2881           (KMOVBrk VK8:$src)>, Requires<[HasDQI]>;
   2882 def : Pat<(i32 (anyext (i8 (bitconvert (v8i1 VK8:$src))))),
   2883           (COPY_TO_REGCLASS VK8:$src, GR32)>;
   2884 
   2885 def : Pat<(v32i1 (bitconvert (i32 GR32:$src))),
   2886           (COPY_TO_REGCLASS GR32:$src, VK32)>;
   2887 def : Pat<(i32 (bitconvert (v32i1 VK32:$src))),
   2888           (COPY_TO_REGCLASS VK32:$src, GR32)>;
   2889 def : Pat<(v64i1 (bitconvert (i64 GR64:$src))),
   2890           (COPY_TO_REGCLASS GR64:$src, VK64)>;
   2891 def : Pat<(i64 (bitconvert (v64i1 VK64:$src))),
   2892           (COPY_TO_REGCLASS VK64:$src, GR64)>;
   2893 
   2894 // Load/store kreg
   2895 let Predicates = [HasDQI] in {
   2896   def : Pat<(store VK1:$src, addr:$dst),
   2897             (KMOVBmk addr:$dst, (COPY_TO_REGCLASS VK1:$src, VK8))>;
   2898 
   2899   def : Pat<(v1i1 (load addr:$src)),
   2900             (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK1)>;
   2901   def : Pat<(v2i1 (load addr:$src)),
   2902             (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK2)>;
   2903   def : Pat<(v4i1 (load addr:$src)),
   2904             (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK4)>;
   2905 }
   2906 
   2907 let Predicates = [HasAVX512] in {
   2908   def : Pat<(v8i1 (bitconvert (i8 (load addr:$src)))),
   2909             (COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK8)>;
   2910 }
   2911 
   2912 let Predicates = [HasAVX512] in {
   2913   multiclass operation_gpr_mask_copy_lowering<RegisterClass maskRC, ValueType maskVT> {
   2914     def : Pat<(maskVT (scalar_to_vector GR32:$src)),
   2915               (COPY_TO_REGCLASS GR32:$src, maskRC)>;
   2916 
   2917     def : Pat<(maskVT (scalar_to_vector GR8:$src)),
   2918               (COPY_TO_REGCLASS (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit), maskRC)>;
   2919   }
   2920 
   2921   defm : operation_gpr_mask_copy_lowering<VK1,  v1i1>;
   2922   defm : operation_gpr_mask_copy_lowering<VK2,  v2i1>;
   2923   defm : operation_gpr_mask_copy_lowering<VK4,  v4i1>;
   2924   defm : operation_gpr_mask_copy_lowering<VK8,  v8i1>;
   2925   defm : operation_gpr_mask_copy_lowering<VK16,  v16i1>;
   2926   defm : operation_gpr_mask_copy_lowering<VK32,  v32i1>;
   2927   defm : operation_gpr_mask_copy_lowering<VK64,  v64i1>;
   2928 
   2929   def : Pat<(insert_subvector (v16i1 immAllZerosV),
   2930                               (v1i1 (scalar_to_vector GR8:$src)), (iPTR 0)),
   2931             (COPY_TO_REGCLASS
   2932              (KMOVWkr (AND32ri8
   2933                        (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit),
   2934                        (i32 1))), VK16)>;
   2935 }
   2936 
   2937 // Mask unary operation
   2938 // - KNOT
   2939 multiclass avx512_mask_unop<bits<8> opc, string OpcodeStr,
   2940                             RegisterClass KRC, SDPatternOperator OpNode,
   2941                             X86FoldableSchedWrite sched, Predicate prd> {
   2942   let Predicates = [prd] in
   2943     def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
   2944                !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
   2945                [(set KRC:$dst, (OpNode KRC:$src))]>,
   2946                Sched<[sched]>;
   2947 }
   2948 
   2949 multiclass avx512_mask_unop_all<bits<8> opc, string OpcodeStr,
   2950                                 SDPatternOperator OpNode,
   2951                                 X86FoldableSchedWrite sched> {
   2952   defm B : avx512_mask_unop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
   2953                             sched, HasDQI>, VEX, PD;
   2954   defm W : avx512_mask_unop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
   2955                             sched, HasAVX512>, VEX, PS;
   2956   defm D : avx512_mask_unop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
   2957                             sched, HasBWI>, VEX, PD, VEX_W;
   2958   defm Q : avx512_mask_unop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
   2959                             sched, HasBWI>, VEX, PS, VEX_W;
   2960 }
   2961 
   2962 // TODO - do we need a X86SchedWriteWidths::KMASK type?
   2963 defm KNOT : avx512_mask_unop_all<0x44, "knot", vnot, SchedWriteVecLogic.XMM>;
   2964 
   2965 // KNL does not support KMOVB, 8-bit mask is promoted to 16-bit
   2966 let Predicates = [HasAVX512, NoDQI] in
   2967 def : Pat<(vnot VK8:$src),
   2968           (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$src, VK16)), VK8)>;
   2969 
   2970 def : Pat<(vnot VK4:$src),
   2971           (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK4:$src, VK16)), VK4)>;
   2972 def : Pat<(vnot VK2:$src),
   2973           (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK2:$src, VK16)), VK2)>;
   2974 
   2975 // Mask binary operation
   2976 // - KAND, KANDN, KOR, KXNOR, KXOR
   2977 multiclass avx512_mask_binop<bits<8> opc, string OpcodeStr,
   2978                            RegisterClass KRC, SDPatternOperator OpNode,
   2979                            X86FoldableSchedWrite sched, Predicate prd,
   2980                            bit IsCommutable> {
   2981   let Predicates = [prd], isCommutable = IsCommutable in
   2982     def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src1, KRC:$src2),
   2983                !strconcat(OpcodeStr,
   2984                           "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
   2985                [(set KRC:$dst, (OpNode KRC:$src1, KRC:$src2))]>,
   2986                Sched<[sched]>;
   2987 }
   2988 
   2989 multiclass avx512_mask_binop_all<bits<8> opc, string OpcodeStr,
   2990                                  SDPatternOperator OpNode,
   2991                                  X86FoldableSchedWrite sched, bit IsCommutable,
   2992                                  Predicate prdW = HasAVX512> {
   2993   defm B : avx512_mask_binop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
   2994                              sched, HasDQI, IsCommutable>, VEX_4V, VEX_L, PD;
   2995   defm W : avx512_mask_binop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
   2996                              sched, prdW, IsCommutable>, VEX_4V, VEX_L, PS;
   2997   defm D : avx512_mask_binop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
   2998                              sched, HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PD;
   2999   defm Q : avx512_mask_binop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
   3000                              sched, HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PS;
   3001 }
   3002 
   3003 def andn : PatFrag<(ops node:$i0, node:$i1), (and (not node:$i0), node:$i1)>;
   3004 def xnor : PatFrag<(ops node:$i0, node:$i1), (not (xor node:$i0, node:$i1))>;
   3005 // These nodes use 'vnot' instead of 'not' to support vectors.
   3006 def vandn : PatFrag<(ops node:$i0, node:$i1), (and (vnot node:$i0), node:$i1)>;
   3007 def vxnor : PatFrag<(ops node:$i0, node:$i1), (vnot (xor node:$i0, node:$i1))>;
   3008 
   3009 // TODO - do we need a X86SchedWriteWidths::KMASK type?
   3010 defm KAND  : avx512_mask_binop_all<0x41, "kand",  and,     SchedWriteVecLogic.XMM, 1>;
   3011 defm KOR   : avx512_mask_binop_all<0x45, "kor",   or,      SchedWriteVecLogic.XMM, 1>;
   3012 defm KXNOR : avx512_mask_binop_all<0x46, "kxnor", vxnor,   SchedWriteVecLogic.XMM, 1>;
   3013 defm KXOR  : avx512_mask_binop_all<0x47, "kxor",  xor,     SchedWriteVecLogic.XMM, 1>;
   3014 defm KANDN : avx512_mask_binop_all<0x42, "kandn", vandn,   SchedWriteVecLogic.XMM, 0>;
   3015 defm KADD  : avx512_mask_binop_all<0x4A, "kadd",  X86kadd, SchedWriteVecLogic.XMM, 1, HasDQI>;
   3016 
   3017 multiclass avx512_binop_pat<SDPatternOperator VOpNode, SDPatternOperator OpNode,
   3018                             Instruction Inst> {
   3019   // With AVX512F, 8-bit mask is promoted to 16-bit mask,
   3020   // for the DQI set, this type is legal and KxxxB instruction is used
   3021   let Predicates = [NoDQI] in
   3022   def : Pat<(VOpNode VK8:$src1, VK8:$src2),
   3023             (COPY_TO_REGCLASS
   3024               (Inst (COPY_TO_REGCLASS VK8:$src1, VK16),
   3025                     (COPY_TO_REGCLASS VK8:$src2, VK16)), VK8)>;
   3026 
   3027   // All types smaller than 8 bits require conversion anyway
   3028   def : Pat<(OpNode VK1:$src1, VK1:$src2),
   3029         (COPY_TO_REGCLASS (Inst
   3030                            (COPY_TO_REGCLASS VK1:$src1, VK16),
   3031                            (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>;
   3032   def : Pat<(VOpNode VK2:$src1, VK2:$src2),
   3033         (COPY_TO_REGCLASS (Inst
   3034                            (COPY_TO_REGCLASS VK2:$src1, VK16),
   3035                            (COPY_TO_REGCLASS VK2:$src2, VK16)), VK1)>;
   3036   def : Pat<(VOpNode VK4:$src1, VK4:$src2),
   3037         (COPY_TO_REGCLASS (Inst
   3038                            (COPY_TO_REGCLASS VK4:$src1, VK16),
   3039                            (COPY_TO_REGCLASS VK4:$src2, VK16)), VK1)>;
   3040 }
   3041 
   3042 defm : avx512_binop_pat<and,   and,  KANDWrr>;
   3043 defm : avx512_binop_pat<vandn, andn, KANDNWrr>;
   3044 defm : avx512_binop_pat<or,    or,   KORWrr>;
   3045 defm : avx512_binop_pat<vxnor, xnor, KXNORWrr>;
   3046 defm : avx512_binop_pat<xor,   xor,  KXORWrr>;
   3047 
   3048 // Mask unpacking
   3049 multiclass avx512_mask_unpck<string Suffix,RegisterClass KRC, ValueType VT,
   3050                              RegisterClass KRCSrc, X86FoldableSchedWrite sched,
   3051                              Predicate prd> {
   3052   let Predicates = [prd] in {
   3053     let hasSideEffects = 0 in
   3054     def rr : I<0x4b, MRMSrcReg, (outs KRC:$dst),
   3055                (ins KRC:$src1, KRC:$src2),
   3056                "kunpck"#Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
   3057                VEX_4V, VEX_L, Sched<[sched]>;
   3058 
   3059     def : Pat<(VT (concat_vectors KRCSrc:$src1, KRCSrc:$src2)),
   3060               (!cast<Instruction>(NAME##rr)
   3061                         (COPY_TO_REGCLASS KRCSrc:$src2, KRC),
   3062                         (COPY_TO_REGCLASS KRCSrc:$src1, KRC))>;
   3063   }
   3064 }
   3065 
   3066 defm KUNPCKBW : avx512_mask_unpck<"bw", VK16, v16i1, VK8, WriteShuffle, HasAVX512>, PD;
   3067 defm KUNPCKWD : avx512_mask_unpck<"wd", VK32, v32i1, VK16, WriteShuffle, HasBWI>, PS;
   3068 defm KUNPCKDQ : avx512_mask_unpck<"dq", VK64, v64i1, VK32, WriteShuffle, HasBWI>, PS, VEX_W;
   3069 
   3070 // Mask bit testing
   3071 multiclass avx512_mask_testop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
   3072                               SDNode OpNode, X86FoldableSchedWrite sched,
   3073                               Predicate prd> {
   3074   let Predicates = [prd], Defs = [EFLAGS] in
   3075     def rr : I<opc, MRMSrcReg, (outs), (ins KRC:$src1, KRC:$src2),
   3076                !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
   3077                [(set EFLAGS, (OpNode KRC:$src1, KRC:$src2))]>,
   3078                Sched<[sched]>;
   3079 }
   3080 
   3081 multiclass avx512_mask_testop_w<bits<8> opc, string OpcodeStr, SDNode OpNode,
   3082                                 X86FoldableSchedWrite sched,
   3083                                 Predicate prdW = HasAVX512> {
   3084   defm B : avx512_mask_testop<opc, OpcodeStr#"b", VK8, OpNode, sched, HasDQI>,
   3085                                                                 VEX, PD;
   3086   defm W : avx512_mask_testop<opc, OpcodeStr#"w", VK16, OpNode, sched, prdW>,
   3087                                                                 VEX, PS;
   3088   defm Q : avx512_mask_testop<opc, OpcodeStr#"q", VK64, OpNode, sched, HasBWI>,
   3089                                                                 VEX, PS, VEX_W;
   3090   defm D : avx512_mask_testop<opc, OpcodeStr#"d", VK32, OpNode, sched, HasBWI>,
   3091                                                                 VEX, PD, VEX_W;
   3092 }
   3093 
   3094 // TODO - do we need a X86SchedWriteWidths::KMASK type?
   3095 defm KORTEST : avx512_mask_testop_w<0x98, "kortest", X86kortest, SchedWriteVecLogic.XMM>;
   3096 defm KTEST   : avx512_mask_testop_w<0x99, "ktest", X86ktest, SchedWriteVecLogic.XMM, HasDQI>;
   3097 
   3098 // Mask shift
   3099 multiclass avx512_mask_shiftop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
   3100                                SDNode OpNode, X86FoldableSchedWrite sched> {
   3101   let Predicates = [HasAVX512] in
   3102     def ri : Ii8<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src, u8imm:$imm),
   3103                  !strconcat(OpcodeStr,
   3104                             "\t{$imm, $src, $dst|$dst, $src, $imm}"),
   3105                             [(set KRC:$dst, (OpNode KRC:$src, (i8 imm:$imm)))]>,
   3106                  Sched<[sched]>;
   3107 }
   3108 
   3109 multiclass avx512_mask_shiftop_w<bits<8> opc1, bits<8> opc2, string OpcodeStr,
   3110                                  SDNode OpNode, X86FoldableSchedWrite sched> {
   3111   defm W : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "w"), VK16, OpNode,
   3112                                sched>, VEX, TAPD, VEX_W;
   3113   let Predicates = [HasDQI] in
   3114   defm B : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "b"), VK8, OpNode,
   3115                                sched>, VEX, TAPD;
   3116   let Predicates = [HasBWI] in {
   3117   defm Q : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "q"), VK64, OpNode,
   3118                                sched>, VEX, TAPD, VEX_W;
   3119   defm D : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "d"), VK32, OpNode,
   3120                                sched>, VEX, TAPD;
   3121   }
   3122 }
   3123 
   3124 defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86kshiftl, WriteShuffle>;
   3125 defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", X86kshiftr, WriteShuffle>;
   3126 
   3127 // Patterns for comparing 128/256-bit integer vectors using 512-bit instruction.
   3128 multiclass axv512_icmp_packed_no_vlx_lowering<PatFrag Frag, string InstStr,
   3129                                               X86VectorVTInfo Narrow,
   3130                                               X86VectorVTInfo Wide> {
   3131   def : Pat<(Narrow.KVT (Frag (Narrow.VT Narrow.RC:$src1),
   3132                               (Narrow.VT Narrow.RC:$src2))),
   3133           (COPY_TO_REGCLASS
   3134            (!cast<Instruction>(InstStr#"Zrr")
   3135             (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
   3136             (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx))),
   3137            Narrow.KRC)>;
   3138 
   3139   def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
   3140                              (Frag (Narrow.VT Narrow.RC:$src1),
   3141                                    (Narrow.VT Narrow.RC:$src2)))),
   3142           (COPY_TO_REGCLASS
   3143            (!cast<Instruction>(InstStr#"Zrrk")
   3144             (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
   3145             (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
   3146             (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx))),
   3147            Narrow.KRC)>;
   3148 }
   3149 
   3150 // Patterns for comparing 128/256-bit integer vectors using 512-bit instruction.
   3151 multiclass axv512_icmp_packed_cc_no_vlx_lowering<PatFrag Frag,
   3152                                                  string InstStr,
   3153                                                  X86VectorVTInfo Narrow,
   3154                                                  X86VectorVTInfo Wide> {
   3155 def : Pat<(Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1),
   3156                                 (Narrow.VT Narrow.RC:$src2), cond)),
   3157           (COPY_TO_REGCLASS
   3158            (!cast<Instruction>(InstStr##Zrri)
   3159             (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
   3160             (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
   3161             (Frag.OperandTransform $cc)), Narrow.KRC)>;
   3162 
   3163 def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
   3164                            (Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1),
   3165                                                  (Narrow.VT Narrow.RC:$src2),
   3166                                                  cond)))),
   3167           (COPY_TO_REGCLASS (!cast<Instruction>(InstStr##Zrrik)
   3168            (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
   3169            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
   3170            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
   3171            (Frag.OperandTransform $cc)), Narrow.KRC)>;
   3172 }
   3173 
   3174 // Same as above, but for fp types which don't use PatFrags.
   3175 multiclass axv512_cmp_packed_cc_no_vlx_lowering<SDNode OpNode, string InstStr,
   3176                                                 X86VectorVTInfo Narrow,
   3177                                                 X86VectorVTInfo Wide> {
   3178 def : Pat<(Narrow.KVT (OpNode (Narrow.VT Narrow.RC:$src1),
   3179                               (Narrow.VT Narrow.RC:$src2), imm:$cc)),
   3180           (COPY_TO_REGCLASS
   3181            (!cast<Instruction>(InstStr##Zrri)
   3182             (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
   3183             (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
   3184             imm:$cc), Narrow.KRC)>;
   3185 
   3186 def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
   3187                            (OpNode (Narrow.VT Narrow.RC:$src1),
   3188                                    (Narrow.VT Narrow.RC:$src2), imm:$cc))),
   3189           (COPY_TO_REGCLASS (!cast<Instruction>(InstStr##Zrrik)
   3190            (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
   3191            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
   3192            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
   3193            imm:$cc), Narrow.KRC)>;
   3194 }
   3195 
   3196 let Predicates = [HasAVX512, NoVLX] in {
   3197   // AddedComplexity is needed because the explicit SETEQ/SETGT CondCode doesn't
   3198   // increase the pattern complexity the way an immediate would.
   3199   let AddedComplexity = 2 in {
   3200   defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTD", v8i32x_info, v16i32_info>;
   3201   defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQD", v8i32x_info, v16i32_info>;
   3202 
   3203   defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTD", v4i32x_info, v16i32_info>;
   3204   defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQD", v4i32x_info, v16i32_info>;
   3205 
   3206   defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTQ", v4i64x_info, v8i64_info>;
   3207   defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQQ", v4i64x_info, v8i64_info>;
   3208 
   3209   defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTQ", v2i64x_info, v8i64_info>;
   3210   defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQQ", v2i64x_info, v8i64_info>;
   3211   }
   3212 
   3213   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, "VPCMPD", v8i32x_info, v16i32_info>;
   3214   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, "VPCMPUD", v8i32x_info, v16i32_info>;
   3215 
   3216   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, "VPCMPD", v4i32x_info, v16i32_info>;
   3217   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, "VPCMPUD", v4i32x_info, v16i32_info>;
   3218 
   3219   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, "VPCMPQ", v4i64x_info, v8i64_info>;
   3220   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, "VPCMPUQ", v4i64x_info, v8i64_info>;
   3221 
   3222   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, "VPCMPQ", v2i64x_info, v8i64_info>;
   3223   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, "VPCMPUQ", v2i64x_info, v8i64_info>;
   3224 
   3225   defm : axv512_cmp_packed_cc_no_vlx_lowering<X86cmpm, "VCMPPS", v8f32x_info, v16f32_info>;
   3226   defm : axv512_cmp_packed_cc_no_vlx_lowering<X86cmpm, "VCMPPS", v4f32x_info, v16f32_info>;
   3227   defm : axv512_cmp_packed_cc_no_vlx_lowering<X86cmpm, "VCMPPD", v4f64x_info, v8f64_info>;
   3228   defm : axv512_cmp_packed_cc_no_vlx_lowering<X86cmpm, "VCMPPD", v2f64x_info, v8f64_info>;
   3229 }
   3230 
   3231 let Predicates = [HasBWI, NoVLX] in {
   3232   // AddedComplexity is needed because the explicit SETEQ/SETGT CondCode doesn't
   3233   // increase the pattern complexity the way an immediate would.
   3234   let AddedComplexity = 2 in {
   3235   defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTB", v32i8x_info, v64i8_info>;
   3236   defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQB", v32i8x_info, v64i8_info>;
   3237 
   3238   defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTB", v16i8x_info, v64i8_info>;
   3239   defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQB", v16i8x_info, v64i8_info>;
   3240 
   3241   defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTW", v16i16x_info, v32i16_info>;
   3242   defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQW", v16i16x_info, v32i16_info>;
   3243 
   3244   defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTW", v8i16x_info, v32i16_info>;
   3245   defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQW", v8i16x_info, v32i16_info>;
   3246   }
   3247 
   3248   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, "VPCMPB", v32i8x_info, v64i8_info>;
   3249   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, "VPCMPUB", v32i8x_info, v64i8_info>;
   3250 
   3251   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, "VPCMPB", v16i8x_info, v64i8_info>;
   3252   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, "VPCMPUB", v16i8x_info, v64i8_info>;
   3253 
   3254   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, "VPCMPW", v16i16x_info, v32i16_info>;
   3255   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, "VPCMPUW", v16i16x_info, v32i16_info>;
   3256 
   3257   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, "VPCMPW", v8i16x_info, v32i16_info>;
   3258   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, "VPCMPUW", v8i16x_info, v32i16_info>;
   3259 }
   3260 
   3261 // Mask setting all 0s or 1s
   3262 multiclass avx512_mask_setop<RegisterClass KRC, ValueType VT, PatFrag Val> {
   3263   let Predicates = [HasAVX512] in
   3264     let isReMaterializable = 1, isAsCheapAsAMove = 1, isPseudo = 1,
   3265         SchedRW = [WriteZero] in
   3266       def #NAME# : I<0, Pseudo, (outs KRC:$dst), (ins), "",
   3267                      [(set KRC:$dst, (VT Val))]>;
   3268 }
   3269 
   3270 multiclass avx512_mask_setop_w<PatFrag Val> {
   3271   defm W : avx512_mask_setop<VK16, v16i1, Val>;
   3272   defm D : avx512_mask_setop<VK32,  v32i1, Val>;
   3273   defm Q : avx512_mask_setop<VK64, v64i1, Val>;
   3274 }
   3275 
   3276 defm KSET0 : avx512_mask_setop_w<immAllZerosV>;
   3277 defm KSET1 : avx512_mask_setop_w<immAllOnesV>;
   3278 
   3279 // With AVX-512 only, 8-bit mask is promoted to 16-bit mask.
   3280 let Predicates = [HasAVX512] in {
   3281   def : Pat<(v8i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK8)>;
   3282   def : Pat<(v4i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK4)>;
   3283   def : Pat<(v2i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK2)>;
   3284   def : Pat<(v1i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK1)>;
   3285   def : Pat<(v8i1 immAllOnesV),  (COPY_TO_REGCLASS (KSET1W), VK8)>;
   3286   def : Pat<(v4i1 immAllOnesV),  (COPY_TO_REGCLASS (KSET1W), VK4)>;
   3287   def : Pat<(v2i1 immAllOnesV),  (COPY_TO_REGCLASS (KSET1W), VK2)>;
   3288   def : Pat<(v1i1 immAllOnesV),  (COPY_TO_REGCLASS (KSET1W), VK1)>;
   3289 }
   3290 
   3291 // Patterns for kmask insert_subvector/extract_subvector to/from index=0
   3292 multiclass operation_subvector_mask_lowering<RegisterClass subRC, ValueType subVT,
   3293                                              RegisterClass RC, ValueType VT> {
   3294   def : Pat<(subVT (extract_subvector (VT RC:$src), (iPTR 0))),
   3295             (subVT (COPY_TO_REGCLASS RC:$src, subRC))>;
   3296 
   3297   def : Pat<(VT (insert_subvector undef, subRC:$src, (iPTR 0))),
   3298             (VT (COPY_TO_REGCLASS subRC:$src, RC))>;
   3299 }
   3300 defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK2,  v2i1>;
   3301 defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK4,  v4i1>;
   3302 defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK8,  v8i1>;
   3303 defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK16, v16i1>;
   3304 defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK32, v32i1>;
   3305 defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK64, v64i1>;
   3306 
   3307 defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK4,  v4i1>;
   3308 defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK8,  v8i1>;
   3309 defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK16, v16i1>;
   3310 defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK32, v32i1>;
   3311 defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK64, v64i1>;
   3312 
   3313 defm : operation_subvector_mask_lowering<VK4,  v4i1,  VK8,  v8i1>;
   3314 defm : operation_subvector_mask_lowering<VK4,  v4i1,  VK16, v16i1>;
   3315 defm : operation_subvector_mask_lowering<VK4,  v4i1,  VK32, v32i1>;
   3316 defm : operation_subvector_mask_lowering<VK4,  v4i1,  VK64, v64i1>;
   3317 
   3318 defm : operation_subvector_mask_lowering<VK8,  v8i1,  VK16, v16i1>;
   3319 defm : operation_subvector_mask_lowering<VK8,  v8i1,  VK32, v32i1>;
   3320 defm : operation_subvector_mask_lowering<VK8,  v8i1,  VK64, v64i1>;
   3321 
   3322 defm : operation_subvector_mask_lowering<VK16, v16i1, VK32, v32i1>;
   3323 defm : operation_subvector_mask_lowering<VK16, v16i1, VK64, v64i1>;
   3324 
   3325 defm : operation_subvector_mask_lowering<VK32, v32i1, VK64, v64i1>;
   3326 
   3327 //===----------------------------------------------------------------------===//
   3328 // AVX-512 - Aligned and unaligned load and store
   3329 //
   3330 
   3331 multiclass avx512_load<bits<8> opc, string OpcodeStr, string Name,
   3332                        X86VectorVTInfo _, PatFrag ld_frag, PatFrag mload,
   3333                        X86SchedWriteMoveLS Sched, string EVEX2VEXOvrd,
   3334                        bit NoRMPattern = 0,
   3335                        SDPatternOperator SelectOprr = vselect> {
   3336   let hasSideEffects = 0 in {
   3337   let isMoveReg = 1 in
   3338   def rr : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst), (ins _.RC:$src),
   3339                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [],
   3340                     _.ExeDomain>, EVEX, Sched<[Sched.RR]>,
   3341                     EVEX2VEXOverride<EVEX2VEXOvrd#"rr">;
   3342   def rrkz : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst),
   3343                       (ins _.KRCWM:$mask,  _.RC:$src),
   3344                       !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
   3345                        "${dst} {${mask}} {z}, $src}"),
   3346                        [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask,
   3347                                            (_.VT _.RC:$src),
   3348                                            _.ImmAllZerosV)))], _.ExeDomain>,
   3349                        EVEX, EVEX_KZ, Sched<[Sched.RR]>;
   3350 
   3351   let mayLoad = 1, canFoldAsLoad = 1, isReMaterializable = 1 in
   3352   def rm : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst), (ins _.MemOp:$src),
   3353                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
   3354                     !if(NoRMPattern, [],
   3355                         [(set _.RC:$dst,
   3356                           (_.VT (bitconvert (ld_frag addr:$src))))]),
   3357                     _.ExeDomain>, EVEX, Sched<[Sched.RM]>,
   3358                     EVEX2VEXOverride<EVEX2VEXOvrd#"rm">;
   3359 
   3360   let Constraints = "$src0 = $dst", isConvertibleToThreeAddress = 1 in {
   3361     def rrk : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst),
   3362                       (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1),
   3363                       !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
   3364                       "${dst} {${mask}}, $src1}"),
   3365                       [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask,
   3366                                           (_.VT _.RC:$src1),
   3367                                           (_.VT _.RC:$src0))))], _.ExeDomain>,
   3368                        EVEX, EVEX_K, Sched<[Sched.RR]>;
   3369     def rmk : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst),
   3370                      (ins _.RC:$src0, _.KRCWM:$mask, _.MemOp:$src1),
   3371                      !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
   3372                       "${dst} {${mask}}, $src1}"),
   3373                      [(set _.RC:$dst, (_.VT
   3374                          (vselect _.KRCWM:$mask,
   3375                           (_.VT (bitconvert (ld_frag addr:$src1))),
   3376                            (_.VT _.RC:$src0))))], _.ExeDomain>,
   3377                      EVEX, EVEX_K, Sched<[Sched.RM]>;
   3378   }
   3379   def rmkz : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst),
   3380                   (ins _.KRCWM:$mask, _.MemOp:$src),
   3381                   OpcodeStr #"\t{$src, ${dst} {${mask}} {z}|"#
   3382                                 "${dst} {${mask}} {z}, $src}",
   3383                   [(set _.RC:$dst, (_.VT (vselect _.KRCWM:$mask,
   3384                     (_.VT (bitconvert (ld_frag addr:$src))), _.ImmAllZerosV)))],
   3385                   _.ExeDomain>, EVEX, EVEX_KZ, Sched<[Sched.RM]>;
   3386   }
   3387   def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, undef)),
   3388             (!cast<Instruction>(Name#_.ZSuffix##rmkz) _.KRCWM:$mask, addr:$ptr)>;
   3389 
   3390   def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, _.ImmAllZerosV)),
   3391             (!cast<Instruction>(Name#_.ZSuffix##rmkz) _.KRCWM:$mask, addr:$ptr)>;
   3392 
   3393   def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, (_.VT _.RC:$src0))),
   3394             (!cast<Instruction>(Name#_.ZSuffix##rmk) _.RC:$src0,
   3395              _.KRCWM:$mask, addr:$ptr)>;
   3396 }
   3397 
   3398 multiclass avx512_alignedload_vl<bits<8> opc, string OpcodeStr,
   3399                                  AVX512VLVectorVTInfo _, Predicate prd,
   3400                                  X86SchedWriteMoveLSWidths Sched,
   3401                                  string EVEX2VEXOvrd, bit NoRMPattern = 0> {
   3402   let Predicates = [prd] in
   3403   defm Z : avx512_load<opc, OpcodeStr, NAME, _.info512,
   3404                        _.info512.AlignedLdFrag, masked_load_aligned512,
   3405                        Sched.ZMM, "", NoRMPattern>, EVEX_V512;
   3406 
   3407   let Predicates = [prd, HasVLX] in {
   3408   defm Z256 : avx512_load<opc, OpcodeStr, NAME, _.info256,
   3409                           _.info256.AlignedLdFrag, masked_load_aligned256,
   3410                           Sched.YMM, EVEX2VEXOvrd#"Y", NoRMPattern>, EVEX_V256;
   3411   defm Z128 : avx512_load<opc, OpcodeStr, NAME, _.info128,
   3412                           _.info128.AlignedLdFrag, masked_load_aligned128,
   3413                           Sched.XMM, EVEX2VEXOvrd, NoRMPattern>, EVEX_V128;
   3414   }
   3415 }
   3416 
   3417 multiclass avx512_load_vl<bits<8> opc, string OpcodeStr,
   3418                           AVX512VLVectorVTInfo _, Predicate prd,
   3419                           X86SchedWriteMoveLSWidths Sched,
   3420                           string EVEX2VEXOvrd, bit NoRMPattern = 0,
   3421                           SDPatternOperator SelectOprr = vselect> {
   3422   let Predicates = [prd] in
   3423   defm Z : avx512_load<opc, OpcodeStr, NAME, _.info512, _.info512.LdFrag,
   3424                        masked_load_unaligned, Sched.ZMM, "",
   3425                        NoRMPattern, SelectOprr>, EVEX_V512;
   3426 
   3427   let Predicates = [prd, HasVLX] in {
   3428   defm Z256 : avx512_load<opc, OpcodeStr, NAME, _.info256, _.info256.LdFrag,
   3429                          masked_load_unaligned, Sched.YMM, EVEX2VEXOvrd#"Y",
   3430                          NoRMPattern, SelectOprr>, EVEX_V256;
   3431   defm Z128 : avx512_load<opc, OpcodeStr, NAME, _.info128, _.info128.LdFrag,
   3432                          masked_load_unaligned, Sched.XMM, EVEX2VEXOvrd,
   3433                          NoRMPattern, SelectOprr>, EVEX_V128;
   3434   }
   3435 }
   3436 
   3437 multiclass avx512_store<bits<8> opc, string OpcodeStr, string BaseName,
   3438                         X86VectorVTInfo _, PatFrag st_frag, PatFrag mstore,
   3439                         X86SchedWriteMoveLS Sched, string EVEX2VEXOvrd,
   3440                         bit NoMRPattern = 0> {
   3441   let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in {
   3442   let isMoveReg = 1 in
   3443   def rr_REV  : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst), (ins _.RC:$src),
   3444                          OpcodeStr # "\t{$src, $dst|$dst, $src}",
   3445                          [], _.ExeDomain>, EVEX,
   3446                          FoldGenData<BaseName#_.ZSuffix#rr>, Sched<[Sched.RR]>,
   3447                          EVEX2VEXOverride<EVEX2VEXOvrd#"rr_REV">;
   3448   def rrk_REV : AVX512PI<opc, MRMDestReg, (outs  _.RC:$dst),
   3449                          (ins _.KRCWM:$mask, _.RC:$src),
   3450                          OpcodeStr # "\t{$src, ${dst} {${mask}}|"#
   3451                          "${dst} {${mask}}, $src}",
   3452                          [], _.ExeDomain>,  EVEX, EVEX_K,
   3453                          FoldGenData<BaseName#_.ZSuffix#rrk>,
   3454                          Sched<[Sched.RR]>;
   3455   def rrkz_REV : AVX512PI<opc, MRMDestReg, (outs  _.RC:$dst),
   3456                           (ins _.KRCWM:$mask, _.RC:$src),
   3457                           OpcodeStr # "\t{$src, ${dst} {${mask}} {z}|" #
   3458                           "${dst} {${mask}} {z}, $src}",
   3459                           [], _.ExeDomain>, EVEX, EVEX_KZ,
   3460                           FoldGenData<BaseName#_.ZSuffix#rrkz>,
   3461                           Sched<[Sched.RR]>;
   3462   }
   3463 
   3464   let hasSideEffects = 0, mayStore = 1 in
   3465   def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src),
   3466                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
   3467                     !if(NoMRPattern, [],
   3468                         [(st_frag (_.VT _.RC:$src), addr:$dst)]),
   3469                     _.ExeDomain>, EVEX, Sched<[Sched.MR]>,
   3470                     EVEX2VEXOverride<EVEX2VEXOvrd#"mr">;
   3471   def mrk : AVX512PI<opc, MRMDestMem, (outs),
   3472                      (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
   3473               OpcodeStr # "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}",
   3474                [], _.ExeDomain>, EVEX, EVEX_K, Sched<[Sched.MR]>,
   3475                NotMemoryFoldable;
   3476 
   3477   def: Pat<(mstore addr:$ptr, _.KRCWM:$mask, (_.VT _.RC:$src)),
   3478            (!cast<Instruction>(BaseName#_.ZSuffix#mrk) addr:$ptr,
   3479                                                         _.KRCWM:$mask, _.RC:$src)>;
   3480 
   3481   def : InstAlias<OpcodeStr#".s\t{$src, $dst|$dst, $src}",
   3482                   (!cast<Instruction>(BaseName#_.ZSuffix#"rr_REV")
   3483                    _.RC:$dst, _.RC:$src), 0>;
   3484   def : InstAlias<OpcodeStr#".s\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}",
   3485                   (!cast<Instruction>(BaseName#_.ZSuffix#"rrk_REV")
   3486                    _.RC:$dst, _.KRCWM:$mask, _.RC:$src), 0>;
   3487   def : InstAlias<OpcodeStr#".s\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}",
   3488                   (!cast<Instruction>(BaseName#_.ZSuffix#"rrkz_REV")
   3489                    _.RC:$dst, _.KRCWM:$mask, _.RC:$src), 0>;
   3490 }
   3491 
   3492 multiclass avx512_store_vl< bits<8> opc, string OpcodeStr,
   3493                             AVX512VLVectorVTInfo _, Predicate prd,
   3494                             X86SchedWriteMoveLSWidths Sched,
   3495                             string EVEX2VEXOvrd, bit NoMRPattern = 0> {
   3496   let Predicates = [prd] in
   3497   defm Z : avx512_store<opc, OpcodeStr, NAME, _.info512, store,
   3498                         masked_store_unaligned, Sched.ZMM, "",
   3499                         NoMRPattern>, EVEX_V512;
   3500   let Predicates = [prd, HasVLX] in {
   3501     defm Z256 : avx512_store<opc, OpcodeStr, NAME, _.info256, store,
   3502                              masked_store_unaligned, Sched.YMM,
   3503                              EVEX2VEXOvrd#"Y", NoMRPattern>, EVEX_V256;
   3504     defm Z128 : avx512_store<opc, OpcodeStr, NAME, _.info128, store,
   3505                              masked_store_unaligned, Sched.XMM, EVEX2VEXOvrd,
   3506                              NoMRPattern>, EVEX_V128;
   3507   }
   3508 }
   3509 
   3510 multiclass avx512_alignedstore_vl<bits<8> opc, string OpcodeStr,
   3511                                   AVX512VLVectorVTInfo _, Predicate prd,
   3512                                   X86SchedWriteMoveLSWidths Sched,
   3513                                   string EVEX2VEXOvrd, bit NoMRPattern = 0> {
   3514   let Predicates = [prd] in
   3515   defm Z : avx512_store<opc, OpcodeStr, NAME, _.info512, alignedstore,
   3516                         masked_store_aligned512, Sched.ZMM, "",
   3517                         NoMRPattern>, EVEX_V512;
   3518 
   3519   let Predicates = [prd, HasVLX] in {
   3520     defm Z256 : avx512_store<opc, OpcodeStr, NAME, _.info256, alignedstore,
   3521                              masked_store_aligned256, Sched.YMM,
   3522                              EVEX2VEXOvrd#"Y", NoMRPattern>, EVEX_V256;
   3523     defm Z128 : avx512_store<opc, OpcodeStr, NAME, _.info128, alignedstore,
   3524                              masked_store_aligned128, Sched.XMM, EVEX2VEXOvrd,
   3525                              NoMRPattern>, EVEX_V128;
   3526   }
   3527 }
   3528 
   3529 defm VMOVAPS : avx512_alignedload_vl<0x28, "vmovaps", avx512vl_f32_info,
   3530                                      HasAVX512, SchedWriteFMoveLS, "VMOVAPS">,
   3531                avx512_alignedstore_vl<0x29, "vmovaps", avx512vl_f32_info,
   3532                                       HasAVX512, SchedWriteFMoveLS, "VMOVAPS">,
   3533                PS, EVEX_CD8<32, CD8VF>;
   3534 
   3535 defm VMOVAPD : avx512_alignedload_vl<0x28, "vmovapd", avx512vl_f64_info,
   3536                                      HasAVX512, SchedWriteFMoveLS, "VMOVAPD">,
   3537                avx512_alignedstore_vl<0x29, "vmovapd", avx512vl_f64_info,
   3538                                       HasAVX512, SchedWriteFMoveLS, "VMOVAPD">,
   3539                PD, VEX_W, EVEX_CD8<64, CD8VF>;
   3540 
   3541 defm VMOVUPS : avx512_load_vl<0x10, "vmovups", avx512vl_f32_info, HasAVX512,
   3542                               SchedWriteFMoveLS, "VMOVUPS", 0, null_frag>,
   3543                avx512_store_vl<0x11, "vmovups", avx512vl_f32_info, HasAVX512,
   3544                                SchedWriteFMoveLS, "VMOVUPS">,
   3545                                PS, EVEX_CD8<32, CD8VF>;
   3546 
   3547 defm VMOVUPD : avx512_load_vl<0x10, "vmovupd", avx512vl_f64_info, HasAVX512,
   3548                               SchedWriteFMoveLS, "VMOVUPD", 0, null_frag>,
   3549                avx512_store_vl<0x11, "vmovupd", avx512vl_f64_info, HasAVX512,
   3550                                SchedWriteFMoveLS, "VMOVUPD">,
   3551                PD, VEX_W, EVEX_CD8<64, CD8VF>;
   3552 
   3553 defm VMOVDQA32 : avx512_alignedload_vl<0x6F, "vmovdqa32", avx512vl_i32_info,
   3554                                        HasAVX512, SchedWriteVecMoveLS,
   3555                                        "VMOVDQA", 1>,
   3556                  avx512_alignedstore_vl<0x7F, "vmovdqa32", avx512vl_i32_info,
   3557                                         HasAVX512, SchedWriteVecMoveLS,
   3558                                         "VMOVDQA", 1>,
   3559                  PD, EVEX_CD8<32, CD8VF>;
   3560 
   3561 defm VMOVDQA64 : avx512_alignedload_vl<0x6F, "vmovdqa64", avx512vl_i64_info,
   3562                                        HasAVX512, SchedWriteVecMoveLS,
   3563                                        "VMOVDQA">,
   3564                  avx512_alignedstore_vl<0x7F, "vmovdqa64", avx512vl_i64_info,
   3565                                         HasAVX512, SchedWriteVecMoveLS,
   3566                                         "VMOVDQA">,
   3567                  PD, VEX_W, EVEX_CD8<64, CD8VF>;
   3568 
   3569 defm VMOVDQU8 : avx512_load_vl<0x6F, "vmovdqu8", avx512vl_i8_info, HasBWI,
   3570                                SchedWriteVecMoveLS, "VMOVDQU", 1>,
   3571                 avx512_store_vl<0x7F, "vmovdqu8", avx512vl_i8_info, HasBWI,
   3572                                 SchedWriteVecMoveLS, "VMOVDQU", 1>,
   3573                 XD, EVEX_CD8<8, CD8VF>;
   3574 
   3575 defm VMOVDQU16 : avx512_load_vl<0x6F, "vmovdqu16", avx512vl_i16_info, HasBWI,
   3576                                 SchedWriteVecMoveLS, "VMOVDQU", 1>,
   3577                  avx512_store_vl<0x7F, "vmovdqu16", avx512vl_i16_info, HasBWI,
   3578                                  SchedWriteVecMoveLS, "VMOVDQU", 1>,
   3579                  XD, VEX_W, EVEX_CD8<16, CD8VF>;
   3580 
   3581 defm VMOVDQU32 : avx512_load_vl<0x6F, "vmovdqu32", avx512vl_i32_info, HasAVX512,
   3582                                 SchedWriteVecMoveLS, "VMOVDQU", 1, null_frag>,
   3583                  avx512_store_vl<0x7F, "vmovdqu32", avx512vl_i32_info, HasAVX512,
   3584                                  SchedWriteVecMoveLS, "VMOVDQU", 1>,
   3585                  XS, EVEX_CD8<32, CD8VF>;
   3586 
   3587 defm VMOVDQU64 : avx512_load_vl<0x6F, "vmovdqu64", avx512vl_i64_info, HasAVX512,
   3588                                 SchedWriteVecMoveLS, "VMOVDQU", 0, null_frag>,
   3589                  avx512_store_vl<0x7F, "vmovdqu64", avx512vl_i64_info, HasAVX512,
   3590                                  SchedWriteVecMoveLS, "VMOVDQU">,
   3591                  XS, VEX_W, EVEX_CD8<64, CD8VF>;
   3592 
   3593 // Special instructions to help with spilling when we don't have VLX. We need
   3594 // to load or store from a ZMM register instead. These are converted in
   3595 // expandPostRAPseudos.
   3596 let isReMaterializable = 1, canFoldAsLoad = 1,
   3597     isPseudo = 1, mayLoad = 1, hasSideEffects = 0 in {
   3598 def VMOVAPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src),
   3599                             "", []>, Sched<[WriteFLoadX]>;
   3600 def VMOVAPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src),
   3601                             "", []>, Sched<[WriteFLoadY]>;
   3602 def VMOVUPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src),
   3603                             "", []>, Sched<[WriteFLoadX]>;
   3604 def VMOVUPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src),
   3605                             "", []>, Sched<[WriteFLoadY]>;
   3606 }
   3607 
   3608 let isPseudo = 1, mayStore = 1, hasSideEffects = 0 in {
   3609 def VMOVAPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src),
   3610                             "", []>, Sched<[WriteFStoreX]>;
   3611 def VMOVAPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src),
   3612                             "", []>, Sched<[WriteFStoreY]>;
   3613 def VMOVUPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src),
   3614                             "", []>, Sched<[WriteFStoreX]>;
   3615 def VMOVUPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src),
   3616                             "", []>, Sched<[WriteFStoreY]>;
   3617 }
   3618 
   3619 def : Pat<(v8i64 (vselect VK8WM:$mask, (bc_v8i64 (v16i32 immAllZerosV)),
   3620                           (v8i64 VR512:$src))),
   3621    (VMOVDQA64Zrrkz (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$mask, VK16)),
   3622                                               VK8), VR512:$src)>;
   3623 
   3624 def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 immAllZerosV),
   3625                            (v16i32 VR512:$src))),
   3626                   (VMOVDQA32Zrrkz (KNOTWrr VK16WM:$mask), VR512:$src)>;
   3627 
   3628 // These patterns exist to prevent the above patterns from introducing a second
   3629 // mask inversion when one already exists.
   3630 def : Pat<(v8i64 (vselect (xor VK8:$mask, (v8i1 immAllOnesV)),
   3631                           (bc_v8i64 (v16i32 immAllZerosV)),
   3632                           (v8i64 VR512:$src))),
   3633                  (VMOVDQA64Zrrkz VK8:$mask, VR512:$src)>;
   3634 def : Pat<(v16i32 (vselect (xor VK16:$mask, (v16i1 immAllOnesV)),
   3635                            (v16i32 immAllZerosV),
   3636                            (v16i32 VR512:$src))),
   3637                   (VMOVDQA32Zrrkz VK16WM:$mask, VR512:$src)>;
   3638 
   3639 multiclass mask_move_lowering<string InstrStr, X86VectorVTInfo Narrow,
   3640                               X86VectorVTInfo Wide> {
   3641  def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask),
   3642                                Narrow.RC:$src1, Narrow.RC:$src0)),
   3643            (EXTRACT_SUBREG
   3644             (Wide.VT
   3645              (!cast<Instruction>(InstrStr#"rrk")
   3646               (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src0, Narrow.SubRegIdx)),
   3647               (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM),
   3648               (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))),
   3649             Narrow.SubRegIdx)>;
   3650 
   3651  def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask),
   3652                                Narrow.RC:$src1, Narrow.ImmAllZerosV)),
   3653            (EXTRACT_SUBREG
   3654             (Wide.VT
   3655              (!cast<Instruction>(InstrStr#"rrkz")
   3656               (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM),
   3657               (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))),
   3658             Narrow.SubRegIdx)>;
   3659 }
   3660 
   3661 // Patterns for handling v8i1 selects of 256-bit vectors when VLX isn't
   3662 // available. Use a 512-bit operation and extract.
   3663 let Predicates = [HasAVX512, NoVLX] in {
   3664   defm : mask_move_lowering<"VMOVAPSZ", v4f32x_info, v16f32_info>;
   3665   defm : mask_move_lowering<"VMOVDQA32Z", v4i32x_info, v16i32_info>;
   3666   defm : mask_move_lowering<"VMOVAPSZ", v8f32x_info, v16f32_info>;
   3667   defm : mask_move_lowering<"VMOVDQA32Z", v8i32x_info, v16i32_info>;
   3668 
   3669   defm : mask_move_lowering<"VMOVAPDZ", v2f64x_info, v8f64_info>;
   3670   defm : mask_move_lowering<"VMOVDQA64Z", v2i64x_info, v8i64_info>;
   3671   defm : mask_move_lowering<"VMOVAPDZ", v4f64x_info, v8f64_info>;
   3672   defm : mask_move_lowering<"VMOVDQA64Z", v4i64x_info, v8i64_info>;
   3673 }
   3674 
   3675 let Predicates = [HasBWI, NoVLX] in {
   3676   defm : mask_move_lowering<"VMOVDQU8Z", v16i8x_info, v64i8_info>;
   3677   defm : mask_move_lowering<"VMOVDQU8Z", v32i8x_info, v64i8_info>;
   3678 
   3679   defm : mask_move_lowering<"VMOVDQU16Z", v8i16x_info, v32i16_info>;
   3680   defm : mask_move_lowering<"VMOVDQU16Z", v16i16x_info, v32i16_info>;
   3681 }
   3682 
   3683 let Predicates = [HasAVX512] in {
   3684   // 512-bit store.
   3685   def : Pat<(alignedstore (v16i32 VR512:$src), addr:$dst),
   3686             (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
   3687   def : Pat<(alignedstore (v32i16 VR512:$src), addr:$dst),
   3688             (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
   3689   def : Pat<(alignedstore (v64i8 VR512:$src), addr:$dst),
   3690             (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
   3691   def : Pat<(store (v16i32 VR512:$src), addr:$dst),
   3692             (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
   3693   def : Pat<(store (v32i16 VR512:$src), addr:$dst),
   3694             (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
   3695   def : Pat<(store (v64i8 VR512:$src), addr:$dst),
   3696             (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
   3697 }
   3698 
   3699 let Predicates = [HasVLX] in {
   3700   // 128-bit store.
   3701   def : Pat<(alignedstore (v4i32 VR128X:$src), addr:$dst),
   3702             (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
   3703   def : Pat<(alignedstore (v8i16 VR128X:$src), addr:$dst),
   3704             (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
   3705   def : Pat<(alignedstore (v16i8 VR128X:$src), addr:$dst),
   3706             (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
   3707   def : Pat<(store (v4i32 VR128X:$src), addr:$dst),
   3708             (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
   3709   def : Pat<(store (v8i16 VR128X:$src), addr:$dst),
   3710             (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
   3711   def : Pat<(store (v16i8 VR128X:$src), addr:$dst),
   3712             (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
   3713 
   3714   // 256-bit store.
   3715   def : Pat<(alignedstore (v8i32 VR256X:$src), addr:$dst),
   3716             (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
   3717   def : Pat<(alignedstore (v16i16 VR256X:$src), addr:$dst),
   3718             (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
   3719   def : Pat<(alignedstore (v32i8 VR256X:$src), addr:$dst),
   3720             (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
   3721   def : Pat<(store (v8i32 VR256X:$src), addr:$dst),
   3722             (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
   3723   def : Pat<(store (v16i16 VR256X:$src), addr:$dst),
   3724             (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
   3725   def : Pat<(store (v32i8 VR256X:$src), addr:$dst),
   3726             (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
   3727 }
   3728 
   3729 multiclass masked_move_for_extract<string InstrStr, X86VectorVTInfo From,
   3730                                    X86VectorVTInfo To, X86VectorVTInfo Cast> {
   3731   def : Pat<(Cast.VT (vselect Cast.KRCWM:$mask,
   3732                               (bitconvert
   3733                                (To.VT (extract_subvector
   3734                                        (From.VT From.RC:$src), (iPTR 0)))),
   3735                               To.RC:$src0)),
   3736             (Cast.VT (!cast<Instruction>(InstrStr#"rrk")
   3737                       Cast.RC:$src0, Cast.KRCWM:$mask,
   3738                       (To.VT (EXTRACT_SUBREG From.RC:$src, To.SubRegIdx))))>;
   3739 
   3740   def : Pat<(Cast.VT (vselect Cast.KRCWM:$mask,
   3741                               (bitconvert
   3742                                (To.VT (extract_subvector
   3743                                        (From.VT From.RC:$src), (iPTR 0)))),
   3744                               Cast.ImmAllZerosV)),
   3745             (Cast.VT (!cast<Instruction>(InstrStr#"rrkz")
   3746                       Cast.KRCWM:$mask,
   3747                       (To.VT (EXTRACT_SUBREG From.RC:$src, To.SubRegIdx))))>;
   3748 }
   3749 
   3750 
   3751 let Predicates = [HasVLX] in {
   3752 // A masked extract from the first 128-bits of a 256-bit vector can be
   3753 // implemented with masked move.
   3754 defm : masked_move_for_extract<"VMOVDQA64Z128", v4i64x_info,  v2i64x_info, v2i64x_info>;
   3755 defm : masked_move_for_extract<"VMOVDQA64Z128", v8i32x_info,  v4i32x_info, v2i64x_info>;
   3756 defm : masked_move_for_extract<"VMOVDQA64Z128", v16i16x_info, v8i16x_info, v2i64x_info>;
   3757 defm : masked_move_for_extract<"VMOVDQA64Z128", v32i8x_info,  v16i8x_info, v2i64x_info>;
   3758 defm : masked_move_for_extract<"VMOVDQA32Z128", v4i64x_info,  v2i64x_info, v4i32x_info>;
   3759 defm : masked_move_for_extract<"VMOVDQA32Z128", v8i32x_info,  v4i32x_info, v4i32x_info>;
   3760 defm : masked_move_for_extract<"VMOVDQA32Z128", v16i16x_info, v8i16x_info, v4i32x_info>;
   3761 defm : masked_move_for_extract<"VMOVDQA32Z128", v32i8x_info,  v16i8x_info, v4i32x_info>;
   3762 defm : masked_move_for_extract<"VMOVAPDZ128",   v4f64x_info,  v2f64x_info, v2f64x_info>;
   3763 defm : masked_move_for_extract<"VMOVAPDZ128",   v8f32x_info,  v4f32x_info, v2f64x_info>;
   3764 defm : masked_move_for_extract<"VMOVAPSZ128",   v4f64x_info,  v2f64x_info, v4f32x_info>;
   3765 defm : masked_move_for_extract<"VMOVAPSZ128",   v8f32x_info,  v4f32x_info, v4f32x_info>;
   3766 
   3767 // A masked extract from the first 128-bits of a 512-bit vector can be
   3768 // implemented with masked move.
   3769 defm : masked_move_for_extract<"VMOVDQA64Z128", v8i64_info,  v2i64x_info, v2i64x_info>;
   3770 defm : masked_move_for_extract<"VMOVDQA64Z128", v16i32_info, v4i32x_info, v2i64x_info>;
   3771 defm : masked_move_for_extract<"VMOVDQA64Z128", v32i16_info, v8i16x_info, v2i64x_info>;
   3772 defm : masked_move_for_extract<"VMOVDQA64Z128", v64i8_info,  v16i8x_info, v2i64x_info>;
   3773 defm : masked_move_for_extract<"VMOVDQA32Z128", v8i64_info,  v2i64x_info, v4i32x_info>;
   3774 defm : masked_move_for_extract<"VMOVDQA32Z128", v16i32_info, v4i32x_info, v4i32x_info>;
   3775 defm : masked_move_for_extract<"VMOVDQA32Z128", v32i16_info, v8i16x_info, v4i32x_info>;
   3776 defm : masked_move_for_extract<"VMOVDQA32Z128", v64i8_info,  v16i8x_info, v4i32x_info>;
   3777 defm : masked_move_for_extract<"VMOVAPDZ128",   v8f64_info,  v2f64x_info, v2f64x_info>;
   3778 defm : masked_move_for_extract<"VMOVAPDZ128",   v16f32_info, v4f32x_info, v2f64x_info>;
   3779 defm : masked_move_for_extract<"VMOVAPSZ128",   v8f64_info,  v2f64x_info, v4f32x_info>;
   3780 defm : masked_move_for_extract<"VMOVAPSZ128",   v16f32_info, v4f32x_info, v4f32x_info>;
   3781 
   3782 // A masked extract from the first 256-bits of a 512-bit vector can be
   3783 // implemented with masked move.
   3784 defm : masked_move_for_extract<"VMOVDQA64Z256", v8i64_info,  v4i64x_info,  v4i64x_info>;
   3785 defm : masked_move_for_extract<"VMOVDQA64Z256", v16i32_info, v8i32x_info,  v4i64x_info>;
   3786 defm : masked_move_for_extract<"VMOVDQA64Z256", v32i16_info, v16i16x_info, v4i64x_info>;
   3787 defm : masked_move_for_extract<"VMOVDQA64Z256", v64i8_info,  v32i8x_info,  v4i64x_info>;
   3788 defm : masked_move_for_extract<"VMOVDQA32Z256", v8i64_info,  v4i64x_info,  v8i32x_info>;
   3789 defm : masked_move_for_extract<"VMOVDQA32Z256", v16i32_info, v8i32x_info,  v8i32x_info>;
   3790 defm : masked_move_for_extract<"VMOVDQA32Z256", v32i16_info, v16i16x_info, v8i32x_info>;
   3791 defm : masked_move_for_extract<"VMOVDQA32Z256", v64i8_info,  v32i8x_info,  v8i32x_info>;
   3792 defm : masked_move_for_extract<"VMOVAPDZ256",   v8f64_info,  v4f64x_info,  v4f64x_info>;
   3793 defm : masked_move_for_extract<"VMOVAPDZ256",   v16f32_info, v8f32x_info,  v4f64x_info>;
   3794 defm : masked_move_for_extract<"VMOVAPSZ256",   v8f64_info,  v4f64x_info,  v8f32x_info>;
   3795 defm : masked_move_for_extract<"VMOVAPSZ256",   v16f32_info, v8f32x_info,  v8f32x_info>;
   3796 }
   3797 
   3798 // Move Int Doubleword to Packed Double Int
   3799 //
   3800 let ExeDomain = SSEPackedInt in {
   3801 def VMOVDI2PDIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src),
   3802                       "vmovd\t{$src, $dst|$dst, $src}",
   3803                       [(set VR128X:$dst,
   3804                         (v4i32 (scalar_to_vector GR32:$src)))]>,
   3805                         EVEX, Sched<[WriteVecMoveFromGpr]>;
   3806 def VMOVDI2PDIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i32mem:$src),
   3807                       "vmovd\t{$src, $dst|$dst, $src}",
   3808                       [(set VR128X:$dst,
   3809                         (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>,
   3810                       EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecLoad]>;
   3811 def VMOV64toPQIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src),
   3812                       "vmovq\t{$src, $dst|$dst, $src}",
   3813                         [(set VR128X:$dst,
   3814                           (v2i64 (scalar_to_vector GR64:$src)))]>,
   3815                       EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>;
   3816 let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in
   3817 def VMOV64toPQIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst),
   3818                       (ins i64mem:$src),
   3819                       "vmovq\t{$src, $dst|$dst, $src}", []>,
   3820                       EVEX, VEX_W, EVEX_CD8<64, CD8VT1>, Sched<[WriteVecLoad]>;
   3821 let isCodeGenOnly = 1 in {
   3822 def VMOV64toSDZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR64X:$dst), (ins GR64:$src),
   3823                        "vmovq\t{$src, $dst|$dst, $src}",
   3824                        [(set FR64X:$dst, (bitconvert GR64:$src))]>,
   3825                        EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>;
   3826 def VMOV64toSDZrm : AVX512XSI<0x7E, MRMSrcMem, (outs FR64X:$dst), (ins i64mem:$src),
   3827                       "vmovq\t{$src, $dst|$dst, $src}",
   3828                       [(set FR64X:$dst, (bitconvert (loadi64 addr:$src)))]>,
   3829                       EVEX, VEX_W, EVEX_CD8<8, CD8VT8>, Sched<[WriteVecLoad]>;
   3830 def VMOVSDto64Zrr : AVX512BI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64X:$src),
   3831                          "vmovq\t{$src, $dst|$dst, $src}",
   3832                          [(set GR64:$dst, (bitconvert FR64X:$src))]>,
   3833                          EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>;
   3834 def VMOVSDto64Zmr : AVX512BI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64X:$src),
   3835                          "vmovq\t{$src, $dst|$dst, $src}",
   3836                          [(store (i64 (bitconvert FR64X:$src)), addr:$dst)]>,
   3837                          EVEX, VEX_W, Sched<[WriteVecStore]>,
   3838                          EVEX_CD8<64, CD8VT1>;
   3839 }
   3840 } // ExeDomain = SSEPackedInt
   3841 
   3842 // Move Int Doubleword to Single Scalar
   3843 //
   3844 let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
   3845 def VMOVDI2SSZrr  : AVX512BI<0x6E, MRMSrcReg, (outs FR32X:$dst), (ins GR32:$src),
   3846                       "vmovd\t{$src, $dst|$dst, $src}",
   3847                       [(set FR32X:$dst, (bitconvert GR32:$src))]>,
   3848                       EVEX, Sched<[WriteVecMoveFromGpr]>;
   3849 
   3850 def VMOVDI2SSZrm  : AVX512BI<0x6E, MRMSrcMem, (outs FR32X:$dst), (ins i32mem:$src),
   3851                       "vmovd\t{$src, $dst|$dst, $src}",
   3852                       [(set FR32X:$dst, (bitconvert (loadi32 addr:$src)))]>,
   3853                       EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecLoad]>;
   3854 } // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
   3855 
   3856 // Move doubleword from xmm register to r/m32
   3857 //
   3858 let ExeDomain = SSEPackedInt in {
   3859 def VMOVPDI2DIZrr  : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src),
   3860                        "vmovd\t{$src, $dst|$dst, $src}",
   3861                        [(set GR32:$dst, (extractelt (v4i32 VR128X:$src),
   3862                                         (iPTR 0)))]>,
   3863                        EVEX, Sched<[WriteVecMoveToGpr]>;
   3864 def VMOVPDI2DIZmr  : AVX512BI<0x7E, MRMDestMem, (outs),
   3865                        (ins i32mem:$dst, VR128X:$src),
   3866                        "vmovd\t{$src, $dst|$dst, $src}",
   3867                        [(store (i32 (extractelt (v4i32 VR128X:$src),
   3868                                      (iPTR 0))), addr:$dst)]>,
   3869                        EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecStore]>;
   3870 } // ExeDomain = SSEPackedInt
   3871 
   3872 // Move quadword from xmm1 register to r/m64
   3873 //
   3874 let ExeDomain = SSEPackedInt in {
   3875 def VMOVPQIto64Zrr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src),
   3876                       "vmovq\t{$src, $dst|$dst, $src}",
   3877                       [(set GR64:$dst, (extractelt (v2i64 VR128X:$src),
   3878                                                    (iPTR 0)))]>,
   3879                       PD, EVEX, VEX_W, Sched<[WriteVecMoveToGpr]>,
   3880                       Requires<[HasAVX512]>;
   3881 
   3882 let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in
   3883 def VMOVPQIto64Zmr : I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, VR128X:$src),
   3884                       "vmovq\t{$src, $dst|$dst, $src}", []>, PD,
   3885                       EVEX, VEX_W, Sched<[WriteVecStore]>,
   3886                       Requires<[HasAVX512, In64BitMode]>;
   3887 
   3888 def VMOVPQI2QIZmr : I<0xD6, MRMDestMem, (outs),
   3889                       (ins i64mem:$dst, VR128X:$src),
   3890                       "vmovq\t{$src, $dst|$dst, $src}",
   3891                       [(store (extractelt (v2i64 VR128X:$src), (iPTR 0)),
   3892                               addr:$dst)]>,
   3893                       EVEX, PD, VEX_W, EVEX_CD8<64, CD8VT1>,
   3894                       Sched<[WriteVecStore]>, Requires<[HasAVX512]>;
   3895 
   3896 let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in
   3897 def VMOVPQI2QIZrr : AVX512BI<0xD6, MRMDestReg, (outs VR128X:$dst),
   3898                              (ins VR128X:$src),
   3899                              "vmovq\t{$src, $dst|$dst, $src}", []>,
   3900                              EVEX, VEX_W, Sched<[SchedWriteVecLogic.XMM]>;
   3901 } // ExeDomain = SSEPackedInt
   3902 
   3903 def : InstAlias<"vmovq.s\t{$src, $dst|$dst, $src}",
   3904                 (VMOVPQI2QIZrr VR128X:$dst, VR128X:$src), 0>;
   3905 
   3906 // Move Scalar Single to Double Int
   3907 //
   3908 let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
   3909 def VMOVSS2DIZrr  : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst),
   3910                       (ins FR32X:$src),
   3911                       "vmovd\t{$src, $dst|$dst, $src}",
   3912                       [(set GR32:$dst, (bitconvert FR32X:$src))]>,
   3913                       EVEX, Sched<[WriteVecMoveToGpr]>;
   3914 def VMOVSS2DIZmr  : AVX512BI<0x7E, MRMDestMem, (outs),
   3915                       (ins i32mem:$dst, FR32X:$src),
   3916                       "vmovd\t{$src, $dst|$dst, $src}",
   3917                       [(store (i32 (bitconvert FR32X:$src)), addr:$dst)]>,
   3918                       EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecStore]>;
   3919 } // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
   3920 
   3921 // Move Quadword Int to Packed Quadword Int
   3922 //
   3923 let ExeDomain = SSEPackedInt in {
   3924 def VMOVQI2PQIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst),
   3925                       (ins i64mem:$src),
   3926                       "vmovq\t{$src, $dst|$dst, $src}",
   3927                       [(set VR128X:$dst,
   3928                         (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>,
   3929                       EVEX, VEX_W, EVEX_CD8<8, CD8VT8>, Sched<[WriteVecLoad]>;
   3930 } // ExeDomain = SSEPackedInt
   3931 
   3932 // Allow "vmovd" but print "vmovq".
   3933 def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}",
   3934                 (VMOV64toPQIZrr VR128X:$dst, GR64:$src), 0>;
   3935 def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}",
   3936                 (VMOVPQIto64Zrr GR64:$dst, VR128X:$src), 0>;
   3937 
   3938 //===----------------------------------------------------------------------===//
   3939 // AVX-512  MOVSS, MOVSD
   3940 //===----------------------------------------------------------------------===//
   3941 
   3942 multiclass avx512_move_scalar<string asm, SDNode OpNode,
   3943                               X86VectorVTInfo _> {
   3944   let Predicates = [HasAVX512, OptForSize] in
   3945   def rr : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
   3946              (ins _.RC:$src1, _.RC:$src2),
   3947              !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
   3948              [(set _.RC:$dst, (_.VT (OpNode _.RC:$src1, _.RC:$src2)))],
   3949              _.ExeDomain>, EVEX_4V, Sched<[SchedWriteFShuffle.XMM]>;
   3950   def rrkz : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
   3951               (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
   3952               !strconcat(asm, "\t{$src2, $src1, $dst {${mask}} {z}|",
   3953               "$dst {${mask}} {z}, $src1, $src2}"),
   3954               [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask,
   3955                                       (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
   3956                                       _.ImmAllZerosV)))],
   3957               _.ExeDomain>, EVEX_4V, EVEX_KZ, Sched<[SchedWriteFShuffle.XMM]>;
   3958   let Constraints = "$src0 = $dst"  in
   3959   def rrk : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
   3960              (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
   3961              !strconcat(asm, "\t{$src2, $src1, $dst {${mask}}|",
   3962              "$dst {${mask}}, $src1, $src2}"),
   3963              [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask,
   3964                                      (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
   3965                                      (_.VT _.RC:$src0))))],
   3966              _.ExeDomain>, EVEX_4V, EVEX_K, Sched<[SchedWriteFShuffle.XMM]>;
   3967   let canFoldAsLoad = 1, isReMaterializable = 1 in
   3968   def rm : AVX512PI<0x10, MRMSrcMem, (outs _.FRC:$dst), (ins _.ScalarMemOp:$src),
   3969              !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
   3970              [(set _.FRC:$dst, (_.ScalarLdFrag addr:$src))],
   3971              _.ExeDomain>, EVEX, Sched<[WriteFLoad]>;
   3972   let mayLoad = 1, hasSideEffects = 0 in {
   3973     let Constraints = "$src0 = $dst" in
   3974     def rmk : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst),
   3975                (ins _.RC:$src0, _.KRCWM:$mask, _.ScalarMemOp:$src),
   3976                !strconcat(asm, "\t{$src, $dst {${mask}}|",
   3977                "$dst {${mask}}, $src}"),
   3978                [], _.ExeDomain>, EVEX, EVEX_K, Sched<[WriteFLoad]>;
   3979     def rmkz : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst),
   3980                (ins _.KRCWM:$mask, _.ScalarMemOp:$src),
   3981                !strconcat(asm, "\t{$src, $dst {${mask}} {z}|",
   3982                "$dst {${mask}} {z}, $src}"),
   3983                [], _.ExeDomain>, EVEX, EVEX_KZ, Sched<[WriteFLoad]>;
   3984   }
   3985   def mr: AVX512PI<0x11, MRMDestMem, (outs), (ins _.ScalarMemOp:$dst, _.FRC:$src),
   3986              !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
   3987              [(store _.FRC:$src, addr:$dst)],  _.ExeDomain>,
   3988              EVEX, Sched<[WriteFStore]>;
   3989   let mayStore = 1, hasSideEffects = 0 in
   3990   def mrk: AVX512PI<0x11, MRMDestMem, (outs),
   3991               (ins _.ScalarMemOp:$dst, VK1WM:$mask, _.FRC:$src),
   3992               !strconcat(asm, "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}"),
   3993               [], _.ExeDomain>, EVEX, EVEX_K, Sched<[WriteFStore]>,
   3994               NotMemoryFoldable;
   3995 }
   3996 
   3997 defm VMOVSSZ : avx512_move_scalar<"vmovss", X86Movss, f32x_info>,
   3998                                   VEX_LIG, XS, EVEX_CD8<32, CD8VT1>;
   3999 
   4000 defm VMOVSDZ : avx512_move_scalar<"vmovsd", X86Movsd, f64x_info>,
   4001                                   VEX_LIG, XD, VEX_W, EVEX_CD8<64, CD8VT1>;
   4002 
   4003 
   4004 multiclass avx512_move_scalar_lowering<string InstrStr, SDNode OpNode,
   4005                                        PatLeaf ZeroFP, X86VectorVTInfo _> {
   4006 
   4007 def : Pat<(_.VT (OpNode _.RC:$src0,
   4008                         (_.VT (scalar_to_vector
   4009                                   (_.EltVT (X86selects VK1WM:$mask,
   4010                                                        (_.EltVT _.FRC:$src1),
   4011                                                        (_.EltVT _.FRC:$src2))))))),
   4012           (!cast<Instruction>(InstrStr#rrk)
   4013                         (_.VT (COPY_TO_REGCLASS _.FRC:$src2, _.RC)),
   4014                         VK1WM:$mask,
   4015                         (_.VT _.RC:$src0),
   4016                         (_.VT (COPY_TO_REGCLASS _.FRC:$src1, _.RC)))>;
   4017 
   4018 def : Pat<(_.VT (OpNode _.RC:$src0,
   4019                         (_.VT (scalar_to_vector
   4020                                   (_.EltVT (X86selects VK1WM:$mask,
   4021                                                        (_.EltVT _.FRC:$src1),
   4022                                                        (_.EltVT ZeroFP))))))),
   4023           (!cast<Instruction>(InstrStr#rrkz)
   4024                         VK1WM:$mask,
   4025                         (_.VT _.RC:$src0),
   4026                         (_.VT (COPY_TO_REGCLASS _.FRC:$src1, _.RC)))>;
   4027 }
   4028 
   4029 multiclass avx512_store_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _,
   4030                                         dag Mask, RegisterClass MaskRC> {
   4031 
   4032 def : Pat<(masked_store addr:$dst, Mask,
   4033              (_.info512.VT (insert_subvector undef,
   4034                                (_.info128.VT _.info128.RC:$src),
   4035                                (iPTR 0)))),
   4036           (!cast<Instruction>(InstrStr#mrk) addr:$dst,
   4037                       (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
   4038                       (COPY_TO_REGCLASS _.info128.RC:$src, _.info128.FRC))>;
   4039 
   4040 }
   4041 
   4042 multiclass avx512_store_scalar_lowering_subreg<string InstrStr,
   4043                                                AVX512VLVectorVTInfo _,
   4044                                                dag Mask, RegisterClass MaskRC,
   4045                                                SubRegIndex subreg> {
   4046 
   4047 def : Pat<(masked_store addr:$dst, Mask,
   4048              (_.info512.VT (insert_subvector undef,
   4049                                (_.info128.VT _.info128.RC:$src),
   4050                                (iPTR 0)))),
   4051           (!cast<Instruction>(InstrStr#mrk) addr:$dst,
   4052                       (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
   4053                       (COPY_TO_REGCLASS _.info128.RC:$src, _.info128.FRC))>;
   4054 
   4055 }
   4056 
   4057 // This matches the more recent codegen from clang that avoids emitting a 512
   4058 // bit masked store directly. Codegen will widen 128-bit masked store to 512
   4059 // bits on AVX512F only targets.
   4060 multiclass avx512_store_scalar_lowering_subreg2<string InstrStr,
   4061                                                AVX512VLVectorVTInfo _,
   4062                                                dag Mask512, dag Mask128,
   4063                                                RegisterClass MaskRC,
   4064                                                SubRegIndex subreg> {
   4065 
   4066 // AVX512F pattern.
   4067 def : Pat<(masked_store addr:$dst, Mask512,
   4068              (_.info512.VT (insert_subvector undef,
   4069                                (_.info128.VT _.info128.RC:$src),
   4070                                (iPTR 0)))),
   4071           (!cast<Instruction>(InstrStr#mrk) addr:$dst,
   4072                       (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
   4073                       (COPY_TO_REGCLASS _.info128.RC:$src, _.info128.FRC))>;
   4074 
   4075 // AVX512VL pattern.
   4076 def : Pat<(masked_store addr:$dst, Mask128, (_.info128.VT _.info128.RC:$src)),
   4077           (!cast<Instruction>(InstrStr#mrk) addr:$dst,
   4078                       (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
   4079                       (COPY_TO_REGCLASS _.info128.RC:$src, _.info128.FRC))>;
   4080 }
   4081 
   4082 multiclass avx512_load_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _,
   4083                                        dag Mask, RegisterClass MaskRC> {
   4084 
   4085 def : Pat<(_.info128.VT (extract_subvector
   4086                          (_.info512.VT (masked_load addr:$srcAddr, Mask,
   4087                                         (_.info512.VT (bitconvert
   4088                                                        (v16i32 immAllZerosV))))),
   4089                            (iPTR 0))),
   4090           (!cast<Instruction>(InstrStr#rmkz)
   4091                       (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
   4092                       addr:$srcAddr)>;
   4093 
   4094 def : Pat<(_.info128.VT (extract_subvector
   4095                 (_.info512.VT (masked_load addr:$srcAddr, Mask,
   4096                       (_.info512.VT (insert_subvector undef,
   4097                             (_.info128.VT (X86vzmovl _.info128.RC:$src)),
   4098                             (iPTR 0))))),
   4099                 (iPTR 0))),
   4100           (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
   4101                       (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
   4102                       addr:$srcAddr)>;
   4103 
   4104 }
   4105 
   4106 multiclass avx512_load_scalar_lowering_subreg<string InstrStr,
   4107                                               AVX512VLVectorVTInfo _,
   4108                                               dag Mask, RegisterClass MaskRC,
   4109                                               SubRegIndex subreg> {
   4110 
   4111 def : Pat<(_.info128.VT (extract_subvector
   4112                          (_.info512.VT (masked_load addr:$srcAddr, Mask,
   4113                                         (_.info512.VT (bitconvert
   4114                                                        (v16i32 immAllZerosV))))),
   4115                            (iPTR 0))),
   4116           (!cast<Instruction>(InstrStr#rmkz)
   4117                       (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
   4118                       addr:$srcAddr)>;
   4119 
   4120 def : Pat<(_.info128.VT (extract_subvector
   4121                 (_.info512.VT (masked_load addr:$srcAddr, Mask,
   4122                       (_.info512.VT (insert_subvector undef,
   4123                             (_.info128.VT (X86vzmovl _.info128.RC:$src)),
   4124                             (iPTR 0))))),
   4125                 (iPTR 0))),
   4126           (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
   4127                       (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
   4128                       addr:$srcAddr)>;
   4129 
   4130 }
   4131 
   4132 // This matches the more recent codegen from clang that avoids emitting a 512
   4133 // bit masked load directly. Codegen will widen 128-bit masked load to 512
   4134 // bits on AVX512F only targets.
   4135 multiclass avx512_load_scalar_lowering_subreg2<string InstrStr,
   4136                                               AVX512VLVectorVTInfo _,
   4137                                               dag Mask512, dag Mask128,
   4138                                               RegisterClass MaskRC,
   4139                                               SubRegIndex subreg> {
   4140 // AVX512F patterns.
   4141 def : Pat<(_.info128.VT (extract_subvector
   4142                          (_.info512.VT (masked_load addr:$srcAddr, Mask512,
   4143                                         (_.info512.VT (bitconvert
   4144                                                        (v16i32 immAllZerosV))))),
   4145                            (iPTR 0))),
   4146           (!cast<Instruction>(InstrStr#rmkz)
   4147                       (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
   4148                       addr:$srcAddr)>;
   4149 
   4150 def : Pat<(_.info128.VT (extract_subvector
   4151                 (_.info512.VT (masked_load addr:$srcAddr, Mask512,
   4152                       (_.info512.VT (insert_subvector undef,
   4153                             (_.info128.VT (X86vzmovl _.info128.RC:$src)),
   4154                             (iPTR 0))))),
   4155                 (iPTR 0))),
   4156           (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
   4157                       (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
   4158                       addr:$srcAddr)>;
   4159 
   4160 // AVX512Vl patterns.
   4161 def : Pat<(_.info128.VT (masked_load addr:$srcAddr, Mask128,
   4162                          (_.info128.VT (bitconvert (v4i32 immAllZerosV))))),
   4163           (!cast<Instruction>(InstrStr#rmkz)
   4164                       (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
   4165                       addr:$srcAddr)>;
   4166 
   4167 def : Pat<(_.info128.VT (masked_load addr:$srcAddr, Mask128,
   4168                          (_.info128.VT (X86vzmovl _.info128.RC:$src)))),
   4169           (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
   4170                       (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
   4171                       addr:$srcAddr)>;
   4172 }
   4173 
   4174 defm : avx512_move_scalar_lowering<"VMOVSSZ", X86Movss, fp32imm0, v4f32x_info>;
   4175 defm : avx512_move_scalar_lowering<"VMOVSDZ", X86Movsd, fp64imm0, v2f64x_info>;
   4176 
   4177 defm : avx512_store_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
   4178                    (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>;
   4179 defm : avx512_store_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
   4180                    (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>;
   4181 defm : avx512_store_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info,
   4182                    (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>;
   4183 
   4184 defm : avx512_store_scalar_lowering_subreg2<"VMOVSSZ", avx512vl_f32_info,
   4185                    (v16i1 (insert_subvector
   4186                            (v16i1 immAllZerosV),
   4187                            (v4i1 (extract_subvector
   4188                                   (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
   4189                                   (iPTR 0))),
   4190                            (iPTR 0))),
   4191                    (v4i1 (extract_subvector
   4192                           (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
   4193                           (iPTR 0))), GR8, sub_8bit>;
   4194 defm : avx512_store_scalar_lowering_subreg2<"VMOVSDZ", avx512vl_f64_info,
   4195                    (v8i1
   4196                     (extract_subvector
   4197                      (v16i1
   4198                       (insert_subvector
   4199                        (v16i1 immAllZerosV),
   4200                        (v2i1 (extract_subvector
   4201                               (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
   4202                               (iPTR 0))),
   4203                        (iPTR 0))),
   4204                      (iPTR 0))),
   4205                    (v2i1 (extract_subvector
   4206                           (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
   4207                           (iPTR 0))), GR8, sub_8bit>;
   4208 
   4209 defm : avx512_load_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
   4210                    (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>;
   4211 defm : avx512_load_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
   4212                    (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>;
   4213 defm : avx512_load_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info,
   4214                    (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>;
   4215 
   4216 defm : avx512_load_scalar_lowering_subreg2<"VMOVSSZ", avx512vl_f32_info,
   4217                    (v16i1 (insert_subvector
   4218                            (v16i1 immAllZerosV),
   4219                            (v4i1 (extract_subvector
   4220                                   (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
   4221                                   (iPTR 0))),
   4222                            (iPTR 0))),
   4223                    (v4i1 (extract_subvector
   4224                           (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
   4225                           (iPTR 0))), GR8, sub_8bit>;
   4226 defm : avx512_load_scalar_lowering_subreg2<"VMOVSDZ", avx512vl_f64_info,
   4227                    (v8i1
   4228                     (extract_subvector
   4229                      (v16i1
   4230                       (insert_subvector
   4231                        (v16i1 immAllZerosV),
   4232                        (v2i1 (extract_subvector
   4233                               (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
   4234                               (iPTR 0))),
   4235                        (iPTR 0))),
   4236                      (iPTR 0))),
   4237                    (v2i1 (extract_subvector
   4238                           (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
   4239                           (iPTR 0))), GR8, sub_8bit>;
   4240 
   4241 def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), (f32 FR32X:$src2))),
   4242           (COPY_TO_REGCLASS (v4f32 (VMOVSSZrrk
   4243            (v4f32 (COPY_TO_REGCLASS FR32X:$src2, VR128X)),
   4244            VK1WM:$mask, (v4f32 (IMPLICIT_DEF)),
   4245            (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)))), FR32X)>;
   4246 
   4247 def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), fp32imm0)),
   4248           (COPY_TO_REGCLASS (v4f32 (VMOVSSZrrkz VK1WM:$mask, (v4f32 (IMPLICIT_DEF)),
   4249            (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)))), FR32X)>;
   4250 
   4251 def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), (f64 FR64X:$src2))),
   4252           (COPY_TO_REGCLASS (v2f64 (VMOVSDZrrk
   4253            (v2f64 (COPY_TO_REGCLASS FR64X:$src2, VR128X)),
   4254            VK1WM:$mask, (v2f64 (IMPLICIT_DEF)),
   4255            (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>;
   4256 
   4257 def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), fpimm0)),
   4258           (COPY_TO_REGCLASS (v2f64 (VMOVSDZrrkz VK1WM:$mask, (v2f64 (IMPLICIT_DEF)),
   4259            (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>;
   4260 
   4261 let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in {
   4262   def VMOVSSZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
   4263                            (ins VR128X:$src1, VR128X:$src2),
   4264                            "vmovss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
   4265                            []>, XS, EVEX_4V, VEX_LIG,
   4266                            FoldGenData<"VMOVSSZrr">,
   4267                            Sched<[SchedWriteFShuffle.XMM]>;
   4268 
   4269   let Constraints = "$src0 = $dst" in
   4270   def VMOVSSZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
   4271                              (ins f32x_info.RC:$src0, f32x_info.KRCWM:$mask,
   4272                                                    VR128X:$src1, VR128X:$src2),
   4273                              "vmovss\t{$src2, $src1, $dst {${mask}}|"#
   4274                                         "$dst {${mask}}, $src1, $src2}",
   4275                              []>, EVEX_K, XS, EVEX_4V, VEX_LIG,
   4276                              FoldGenData<"VMOVSSZrrk">,
   4277                              Sched<[SchedWriteFShuffle.XMM]>;
   4278 
   4279   def VMOVSSZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
   4280                          (ins f32x_info.KRCWM:$mask, VR128X:$src1, VR128X:$src2),
   4281                          "vmovss\t{$src2, $src1, $dst {${mask}} {z}|"#
   4282                                     "$dst {${mask}} {z}, $src1, $src2}",
   4283                          []>, EVEX_KZ, XS, EVEX_4V, VEX_LIG,
   4284                          FoldGenData<"VMOVSSZrrkz">,
   4285                          Sched<[SchedWriteFShuffle.XMM]>;
   4286 
   4287   def VMOVSDZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
   4288                            (ins VR128X:$src1, VR128X:$src2),
   4289                            "vmovsd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
   4290                            []>, XD, EVEX_4V, VEX_LIG, VEX_W,
   4291                            FoldGenData<"VMOVSDZrr">,
   4292                            Sched<[SchedWriteFShuffle.XMM]>;
   4293 
   4294   let Constraints = "$src0 = $dst" in
   4295   def VMOVSDZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
   4296                              (ins f64x_info.RC:$src0, f64x_info.KRCWM:$mask,
   4297                                                    VR128X:$src1, VR128X:$src2),
   4298                              "vmovsd\t{$src2, $src1, $dst {${mask}}|"#
   4299                                         "$dst {${mask}}, $src1, $src2}",
   4300                              []>, EVEX_K, XD, EVEX_4V, VEX_LIG,
   4301                              VEX_W, FoldGenData<"VMOVSDZrrk">,
   4302                              Sched<[SchedWriteFShuffle.XMM]>;
   4303 
   4304   def VMOVSDZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
   4305                               (ins f64x_info.KRCWM:$mask, VR128X:$src1,
   4306                                                           VR128X:$src2),
   4307                               "vmovsd\t{$src2, $src1, $dst {${mask}} {z}|"#
   4308                                          "$dst {${mask}} {z}, $src1, $src2}",
   4309                               []>, EVEX_KZ, XD, EVEX_4V, VEX_LIG,
   4310                               VEX_W, FoldGenData<"VMOVSDZrrkz">,
   4311                               Sched<[SchedWriteFShuffle.XMM]>;
   4312 }
   4313 
   4314 def : InstAlias<"vmovss.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
   4315                 (VMOVSSZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>;
   4316 def : InstAlias<"vmovss.s\t{$src2, $src1, $dst {${mask}}|"#
   4317                              "$dst {${mask}}, $src1, $src2}",
   4318                 (VMOVSSZrrk_REV VR128X:$dst, VK1WM:$mask,
   4319                                 VR128X:$src1, VR128X:$src2), 0>;
   4320 def : InstAlias<"vmovss.s\t{$src2, $src1, $dst {${mask}} {z}|"#
   4321                              "$dst {${mask}} {z}, $src1, $src2}",
   4322                 (VMOVSSZrrkz_REV VR128X:$dst, VK1WM:$mask,
   4323                                  VR128X:$src1, VR128X:$src2), 0>;
   4324 def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
   4325                 (VMOVSDZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>;
   4326 def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst {${mask}}|"#
   4327                              "$dst {${mask}}, $src1, $src2}",
   4328                 (VMOVSDZrrk_REV VR128X:$dst, VK1WM:$mask,
   4329                                 VR128X:$src1, VR128X:$src2), 0>;
   4330 def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst {${mask}} {z}|"#
   4331                              "$dst {${mask}} {z}, $src1, $src2}",
   4332                 (VMOVSDZrrkz_REV VR128X:$dst, VK1WM:$mask,
   4333                                  VR128X:$src1, VR128X:$src2), 0>;
   4334 
   4335 let Predicates = [HasAVX512, OptForSize] in {
   4336   def : Pat<(v4f32 (X86vzmovl (v4f32 VR128X:$src))),
   4337             (VMOVSSZrr (v4f32 (AVX512_128_SET0)), VR128X:$src)>;
   4338   def : Pat<(v4i32 (X86vzmovl (v4i32 VR128X:$src))),
   4339             (VMOVSSZrr (v4i32 (AVX512_128_SET0)), VR128X:$src)>;
   4340 
   4341   // Move low f32 and clear high bits.
   4342   def : Pat<(v8f32 (X86vzmovl (v8f32 VR256X:$src))),
   4343             (SUBREG_TO_REG (i32 0),
   4344              (v4f32 (VMOVSSZrr (v4f32 (AVX512_128_SET0)),
   4345               (v4f32 (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)))), sub_xmm)>;
   4346   def : Pat<(v8i32 (X86vzmovl (v8i32 VR256X:$src))),
   4347             (SUBREG_TO_REG (i32 0),
   4348              (v4i32 (VMOVSSZrr (v4i32 (AVX512_128_SET0)),
   4349               (v4i32 (EXTRACT_SUBREG (v8i32 VR256X:$src), sub_xmm)))), sub_xmm)>;
   4350 
   4351   def : Pat<(v4f64 (X86vzmovl (v4f64 VR256X:$src))),
   4352             (SUBREG_TO_REG (i32 0),
   4353              (v2f64 (VMOVSDZrr (v2f64 (AVX512_128_SET0)),
   4354               (v2f64 (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)))), sub_xmm)>;
   4355   def : Pat<(v4i64 (X86vzmovl (v4i64 VR256X:$src))),
   4356             (SUBREG_TO_REG (i32 0),
   4357              (v2i64 (VMOVSDZrr (v2i64 (AVX512_128_SET0)),
   4358               (v2i64 (EXTRACT_SUBREG (v4i64 VR256X:$src), sub_xmm)))), sub_xmm)>;
   4359 
   4360   def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))),
   4361             (SUBREG_TO_REG (i32 0),
   4362              (v4f32 (VMOVSSZrr (v4f32 (AVX512_128_SET0)),
   4363               (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)))), sub_xmm)>;
   4364   def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))),
   4365             (SUBREG_TO_REG (i32 0),
   4366              (v4i32 (VMOVSSZrr (v4i32 (AVX512_128_SET0)),
   4367               (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)))), sub_xmm)>;
   4368 
   4369   def : Pat<(v8f64 (X86vzmovl (v8f64 VR512:$src))),
   4370             (SUBREG_TO_REG (i32 0),
   4371              (v2f64 (VMOVSDZrr (v2f64 (AVX512_128_SET0)),
   4372               (v2f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm)))), sub_xmm)>;
   4373 
   4374   def : Pat<(v8i64 (X86vzmovl (v8i64 VR512:$src))),
   4375             (SUBREG_TO_REG (i32 0),
   4376              (v2i64 (VMOVSDZrr (v2i64 (AVX512_128_SET0)),
   4377               (v2i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm)))), sub_xmm)>;
   4378 
   4379 }
   4380 
   4381 // Use 128-bit blends for OptForSpeed since BLENDs have better throughput than
   4382 // VMOVSS/SD. Unfortunately, loses the ability to use XMM16-31.
   4383 let Predicates = [HasAVX512, OptForSpeed] in {
   4384   def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))),
   4385             (SUBREG_TO_REG (i32 0),
   4386              (v4f32 (VBLENDPSrri (v4f32 (V_SET0)),
   4387                           (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)),
   4388                           (i8 1))), sub_xmm)>;
   4389   def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))),
   4390             (SUBREG_TO_REG (i32 0),
   4391              (v4i32 (VPBLENDWrri (v4i32 (V_SET0)),
   4392                           (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)),
   4393                           (i8 3))), sub_xmm)>;
   4394 
   4395   def : Pat<(v8f64 (X86vzmovl (v8f64 VR512:$src))),
   4396             (SUBREG_TO_REG (i32 0),
   4397              (v2f64 (VBLENDPDrri (v2f64 (V_SET0)),
   4398                           (v2f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm)),
   4399                           (i8 1))), sub_xmm)>;
   4400   def : Pat<(v8i64 (X86vzmovl (v8i64 VR512:$src))),
   4401             (SUBREG_TO_REG (i32 0),
   4402              (v2i64 (VPBLENDWrri (v2i64 (V_SET0)),
   4403                           (v2i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm)),
   4404                           (i8 0xf))), sub_xmm)>;
   4405 }
   4406 
   4407 let Predicates = [HasAVX512] in {
   4408 
   4409   // MOVSSrm zeros the high parts of the register; represent this
   4410   // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0
   4411   def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))),
   4412             (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
   4413   def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))),
   4414             (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
   4415   def : Pat<(v4f32 (X86vzload addr:$src)),
   4416             (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
   4417 
   4418   // MOVSDrm zeros the high parts of the register; represent this
   4419   // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0
   4420   def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))),
   4421             (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
   4422   def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))),
   4423             (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
   4424   def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))),
   4425             (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
   4426   def : Pat<(v2f64 (X86vzload addr:$src)),
   4427             (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
   4428 
   4429   // Represent the same patterns above but in the form they appear for
   4430   // 256-bit types
   4431   def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
   4432                    (v4i32 (scalar_to_vector (loadi32 addr:$src))), (iPTR 0)))),
   4433             (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>;
   4434   def : Pat<(v8f32 (X86vzmovl (insert_subvector undef,
   4435                    (v4f32 (scalar_to_vector (loadf32 addr:$src))), (iPTR 0)))),
   4436             (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
   4437   def : Pat<(v8f32 (X86vzload addr:$src)),
   4438             (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
   4439   def : Pat<(v4f64 (X86vzmovl (insert_subvector undef,
   4440                    (v2f64 (scalar_to_vector (loadf64 addr:$src))), (iPTR 0)))),
   4441             (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
   4442   def : Pat<(v4f64 (X86vzload addr:$src)),
   4443             (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
   4444 
   4445   // Represent the same patterns above but in the form they appear for
   4446   // 512-bit types
   4447   def : Pat<(v16i32 (X86vzmovl (insert_subvector undef,
   4448                    (v4i32 (scalar_to_vector (loadi32 addr:$src))), (iPTR 0)))),
   4449             (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>;
   4450   def : Pat<(v16f32 (X86vzmovl (insert_subvector undef,
   4451                    (v4f32 (scalar_to_vector (loadf32 addr:$src))), (iPTR 0)))),
   4452             (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
   4453   def : Pat<(v16f32 (X86vzload addr:$src)),
   4454             (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
   4455   def : Pat<(v8f64 (X86vzmovl (insert_subvector undef,
   4456                    (v2f64 (scalar_to_vector (loadf64 addr:$src))), (iPTR 0)))),
   4457             (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
   4458   def : Pat<(v8f64 (X86vzload addr:$src)),
   4459             (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
   4460 
   4461   def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
   4462                    (v2i64 (scalar_to_vector (loadi64 addr:$src))), (iPTR 0)))),
   4463             (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>;
   4464 
   4465   // Extract and store.
   4466   def : Pat<(store (f32 (extractelt (v4f32 VR128X:$src), (iPTR 0))),
   4467                    addr:$dst),
   4468             (VMOVSSZmr addr:$dst, (COPY_TO_REGCLASS (v4f32 VR128X:$src), FR32X))>;
   4469 }
   4470 
   4471 let ExeDomain = SSEPackedInt, SchedRW = [SchedWriteVecLogic.XMM] in {
   4472 def VMOVZPQILo2PQIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst),
   4473                                 (ins VR128X:$src),
   4474                                 "vmovq\t{$src, $dst|$dst, $src}",
   4475                                 [(set VR128X:$dst, (v2i64 (X86vzmovl
   4476                                                    (v2i64 VR128X:$src))))]>,
   4477                                 EVEX, VEX_W;
   4478 }
   4479 
   4480 let Predicates = [HasAVX512] in {
   4481   def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))),
   4482             (VMOVDI2PDIZrr GR32:$src)>;
   4483 
   4484   def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))),
   4485             (VMOV64toPQIZrr GR64:$src)>;
   4486 
   4487   def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
   4488                                (v2i64 (scalar_to_vector GR64:$src)),(iPTR 0)))),
   4489             (SUBREG_TO_REG (i64 0), (v2i64 (VMOV64toPQIZrr GR64:$src)), sub_xmm)>;
   4490 
   4491   def : Pat<(v8i64 (X86vzmovl (insert_subvector undef,
   4492                                (v2i64 (scalar_to_vector GR64:$src)),(iPTR 0)))),
   4493             (SUBREG_TO_REG (i64 0), (v2i64 (VMOV64toPQIZrr GR64:$src)), sub_xmm)>;
   4494 
   4495   // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part.
   4496   def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector (zextloadi64i32 addr:$src))))),
   4497             (VMOVDI2PDIZrm addr:$src)>;
   4498   def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))),
   4499             (VMOVDI2PDIZrm addr:$src)>;
   4500   def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))),
   4501             (VMOVDI2PDIZrm addr:$src)>;
   4502   def : Pat<(v4i32 (X86vzload addr:$src)),
   4503             (VMOVDI2PDIZrm addr:$src)>;
   4504   def : Pat<(v8i32 (X86vzload addr:$src)),
   4505             (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>;
   4506   def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))),
   4507             (VMOVQI2PQIZrm addr:$src)>;
   4508   def : Pat<(v2f64 (X86vzmovl (v2f64 VR128X:$src))),
   4509             (VMOVZPQILo2PQIZrr VR128X:$src)>;
   4510   def : Pat<(v2i64 (X86vzload addr:$src)),
   4511             (VMOVQI2PQIZrm addr:$src)>;
   4512   def : Pat<(v4i64 (X86vzload addr:$src)),
   4513             (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>;
   4514 
   4515   // Use regular 128-bit instructions to match 256-bit scalar_to_vec+zext.
   4516   def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
   4517                                (v4i32 (scalar_to_vector GR32:$src)),(iPTR 0)))),
   4518             (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrr GR32:$src)), sub_xmm)>;
   4519   def : Pat<(v16i32 (X86vzmovl (insert_subvector undef,
   4520                                 (v4i32 (scalar_to_vector GR32:$src)),(iPTR 0)))),
   4521             (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrr GR32:$src)), sub_xmm)>;
   4522 
   4523   // Use regular 128-bit instructions to match 512-bit scalar_to_vec+zext.
   4524   def : Pat<(v16i32 (X86vzload addr:$src)),
   4525             (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>;
   4526   def : Pat<(v8i64 (X86vzload addr:$src)),
   4527             (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>;
   4528 }
   4529 
   4530 //===----------------------------------------------------------------------===//
   4531 // AVX-512 - Non-temporals
   4532 //===----------------------------------------------------------------------===//
   4533 
   4534 def VMOVNTDQAZrm : AVX512PI<0x2A, MRMSrcMem, (outs VR512:$dst),
   4535                       (ins i512mem:$src), "vmovntdqa\t{$src, $dst|$dst, $src}",
   4536                       [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.ZMM.RM]>,
   4537                       EVEX, T8PD, EVEX_V512, EVEX_CD8<64, CD8VF>;
   4538 
   4539 let Predicates = [HasVLX] in {
   4540   def VMOVNTDQAZ256rm : AVX512PI<0x2A, MRMSrcMem, (outs VR256X:$dst),
   4541                        (ins i256mem:$src),
   4542                        "vmovntdqa\t{$src, $dst|$dst, $src}",
   4543                        [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.YMM.RM]>,
   4544                        EVEX, T8PD, EVEX_V256, EVEX_CD8<64, CD8VF>;
   4545 
   4546   def VMOVNTDQAZ128rm : AVX512PI<0x2A, MRMSrcMem, (outs VR128X:$dst),
   4547                       (ins i128mem:$src),
   4548                       "vmovntdqa\t{$src, $dst|$dst, $src}",
   4549                       [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.XMM.RM]>,
   4550                       EVEX, T8PD, EVEX_V128, EVEX_CD8<64, CD8VF>;
   4551 }
   4552 
   4553 multiclass avx512_movnt<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
   4554                         X86SchedWriteMoveLS Sched,
   4555                         PatFrag st_frag = alignednontemporalstore> {
   4556   let SchedRW = [Sched.MR], AddedComplexity = 400 in
   4557   def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src),
   4558                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
   4559                     [(st_frag (_.VT _.RC:$src), addr:$dst)],
   4560                     _.ExeDomain>, EVEX, EVEX_CD8<_.EltSize, CD8VF>;
   4561 }
   4562 
   4563 multiclass avx512_movnt_vl<bits<8> opc, string OpcodeStr,
   4564                            AVX512VLVectorVTInfo VTInfo,
   4565                            X86SchedWriteMoveLSWidths Sched> {
   4566   let Predicates = [HasAVX512] in
   4567     defm Z : avx512_movnt<opc, OpcodeStr, VTInfo.info512, Sched.ZMM>, EVEX_V512;
   4568 
   4569   let Predicates = [HasAVX512, HasVLX] in {
   4570     defm Z256 : avx512_movnt<opc, OpcodeStr, VTInfo.info256, Sched.YMM>, EVEX_V256;
   4571     defm Z128 : avx512_movnt<opc, OpcodeStr, VTInfo.info128, Sched.XMM>, EVEX_V128;
   4572   }
   4573 }
   4574 
   4575 defm VMOVNTDQ : avx512_movnt_vl<0xE7, "vmovntdq", avx512vl_i64_info,
   4576                                 SchedWriteVecMoveLSNT>, PD;
   4577 defm VMOVNTPD : avx512_movnt_vl<0x2B, "vmovntpd", avx512vl_f64_info,
   4578                                 SchedWriteFMoveLSNT>, PD, VEX_W;
   4579 defm VMOVNTPS : avx512_movnt_vl<0x2B, "vmovntps", avx512vl_f32_info,
   4580                                 SchedWriteFMoveLSNT>, PS;
   4581 
   4582 let Predicates = [HasAVX512], AddedComplexity = 400 in {
   4583   def : Pat<(alignednontemporalstore (v16i32 VR512:$src), addr:$dst),
   4584             (VMOVNTDQZmr addr:$dst, VR512:$src)>;
   4585   def : Pat<(alignednontemporalstore (v32i16 VR512:$src), addr:$dst),
   4586             (VMOVNTDQZmr addr:$dst, VR512:$src)>;
   4587   def : Pat<(alignednontemporalstore (v64i8 VR512:$src), addr:$dst),
   4588             (VMOVNTDQZmr addr:$dst, VR512:$src)>;
   4589 
   4590   def : Pat<(v8f64 (alignednontemporalload addr:$src)),
   4591             (VMOVNTDQAZrm addr:$src)>;
   4592   def : Pat<(v16f32 (alignednontemporalload addr:$src)),
   4593             (VMOVNTDQAZrm addr:$src)>;
   4594   def : Pat<(v8i64 (alignednontemporalload addr:$src)),
   4595             (VMOVNTDQAZrm addr:$src)>;
   4596 }
   4597 
   4598 let Predicates = [HasVLX], AddedComplexity = 400 in {
   4599   def : Pat<(alignednontemporalstore (v8i32 VR256X:$src), addr:$dst),
   4600             (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
   4601   def : Pat<(alignednontemporalstore (v16i16 VR256X:$src), addr:$dst),
   4602             (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
   4603   def : Pat<(alignednontemporalstore (v32i8 VR256X:$src), addr:$dst),
   4604             (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
   4605 
   4606   def : Pat<(v4f64 (alignednontemporalload addr:$src)),
   4607             (VMOVNTDQAZ256rm addr:$src)>;
   4608   def : Pat<(v8f32 (alignednontemporalload addr:$src)),
   4609             (VMOVNTDQAZ256rm addr:$src)>;
   4610   def : Pat<(v4i64 (alignednontemporalload addr:$src)),
   4611             (VMOVNTDQAZ256rm addr:$src)>;
   4612 
   4613   def : Pat<(alignednontemporalstore (v4i32 VR128X:$src), addr:$dst),
   4614             (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
   4615   def : Pat<(alignednontemporalstore (v8i16 VR128X:$src), addr:$dst),
   4616             (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
   4617   def : Pat<(alignednontemporalstore (v16i8 VR128X:$src), addr:$dst),
   4618             (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
   4619 
   4620   def : Pat<(v2f64 (alignednontemporalload addr:$src)),
   4621             (VMOVNTDQAZ128rm addr:$src)>;
   4622   def : Pat<(v4f32 (alignednontemporalload addr:$src)),
   4623             (VMOVNTDQAZ128rm addr:$src)>;
   4624   def : Pat<(v2i64 (alignednontemporalload addr:$src)),
   4625             (VMOVNTDQAZ128rm addr:$src)>;
   4626 }
   4627 
   4628 //===----------------------------------------------------------------------===//
   4629 // AVX-512 - Integer arithmetic
   4630 //
   4631 multiclass avx512_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
   4632                            X86VectorVTInfo _, X86FoldableSchedWrite sched,
   4633                            bit IsCommutable = 0> {
   4634   defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
   4635                     (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
   4636                     "$src2, $src1", "$src1, $src2",
   4637                     (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
   4638                     IsCommutable>, AVX512BIBase, EVEX_4V,
   4639                     Sched<[sched]>;
   4640 
   4641   defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
   4642                   (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
   4643                   "$src2, $src1", "$src1, $src2",
   4644                   (_.VT (OpNode _.RC:$src1,
   4645                                 (bitconvert (_.LdFrag addr:$src2))))>,
   4646                   AVX512BIBase, EVEX_4V,
   4647                   Sched<[sched.Folded, ReadAfterLd]>;
   4648 }
   4649 
   4650 multiclass avx512_binop_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
   4651                             X86VectorVTInfo _, X86FoldableSchedWrite sched,
   4652                             bit IsCommutable = 0> :
   4653            avx512_binop_rm<opc, OpcodeStr, OpNode, _, sched, IsCommutable> {
   4654   defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
   4655                   (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
   4656                   "${src2}"##_.BroadcastStr##", $src1",
   4657                   "$src1, ${src2}"##_.BroadcastStr,
   4658                   (_.VT (OpNode _.RC:$src1,
   4659                                 (X86VBroadcast
   4660                                     (_.ScalarLdFrag addr:$src2))))>,
   4661                   AVX512BIBase, EVEX_4V, EVEX_B,
   4662                   Sched<[sched.Folded, ReadAfterLd]>;
   4663 }
   4664 
   4665 multiclass avx512_binop_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
   4666                               AVX512VLVectorVTInfo VTInfo,
   4667                               X86SchedWriteWidths sched, Predicate prd,
   4668                               bit IsCommutable = 0> {
   4669   let Predicates = [prd] in
   4670     defm Z : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info512, sched.ZMM,
   4671                              IsCommutable>, EVEX_V512;
   4672 
   4673   let Predicates = [prd, HasVLX] in {
   4674     defm Z256 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info256,
   4675                                 sched.YMM, IsCommutable>, EVEX_V256;
   4676     defm Z128 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info128,
   4677                                 sched.XMM, IsCommutable>, EVEX_V128;
   4678   }
   4679 }
   4680 
   4681 multiclass avx512_binop_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
   4682                                AVX512VLVectorVTInfo VTInfo,
   4683                                X86SchedWriteWidths sched, Predicate prd,
   4684                                bit IsCommutable = 0> {
   4685   let Predicates = [prd] in
   4686     defm Z : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info512, sched.ZMM,
   4687                              IsCommutable>, EVEX_V512;
   4688 
   4689   let Predicates = [prd, HasVLX] in {
   4690     defm Z256 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info256,
   4691                                  sched.YMM, IsCommutable>, EVEX_V256;
   4692     defm Z128 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info128,
   4693                                  sched.XMM, IsCommutable>, EVEX_V128;
   4694   }
   4695 }
   4696 
   4697 multiclass avx512_binop_rm_vl_q<bits<8> opc, string OpcodeStr, SDNode OpNode,
   4698                                 X86SchedWriteWidths sched, Predicate prd,
   4699                                 bit IsCommutable = 0> {
   4700   defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i64_info,
   4701                                   sched, prd, IsCommutable>,
   4702                                   VEX_W, EVEX_CD8<64, CD8VF>;
   4703 }
   4704 
   4705 multiclass avx512_binop_rm_vl_d<bits<8> opc, string OpcodeStr, SDNode OpNode,
   4706                                 X86SchedWriteWidths sched, Predicate prd,
   4707                                 bit IsCommutable = 0> {
   4708   defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i32_info,
   4709                                   sched, prd, IsCommutable>, EVEX_CD8<32, CD8VF>;
   4710 }
   4711 
   4712 multiclass avx512_binop_rm_vl_w<bits<8> opc, string OpcodeStr, SDNode OpNode,
   4713                                 X86SchedWriteWidths sched, Predicate prd,
   4714                                 bit IsCommutable = 0> {
   4715   defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i16_info,
   4716                                  sched, prd, IsCommutable>, EVEX_CD8<16, CD8VF>,
   4717                                  VEX_WIG;
   4718 }
   4719 
   4720 multiclass avx512_binop_rm_vl_b<bits<8> opc, string OpcodeStr, SDNode OpNode,
   4721                                 X86SchedWriteWidths sched, Predicate prd,
   4722                                 bit IsCommutable = 0> {
   4723   defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i8_info,
   4724                                  sched, prd, IsCommutable>, EVEX_CD8<8, CD8VF>,
   4725                                  VEX_WIG;
   4726 }
   4727 
   4728 multiclass avx512_binop_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
   4729                                  SDNode OpNode, X86SchedWriteWidths sched,
   4730                                  Predicate prd, bit IsCommutable = 0> {
   4731   defm Q : avx512_binop_rm_vl_q<opc_q, OpcodeStr#"q", OpNode, sched, prd,
   4732                                    IsCommutable>;
   4733 
   4734   defm D : avx512_binop_rm_vl_d<opc_d, OpcodeStr#"d", OpNode, sched, prd,
   4735                                    IsCommutable>;
   4736 }
   4737 
   4738 multiclass avx512_binop_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr,
   4739                                  SDNode OpNode, X86SchedWriteWidths sched,
   4740                                  Predicate prd, bit IsCommutable = 0> {
   4741   defm W : avx512_binop_rm_vl_w<opc_w, OpcodeStr#"w", OpNode, sched, prd,
   4742                                    IsCommutable>;
   4743 
   4744   defm B : avx512_binop_rm_vl_b<opc_b, OpcodeStr#"b", OpNode, sched, prd,
   4745                                    IsCommutable>;
   4746 }
   4747 
   4748 multiclass avx512_binop_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
   4749                                   bits<8> opc_d, bits<8> opc_q,
   4750                                   string OpcodeStr, SDNode OpNode,
   4751                                   X86SchedWriteWidths sched,
   4752                                   bit IsCommutable = 0> {
   4753   defm NAME : avx512_binop_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode,
   4754                                     sched, HasAVX512, IsCommutable>,
   4755               avx512_binop_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode,
   4756                                     sched, HasBWI, IsCommutable>;
   4757 }
   4758 
   4759 multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr,
   4760                             X86FoldableSchedWrite sched,
   4761                             SDNode OpNode,X86VectorVTInfo _Src,
   4762                             X86VectorVTInfo _Dst, X86VectorVTInfo _Brdct,
   4763                             bit IsCommutable = 0> {
   4764   defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
   4765                             (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
   4766                             "$src2, $src1","$src1, $src2",
   4767                             (_Dst.VT (OpNode
   4768                                          (_Src.VT _Src.RC:$src1),
   4769                                          (_Src.VT _Src.RC:$src2))),
   4770                             IsCommutable>,
   4771                             AVX512BIBase, EVEX_4V, Sched<[sched]>;
   4772   defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
   4773                         (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
   4774                         "$src2, $src1", "$src1, $src2",
   4775                         (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
   4776                                       (bitconvert (_Src.LdFrag addr:$src2))))>,
   4777                         AVX512BIBase, EVEX_4V,
   4778                         Sched<[sched.Folded, ReadAfterLd]>;
   4779 
   4780   defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
   4781                     (ins _Src.RC:$src1, _Brdct.ScalarMemOp:$src2),
   4782                     OpcodeStr,
   4783                     "${src2}"##_Brdct.BroadcastStr##", $src1",
   4784                      "$src1, ${src2}"##_Brdct.BroadcastStr,
   4785                     (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
   4786                                  (_Brdct.VT (X86VBroadcast
   4787                                           (_Brdct.ScalarLdFrag addr:$src2))))))>,
   4788                     AVX512BIBase, EVEX_4V, EVEX_B,
   4789                     Sched<[sched.Folded, ReadAfterLd]>;
   4790 }
   4791 
   4792 defm VPADD : avx512_binop_rm_vl_all<0xFC, 0xFD, 0xFE, 0xD4, "vpadd", add,
   4793                                     SchedWriteVecALU, 1>;
   4794 defm VPSUB : avx512_binop_rm_vl_all<0xF8, 0xF9, 0xFA, 0xFB, "vpsub", sub,
   4795                                     SchedWriteVecALU, 0>;
   4796 defm VPADDS : avx512_binop_rm_vl_bw<0xEC, 0xED, "vpadds", X86adds,
   4797                                     SchedWriteVecALU, HasBWI, 1>;
   4798 defm VPSUBS : avx512_binop_rm_vl_bw<0xE8, 0xE9, "vpsubs", X86subs,
   4799                                     SchedWriteVecALU, HasBWI, 0>;
   4800 defm VPADDUS : avx512_binop_rm_vl_bw<0xDC, 0xDD, "vpaddus", X86addus,
   4801                                      SchedWriteVecALU, HasBWI, 1>;
   4802 defm VPSUBUS : avx512_binop_rm_vl_bw<0xD8, 0xD9, "vpsubus", X86subus,
   4803                                      SchedWriteVecALU, HasBWI, 0>;
   4804 defm VPMULLD : avx512_binop_rm_vl_d<0x40, "vpmulld", mul,
   4805                                     SchedWritePMULLD, HasAVX512, 1>, T8PD;
   4806 defm VPMULLW : avx512_binop_rm_vl_w<0xD5, "vpmullw", mul,
   4807                                     SchedWriteVecIMul, HasBWI, 1>;
   4808 defm VPMULLQ : avx512_binop_rm_vl_q<0x40, "vpmullq", mul,
   4809                                     SchedWriteVecIMul, HasDQI, 1>, T8PD,
   4810                                     NotEVEX2VEXConvertible;
   4811 defm VPMULHW : avx512_binop_rm_vl_w<0xE5, "vpmulhw", mulhs, SchedWriteVecIMul,
   4812                                     HasBWI, 1>;
   4813 defm VPMULHUW : avx512_binop_rm_vl_w<0xE4, "vpmulhuw", mulhu, SchedWriteVecIMul,
   4814                                      HasBWI, 1>;
   4815 defm VPMULHRSW : avx512_binop_rm_vl_w<0x0B, "vpmulhrsw", X86mulhrs,
   4816                                       SchedWriteVecIMul, HasBWI, 1>, T8PD;
   4817 defm VPAVG : avx512_binop_rm_vl_bw<0xE0, 0xE3, "vpavg", X86avg,
   4818                                    SchedWriteVecALU, HasBWI, 1>;
   4819 defm VPMULDQ : avx512_binop_rm_vl_q<0x28, "vpmuldq", X86pmuldq,
   4820                                     SchedWriteVecIMul, HasAVX512, 1>, T8PD;
   4821 defm VPMULUDQ : avx512_binop_rm_vl_q<0xF4, "vpmuludq", X86pmuludq,
   4822                                      SchedWriteVecIMul, HasAVX512, 1>;
   4823 
   4824 multiclass avx512_binop_all<bits<8> opc, string OpcodeStr,
   4825                             X86SchedWriteWidths sched,
   4826                             AVX512VLVectorVTInfo _SrcVTInfo,
   4827                             AVX512VLVectorVTInfo _DstVTInfo,
   4828                             SDNode OpNode, Predicate prd,  bit IsCommutable = 0> {
   4829   let Predicates = [prd] in
   4830     defm NAME#Z : avx512_binop_rm2<opc, OpcodeStr, sched.ZMM, OpNode,
   4831                                  _SrcVTInfo.info512, _DstVTInfo.info512,
   4832                                  v8i64_info, IsCommutable>,
   4833                                   EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_W;
   4834   let Predicates = [HasVLX, prd] in {
   4835     defm NAME#Z256 : avx512_binop_rm2<opc, OpcodeStr, sched.YMM, OpNode,
   4836                                       _SrcVTInfo.info256, _DstVTInfo.info256,
   4837                                       v4i64x_info, IsCommutable>,
   4838                                       EVEX_V256, EVEX_CD8<64, CD8VF>, VEX_W;
   4839     defm NAME#Z128 : avx512_binop_rm2<opc, OpcodeStr, sched.XMM, OpNode,
   4840                                       _SrcVTInfo.info128, _DstVTInfo.info128,
   4841                                       v2i64x_info, IsCommutable>,
   4842                                      EVEX_V128, EVEX_CD8<64, CD8VF>, VEX_W;
   4843   }
   4844 }
   4845 
   4846 defm VPMULTISHIFTQB : avx512_binop_all<0x83, "vpmultishiftqb", SchedWriteVecALU,
   4847                                 avx512vl_i8_info, avx512vl_i8_info,
   4848                                 X86multishift, HasVBMI, 0>, T8PD;
   4849 
   4850 multiclass avx512_packs_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
   4851                             X86VectorVTInfo _Src, X86VectorVTInfo _Dst,
   4852                             X86FoldableSchedWrite sched> {
   4853   defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
   4854                     (ins _Src.RC:$src1, _Src.ScalarMemOp:$src2),
   4855                     OpcodeStr,
   4856                     "${src2}"##_Src.BroadcastStr##", $src1",
   4857                      "$src1, ${src2}"##_Src.BroadcastStr,
   4858                     (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
   4859                                  (_Src.VT (X86VBroadcast
   4860                                           (_Src.ScalarLdFrag addr:$src2))))))>,
   4861                     EVEX_4V, EVEX_B, EVEX_CD8<_Src.EltSize, CD8VF>,
   4862                     Sched<[sched.Folded, ReadAfterLd]>;
   4863 }
   4864 
   4865 multiclass avx512_packs_rm<bits<8> opc, string OpcodeStr,
   4866                             SDNode OpNode,X86VectorVTInfo _Src,
   4867                             X86VectorVTInfo _Dst, X86FoldableSchedWrite sched,
   4868                             bit IsCommutable = 0> {
   4869   defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
   4870                             (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
   4871                             "$src2, $src1","$src1, $src2",
   4872                             (_Dst.VT (OpNode
   4873                                          (_Src.VT _Src.RC:$src1),
   4874                                          (_Src.VT _Src.RC:$src2))),
   4875                             IsCommutable>,
   4876                             EVEX_CD8<_Src.EltSize, CD8VF>, EVEX_4V, Sched<[sched]>;
   4877   defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
   4878                         (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
   4879                         "$src2, $src1", "$src1, $src2",
   4880                         (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
   4881                                       (bitconvert (_Src.LdFrag addr:$src2))))>,
   4882                          EVEX_4V, EVEX_CD8<_Src.EltSize, CD8VF>,
   4883                          Sched<[sched.Folded, ReadAfterLd]>;
   4884 }
   4885 
   4886 multiclass avx512_packs_all_i32_i16<bits<8> opc, string OpcodeStr,
   4887                                     SDNode OpNode> {
   4888   let Predicates = [HasBWI] in
   4889   defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i32_info,
   4890                                  v32i16_info, SchedWriteShuffle.ZMM>,
   4891                 avx512_packs_rmb<opc, OpcodeStr, OpNode, v16i32_info,
   4892                                  v32i16_info, SchedWriteShuffle.ZMM>, EVEX_V512;
   4893   let Predicates = [HasBWI, HasVLX] in {
   4894     defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i32x_info,
   4895                                      v16i16x_info, SchedWriteShuffle.YMM>,
   4896                      avx512_packs_rmb<opc, OpcodeStr, OpNode, v8i32x_info,
   4897                                       v16i16x_info, SchedWriteShuffle.YMM>,
   4898                                       EVEX_V256;
   4899     defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v4i32x_info,
   4900                                      v8i16x_info, SchedWriteShuffle.XMM>,
   4901                      avx512_packs_rmb<opc, OpcodeStr, OpNode, v4i32x_info,
   4902                                       v8i16x_info, SchedWriteShuffle.XMM>,
   4903                                       EVEX_V128;
   4904   }
   4905 }
   4906 multiclass avx512_packs_all_i16_i8<bits<8> opc, string OpcodeStr,
   4907                             SDNode OpNode> {
   4908   let Predicates = [HasBWI] in
   4909   defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v32i16_info, v64i8_info,
   4910                                 SchedWriteShuffle.ZMM>, EVEX_V512, VEX_WIG;
   4911   let Predicates = [HasBWI, HasVLX] in {
   4912     defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i16x_info,
   4913                                      v32i8x_info, SchedWriteShuffle.YMM>,
   4914                                      EVEX_V256, VEX_WIG;
   4915     defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i16x_info,
   4916                                      v16i8x_info, SchedWriteShuffle.XMM>,
   4917                                      EVEX_V128, VEX_WIG;
   4918   }
   4919 }
   4920 
   4921 multiclass avx512_vpmadd<bits<8> opc, string OpcodeStr,
   4922                             SDNode OpNode, AVX512VLVectorVTInfo _Src,
   4923                             AVX512VLVectorVTInfo _Dst, bit IsCommutable = 0> {
   4924   let Predicates = [HasBWI] in
   4925   defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info512,
   4926                                 _Dst.info512, SchedWriteVecIMul.ZMM,
   4927                                 IsCommutable>, EVEX_V512;
   4928   let Predicates = [HasBWI, HasVLX] in {
   4929     defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info256,
   4930                                      _Dst.info256, SchedWriteVecIMul.YMM,
   4931                                      IsCommutable>, EVEX_V256;
   4932     defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info128,
   4933                                      _Dst.info128, SchedWriteVecIMul.XMM,
   4934                                      IsCommutable>, EVEX_V128;
   4935   }
   4936 }
   4937 
   4938 defm VPACKSSDW : avx512_packs_all_i32_i16<0x6B, "vpackssdw", X86Packss>, AVX512BIBase;
   4939 defm VPACKUSDW : avx512_packs_all_i32_i16<0x2b, "vpackusdw", X86Packus>, AVX5128IBase;
   4940 defm VPACKSSWB : avx512_packs_all_i16_i8 <0x63, "vpacksswb", X86Packss>, AVX512BIBase;
   4941 defm VPACKUSWB : avx512_packs_all_i16_i8 <0x67, "vpackuswb", X86Packus>, AVX512BIBase;
   4942 
   4943 defm VPMADDUBSW : avx512_vpmadd<0x04, "vpmaddubsw", X86vpmaddubsw,
   4944                      avx512vl_i8_info, avx512vl_i16_info>, AVX512BIBase, T8PD, VEX_WIG;
   4945 defm VPMADDWD   : avx512_vpmadd<0xF5, "vpmaddwd", X86vpmaddwd,
   4946                      avx512vl_i16_info, avx512vl_i32_info, 1>, AVX512BIBase, VEX_WIG;
   4947 
   4948 defm VPMAXSB : avx512_binop_rm_vl_b<0x3C, "vpmaxsb", smax,
   4949                                     SchedWriteVecALU, HasBWI, 1>, T8PD;
   4950 defm VPMAXSW : avx512_binop_rm_vl_w<0xEE, "vpmaxsw", smax,
   4951                                     SchedWriteVecALU, HasBWI, 1>;
   4952 defm VPMAXSD : avx512_binop_rm_vl_d<0x3D, "vpmaxsd", smax,
   4953                                     SchedWriteVecALU, HasAVX512, 1>, T8PD;
   4954 defm VPMAXSQ : avx512_binop_rm_vl_q<0x3D, "vpmaxsq", smax,
   4955                                     SchedWriteVecALU, HasAVX512, 1>, T8PD,
   4956                                     NotEVEX2VEXConvertible;
   4957 
   4958 defm VPMAXUB : avx512_binop_rm_vl_b<0xDE, "vpmaxub", umax,
   4959                                     SchedWriteVecALU, HasBWI, 1>;
   4960 defm VPMAXUW : avx512_binop_rm_vl_w<0x3E, "vpmaxuw", umax,
   4961                                     SchedWriteVecALU, HasBWI, 1>, T8PD;
   4962 defm VPMAXUD : avx512_binop_rm_vl_d<0x3F, "vpmaxud", umax,
   4963                                     SchedWriteVecALU, HasAVX512, 1>, T8PD;
   4964 defm VPMAXUQ : avx512_binop_rm_vl_q<0x3F, "vpmaxuq", umax,
   4965                                     SchedWriteVecALU, HasAVX512, 1>, T8PD,
   4966                                     NotEVEX2VEXConvertible;
   4967 
   4968 defm VPMINSB : avx512_binop_rm_vl_b<0x38, "vpminsb", smin,
   4969                                     SchedWriteVecALU, HasBWI, 1>, T8PD;
   4970 defm VPMINSW : avx512_binop_rm_vl_w<0xEA, "vpminsw", smin,
   4971                                     SchedWriteVecALU, HasBWI, 1>;
   4972 defm VPMINSD : avx512_binop_rm_vl_d<0x39, "vpminsd", smin,
   4973                                     SchedWriteVecALU, HasAVX512, 1>, T8PD;
   4974 defm VPMINSQ : avx512_binop_rm_vl_q<0x39, "vpminsq", smin,
   4975                                     SchedWriteVecALU, HasAVX512, 1>, T8PD,
   4976                                     NotEVEX2VEXConvertible;
   4977 
   4978 defm VPMINUB : avx512_binop_rm_vl_b<0xDA, "vpminub", umin,
   4979                                     SchedWriteVecALU, HasBWI, 1>;
   4980 defm VPMINUW : avx512_binop_rm_vl_w<0x3A, "vpminuw", umin,
   4981                                     SchedWriteVecALU, HasBWI, 1>, T8PD;
   4982 defm VPMINUD : avx512_binop_rm_vl_d<0x3B, "vpminud", umin,
   4983                                     SchedWriteVecALU, HasAVX512, 1>, T8PD;
   4984 defm VPMINUQ : avx512_binop_rm_vl_q<0x3B, "vpminuq", umin,
   4985                                     SchedWriteVecALU, HasAVX512, 1>, T8PD,
   4986                                     NotEVEX2VEXConvertible;
   4987 
   4988 // PMULLQ: Use 512bit version to implement 128/256 bit in case NoVLX.
   4989 let Predicates = [HasDQI, NoVLX] in {
   4990   def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
   4991             (EXTRACT_SUBREG
   4992                 (VPMULLQZrr
   4993                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
   4994                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
   4995              sub_ymm)>;
   4996 
   4997   def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
   4998             (EXTRACT_SUBREG
   4999                 (VPMULLQZrr
   5000                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
   5001                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
   5002              sub_xmm)>;
   5003 }
   5004 
   5005 // PMULLQ: Use 512bit version to implement 128/256 bit in case NoVLX.
   5006 let Predicates = [HasDQI, NoVLX] in {
   5007   def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
   5008             (EXTRACT_SUBREG
   5009                 (VPMULLQZrr
   5010                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
   5011                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
   5012              sub_ymm)>;
   5013 
   5014   def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
   5015             (EXTRACT_SUBREG
   5016                 (VPMULLQZrr
   5017                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
   5018                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
   5019              sub_xmm)>;
   5020 }
   5021 
   5022 multiclass avx512_min_max_lowering<Instruction Instr, SDNode OpNode> {
   5023   def : Pat<(v4i64 (OpNode VR256X:$src1, VR256X:$src2)),
   5024             (EXTRACT_SUBREG
   5025                 (Instr
   5026                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
   5027                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
   5028              sub_ymm)>;
   5029 
   5030   def : Pat<(v2i64 (OpNode VR128X:$src1, VR128X:$src2)),
   5031             (EXTRACT_SUBREG
   5032                 (Instr
   5033                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
   5034                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
   5035              sub_xmm)>;
   5036 }
   5037 
   5038 let Predicates = [HasAVX512, NoVLX] in {
   5039   defm : avx512_min_max_lowering<VPMAXUQZrr, umax>;
   5040   defm : avx512_min_max_lowering<VPMINUQZrr, umin>;
   5041   defm : avx512_min_max_lowering<VPMAXSQZrr, smax>;
   5042   defm : avx512_min_max_lowering<VPMINSQZrr, smin>;
   5043 }
   5044 
   5045 //===----------------------------------------------------------------------===//
   5046 // AVX-512  Logical Instructions
   5047 //===----------------------------------------------------------------------===//
   5048 
   5049 // OpNodeMsk is the OpNode to use when element size is important. OpNode will
   5050 // be set to null_frag for 32-bit elements.
   5051 multiclass avx512_logic_rm<bits<8> opc, string OpcodeStr,
   5052                            SDPatternOperator OpNode,
   5053                            SDNode OpNodeMsk, X86FoldableSchedWrite sched,
   5054                            X86VectorVTInfo _, bit IsCommutable = 0> {
   5055   let hasSideEffects = 0 in
   5056   defm rr : AVX512_maskable_logic<opc, MRMSrcReg, _, (outs _.RC:$dst),
   5057                     (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
   5058                     "$src2, $src1", "$src1, $src2",
   5059                     (_.i64VT (OpNode (bitconvert (_.VT _.RC:$src1)),
   5060                                      (bitconvert (_.VT _.RC:$src2)))),
   5061                     (_.VT (bitconvert (_.i64VT (OpNodeMsk _.RC:$src1,
   5062                                                           _.RC:$src2)))),
   5063                     IsCommutable>, AVX512BIBase, EVEX_4V,
   5064                     Sched<[sched]>;
   5065 
   5066   let hasSideEffects = 0, mayLoad = 1 in
   5067   defm rm : AVX512_maskable_logic<opc, MRMSrcMem, _, (outs _.RC:$dst),
   5068                   (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
   5069                   "$src2, $src1", "$src1, $src2",
   5070                   (_.i64VT (OpNode (bitconvert (_.VT _.RC:$src1)),
   5071                                    (bitconvert (_.LdFrag addr:$src2)))),
   5072                   (_.VT (bitconvert (_.i64VT (OpNodeMsk _.RC:$src1,
   5073                                      (bitconvert (_.LdFrag addr:$src2))))))>,
   5074                   AVX512BIBase, EVEX_4V,
   5075                   Sched<[sched.Folded, ReadAfterLd]>;
   5076 }
   5077 
   5078 // OpNodeMsk is the OpNode to use where element size is important. So use
   5079 // for all of the broadcast patterns.
   5080 multiclass avx512_logic_rmb<bits<8> opc, string OpcodeStr,
   5081                             SDPatternOperator OpNode,
   5082                             SDNode OpNodeMsk, X86FoldableSchedWrite sched, X86VectorVTInfo _,
   5083                             bit IsCommutable = 0> :
   5084            avx512_logic_rm<opc, OpcodeStr, OpNode, OpNodeMsk, sched, _,
   5085                            IsCommutable> {
   5086   defm rmb : AVX512_maskable_logic<opc, MRMSrcMem, _, (outs _.RC:$dst),
   5087                   (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
   5088                   "${src2}"##_.BroadcastStr##", $src1",
   5089                   "$src1, ${src2}"##_.BroadcastStr,
   5090                   (_.i64VT (OpNodeMsk _.RC:$src1,
   5091                                    (bitconvert
   5092                                     (_.VT (X86VBroadcast
   5093                                             (_.ScalarLdFrag addr:$src2)))))),
   5094                   (_.VT (bitconvert (_.i64VT (OpNodeMsk _.RC:$src1,
   5095                                      (bitconvert
   5096                                       (_.VT (X86VBroadcast
   5097                                              (_.ScalarLdFrag addr:$src2))))))))>,
   5098                   AVX512BIBase, EVEX_4V, EVEX_B,
   5099                   Sched<[sched.Folded, ReadAfterLd]>;
   5100 }
   5101 
   5102 multiclass avx512_logic_rmb_vl<bits<8> opc, string OpcodeStr,
   5103                                SDPatternOperator OpNode,
   5104                                SDNode OpNodeMsk, X86SchedWriteWidths sched,
   5105                                AVX512VLVectorVTInfo VTInfo,
   5106                                bit IsCommutable = 0> {
   5107   let Predicates = [HasAVX512] in
   5108     defm Z : avx512_logic_rmb<opc, OpcodeStr, OpNode, OpNodeMsk, sched.ZMM,
   5109                               VTInfo.info512, IsCommutable>, EVEX_V512;
   5110 
   5111   let Predicates = [HasAVX512, HasVLX] in {
   5112     defm Z256 : avx512_logic_rmb<opc, OpcodeStr, OpNode, OpNodeMsk, sched.YMM,
   5113                                  VTInfo.info256, IsCommutable>, EVEX_V256;
   5114     defm Z128 : avx512_logic_rmb<opc, OpcodeStr, OpNode, OpNodeMsk, sched.XMM,
   5115                                  VTInfo.info128, IsCommutable>, EVEX_V128;
   5116   }
   5117 }
   5118 
   5119 multiclass avx512_logic_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
   5120                                  SDNode OpNode, X86SchedWriteWidths sched,
   5121                                  bit IsCommutable = 0> {
   5122   defm Q : avx512_logic_rmb_vl<opc_q, OpcodeStr#"q", OpNode, OpNode, sched,
   5123                                avx512vl_i64_info, IsCommutable>,
   5124                                VEX_W, EVEX_CD8<64, CD8VF>;
   5125   defm D : avx512_logic_rmb_vl<opc_d, OpcodeStr#"d", null_frag, OpNode, sched,
   5126                                avx512vl_i32_info, IsCommutable>,
   5127                                EVEX_CD8<32, CD8VF>;
   5128 }
   5129 
   5130 defm VPAND : avx512_logic_rm_vl_dq<0xDB, 0xDB, "vpand", and,
   5131                                    SchedWriteVecLogic, 1>;
   5132 defm VPOR : avx512_logic_rm_vl_dq<0xEB, 0xEB, "vpor", or,
   5133                                   SchedWriteVecLogic, 1>;
   5134 defm VPXOR : avx512_logic_rm_vl_dq<0xEF, 0xEF, "vpxor", xor,
   5135                                    SchedWriteVecLogic, 1>;
   5136 defm VPANDN : avx512_logic_rm_vl_dq<0xDF, 0xDF, "vpandn", X86andnp,
   5137                                     SchedWriteVecLogic>;
   5138 
   5139 //===----------------------------------------------------------------------===//
   5140 // AVX-512  FP arithmetic
   5141 //===----------------------------------------------------------------------===//
   5142 
   5143 multiclass avx512_fp_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
   5144                             SDNode OpNode, SDNode VecNode,
   5145                             X86FoldableSchedWrite sched, bit IsCommutable> {
   5146   let ExeDomain = _.ExeDomain in {
   5147   defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
   5148                            (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
   5149                            "$src2, $src1", "$src1, $src2",
   5150                            (_.VT (VecNode _.RC:$src1, _.RC:$src2,
   5151                                           (i32 FROUND_CURRENT)))>,
   5152                            Sched<[sched]>;
   5153 
   5154   defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
   5155                          (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
   5156                          "$src2, $src1", "$src1, $src2",
   5157                          (_.VT (VecNode _.RC:$src1,
   5158                                         _.ScalarIntMemCPat:$src2,
   5159                                         (i32 FROUND_CURRENT)))>,
   5160                          Sched<[sched.Folded, ReadAfterLd]>;
   5161   let isCodeGenOnly = 1, Predicates = [HasAVX512] in {
   5162   def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
   5163                          (ins _.FRC:$src1, _.FRC:$src2),
   5164                           OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
   5165                           [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>,
   5166                           Sched<[sched]> {
   5167     let isCommutable = IsCommutable;
   5168   }
   5169   def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
   5170                          (ins _.FRC:$src1, _.ScalarMemOp:$src2),
   5171                          OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
   5172                          [(set _.FRC:$dst, (OpNode _.FRC:$src1,
   5173                          (_.ScalarLdFrag addr:$src2)))]>,
   5174                          Sched<[sched.Folded, ReadAfterLd]>;
   5175   }
   5176   }
   5177 }
   5178 
   5179 multiclass avx512_fp_scalar_round<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
   5180                                   SDNode VecNode, X86FoldableSchedWrite sched,
   5181                                   bit IsCommutable = 0> {
   5182   let ExeDomain = _.ExeDomain in
   5183   defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
   5184                           (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
   5185                           "$rc, $src2, $src1", "$src1, $src2, $rc",
   5186                           (VecNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
   5187                           (i32 imm:$rc)), IsCommutable>,
   5188                           EVEX_B, EVEX_RC, Sched<[sched]>;
   5189 }
   5190 multiclass avx512_fp_scalar_sae<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
   5191                                 SDNode OpNode, SDNode VecNode, SDNode SaeNode,
   5192                                 X86FoldableSchedWrite sched, bit IsCommutable> {
   5193   let ExeDomain = _.ExeDomain in {
   5194   defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
   5195                            (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
   5196                            "$src2, $src1", "$src1, $src2",
   5197                            (_.VT (VecNode _.RC:$src1, _.RC:$src2))>,
   5198                            Sched<[sched]>;
   5199 
   5200   defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
   5201                          (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
   5202                          "$src2, $src1", "$src1, $src2",
   5203                          (_.VT (VecNode _.RC:$src1,
   5204                                         _.ScalarIntMemCPat:$src2))>,
   5205                          Sched<[sched.Folded, ReadAfterLd]>;
   5206 
   5207   let isCodeGenOnly = 1, Predicates = [HasAVX512] in {
   5208   def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
   5209                          (ins _.FRC:$src1, _.FRC:$src2),
   5210                           OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
   5211                           [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>,
   5212                           Sched<[sched]> {
   5213     let isCommutable = IsCommutable;
   5214   }
   5215   def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
   5216                          (ins _.FRC:$src1, _.ScalarMemOp:$src2),
   5217                          OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
   5218                          [(set _.FRC:$dst, (OpNode _.FRC:$src1,
   5219                          (_.ScalarLdFrag addr:$src2)))]>,
   5220                          Sched<[sched.Folded, ReadAfterLd]>;
   5221   }
   5222 
   5223   defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
   5224                             (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
   5225                             "{sae}, $src2, $src1", "$src1, $src2, {sae}",
   5226                             (SaeNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
   5227                             (i32 FROUND_NO_EXC))>, EVEX_B,
   5228                             Sched<[sched]>;
   5229   }
   5230 }
   5231 
   5232 multiclass avx512_binop_s_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
   5233                                 SDNode VecNode, X86SchedWriteSizes sched,
   5234                                 bit IsCommutable> {
   5235   defm SSZ : avx512_fp_scalar<opc, OpcodeStr#"ss", f32x_info, OpNode, VecNode,
   5236                               sched.PS.Scl, IsCommutable>,
   5237              avx512_fp_scalar_round<opc, OpcodeStr#"ss", f32x_info, VecNode,
   5238                               sched.PS.Scl, IsCommutable>,
   5239                               XS, EVEX_4V, VEX_LIG,  EVEX_CD8<32, CD8VT1>;
   5240   defm SDZ : avx512_fp_scalar<opc, OpcodeStr#"sd", f64x_info, OpNode, VecNode,
   5241                               sched.PD.Scl, IsCommutable>,
   5242              avx512_fp_scalar_round<opc, OpcodeStr#"sd", f64x_info, VecNode,
   5243                               sched.PD.Scl, IsCommutable>,
   5244                               XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
   5245 }
   5246 
   5247 multiclass avx512_binop_s_sae<bits<8> opc, string OpcodeStr, SDNode OpNode,
   5248                               SDNode VecNode, SDNode SaeNode,
   5249                               X86SchedWriteSizes sched, bit IsCommutable> {
   5250   defm SSZ : avx512_fp_scalar_sae<opc, OpcodeStr#"ss", f32x_info, OpNode,
   5251                               VecNode, SaeNode, sched.PS.Scl, IsCommutable>,
   5252                               XS, EVEX_4V, VEX_LIG,  EVEX_CD8<32, CD8VT1>;
   5253   defm SDZ : avx512_fp_scalar_sae<opc, OpcodeStr#"sd", f64x_info, OpNode,
   5254                               VecNode, SaeNode, sched.PD.Scl, IsCommutable>,
   5255                               XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
   5256 }
   5257 defm VADD : avx512_binop_s_round<0x58, "vadd", fadd, X86faddRnds,
   5258                                  SchedWriteFAddSizes, 1>;
   5259 defm VMUL : avx512_binop_s_round<0x59, "vmul", fmul, X86fmulRnds,
   5260                                  SchedWriteFMulSizes, 1>;
   5261 defm VSUB : avx512_binop_s_round<0x5C, "vsub", fsub, X86fsubRnds,
   5262                                  SchedWriteFAddSizes, 0>;
   5263 defm VDIV : avx512_binop_s_round<0x5E, "vdiv", fdiv, X86fdivRnds,
   5264                                  SchedWriteFDivSizes, 0>;
   5265 defm VMIN : avx512_binop_s_sae<0x5D, "vmin", X86fmin, X86fmins, X86fminRnds,
   5266                                SchedWriteFCmpSizes, 0>;
   5267 defm VMAX : avx512_binop_s_sae<0x5F, "vmax", X86fmax, X86fmaxs, X86fmaxRnds,
   5268                                SchedWriteFCmpSizes, 0>;
   5269 
   5270 // MIN/MAX nodes are commutable under "unsafe-fp-math". In this case we use
   5271 // X86fminc and X86fmaxc instead of X86fmin and X86fmax
   5272 multiclass avx512_comutable_binop_s<bits<8> opc, string OpcodeStr,
   5273                                     X86VectorVTInfo _, SDNode OpNode,
   5274                                     X86FoldableSchedWrite sched> {
   5275   let isCodeGenOnly = 1, Predicates = [HasAVX512], ExeDomain = _.ExeDomain in {
   5276   def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
   5277                          (ins _.FRC:$src1, _.FRC:$src2),
   5278                           OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
   5279                           [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>,
   5280                           Sched<[sched]> {
   5281     let isCommutable = 1;
   5282   }
   5283   def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
   5284                          (ins _.FRC:$src1, _.ScalarMemOp:$src2),
   5285                          OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
   5286                          [(set _.FRC:$dst, (OpNode _.FRC:$src1,
   5287                          (_.ScalarLdFrag addr:$src2)))]>,
   5288                          Sched<[sched.Folded, ReadAfterLd]>;
   5289   }
   5290 }
   5291 defm VMINCSSZ : avx512_comutable_binop_s<0x5D, "vminss", f32x_info, X86fminc,
   5292                                          SchedWriteFCmp.Scl>, XS, EVEX_4V,
   5293                                          VEX_LIG, EVEX_CD8<32, CD8VT1>;
   5294 
   5295 defm VMINCSDZ : avx512_comutable_binop_s<0x5D, "vminsd", f64x_info, X86fminc,
   5296                                          SchedWriteFCmp.Scl>, XD, VEX_W, EVEX_4V,
   5297                                          VEX_LIG, EVEX_CD8<64, CD8VT1>;
   5298 
   5299 defm VMAXCSSZ : avx512_comutable_binop_s<0x5F, "vmaxss", f32x_info, X86fmaxc,
   5300                                          SchedWriteFCmp.Scl>, XS, EVEX_4V,
   5301                                          VEX_LIG, EVEX_CD8<32, CD8VT1>;
   5302 
   5303 defm VMAXCSDZ : avx512_comutable_binop_s<0x5F, "vmaxsd", f64x_info, X86fmaxc,
   5304                                          SchedWriteFCmp.Scl>, XD, VEX_W, EVEX_4V,
   5305                                          VEX_LIG, EVEX_CD8<64, CD8VT1>;
   5306 
   5307 multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
   5308                             X86VectorVTInfo _, X86FoldableSchedWrite sched,
   5309                             bit IsCommutable,
   5310                             bit IsKZCommutable = IsCommutable> {
   5311   let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
   5312   defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
   5313                   (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
   5314                   "$src2, $src1", "$src1, $src2",
   5315                   (_.VT (OpNode _.RC:$src1, _.RC:$src2)), IsCommutable, 0,
   5316                   IsKZCommutable>,
   5317                   EVEX_4V, Sched<[sched]>;
   5318   let mayLoad = 1 in {
   5319     defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
   5320                     (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr##_.Suffix,
   5321                     "$src2, $src1", "$src1, $src2",
   5322                     (OpNode _.RC:$src1, (_.LdFrag addr:$src2))>,
   5323                     EVEX_4V, Sched<[sched.Folded, ReadAfterLd]>;
   5324     defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
   5325                      (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr##_.Suffix,
   5326                      "${src2}"##_.BroadcastStr##", $src1",
   5327                      "$src1, ${src2}"##_.BroadcastStr,
   5328                      (OpNode  _.RC:$src1, (_.VT (X86VBroadcast
   5329                                                 (_.ScalarLdFrag addr:$src2))))>,
   5330                      EVEX_4V, EVEX_B,
   5331                      Sched<[sched.Folded, ReadAfterLd]>;
   5332     }
   5333   }
   5334 }
   5335 
   5336 multiclass avx512_fp_round_packed<bits<8> opc, string OpcodeStr,
   5337                                   SDPatternOperator OpNodeRnd,
   5338                                   X86FoldableSchedWrite sched, X86VectorVTInfo _> {
   5339   let ExeDomain = _.ExeDomain in
   5340   defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
   5341                   (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr##_.Suffix,
   5342                   "$rc, $src2, $src1", "$src1, $src2, $rc",
   5343                   (_.VT (OpNodeRnd _.RC:$src1, _.RC:$src2, (i32 imm:$rc)))>,
   5344                   EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched]>;
   5345 }
   5346 
   5347 multiclass avx512_fp_sae_packed<bits<8> opc, string OpcodeStr,
   5348                                 SDPatternOperator OpNodeRnd,
   5349                                 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
   5350   let ExeDomain = _.ExeDomain in
   5351   defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
   5352                   (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
   5353                   "{sae}, $src2, $src1", "$src1, $src2, {sae}",
   5354                   (_.VT (OpNodeRnd _.RC:$src1, _.RC:$src2, (i32 FROUND_NO_EXC)))>,
   5355                   EVEX_4V, EVEX_B, Sched<[sched]>;
   5356 }
   5357 
   5358 multiclass avx512_fp_binop_p<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
   5359                              Predicate prd, X86SchedWriteSizes sched,
   5360                              bit IsCommutable = 0,
   5361                              bit IsPD128Commutable = IsCommutable> {
   5362   let Predicates = [prd] in {
   5363   defm PSZ : avx512_fp_packed<opc, OpcodeStr, OpNode, v16f32_info,
   5364                               sched.PS.ZMM, IsCommutable>, EVEX_V512, PS,
   5365                               EVEX_CD8<32, CD8VF>;
   5366   defm PDZ : avx512_fp_packed<opc, OpcodeStr, OpNode, v8f64_info,
   5367                               sched.PD.ZMM, IsCommutable>, EVEX_V512, PD, VEX_W,
   5368                               EVEX_CD8<64, CD8VF>;
   5369   }
   5370 
   5371     // Define only if AVX512VL feature is present.
   5372   let Predicates = [prd, HasVLX] in {
   5373     defm PSZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, v4f32x_info,
   5374                                    sched.PS.XMM, IsCommutable>, EVEX_V128, PS,
   5375                                    EVEX_CD8<32, CD8VF>;
   5376     defm PSZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, v8f32x_info,
   5377                                    sched.PS.YMM, IsCommutable>, EVEX_V256, PS,
   5378                                    EVEX_CD8<32, CD8VF>;
   5379     defm PDZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, v2f64x_info,
   5380                                    sched.PD.XMM, IsPD128Commutable,
   5381                                    IsCommutable>, EVEX_V128, PD, VEX_W,
   5382                                    EVEX_CD8<64, CD8VF>;
   5383     defm PDZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, v4f64x_info,
   5384                                    sched.PD.YMM, IsCommutable>, EVEX_V256, PD, VEX_W,
   5385                                    EVEX_CD8<64, CD8VF>;
   5386   }
   5387 }
   5388 
   5389 multiclass avx512_fp_binop_p_round<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
   5390                                    X86SchedWriteSizes sched> {
   5391   defm PSZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM,
   5392                                     v16f32_info>,
   5393                                     EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
   5394   defm PDZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.ZMM,
   5395                                     v8f64_info>,
   5396                                     EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>;
   5397 }
   5398 
   5399 multiclass avx512_fp_binop_p_sae<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
   5400                                  X86SchedWriteSizes sched> {
   5401   defm PSZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM,
   5402                                   v16f32_info>,
   5403                                   EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
   5404   defm PDZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.ZMM,
   5405                                   v8f64_info>,
   5406                                   EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>;
   5407 }
   5408 
   5409 defm VADD : avx512_fp_binop_p<0x58, "vadd", fadd, HasAVX512,
   5410                               SchedWriteFAddSizes, 1>,
   5411             avx512_fp_binop_p_round<0x58, "vadd", X86faddRnd, SchedWriteFAddSizes>;
   5412 defm VMUL : avx512_fp_binop_p<0x59, "vmul", fmul, HasAVX512,
   5413                               SchedWriteFMulSizes, 1>,
   5414             avx512_fp_binop_p_round<0x59, "vmul", X86fmulRnd, SchedWriteFMulSizes>;
   5415 defm VSUB : avx512_fp_binop_p<0x5C, "vsub", fsub, HasAVX512,
   5416                               SchedWriteFAddSizes>,
   5417             avx512_fp_binop_p_round<0x5C, "vsub", X86fsubRnd, SchedWriteFAddSizes>;
   5418 defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", fdiv, HasAVX512,
   5419                               SchedWriteFDivSizes>,
   5420             avx512_fp_binop_p_round<0x5E, "vdiv", X86fdivRnd, SchedWriteFDivSizes>;
   5421 defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86fmin, HasAVX512,
   5422                               SchedWriteFCmpSizes, 0>,
   5423             avx512_fp_binop_p_sae<0x5D, "vmin", X86fminRnd, SchedWriteFCmpSizes>;
   5424 defm VMAX : avx512_fp_binop_p<0x5F, "vmax", X86fmax, HasAVX512,
   5425                               SchedWriteFCmpSizes, 0>,
   5426             avx512_fp_binop_p_sae<0x5F, "vmax", X86fmaxRnd, SchedWriteFCmpSizes>;
   5427 let isCodeGenOnly = 1 in {
   5428   defm VMINC : avx512_fp_binop_p<0x5D, "vmin", X86fminc, HasAVX512,
   5429                                  SchedWriteFCmpSizes, 1>;
   5430   defm VMAXC : avx512_fp_binop_p<0x5F, "vmax", X86fmaxc, HasAVX512,
   5431                                  SchedWriteFCmpSizes, 1>;
   5432 }
   5433 defm VAND  : avx512_fp_binop_p<0x54, "vand", null_frag, HasDQI,
   5434                                SchedWriteFLogicSizes, 1>;
   5435 defm VANDN : avx512_fp_binop_p<0x55, "vandn", null_frag, HasDQI,
   5436                                SchedWriteFLogicSizes, 0>;
   5437 defm VOR   : avx512_fp_binop_p<0x56, "vor", null_frag, HasDQI,
   5438                                SchedWriteFLogicSizes, 1>;
   5439 defm VXOR  : avx512_fp_binop_p<0x57, "vxor", null_frag, HasDQI,
   5440                                SchedWriteFLogicSizes, 1>;
   5441 
   5442 // Patterns catch floating point selects with bitcasted integer logic ops.
   5443 multiclass avx512_fp_logical_lowering<string InstrStr, SDNode OpNode,
   5444                                       X86VectorVTInfo _, Predicate prd> {
   5445 let Predicates = [prd] in {
   5446   // Masked register-register logical operations.
   5447   def : Pat<(_.VT (vselect _.KRCWM:$mask,
   5448                    (bitconvert (_.i64VT (OpNode _.RC:$src1, _.RC:$src2))),
   5449                    _.RC:$src0)),
   5450             (!cast<Instruction>(InstrStr#rrk) _.RC:$src0, _.KRCWM:$mask,
   5451              _.RC:$src1, _.RC:$src2)>;
   5452   def : Pat<(_.VT (vselect _.KRCWM:$mask,
   5453                    (bitconvert (_.i64VT (OpNode _.RC:$src1, _.RC:$src2))),
   5454                    _.ImmAllZerosV)),
   5455             (!cast<Instruction>(InstrStr#rrkz) _.KRCWM:$mask, _.RC:$src1,
   5456              _.RC:$src2)>;
   5457   // Masked register-memory logical operations.
   5458   def : Pat<(_.VT (vselect _.KRCWM:$mask,
   5459                    (bitconvert (_.i64VT (OpNode _.RC:$src1,
   5460                                          (load addr:$src2)))),
   5461                    _.RC:$src0)),
   5462             (!cast<Instruction>(InstrStr#rmk) _.RC:$src0, _.KRCWM:$mask,
   5463              _.RC:$src1, addr:$src2)>;
   5464   def : Pat<(_.VT (vselect _.KRCWM:$mask,
   5465                    (bitconvert (_.i64VT (OpNode _.RC:$src1, (load addr:$src2)))),
   5466                    _.ImmAllZerosV)),
   5467             (!cast<Instruction>(InstrStr#rmkz) _.KRCWM:$mask, _.RC:$src1,
   5468              addr:$src2)>;
   5469   // Register-broadcast logical operations.
   5470   def : Pat<(_.i64VT (OpNode _.RC:$src1,
   5471                       (bitconvert (_.VT (X86VBroadcast
   5472                                          (_.ScalarLdFrag addr:$src2)))))),
   5473             (!cast<Instruction>(InstrStr#rmb) _.RC:$src1, addr:$src2)>;
   5474   def : Pat<(_.VT (vselect _.KRCWM:$mask,
   5475                    (bitconvert
   5476                     (_.i64VT (OpNode _.RC:$src1,
   5477                               (bitconvert (_.VT
   5478                                            (X86VBroadcast
   5479                                             (_.ScalarLdFrag addr:$src2))))))),
   5480                    _.RC:$src0)),
   5481             (!cast<Instruction>(InstrStr#rmbk) _.RC:$src0, _.KRCWM:$mask,
   5482              _.RC:$src1, addr:$src2)>;
   5483   def : Pat<(_.VT (vselect _.KRCWM:$mask,
   5484                    (bitconvert
   5485                     (_.i64VT (OpNode _.RC:$src1,
   5486                               (bitconvert (_.VT
   5487                                            (X86VBroadcast
   5488                                             (_.ScalarLdFrag addr:$src2))))))),
   5489                    _.ImmAllZerosV)),
   5490             (!cast<Instruction>(InstrStr#rmbkz)  _.KRCWM:$mask,
   5491              _.RC:$src1, addr:$src2)>;
   5492 }
   5493 }
   5494 
   5495 multiclass avx512_fp_logical_lowering_sizes<string InstrStr, SDNode OpNode> {
   5496   defm : avx512_fp_logical_lowering<InstrStr#DZ128, OpNode, v4f32x_info, HasVLX>;
   5497   defm : avx512_fp_logical_lowering<InstrStr#QZ128, OpNode, v2f64x_info, HasVLX>;
   5498   defm : avx512_fp_logical_lowering<InstrStr#DZ256, OpNode, v8f32x_info, HasVLX>;
   5499   defm : avx512_fp_logical_lowering<InstrStr#QZ256, OpNode, v4f64x_info, HasVLX>;
   5500   defm : avx512_fp_logical_lowering<InstrStr#DZ, OpNode, v16f32_info, HasAVX512>;
   5501   defm : avx512_fp_logical_lowering<InstrStr#QZ, OpNode, v8f64_info, HasAVX512>;
   5502 }
   5503 
   5504 defm : avx512_fp_logical_lowering_sizes<"VPAND", and>;
   5505 defm : avx512_fp_logical_lowering_sizes<"VPOR", or>;
   5506 defm : avx512_fp_logical_lowering_sizes<"VPXOR", xor>;
   5507 defm : avx512_fp_logical_lowering_sizes<"VPANDN", X86andnp>;
   5508 
   5509 let Predicates = [HasVLX,HasDQI] in {
   5510   // Use packed logical operations for scalar ops.
   5511   def : Pat<(f64 (X86fand FR64X:$src1, FR64X:$src2)),
   5512             (COPY_TO_REGCLASS
   5513              (v2f64 (VANDPDZ128rr (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)),
   5514                                   (v2f64 (COPY_TO_REGCLASS FR64X:$src2, VR128X)))),
   5515              FR64X)>;
   5516   def : Pat<(f64 (X86for FR64X:$src1, FR64X:$src2)),
   5517             (COPY_TO_REGCLASS
   5518              (v2f64 (VORPDZ128rr (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)),
   5519                                  (v2f64 (COPY_TO_REGCLASS FR64X:$src2, VR128X)))),
   5520              FR64X)>;
   5521   def : Pat<(f64 (X86fxor FR64X:$src1, FR64X:$src2)),
   5522             (COPY_TO_REGCLASS
   5523              (v2f64 (VXORPDZ128rr (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)),
   5524                                   (v2f64 (COPY_TO_REGCLASS FR64X:$src2, VR128X)))),
   5525              FR64X)>;
   5526   def : Pat<(f64 (X86fandn FR64X:$src1, FR64X:$src2)),
   5527             (COPY_TO_REGCLASS
   5528              (v2f64 (VANDNPDZ128rr (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)),
   5529                                    (v2f64 (COPY_TO_REGCLASS FR64X:$src2, VR128X)))),
   5530              FR64X)>;
   5531 
   5532   def : Pat<(f32 (X86fand FR32X:$src1, FR32X:$src2)),
   5533             (COPY_TO_REGCLASS
   5534              (v4f32 (VANDPSZ128rr (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)),
   5535                                   (v4f32 (COPY_TO_REGCLASS FR32X:$src2, VR128X)))),
   5536              FR32X)>;
   5537   def : Pat<(f32 (X86for FR32X:$src1, FR32X:$src2)),
   5538             (COPY_TO_REGCLASS
   5539              (v4f32 (VORPSZ128rr (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)),
   5540                                  (v4f32 (COPY_TO_REGCLASS FR32X:$src2, VR128X)))),
   5541              FR32X)>;
   5542   def : Pat<(f32 (X86fxor FR32X:$src1, FR32X:$src2)),
   5543             (COPY_TO_REGCLASS
   5544              (v4f32 (VXORPSZ128rr (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)),
   5545                                   (v4f32 (COPY_TO_REGCLASS FR32X:$src2, VR128X)))),
   5546              FR32X)>;
   5547   def : Pat<(f32 (X86fandn FR32X:$src1, FR32X:$src2)),
   5548             (COPY_TO_REGCLASS
   5549              (v4f32 (VANDNPSZ128rr (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)),
   5550                                    (v4f32 (COPY_TO_REGCLASS FR32X:$src2, VR128X)))),
   5551              FR32X)>;
   5552 }
   5553 
   5554 multiclass avx512_fp_scalef_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
   5555                               X86FoldableSchedWrite sched, X86VectorVTInfo _> {
   5556   let ExeDomain = _.ExeDomain in {
   5557   defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
   5558                   (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
   5559                   "$src2, $src1", "$src1, $src2",
   5560                   (_.VT (OpNode _.RC:$src1, _.RC:$src2, (i32 FROUND_CURRENT)))>,
   5561                   EVEX_4V, Sched<[sched]>;
   5562   defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
   5563                   (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr##_.Suffix,
   5564                   "$src2, $src1", "$src1, $src2",
   5565                   (OpNode _.RC:$src1, (_.LdFrag addr:$src2), (i32 FROUND_CURRENT))>,
   5566                   EVEX_4V, Sched<[sched.Folded, ReadAfterLd]>;
   5567   defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
   5568                    (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr##_.Suffix,
   5569                    "${src2}"##_.BroadcastStr##", $src1",
   5570                    "$src1, ${src2}"##_.BroadcastStr,
   5571                    (OpNode  _.RC:$src1, (_.VT (X86VBroadcast
   5572                                               (_.ScalarLdFrag addr:$src2))),
   5573                                               (i32 FROUND_CURRENT))>,
   5574                    EVEX_4V, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
   5575   }
   5576 }
   5577 
   5578 multiclass avx512_fp_scalef_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
   5579                                    X86FoldableSchedWrite sched, X86VectorVTInfo _> {
   5580   let ExeDomain = _.ExeDomain in {
   5581   defm rr: AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
   5582                   (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
   5583                   "$src2, $src1", "$src1, $src2",
   5584                   (_.VT (OpNode _.RC:$src1, _.RC:$src2, (i32 FROUND_CURRENT)))>,
   5585                   Sched<[sched]>;
   5586   defm rm: AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
   5587                   (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr##_.Suffix,
   5588                   "$src2, $src1", "$src1, $src2",
   5589                   (OpNode _.RC:$src1, _.ScalarIntMemCPat:$src2,
   5590                           (i32 FROUND_CURRENT))>,
   5591                   Sched<[sched.Folded, ReadAfterLd]>;
   5592   }
   5593 }
   5594 
   5595 multiclass avx512_fp_scalef_all<bits<8> opc, bits<8> opcScaler, string OpcodeStr,
   5596                                 SDNode OpNode, SDNode OpNodeScal,
   5597                                 X86SchedWriteWidths sched> {
   5598   defm PSZ : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, sched.ZMM, v16f32_info>,
   5599              avx512_fp_round_packed<opc, OpcodeStr, OpNode, sched.ZMM, v16f32_info>,
   5600                               EVEX_V512, EVEX_CD8<32, CD8VF>;
   5601   defm PDZ : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, sched.ZMM, v8f64_info>,
   5602              avx512_fp_round_packed<opc, OpcodeStr, OpNode, sched.ZMM, v8f64_info>,
   5603                               EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
   5604   defm SSZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, OpNodeScal, sched.Scl, f32x_info>,
   5605              avx512_fp_scalar_round<opcScaler, OpcodeStr##"ss", f32x_info, OpNodeScal, sched.Scl>,
   5606                            EVEX_4V,EVEX_CD8<32, CD8VT1>;
   5607   defm SDZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, OpNodeScal, sched.Scl, f64x_info>,
   5608              avx512_fp_scalar_round<opcScaler, OpcodeStr##"sd", f64x_info, OpNodeScal, sched.Scl>,
   5609                            EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
   5610 
   5611   // Define only if AVX512VL feature is present.
   5612   let Predicates = [HasVLX] in {
   5613     defm PSZ128 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, sched.XMM, v4f32x_info>,
   5614                                    EVEX_V128, EVEX_CD8<32, CD8VF>;
   5615     defm PSZ256 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, sched.YMM, v8f32x_info>,
   5616                                    EVEX_V256, EVEX_CD8<32, CD8VF>;
   5617     defm PDZ128 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, sched.XMM, v2f64x_info>,
   5618                                    EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>;
   5619     defm PDZ256 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, sched.YMM, v4f64x_info>,
   5620                                    EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>;
   5621   }
   5622 }
   5623 defm VSCALEF : avx512_fp_scalef_all<0x2C, 0x2D, "vscalef", X86scalef, X86scalefs,
   5624                                     SchedWriteFAdd>, T8PD, NotEVEX2VEXConvertible;
   5625 
   5626 //===----------------------------------------------------------------------===//
   5627 // AVX-512  VPTESTM instructions
   5628 //===----------------------------------------------------------------------===//
   5629 
   5630 multiclass avx512_vptest<bits<8> opc, string OpcodeStr, PatFrag OpNode,
   5631                          X86FoldableSchedWrite sched, X86VectorVTInfo _,
   5632                          string Name> {
   5633   let ExeDomain = _.ExeDomain in {
   5634   let isCommutable = 1 in
   5635   defm rr : AVX512_maskable_cmp<opc, MRMSrcReg, _, (outs _.KRC:$dst),
   5636                    (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
   5637                       "$src2, $src1", "$src1, $src2",
   5638                    (OpNode (bitconvert (_.i64VT (and _.RC:$src1, _.RC:$src2))),
   5639                            _.ImmAllZerosV)>,
   5640                    EVEX_4V, Sched<[sched]>;
   5641   defm rm : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
   5642                    (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
   5643                        "$src2, $src1", "$src1, $src2",
   5644                    (OpNode (bitconvert
   5645                             (_.i64VT (and _.RC:$src1,
   5646                                           (bitconvert (_.LdFrag addr:$src2))))),
   5647                            _.ImmAllZerosV)>,
   5648                    EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
   5649                    Sched<[sched.Folded, ReadAfterLd]>;
   5650   }
   5651 
   5652   // Patterns for compare with 0 that just use the same source twice.
   5653   def : Pat<(_.KVT (OpNode _.RC:$src, _.ImmAllZerosV)),
   5654             (_.KVT (!cast<Instruction>(Name # _.ZSuffix # "rr")
   5655                                       _.RC:$src, _.RC:$src))>;
   5656 
   5657   def : Pat<(_.KVT (and _.KRC:$mask, (OpNode _.RC:$src, _.ImmAllZerosV))),
   5658             (_.KVT (!cast<Instruction>(Name # _.ZSuffix # "rrk")
   5659                                       _.KRC:$mask, _.RC:$src, _.RC:$src))>;
   5660 }
   5661 
   5662 multiclass avx512_vptest_mb<bits<8> opc, string OpcodeStr, PatFrag OpNode,
   5663                             X86FoldableSchedWrite sched, X86VectorVTInfo _> {
   5664   let ExeDomain = _.ExeDomain in
   5665   defm rmb : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
   5666                     (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
   5667                     "${src2}"##_.BroadcastStr##", $src1",
   5668                     "$src1, ${src2}"##_.BroadcastStr,
   5669                     (OpNode (and _.RC:$src1,
   5670                                        (X86VBroadcast
   5671                                         (_.ScalarLdFrag addr:$src2))),
   5672                             _.ImmAllZerosV)>,
   5673                     EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
   5674                     Sched<[sched.Folded, ReadAfterLd]>;
   5675 }
   5676 
   5677 // Use 512bit version to implement 128/256 bit in case NoVLX.
   5678 multiclass avx512_vptest_lowering<PatFrag OpNode, X86VectorVTInfo ExtendInfo,
   5679                                   X86VectorVTInfo _, string Name> {
   5680   def : Pat<(_.KVT (OpNode (bitconvert (_.i64VT (and _.RC:$src1, _.RC:$src2))),
   5681                            _.ImmAllZerosV)),
   5682             (_.KVT (COPY_TO_REGCLASS
   5683                      (!cast<Instruction>(Name # "Zrr")
   5684                        (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
   5685                                       _.RC:$src1, _.SubRegIdx),
   5686                        (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
   5687                                       _.RC:$src2, _.SubRegIdx)),
   5688                    _.KRC))>;
   5689 
   5690   def : Pat<(_.KVT (and _.KRC:$mask,
   5691                         (OpNode (bitconvert (_.i64VT (and _.RC:$src1, _.RC:$src2))),
   5692                                 _.ImmAllZerosV))),
   5693             (COPY_TO_REGCLASS
   5694              (!cast<Instruction>(Name # "Zrrk")
   5695               (COPY_TO_REGCLASS _.KRC:$mask, ExtendInfo.KRC),
   5696               (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
   5697                              _.RC:$src1, _.SubRegIdx),
   5698               (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
   5699                              _.RC:$src2, _.SubRegIdx)),
   5700              _.KRC)>;
   5701 
   5702   def : Pat<(_.KVT (OpNode _.RC:$src, _.ImmAllZerosV)),
   5703             (_.KVT (COPY_TO_REGCLASS
   5704                      (!cast<Instruction>(Name # "Zrr")
   5705                        (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
   5706                                       _.RC:$src, _.SubRegIdx),
   5707                        (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
   5708                                       _.RC:$src, _.SubRegIdx)),
   5709                    _.KRC))>;
   5710 
   5711   def : Pat<(_.KVT (and _.KRC:$mask, (OpNode _.RC:$src, _.ImmAllZerosV))),
   5712             (COPY_TO_REGCLASS
   5713              (!cast<Instruction>(Name # "Zrrk")
   5714               (COPY_TO_REGCLASS _.KRC:$mask, ExtendInfo.KRC),
   5715               (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
   5716                              _.RC:$src, _.SubRegIdx),
   5717               (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
   5718                              _.RC:$src, _.SubRegIdx)),
   5719              _.KRC)>;
   5720 }
   5721 
   5722 multiclass avx512_vptest_dq_sizes<bits<8> opc, string OpcodeStr, PatFrag OpNode,
   5723                                   X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
   5724   let Predicates  = [HasAVX512] in
   5725   defm Z : avx512_vptest<opc, OpcodeStr, OpNode, sched.ZMM, _.info512, NAME>,
   5726            avx512_vptest_mb<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, EVEX_V512;
   5727 
   5728   let Predicates = [HasAVX512, HasVLX] in {
   5729   defm Z256 : avx512_vptest<opc, OpcodeStr, OpNode, sched.YMM, _.info256, NAME>,
   5730               avx512_vptest_mb<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, EVEX_V256;
   5731   defm Z128 : avx512_vptest<opc, OpcodeStr, OpNode, sched.XMM, _.info128, NAME>,
   5732               avx512_vptest_mb<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, EVEX_V128;
   5733   }
   5734   let Predicates = [HasAVX512, NoVLX] in {
   5735   defm Z256_Alt : avx512_vptest_lowering< OpNode, _.info512, _.info256, NAME>;
   5736   defm Z128_Alt : avx512_vptest_lowering< OpNode, _.info512, _.info128, NAME>;
   5737   }
   5738 }
   5739 
   5740 multiclass avx512_vptest_dq<bits<8> opc, string OpcodeStr, PatFrag OpNode,
   5741                             X86SchedWriteWidths sched> {
   5742   defm D : avx512_vptest_dq_sizes<opc, OpcodeStr#"d", OpNode, sched,
   5743                                  avx512vl_i32_info>;
   5744   defm Q : avx512_vptest_dq_sizes<opc, OpcodeStr#"q", OpNode, sched,
   5745                                  avx512vl_i64_info>, VEX_W;
   5746 }
   5747 
   5748 multiclass avx512_vptest_wb<bits<8> opc, string OpcodeStr,
   5749                             PatFrag OpNode, X86SchedWriteWidths sched> {
   5750   let Predicates = [HasBWI] in {
   5751   defm WZ:    avx512_vptest<opc, OpcodeStr#"w", OpNode, sched.ZMM,
   5752                             v32i16_info, NAME#"W">, EVEX_V512, VEX_W;
   5753   defm BZ:    avx512_vptest<opc, OpcodeStr#"b", OpNode, sched.ZMM,
   5754                             v64i8_info, NAME#"B">, EVEX_V512;
   5755   }
   5756   let Predicates = [HasVLX, HasBWI] in {
   5757 
   5758   defm WZ256: avx512_vptest<opc, OpcodeStr#"w", OpNode, sched.YMM,
   5759                             v16i16x_info, NAME#"W">, EVEX_V256, VEX_W;
   5760   defm WZ128: avx512_vptest<opc, OpcodeStr#"w", OpNode, sched.XMM,
   5761                             v8i16x_info, NAME#"W">, EVEX_V128, VEX_W;
   5762   defm BZ256: avx512_vptest<opc, OpcodeStr#"b", OpNode, sched.YMM,
   5763                             v32i8x_info, NAME#"B">, EVEX_V256;
   5764   defm BZ128: avx512_vptest<opc, OpcodeStr#"b", OpNode, sched.XMM,
   5765                             v16i8x_info, NAME#"B">, EVEX_V128;
   5766   }
   5767 
   5768   let Predicates = [HasAVX512, NoVLX] in {
   5769   defm BZ256_Alt : avx512_vptest_lowering<OpNode, v64i8_info, v32i8x_info, NAME#"B">;
   5770   defm BZ128_Alt : avx512_vptest_lowering<OpNode, v64i8_info, v16i8x_info, NAME#"B">;
   5771   defm WZ256_Alt : avx512_vptest_lowering<OpNode, v32i16_info, v16i16x_info, NAME#"W">;
   5772   defm WZ128_Alt : avx512_vptest_lowering<OpNode, v32i16_info, v8i16x_info, NAME#"W">;
   5773   }
   5774 }
   5775 
   5776 // These patterns are used to match vptestm/vptestnm. We don't treat pcmpeqm
   5777 // as commutable here because we already canonicalized all zeros vectors to the
   5778 // RHS during lowering.
   5779 def X86pcmpeqm : PatFrag<(ops node:$src1, node:$src2),
   5780                          (setcc node:$src1, node:$src2, SETEQ)>;
   5781 def X86pcmpnem : PatFrag<(ops node:$src1, node:$src2),
   5782                          (setcc node:$src1, node:$src2, SETNE)>;
   5783 
   5784 multiclass avx512_vptest_all_forms<bits<8> opc_wb, bits<8> opc_dq, string OpcodeStr,
   5785                                    PatFrag OpNode, X86SchedWriteWidths sched> :
   5786   avx512_vptest_wb<opc_wb, OpcodeStr, OpNode, sched>,
   5787   avx512_vptest_dq<opc_dq, OpcodeStr, OpNode, sched>;
   5788 
   5789 defm VPTESTM   : avx512_vptest_all_forms<0x26, 0x27, "vptestm", X86pcmpnem,
   5790                                          SchedWriteVecLogic>, T8PD;
   5791 defm VPTESTNM  : avx512_vptest_all_forms<0x26, 0x27, "vptestnm", X86pcmpeqm,
   5792                                          SchedWriteVecLogic>, T8XS;
   5793 
   5794 //===----------------------------------------------------------------------===//
   5795 // AVX-512  Shift instructions
   5796 //===----------------------------------------------------------------------===//
   5797 
   5798 multiclass avx512_shift_rmi<bits<8> opc, Format ImmFormR, Format ImmFormM,
   5799                             string OpcodeStr, SDNode OpNode,
   5800                             X86FoldableSchedWrite sched, X86VectorVTInfo _> {
   5801   let ExeDomain = _.ExeDomain in {
   5802   defm ri : AVX512_maskable<opc, ImmFormR, _, (outs _.RC:$dst),
   5803                    (ins _.RC:$src1, u8imm:$src2), OpcodeStr,
   5804                       "$src2, $src1", "$src1, $src2",
   5805                    (_.VT (OpNode _.RC:$src1, (i8 imm:$src2)))>,
   5806                    Sched<[sched]>;
   5807   defm mi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
   5808                    (ins _.MemOp:$src1, u8imm:$src2), OpcodeStr,
   5809                        "$src2, $src1", "$src1, $src2",
   5810                    (_.VT (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
   5811                           (i8 imm:$src2)))>,
   5812                    Sched<[sched.Folded]>;
   5813   }
   5814 }
   5815 
   5816 multiclass avx512_shift_rmbi<bits<8> opc, Format ImmFormM,
   5817                              string OpcodeStr, SDNode OpNode,
   5818                              X86FoldableSchedWrite sched, X86VectorVTInfo _> {
   5819   let ExeDomain = _.ExeDomain in
   5820   defm mbi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
   5821                    (ins _.ScalarMemOp:$src1, u8imm:$src2), OpcodeStr,
   5822       "$src2, ${src1}"##_.BroadcastStr, "${src1}"##_.BroadcastStr##", $src2",
   5823      (_.VT (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src1)), (i8 imm:$src2)))>,
   5824      EVEX_B, Sched<[sched.Folded]>;
   5825 }
   5826 
   5827 multiclass avx512_shift_rrm<bits<8> opc, string OpcodeStr, SDNode OpNode,
   5828                             X86FoldableSchedWrite sched, ValueType SrcVT,
   5829                             PatFrag bc_frag, X86VectorVTInfo _> {
   5830    // src2 is always 128-bit
   5831   let ExeDomain = _.ExeDomain in {
   5832   defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
   5833                    (ins _.RC:$src1, VR128X:$src2), OpcodeStr,
   5834                       "$src2, $src1", "$src1, $src2",
   5835                    (_.VT (OpNode _.RC:$src1, (SrcVT VR128X:$src2)))>,
   5836                    AVX512BIBase, EVEX_4V, Sched<[sched]>;
   5837   defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
   5838                    (ins _.RC:$src1, i128mem:$src2), OpcodeStr,
   5839                        "$src2, $src1", "$src1, $src2",
   5840                    (_.VT (OpNode _.RC:$src1, (bc_frag (loadv2i64 addr:$src2))))>,
   5841                    AVX512BIBase,
   5842                    EVEX_4V, Sched<[sched.Folded, ReadAfterLd]>;
   5843   }
   5844 }
   5845 
   5846 multiclass avx512_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
   5847                               X86SchedWriteWidths sched, ValueType SrcVT,
   5848                               PatFrag bc_frag, AVX512VLVectorVTInfo VTInfo,
   5849                               Predicate prd> {
   5850   let Predicates = [prd] in
   5851   defm Z    : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.ZMM, SrcVT,
   5852                                bc_frag, VTInfo.info512>, EVEX_V512,
   5853                                EVEX_CD8<VTInfo.info512.EltSize, CD8VQ> ;
   5854   let Predicates = [prd, HasVLX] in {
   5855   defm Z256 : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.YMM, SrcVT,
   5856                                bc_frag, VTInfo.info256>, EVEX_V256,
   5857                                EVEX_CD8<VTInfo.info256.EltSize, CD8VH>;
   5858   defm Z128 : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.XMM, SrcVT,
   5859                                bc_frag, VTInfo.info128>, EVEX_V128,
   5860                                EVEX_CD8<VTInfo.info128.EltSize, CD8VF>;
   5861   }
   5862 }
   5863 
   5864 multiclass avx512_shift_types<bits<8> opcd, bits<8> opcq, bits<8> opcw,
   5865                               string OpcodeStr, SDNode OpNode,
   5866                               X86SchedWriteWidths sched,
   5867                               bit NotEVEX2VEXConvertibleQ = 0> {
   5868   defm D : avx512_shift_sizes<opcd, OpcodeStr#"d", OpNode, sched, v4i32,
   5869                               bc_v4i32, avx512vl_i32_info, HasAVX512>;
   5870   let notEVEX2VEXConvertible = NotEVEX2VEXConvertibleQ in
   5871   defm Q : avx512_shift_sizes<opcq, OpcodeStr#"q", OpNode, sched, v2i64,
   5872                               bc_v2i64, avx512vl_i64_info, HasAVX512>, VEX_W;
   5873   defm W : avx512_shift_sizes<opcw, OpcodeStr#"w", OpNode, sched, v8i16,
   5874                               bc_v2i64, avx512vl_i16_info, HasBWI>;
   5875 }
   5876 
   5877 multiclass avx512_shift_rmi_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM,
   5878                                   string OpcodeStr, SDNode OpNode,
   5879                                   X86SchedWriteWidths sched,
   5880                                   AVX512VLVectorVTInfo VTInfo> {
   5881   let Predicates = [HasAVX512] in
   5882   defm Z:    avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
   5883                               sched.ZMM, VTInfo.info512>,
   5884              avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.ZMM,
   5885                                VTInfo.info512>, EVEX_V512;
   5886   let Predicates = [HasAVX512, HasVLX] in {
   5887   defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
   5888                               sched.YMM, VTInfo.info256>,
   5889              avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.YMM,
   5890                                VTInfo.info256>, EVEX_V256;
   5891   defm Z128: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
   5892                               sched.XMM, VTInfo.info128>,
   5893              avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.XMM,
   5894                                VTInfo.info128>, EVEX_V128;
   5895   }
   5896 }
   5897 
   5898 multiclass avx512_shift_rmi_w<bits<8> opcw, Format ImmFormR, Format ImmFormM,
   5899                               string OpcodeStr, SDNode OpNode,
   5900                               X86SchedWriteWidths sched> {
   5901   let Predicates = [HasBWI] in
   5902   defm WZ:    avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
   5903                                sched.ZMM, v32i16_info>, EVEX_V512, VEX_WIG;
   5904   let Predicates = [HasVLX, HasBWI] in {
   5905   defm WZ256: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
   5906                                sched.YMM, v16i16x_info>, EVEX_V256, VEX_WIG;
   5907   defm WZ128: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
   5908                                sched.XMM, v8i16x_info>, EVEX_V128, VEX_WIG;
   5909   }
   5910 }
   5911 
   5912 multiclass avx512_shift_rmi_dq<bits<8> opcd, bits<8> opcq,
   5913                                Format ImmFormR, Format ImmFormM,
   5914                                string OpcodeStr, SDNode OpNode,
   5915                                X86SchedWriteWidths sched,
   5916                                bit NotEVEX2VEXConvertibleQ = 0> {
   5917   defm D: avx512_shift_rmi_sizes<opcd, ImmFormR, ImmFormM, OpcodeStr#"d", OpNode,
   5918                                  sched, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
   5919   let notEVEX2VEXConvertible = NotEVEX2VEXConvertibleQ in
   5920   defm Q: avx512_shift_rmi_sizes<opcq, ImmFormR, ImmFormM, OpcodeStr#"q", OpNode,
   5921                                  sched, avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, VEX_W;
   5922 }
   5923 
   5924 defm VPSRL : avx512_shift_rmi_dq<0x72, 0x73, MRM2r, MRM2m, "vpsrl", X86vsrli,
   5925                                  SchedWriteVecShiftImm>,
   5926              avx512_shift_rmi_w<0x71, MRM2r, MRM2m, "vpsrlw", X86vsrli,
   5927                                 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
   5928 
   5929 defm VPSLL : avx512_shift_rmi_dq<0x72, 0x73, MRM6r, MRM6m, "vpsll", X86vshli,
   5930                                  SchedWriteVecShiftImm>,
   5931              avx512_shift_rmi_w<0x71, MRM6r, MRM6m, "vpsllw", X86vshli,
   5932                                 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
   5933 
   5934 defm VPSRA : avx512_shift_rmi_dq<0x72, 0x72, MRM4r, MRM4m, "vpsra", X86vsrai,
   5935                                  SchedWriteVecShiftImm, 1>,
   5936              avx512_shift_rmi_w<0x71, MRM4r, MRM4m, "vpsraw", X86vsrai,
   5937                                 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
   5938 
   5939 defm VPROR : avx512_shift_rmi_dq<0x72, 0x72, MRM0r, MRM0m, "vpror", X86vrotri,
   5940                                  SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
   5941 defm VPROL : avx512_shift_rmi_dq<0x72, 0x72, MRM1r, MRM1m, "vprol", X86vrotli,
   5942                                  SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
   5943 
   5944 defm VPSLL : avx512_shift_types<0xF2, 0xF3, 0xF1, "vpsll", X86vshl,
   5945                                 SchedWriteVecShift>;
   5946 defm VPSRA : avx512_shift_types<0xE2, 0xE2, 0xE1, "vpsra", X86vsra,
   5947                                 SchedWriteVecShift, 1>;
   5948 defm VPSRL : avx512_shift_types<0xD2, 0xD3, 0xD1, "vpsrl", X86vsrl,
   5949                                 SchedWriteVecShift>;
   5950 
   5951 // Use 512bit VPSRA/VPSRAI version to implement v2i64/v4i64 in case NoVLX.
   5952 let Predicates = [HasAVX512, NoVLX] in {
   5953   def : Pat<(v4i64 (X86vsra (v4i64 VR256X:$src1), (v2i64 VR128X:$src2))),
   5954             (EXTRACT_SUBREG (v8i64
   5955               (VPSRAQZrr
   5956                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
   5957                  VR128X:$src2)), sub_ymm)>;
   5958 
   5959   def : Pat<(v2i64 (X86vsra (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
   5960             (EXTRACT_SUBREG (v8i64
   5961               (VPSRAQZrr
   5962                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
   5963                  VR128X:$src2)), sub_xmm)>;
   5964 
   5965   def : Pat<(v4i64 (X86vsrai (v4i64 VR256X:$src1), (i8 imm:$src2))),
   5966             (EXTRACT_SUBREG (v8i64
   5967               (VPSRAQZri
   5968                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
   5969                  imm:$src2)), sub_ymm)>;
   5970 
   5971   def : Pat<(v2i64 (X86vsrai (v2i64 VR128X:$src1), (i8 imm:$src2))),
   5972             (EXTRACT_SUBREG (v8i64
   5973               (VPSRAQZri
   5974                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
   5975                  imm:$src2)), sub_xmm)>;
   5976 }
   5977 
   5978 //===-------------------------------------------------------------------===//
   5979 // Variable Bit Shifts
   5980 //===-------------------------------------------------------------------===//
   5981 
   5982 multiclass avx512_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode,
   5983                             X86FoldableSchedWrite sched, X86VectorVTInfo _> {
   5984   let ExeDomain = _.ExeDomain in {
   5985   defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
   5986                    (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
   5987                       "$src2, $src1", "$src1, $src2",
   5988                    (_.VT (OpNode _.RC:$src1, (_.VT _.RC:$src2)))>,
   5989                    AVX5128IBase, EVEX_4V, Sched<[sched]>;
   5990   defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
   5991                    (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
   5992                        "$src2, $src1", "$src1, $src2",
   5993                    (_.VT (OpNode _.RC:$src1,
   5994                    (_.VT (bitconvert (_.LdFrag addr:$src2)))))>,
   5995                    AVX5128IBase, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
   5996                    Sched<[sched.Folded, ReadAfterLd]>;
   5997   }
   5998 }
   5999 
   6000 multiclass avx512_var_shift_mb<bits<8> opc, string OpcodeStr, SDNode OpNode,
   6001                                X86FoldableSchedWrite sched, X86VectorVTInfo _> {
   6002   let ExeDomain = _.ExeDomain in
   6003   defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
   6004                     (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
   6005                     "${src2}"##_.BroadcastStr##", $src1",
   6006                     "$src1, ${src2}"##_.BroadcastStr,
   6007                     (_.VT (OpNode _.RC:$src1, (_.VT (X86VBroadcast
   6008                                                 (_.ScalarLdFrag addr:$src2)))))>,
   6009                     AVX5128IBase, EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
   6010                     Sched<[sched.Folded, ReadAfterLd]>;
   6011 }
   6012 
   6013 multiclass avx512_var_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
   6014                                   X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
   6015   let Predicates  = [HasAVX512] in
   6016   defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
   6017            avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, EVEX_V512;
   6018 
   6019   let Predicates = [HasAVX512, HasVLX] in {
   6020   defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
   6021               avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, EVEX_V256;
   6022   defm Z128 : avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
   6023               avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, EVEX_V128;
   6024   }
   6025 }
   6026 
   6027 multiclass avx512_var_shift_types<bits<8> opc, string OpcodeStr,
   6028                                   SDNode OpNode, X86SchedWriteWidths sched> {
   6029   defm D : avx512_var_shift_sizes<opc, OpcodeStr#"d", OpNode, sched,
   6030                                  avx512vl_i32_info>;
   6031   defm Q : avx512_var_shift_sizes<opc, OpcodeStr#"q", OpNode, sched,
   6032                                  avx512vl_i64_info>, VEX_W;
   6033 }
   6034 
   6035 // Use 512bit version to implement 128/256 bit in case NoVLX.
   6036 multiclass avx512_var_shift_lowering<AVX512VLVectorVTInfo _, string OpcodeStr,
   6037                                      SDNode OpNode, list<Predicate> p> {
   6038   let Predicates = p in {
   6039   def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1),
   6040                                   (_.info256.VT _.info256.RC:$src2))),
   6041             (EXTRACT_SUBREG
   6042                 (!cast<Instruction>(OpcodeStr#"Zrr")
   6043                     (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
   6044                     (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
   6045              sub_ymm)>;
   6046 
   6047   def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1),
   6048                                   (_.info128.VT _.info128.RC:$src2))),
   6049             (EXTRACT_SUBREG
   6050                 (!cast<Instruction>(OpcodeStr#"Zrr")
   6051                     (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
   6052                     (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
   6053              sub_xmm)>;
   6054   }
   6055 }
   6056 multiclass avx512_var_shift_w<bits<8> opc, string OpcodeStr,
   6057                               SDNode OpNode, X86SchedWriteWidths sched> {
   6058   let Predicates = [HasBWI] in
   6059   defm WZ:    avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, v32i16_info>,
   6060               EVEX_V512, VEX_W;
   6061   let Predicates = [HasVLX, HasBWI] in {
   6062 
   6063   defm WZ256: avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, v16i16x_info>,
   6064               EVEX_V256, VEX_W;
   6065   defm WZ128: avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, v8i16x_info>,
   6066               EVEX_V128, VEX_W;
   6067   }
   6068 }
   6069 
   6070 defm VPSLLV : avx512_var_shift_types<0x47, "vpsllv", shl, SchedWriteVarVecShift>,
   6071               avx512_var_shift_w<0x12, "vpsllvw", shl, SchedWriteVarVecShift>;
   6072 
   6073 defm VPSRAV : avx512_var_shift_types<0x46, "vpsrav", sra, SchedWriteVarVecShift>,
   6074               avx512_var_shift_w<0x11, "vpsravw", sra, SchedWriteVarVecShift>;
   6075 
   6076 defm VPSRLV : avx512_var_shift_types<0x45, "vpsrlv", srl, SchedWriteVarVecShift>,
   6077               avx512_var_shift_w<0x10, "vpsrlvw", srl, SchedWriteVarVecShift>;
   6078 
   6079 defm VPRORV : avx512_var_shift_types<0x14, "vprorv", rotr, SchedWriteVarVecShift>;
   6080 defm VPROLV : avx512_var_shift_types<0x15, "vprolv", rotl, SchedWriteVarVecShift>;
   6081 
   6082 defm : avx512_var_shift_lowering<avx512vl_i64_info, "VPSRAVQ", sra, [HasAVX512, NoVLX]>;
   6083 defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSLLVW", shl, [HasBWI, NoVLX]>;
   6084 defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRAVW", sra, [HasBWI, NoVLX]>;
   6085 defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRLVW", srl, [HasBWI, NoVLX]>;
   6086 
   6087 // Special handing for handling VPSRAV intrinsics.
   6088 multiclass avx512_var_shift_int_lowering<string InstrStr, X86VectorVTInfo _,
   6089                                          list<Predicate> p> {
   6090   let Predicates = p in {
   6091     def : Pat<(_.VT (X86vsrav _.RC:$src1, _.RC:$src2)),
   6092               (!cast<Instruction>(InstrStr#_.ZSuffix#rr) _.RC:$src1,
   6093                _.RC:$src2)>;
   6094     def : Pat<(_.VT (X86vsrav _.RC:$src1, (bitconvert (_.LdFrag addr:$src2)))),
   6095               (!cast<Instruction>(InstrStr#_.ZSuffix##rm)
   6096                _.RC:$src1, addr:$src2)>;
   6097     def : Pat<(_.VT (vselect _.KRCWM:$mask,
   6098                      (X86vsrav _.RC:$src1, _.RC:$src2), _.RC:$src0)),
   6099               (!cast<Instruction>(InstrStr#_.ZSuffix#rrk) _.RC:$src0,
   6100                _.KRC:$mask, _.RC:$src1, _.RC:$src2)>;
   6101     def : Pat<(_.VT (vselect _.KRCWM:$mask,
   6102                      (X86vsrav _.RC:$src1, (bitconvert (_.LdFrag addr:$src2))),
   6103                      _.RC:$src0)),
   6104               (!cast<Instruction>(InstrStr#_.ZSuffix##rmk) _.RC:$src0,
   6105                _.KRC:$mask, _.RC:$src1, addr:$src2)>;
   6106     def : Pat<(_.VT (vselect _.KRCWM:$mask,
   6107                      (X86vsrav _.RC:$src1, _.RC:$src2), _.ImmAllZerosV)),
   6108               (!cast<Instruction>(InstrStr#_.ZSuffix#rrkz) _.KRC:$mask,
   6109                _.RC:$src1, _.RC:$src2)>;
   6110     def : Pat<(_.VT (vselect _.KRCWM:$mask,
   6111                      (X86vsrav _.RC:$src1, (bitconvert (_.LdFrag addr:$src2))),
   6112                      _.ImmAllZerosV)),
   6113               (!cast<Instruction>(InstrStr#_.ZSuffix##rmkz) _.KRC:$mask,
   6114                _.RC:$src1, addr:$src2)>;
   6115   }
   6116 }
   6117 
   6118 multiclass avx512_var_shift_int_lowering_mb<string InstrStr, X86VectorVTInfo _,
   6119                                          list<Predicate> p> :
   6120            avx512_var_shift_int_lowering<InstrStr, _, p> {
   6121   let Predicates = p in {
   6122     def : Pat<(_.VT (X86vsrav _.RC:$src1,
   6123                      (X86VBroadcast (_.ScalarLdFrag addr:$src2)))),
   6124               (!cast<Instruction>(InstrStr#_.ZSuffix##rmb)
   6125                _.RC:$src1, addr:$src2)>;
   6126     def : Pat<(_.VT (vselect _.KRCWM:$mask,
   6127                      (X86vsrav _.RC:$src1,
   6128                       (X86VBroadcast (_.ScalarLdFrag addr:$src2))),
   6129                      _.RC:$src0)),
   6130               (!cast<Instruction>(InstrStr#_.ZSuffix##rmbk) _.RC:$src0,
   6131                _.KRC:$mask, _.RC:$src1, addr:$src2)>;
   6132     def : Pat<(_.VT (vselect _.KRCWM:$mask,
   6133                      (X86vsrav _.RC:$src1,
   6134                       (X86VBroadcast (_.ScalarLdFrag addr:$src2))),
   6135                      _.ImmAllZerosV)),
   6136               (!cast<Instruction>(InstrStr#_.ZSuffix##rmbkz) _.KRC:$mask,
   6137                _.RC:$src1, addr:$src2)>;
   6138   }
   6139 }
   6140 
   6141 defm : avx512_var_shift_int_lowering<"VPSRAVW", v8i16x_info, [HasVLX, HasBWI]>;
   6142 defm : avx512_var_shift_int_lowering<"VPSRAVW", v16i16x_info, [HasVLX, HasBWI]>;
   6143 defm : avx512_var_shift_int_lowering<"VPSRAVW", v32i16_info, [HasBWI]>;
   6144 defm : avx512_var_shift_int_lowering_mb<"VPSRAVD", v4i32x_info, [HasVLX]>;
   6145 defm : avx512_var_shift_int_lowering_mb<"VPSRAVD", v8i32x_info, [HasVLX]>;
   6146 defm : avx512_var_shift_int_lowering_mb<"VPSRAVD", v16i32_info, [HasAVX512]>;
   6147 defm : avx512_var_shift_int_lowering_mb<"VPSRAVQ", v2i64x_info, [HasVLX]>;
   6148 defm : avx512_var_shift_int_lowering_mb<"VPSRAVQ", v4i64x_info, [HasVLX]>;
   6149 defm : avx512_var_shift_int_lowering_mb<"VPSRAVQ", v8i64_info, [HasAVX512]>;
   6150 
   6151 // Use 512bit VPROL/VPROLI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX.
   6152 let Predicates = [HasAVX512, NoVLX] in {
   6153   def : Pat<(v2i64 (rotl (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
   6154             (EXTRACT_SUBREG (v8i64
   6155               (VPROLVQZrr
   6156                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
   6157                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
   6158                        sub_xmm)>;
   6159   def : Pat<(v4i64 (rotl (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
   6160             (EXTRACT_SUBREG (v8i64
   6161               (VPROLVQZrr
   6162                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
   6163                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
   6164                        sub_ymm)>;
   6165 
   6166   def : Pat<(v4i32 (rotl (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))),
   6167             (EXTRACT_SUBREG (v16i32
   6168               (VPROLVDZrr
   6169                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
   6170                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
   6171                         sub_xmm)>;
   6172   def : Pat<(v8i32 (rotl (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
   6173             (EXTRACT_SUBREG (v16i32
   6174               (VPROLVDZrr
   6175                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
   6176                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
   6177                         sub_ymm)>;
   6178 
   6179   def : Pat<(v2i64 (X86vrotli (v2i64 VR128X:$src1), (i8 imm:$src2))),
   6180             (EXTRACT_SUBREG (v8i64
   6181               (VPROLQZri
   6182                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
   6183                         imm:$src2)), sub_xmm)>;
   6184   def : Pat<(v4i64 (X86vrotli (v4i64 VR256X:$src1), (i8 imm:$src2))),
   6185             (EXTRACT_SUBREG (v8i64
   6186               (VPROLQZri
   6187                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
   6188                        imm:$src2)), sub_ymm)>;
   6189 
   6190   def : Pat<(v4i32 (X86vrotli (v4i32 VR128X:$src1), (i8 imm:$src2))),
   6191             (EXTRACT_SUBREG (v16i32
   6192               (VPROLDZri
   6193                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
   6194                         imm:$src2)), sub_xmm)>;
   6195   def : Pat<(v8i32 (X86vrotli (v8i32 VR256X:$src1), (i8 imm:$src2))),
   6196             (EXTRACT_SUBREG (v16i32
   6197               (VPROLDZri
   6198                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
   6199                         imm:$src2)), sub_ymm)>;
   6200 }
   6201 
   6202 // Use 512bit VPROR/VPRORI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX.
   6203 let Predicates = [HasAVX512, NoVLX] in {
   6204   def : Pat<(v2i64 (rotr (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
   6205             (EXTRACT_SUBREG (v8i64
   6206               (VPRORVQZrr
   6207                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
   6208                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
   6209                        sub_xmm)>;
   6210   def : Pat<(v4i64 (rotr (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
   6211             (EXTRACT_SUBREG (v8i64
   6212               (VPRORVQZrr
   6213                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
   6214                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
   6215                        sub_ymm)>;
   6216 
   6217   def : Pat<(v4i32 (rotr (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))),
   6218             (EXTRACT_SUBREG (v16i32
   6219               (VPRORVDZrr
   6220                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
   6221                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
   6222                         sub_xmm)>;
   6223   def : Pat<(v8i32 (rotr (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
   6224             (EXTRACT_SUBREG (v16i32
   6225               (VPRORVDZrr
   6226                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
   6227                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
   6228                         sub_ymm)>;
   6229 
   6230   def : Pat<(v2i64 (X86vrotri (v2i64 VR128X:$src1), (i8 imm:$src2))),
   6231             (EXTRACT_SUBREG (v8i64
   6232               (VPRORQZri
   6233                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
   6234                         imm:$src2)), sub_xmm)>;
   6235   def : Pat<(v4i64 (X86vrotri (v4i64 VR256X:$src1), (i8 imm:$src2))),
   6236             (EXTRACT_SUBREG (v8i64
   6237               (VPRORQZri
   6238                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
   6239                        imm:$src2)), sub_ymm)>;
   6240 
   6241   def : Pat<(v4i32 (X86vrotri (v4i32 VR128X:$src1), (i8 imm:$src2))),
   6242             (EXTRACT_SUBREG (v16i32
   6243               (VPRORDZri
   6244                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
   6245                         imm:$src2)), sub_xmm)>;
   6246   def : Pat<(v8i32 (X86vrotri (v8i32 VR256X:$src1), (i8 imm:$src2))),
   6247             (EXTRACT_SUBREG (v16i32
   6248               (VPRORDZri
   6249                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
   6250                         imm:$src2)), sub_ymm)>;
   6251 }
   6252 
   6253 //===-------------------------------------------------------------------===//
   6254 // 1-src variable permutation VPERMW/D/Q
   6255 //===-------------------------------------------------------------------===//
   6256 
   6257 multiclass avx512_vperm_dq_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
   6258                                  X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _> {
   6259   let Predicates  = [HasAVX512] in
   6260   defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info512>,
   6261            avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched, _.info512>, EVEX_V512;
   6262 
   6263   let Predicates = [HasAVX512, HasVLX] in
   6264   defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info256>,
   6265               avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched, _.info256>, EVEX_V256;
   6266 }
   6267 
   6268 multiclass avx512_vpermi_dq_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM,
   6269                                  string OpcodeStr, SDNode OpNode,
   6270                                  X86FoldableSchedWrite sched, AVX512VLVectorVTInfo VTInfo> {
   6271   let Predicates = [HasAVX512] in
   6272   defm Z:    avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
   6273                               sched, VTInfo.info512>,
   6274              avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
   6275                                sched, VTInfo.info512>, EVEX_V512;
   6276   let Predicates = [HasAVX512, HasVLX] in
   6277   defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
   6278                               sched, VTInfo.info256>,
   6279              avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
   6280                                sched, VTInfo.info256>, EVEX_V256;
   6281 }
   6282 
   6283 multiclass avx512_vperm_bw<bits<8> opc, string OpcodeStr,
   6284                               Predicate prd, SDNode OpNode,
   6285                               X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _> {
   6286   let Predicates = [prd] in
   6287   defm Z:    avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info512>,
   6288               EVEX_V512 ;
   6289   let Predicates = [HasVLX, prd] in {
   6290   defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info256>,
   6291               EVEX_V256 ;
   6292   defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info128>,
   6293               EVEX_V128 ;
   6294   }
   6295 }
   6296 
   6297 defm VPERMW  : avx512_vperm_bw<0x8D, "vpermw", HasBWI, X86VPermv,
   6298                                WriteVarShuffle256, avx512vl_i16_info>, VEX_W;
   6299 defm VPERMB  : avx512_vperm_bw<0x8D, "vpermb", HasVBMI, X86VPermv,
   6300                                WriteVarShuffle256, avx512vl_i8_info>;
   6301 
   6302 defm VPERMD : avx512_vperm_dq_sizes<0x36, "vpermd", X86VPermv,
   6303                                     WriteVarShuffle256, avx512vl_i32_info>;
   6304 defm VPERMQ : avx512_vperm_dq_sizes<0x36, "vpermq", X86VPermv,
   6305                                     WriteVarShuffle256, avx512vl_i64_info>, VEX_W;
   6306 defm VPERMPS : avx512_vperm_dq_sizes<0x16, "vpermps", X86VPermv,
   6307                                      WriteFVarShuffle256, avx512vl_f32_info>;
   6308 defm VPERMPD : avx512_vperm_dq_sizes<0x16, "vpermpd", X86VPermv,
   6309                                      WriteFVarShuffle256, avx512vl_f64_info>, VEX_W;
   6310 
   6311 defm VPERMQ : avx512_vpermi_dq_sizes<0x00, MRMSrcReg, MRMSrcMem, "vpermq",
   6312                              X86VPermi, WriteShuffle256, avx512vl_i64_info>,
   6313                              EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W;
   6314 defm VPERMPD : avx512_vpermi_dq_sizes<0x01, MRMSrcReg, MRMSrcMem, "vpermpd",
   6315                              X86VPermi, WriteFShuffle256, avx512vl_f64_info>,
   6316                              EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W;
   6317 
   6318 //===----------------------------------------------------------------------===//
   6319 // AVX-512 - VPERMIL
   6320 //===----------------------------------------------------------------------===//
   6321 
   6322 multiclass avx512_permil_vec<bits<8> OpcVar, string OpcodeStr, SDNode OpNode,
   6323                              X86FoldableSchedWrite sched, X86VectorVTInfo _,
   6324                              X86VectorVTInfo Ctrl> {
   6325   defm rr: AVX512_maskable<OpcVar, MRMSrcReg, _, (outs _.RC:$dst),
   6326                   (ins _.RC:$src1, Ctrl.RC:$src2), OpcodeStr,
   6327                   "$src2, $src1", "$src1, $src2",
   6328                   (_.VT (OpNode _.RC:$src1,
   6329                                (Ctrl.VT Ctrl.RC:$src2)))>,
   6330                   T8PD, EVEX_4V, Sched<[sched]>;
   6331   defm rm: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst),
   6332                   (ins _.RC:$src1, Ctrl.MemOp:$src2), OpcodeStr,
   6333                   "$src2, $src1", "$src1, $src2",
   6334                   (_.VT (OpNode
   6335                            _.RC:$src1,
   6336                            (Ctrl.VT (bitconvert(Ctrl.LdFrag addr:$src2)))))>,
   6337                   T8PD, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
   6338                   Sched<[sched.Folded, ReadAfterLd]>;
   6339   defm rmb: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst),
   6340                    (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
   6341                    "${src2}"##_.BroadcastStr##", $src1",
   6342                    "$src1, ${src2}"##_.BroadcastStr,
   6343                    (_.VT (OpNode
   6344                             _.RC:$src1,
   6345                             (Ctrl.VT (X86VBroadcast
   6346                                        (Ctrl.ScalarLdFrag addr:$src2)))))>,
   6347                    T8PD, EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
   6348                    Sched<[sched.Folded, ReadAfterLd]>;
   6349 }
   6350 
   6351 multiclass avx512_permil_vec_common<string OpcodeStr, bits<8> OpcVar,
   6352                                     X86SchedWriteWidths sched,
   6353                                     AVX512VLVectorVTInfo _,
   6354                                     AVX512VLVectorVTInfo Ctrl> {
   6355   let Predicates = [HasAVX512] in {
   6356     defm Z    : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.ZMM,
   6357                                   _.info512, Ctrl.info512>, EVEX_V512;
   6358   }
   6359   let Predicates = [HasAVX512, HasVLX] in {
   6360     defm Z128 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.XMM,
   6361                                   _.info128, Ctrl.info128>, EVEX_V128;
   6362     defm Z256 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.YMM,
   6363                                   _.info256, Ctrl.info256>, EVEX_V256;
   6364   }
   6365 }
   6366 
   6367 multiclass avx512_permil<string OpcodeStr, bits<8> OpcImm, bits<8> OpcVar,
   6368                          AVX512VLVectorVTInfo _, AVX512VLVectorVTInfo Ctrl>{
   6369   defm NAME: avx512_permil_vec_common<OpcodeStr, OpcVar, SchedWriteFVarShuffle,
   6370                                       _, Ctrl>;
   6371   defm NAME: avx512_shift_rmi_sizes<OpcImm, MRMSrcReg, MRMSrcMem, OpcodeStr,
   6372                                     X86VPermilpi, SchedWriteFShuffle, _>,
   6373                     EVEX, AVX512AIi8Base, EVEX_CD8<_.info128.EltSize, CD8VF>;
   6374 }
   6375 
   6376 let ExeDomain = SSEPackedSingle in
   6377 defm VPERMILPS : avx512_permil<"vpermilps", 0x04, 0x0C, avx512vl_f32_info,
   6378                                avx512vl_i32_info>;
   6379 let ExeDomain = SSEPackedDouble in
   6380 defm VPERMILPD : avx512_permil<"vpermilpd", 0x05, 0x0D, avx512vl_f64_info,
   6381                                avx512vl_i64_info>, VEX_W1X;
   6382 
   6383 //===----------------------------------------------------------------------===//
   6384 // AVX-512 - VPSHUFD, VPSHUFLW, VPSHUFHW
   6385 //===----------------------------------------------------------------------===//
   6386 
   6387 defm VPSHUFD : avx512_shift_rmi_sizes<0x70, MRMSrcReg, MRMSrcMem, "vpshufd",
   6388                              X86PShufd, SchedWriteShuffle, avx512vl_i32_info>,
   6389                              EVEX, AVX512BIi8Base, EVEX_CD8<32, CD8VF>;
   6390 defm VPSHUFH : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshufhw",
   6391                                   X86PShufhw, SchedWriteShuffle>,
   6392                                   EVEX, AVX512XSIi8Base;
   6393 defm VPSHUFL : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshuflw",
   6394                                   X86PShuflw, SchedWriteShuffle>,
   6395                                   EVEX, AVX512XDIi8Base;
   6396 
   6397 //===----------------------------------------------------------------------===//
   6398 // AVX-512 - VPSHUFB
   6399 //===----------------------------------------------------------------------===//
   6400 
   6401 multiclass avx512_pshufb_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
   6402                                X86SchedWriteWidths sched> {
   6403   let Predicates = [HasBWI] in
   6404   defm Z:    avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, v64i8_info>,
   6405                               EVEX_V512;
   6406 
   6407   let Predicates = [HasVLX, HasBWI] in {
   6408   defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, v32i8x_info>,
   6409                               EVEX_V256;
   6410   defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, v16i8x_info>,
   6411                               EVEX_V128;
   6412   }
   6413 }
   6414 
   6415 defm VPSHUFB: avx512_pshufb_sizes<0x00, "vpshufb", X86pshufb,
   6416                                   SchedWriteVarShuffle>, VEX_WIG;
   6417 
   6418 //===----------------------------------------------------------------------===//
   6419 // Move Low to High and High to Low packed FP Instructions
   6420 //===----------------------------------------------------------------------===//
   6421 
   6422 def VMOVLHPSZrr : AVX512PSI<0x16, MRMSrcReg, (outs VR128X:$dst),
   6423           (ins VR128X:$src1, VR128X:$src2),
   6424           "vmovlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
   6425           [(set VR128X:$dst, (v4f32 (X86Movlhps VR128X:$src1, VR128X:$src2)))]>,
   6426           Sched<[SchedWriteFShuffle.XMM]>, EVEX_4V;
   6427 let isCommutable = 1 in
   6428 def VMOVHLPSZrr : AVX512PSI<0x12, MRMSrcReg, (outs VR128X:$dst),
   6429           (ins VR128X:$src1, VR128X:$src2),
   6430           "vmovhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
   6431           [(set VR128X:$dst, (v4f32 (X86Movhlps VR128X:$src1, VR128X:$src2)))]>,
   6432           Sched<[SchedWriteFShuffle.XMM]>, EVEX_4V, NotMemoryFoldable;
   6433 
   6434 //===----------------------------------------------------------------------===//
   6435 // VMOVHPS/PD VMOVLPS Instructions
   6436 // All patterns was taken from SSS implementation.
   6437 //===----------------------------------------------------------------------===//
   6438 
   6439 multiclass avx512_mov_hilo_packed<bits<8> opc, string OpcodeStr,
   6440                                   SDPatternOperator OpNode,
   6441                                   X86VectorVTInfo _> {
   6442   let hasSideEffects = 0, mayLoad = 1, ExeDomain = _.ExeDomain in
   6443   def rm : AVX512<opc, MRMSrcMem, (outs _.RC:$dst),
   6444                   (ins _.RC:$src1, f64mem:$src2),
   6445                   !strconcat(OpcodeStr,
   6446                              "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
   6447                   [(set _.RC:$dst,
   6448                      (OpNode _.RC:$src1,
   6449                        (_.VT (bitconvert
   6450                          (v2f64 (scalar_to_vector (loadf64 addr:$src2)))))))]>,
   6451                   Sched<[SchedWriteFShuffle.XMM.Folded, ReadAfterLd]>, EVEX_4V;
   6452 }
   6453 
   6454 // No patterns for MOVLPS/MOVHPS as the Movlhps node should only be created in
   6455 // SSE1. And MOVLPS pattern is even more complex.
   6456 defm VMOVHPSZ128 : avx512_mov_hilo_packed<0x16, "vmovhps", null_frag,
   6457                                   v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS;
   6458 defm VMOVHPDZ128 : avx512_mov_hilo_packed<0x16, "vmovhpd", X86Unpckl,
   6459                                   v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W;
   6460 defm VMOVLPSZ128 : avx512_mov_hilo_packed<0x12, "vmovlps", null_frag,
   6461                                   v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS;
   6462 defm VMOVLPDZ128 : avx512_mov_hilo_packed<0x12, "vmovlpd", X86Movsd,
   6463                                   v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W;
   6464 
   6465 let Predicates = [HasAVX512] in {
   6466   // VMOVHPD patterns
   6467   def : Pat<(v2f64 (X86Unpckl VR128X:$src1,
   6468                     (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src2)))))),
   6469            (VMOVHPDZ128rm VR128X:$src1, addr:$src2)>;
   6470 }
   6471 
   6472 let SchedRW = [WriteFStore] in {
   6473 def VMOVHPSZ128mr : AVX512PSI<0x17, MRMDestMem, (outs),
   6474                        (ins f64mem:$dst, VR128X:$src),
   6475                        "vmovhps\t{$src, $dst|$dst, $src}",
   6476                        [(store (f64 (extractelt
   6477                                      (X86Unpckh (bc_v2f64 (v4f32 VR128X:$src)),
   6478                                                 (bc_v2f64 (v4f32 VR128X:$src))),
   6479                                      (iPTR 0))), addr:$dst)]>,
   6480                        EVEX, EVEX_CD8<32, CD8VT2>;
   6481 def VMOVHPDZ128mr : AVX512PDI<0x17, MRMDestMem, (outs),
   6482                        (ins f64mem:$dst, VR128X:$src),
   6483                        "vmovhpd\t{$src, $dst|$dst, $src}",
   6484                        [(store (f64 (extractelt
   6485                                      (v2f64 (X86Unpckh VR128X:$src, VR128X:$src)),
   6486                                      (iPTR 0))), addr:$dst)]>,
   6487                        EVEX, EVEX_CD8<64, CD8VT1>, VEX_W;
   6488 def VMOVLPSZ128mr : AVX512PSI<0x13, MRMDestMem, (outs),
   6489                        (ins f64mem:$dst, VR128X:$src),
   6490                        "vmovlps\t{$src, $dst|$dst, $src}",
   6491                        [(store (f64 (extractelt (bc_v2f64 (v4f32 VR128X:$src)),
   6492                                      (iPTR 0))), addr:$dst)]>,
   6493                        EVEX, EVEX_CD8<32, CD8VT2>;
   6494 def VMOVLPDZ128mr : AVX512PDI<0x13, MRMDestMem, (outs),
   6495                        (ins f64mem:$dst, VR128X:$src),
   6496                        "vmovlpd\t{$src, $dst|$dst, $src}",
   6497                        [(store (f64 (extractelt (v2f64 VR128X:$src),
   6498                                      (iPTR 0))), addr:$dst)]>,
   6499                        EVEX, EVEX_CD8<64, CD8VT1>, VEX_W;
   6500 } // SchedRW
   6501 
   6502 let Predicates = [HasAVX512] in {
   6503   // VMOVHPD patterns
   6504   def : Pat<(store (f64 (extractelt
   6505                            (v2f64 (X86VPermilpi VR128X:$src, (i8 1))),
   6506                            (iPTR 0))), addr:$dst),
   6507            (VMOVHPDZ128mr addr:$dst, VR128X:$src)>;
   6508 }
   6509 //===----------------------------------------------------------------------===//
   6510 // FMA - Fused Multiply Operations
   6511 //
   6512 
   6513 multiclass avx512_fma3p_213_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
   6514                                X86FoldableSchedWrite sched,
   6515                                X86VectorVTInfo _, string Suff> {
   6516   let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
   6517   defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
   6518           (ins _.RC:$src2, _.RC:$src3),
   6519           OpcodeStr, "$src3, $src2", "$src2, $src3",
   6520           (_.VT (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)), 1, 1>,
   6521           AVX512FMA3Base, Sched<[sched]>;
   6522 
   6523   defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
   6524           (ins _.RC:$src2, _.MemOp:$src3),
   6525           OpcodeStr, "$src3, $src2", "$src2, $src3",
   6526           (_.VT (OpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))), 1, 0>,
   6527           AVX512FMA3Base, Sched<[sched.Folded, ReadAfterLd]>;
   6528 
   6529   defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
   6530             (ins _.RC:$src2, _.ScalarMemOp:$src3),
   6531             OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
   6532             !strconcat("$src2, ${src3}", _.BroadcastStr ),
   6533             (OpNode _.RC:$src2,
   6534              _.RC:$src1,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3)))), 1, 0>,
   6535              AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
   6536   }
   6537 }
   6538 
   6539 multiclass avx512_fma3_213_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
   6540                                  X86FoldableSchedWrite sched,
   6541                                  X86VectorVTInfo _, string Suff> {
   6542   let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in
   6543   defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
   6544           (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
   6545           OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
   6546           (_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 imm:$rc))), 1, 1>,
   6547           AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>;
   6548 }
   6549 
   6550 multiclass avx512_fma3p_213_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
   6551                                    SDNode OpNodeRnd, X86SchedWriteWidths sched,
   6552                                    AVX512VLVectorVTInfo _, string Suff> {
   6553   let Predicates = [HasAVX512] in {
   6554     defm Z      : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, sched.ZMM,
   6555                                       _.info512, Suff>,
   6556                   avx512_fma3_213_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
   6557                                         _.info512, Suff>,
   6558                               EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
   6559   }
   6560   let Predicates = [HasVLX, HasAVX512] in {
   6561     defm Z256 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, sched.YMM,
   6562                                     _.info256, Suff>,
   6563                       EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
   6564     defm Z128 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, sched.XMM,
   6565                                     _.info128, Suff>,
   6566                       EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
   6567   }
   6568 }
   6569 
   6570 multiclass avx512_fma3p_213_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
   6571                               SDNode OpNodeRnd> {
   6572     defm PS : avx512_fma3p_213_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd,
   6573                                       SchedWriteFMA, avx512vl_f32_info, "PS">;
   6574     defm PD : avx512_fma3p_213_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd,
   6575                                       SchedWriteFMA, avx512vl_f64_info, "PD">,
   6576                                       VEX_W;
   6577 }
   6578 
   6579 defm VFMADD213    : avx512_fma3p_213_f<0xA8, "vfmadd213", X86Fmadd, X86FmaddRnd>;
   6580 defm VFMSUB213    : avx512_fma3p_213_f<0xAA, "vfmsub213", X86Fmsub, X86FmsubRnd>;
   6581 defm VFMADDSUB213 : avx512_fma3p_213_f<0xA6, "vfmaddsub213", X86Fmaddsub, X86FmaddsubRnd>;
   6582 defm VFMSUBADD213 : avx512_fma3p_213_f<0xA7, "vfmsubadd213", X86Fmsubadd, X86FmsubaddRnd>;
   6583 defm VFNMADD213   : avx512_fma3p_213_f<0xAC, "vfnmadd213", X86Fnmadd, X86FnmaddRnd>;
   6584 defm VFNMSUB213   : avx512_fma3p_213_f<0xAE, "vfnmsub213", X86Fnmsub, X86FnmsubRnd>;
   6585 
   6586 
   6587 multiclass avx512_fma3p_231_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
   6588                                X86FoldableSchedWrite sched,
   6589                                X86VectorVTInfo _, string Suff> {
   6590   let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
   6591   defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
   6592           (ins _.RC:$src2, _.RC:$src3),
   6593           OpcodeStr, "$src3, $src2", "$src2, $src3",
   6594           (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1,
   6595           vselect, 1>, AVX512FMA3Base, Sched<[sched]>;
   6596 
   6597   defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
   6598           (ins _.RC:$src2, _.MemOp:$src3),
   6599           OpcodeStr, "$src3, $src2", "$src2, $src3",
   6600           (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)), 1, 0>,
   6601           AVX512FMA3Base, Sched<[sched.Folded, ReadAfterLd]>;
   6602 
   6603   defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
   6604          (ins _.RC:$src2, _.ScalarMemOp:$src3),
   6605          OpcodeStr, "${src3}"##_.BroadcastStr##", $src2",
   6606          "$src2, ${src3}"##_.BroadcastStr,
   6607          (_.VT (OpNode _.RC:$src2,
   6608                       (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))),
   6609                       _.RC:$src1)), 1, 0>, AVX512FMA3Base, EVEX_B,
   6610          Sched<[sched.Folded, ReadAfterLd]>;
   6611   }
   6612 }
   6613 
   6614 multiclass avx512_fma3_231_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
   6615                                  X86FoldableSchedWrite sched,
   6616                                  X86VectorVTInfo _, string Suff> {
   6617   let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in
   6618   defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
   6619           (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
   6620           OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
   6621           (_.VT ( OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 imm:$rc))),
   6622           1, 1, vselect, 1>,
   6623           AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>;
   6624 }
   6625 
   6626 multiclass avx512_fma3p_231_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
   6627                                    SDNode OpNodeRnd, X86SchedWriteWidths sched,
   6628                                    AVX512VLVectorVTInfo _, string Suff> {
   6629   let Predicates = [HasAVX512] in {
   6630     defm Z      : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, sched.ZMM,
   6631                                       _.info512, Suff>,
   6632                   avx512_fma3_231_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
   6633                                         _.info512, Suff>,
   6634                               EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
   6635   }
   6636   let Predicates = [HasVLX, HasAVX512] in {
   6637     defm Z256 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, sched.YMM,
   6638                                     _.info256, Suff>,
   6639                       EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
   6640     defm Z128 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, sched.XMM,
   6641                                     _.info128, Suff>,
   6642                       EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
   6643   }
   6644 }
   6645 
   6646 multiclass avx512_fma3p_231_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
   6647                               SDNode OpNodeRnd > {
   6648     defm PS : avx512_fma3p_231_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd,
   6649                                       SchedWriteFMA, avx512vl_f32_info, "PS">;
   6650     defm PD : avx512_fma3p_231_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd,
   6651                                       SchedWriteFMA, avx512vl_f64_info, "PD">,
   6652                                       VEX_W;
   6653 }
   6654 
   6655 defm VFMADD231    : avx512_fma3p_231_f<0xB8, "vfmadd231", X86Fmadd, X86FmaddRnd>;
   6656 defm VFMSUB231    : avx512_fma3p_231_f<0xBA, "vfmsub231", X86Fmsub, X86FmsubRnd>;
   6657 defm VFMADDSUB231 : avx512_fma3p_231_f<0xB6, "vfmaddsub231", X86Fmaddsub, X86FmaddsubRnd>;
   6658 defm VFMSUBADD231 : avx512_fma3p_231_f<0xB7, "vfmsubadd231", X86Fmsubadd, X86FmsubaddRnd>;
   6659 defm VFNMADD231   : avx512_fma3p_231_f<0xBC, "vfnmadd231", X86Fnmadd, X86FnmaddRnd>;
   6660 defm VFNMSUB231   : avx512_fma3p_231_f<0xBE, "vfnmsub231", X86Fnmsub, X86FnmsubRnd>;
   6661 
   6662 multiclass avx512_fma3p_132_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
   6663                                X86FoldableSchedWrite sched,
   6664                                X86VectorVTInfo _, string Suff> {
   6665   let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
   6666   defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
   6667           (ins _.RC:$src2, _.RC:$src3),
   6668           OpcodeStr, "$src3, $src2", "$src2, $src3",
   6669           (_.VT (OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2)), 1, 1, vselect, 1>,
   6670           AVX512FMA3Base, Sched<[sched]>;
   6671 
   6672   // Pattern is 312 order so that the load is in a different place from the
   6673   // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
   6674   defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
   6675           (ins _.RC:$src2, _.MemOp:$src3),
   6676           OpcodeStr, "$src3, $src2", "$src2, $src3",
   6677           (_.VT (OpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)), 1, 0>,
   6678           AVX512FMA3Base, Sched<[sched.Folded, ReadAfterLd]>;
   6679 
   6680   // Pattern is 312 order so that the load is in a different place from the
   6681   // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
   6682   defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
   6683          (ins _.RC:$src2, _.ScalarMemOp:$src3),
   6684          OpcodeStr, "${src3}"##_.BroadcastStr##", $src2",
   6685          "$src2, ${src3}"##_.BroadcastStr,
   6686          (_.VT (OpNode (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))),
   6687                        _.RC:$src1, _.RC:$src2)), 1, 0>,
   6688          AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
   6689   }
   6690 }
   6691 
   6692 multiclass avx512_fma3_132_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
   6693                                  X86FoldableSchedWrite sched,
   6694                                  X86VectorVTInfo _, string Suff> {
   6695   let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in
   6696   defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
   6697           (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
   6698           OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
   6699           (_.VT ( OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2, (i32 imm:$rc))),
   6700           1, 1, vselect, 1>,
   6701           AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>;
   6702 }
   6703 
   6704 multiclass avx512_fma3p_132_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
   6705                                    SDNode OpNodeRnd, X86SchedWriteWidths sched,
   6706                                    AVX512VLVectorVTInfo _, string Suff> {
   6707   let Predicates = [HasAVX512] in {
   6708     defm Z      : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, sched.ZMM,
   6709                                       _.info512, Suff>,
   6710                   avx512_fma3_132_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
   6711                                         _.info512, Suff>,
   6712                               EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
   6713   }
   6714   let Predicates = [HasVLX, HasAVX512] in {
   6715     defm Z256 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, sched.YMM,
   6716                                     _.info256, Suff>,
   6717                       EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
   6718     defm Z128 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, sched.XMM,
   6719                                     _.info128, Suff>,
   6720                       EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
   6721   }
   6722 }
   6723 
   6724 multiclass avx512_fma3p_132_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
   6725                               SDNode OpNodeRnd > {
   6726     defm PS : avx512_fma3p_132_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd,
   6727                                       SchedWriteFMA, avx512vl_f32_info, "PS">;
   6728     defm PD : avx512_fma3p_132_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd,
   6729                                       SchedWriteFMA, avx512vl_f64_info, "PD">,
   6730                                       VEX_W;
   6731 }
   6732 
   6733 defm VFMADD132    : avx512_fma3p_132_f<0x98, "vfmadd132", X86Fmadd, X86FmaddRnd>;
   6734 defm VFMSUB132    : avx512_fma3p_132_f<0x9A, "vfmsub132", X86Fmsub, X86FmsubRnd>;
   6735 defm VFMADDSUB132 : avx512_fma3p_132_f<0x96, "vfmaddsub132", X86Fmaddsub, X86FmaddsubRnd>;
   6736 defm VFMSUBADD132 : avx512_fma3p_132_f<0x97, "vfmsubadd132", X86Fmsubadd, X86FmsubaddRnd>;
   6737 defm VFNMADD132   : avx512_fma3p_132_f<0x9C, "vfnmadd132", X86Fnmadd, X86FnmaddRnd>;
   6738 defm VFNMSUB132   : avx512_fma3p_132_f<0x9E, "vfnmsub132", X86Fnmsub, X86FnmsubRnd>;
   6739 
   6740 // Scalar FMA
   6741 multiclass avx512_fma3s_common<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
   6742                                dag RHS_r, dag RHS_m, dag RHS_b, bit MaskOnlyReg> {
   6743 let Constraints = "$src1 = $dst", hasSideEffects = 0 in {
   6744   defm r_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
   6745           (ins _.RC:$src2, _.RC:$src3), OpcodeStr,
   6746           "$src3, $src2", "$src2, $src3", (null_frag), 1, 1>,
   6747           AVX512FMA3Base, Sched<[SchedWriteFMA.Scl]>;
   6748 
   6749   let mayLoad = 1 in
   6750   defm m_Int: AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
   6751           (ins _.RC:$src2, _.IntScalarMemOp:$src3), OpcodeStr,
   6752           "$src3, $src2", "$src2, $src3", (null_frag), 1, 1>,
   6753           AVX512FMA3Base, Sched<[SchedWriteFMA.Scl.Folded, ReadAfterLd]>;
   6754 
   6755   defm rb_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
   6756          (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
   6757          OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", (null_frag), 1, 1>,
   6758          AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[SchedWriteFMA.Scl]>;
   6759 
   6760   let isCodeGenOnly = 1, isCommutable = 1 in {
   6761     def r     : AVX512FMA3S<opc, MRMSrcReg, (outs _.FRC:$dst),
   6762                      (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3),
   6763                      !strconcat(OpcodeStr,
   6764                               "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
   6765                      !if(MaskOnlyReg, [], [RHS_r])>, Sched<[SchedWriteFMA.Scl]>;
   6766     def m     : AVX512FMA3S<opc, MRMSrcMem, (outs _.FRC:$dst),
   6767                     (ins _.FRC:$src1, _.FRC:$src2, _.ScalarMemOp:$src3),
   6768                     !strconcat(OpcodeStr,
   6769                                "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
   6770                     [RHS_m]>, Sched<[SchedWriteFMA.Scl.Folded, ReadAfterLd]>;
   6771 
   6772     def rb    : AVX512FMA3S<opc, MRMSrcReg, (outs _.FRC:$dst),
   6773                      (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3, AVX512RC:$rc),
   6774                      !strconcat(OpcodeStr,
   6775                               "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
   6776                      !if(MaskOnlyReg, [], [RHS_b])>, EVEX_B, EVEX_RC,
   6777                      Sched<[SchedWriteFMA.Scl]>;
   6778   }// isCodeGenOnly = 1
   6779 }// Constraints = "$src1 = $dst"
   6780 }
   6781 
   6782 multiclass avx512_fma3s_all<bits<8> opc213, bits<8> opc231, bits<8> opc132,
   6783                             string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd,
   6784                             X86VectorVTInfo _, string SUFF> {
   6785   let ExeDomain = _.ExeDomain in {
   6786   defm NAME#213#SUFF#Z: avx512_fma3s_common<opc213, OpcodeStr#"213"#_.Suffix, _,
   6787                 // Operands for intrinsic are in 123 order to preserve passthu
   6788                 // semantics.
   6789                 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1,
   6790                          _.FRC:$src3))),
   6791                 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1,
   6792                          (_.ScalarLdFrag addr:$src3)))),
   6793                 (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src2, _.FRC:$src1,
   6794                          _.FRC:$src3, (i32 imm:$rc)))), 0>;
   6795 
   6796   defm NAME#231#SUFF#Z: avx512_fma3s_common<opc231, OpcodeStr#"231"#_.Suffix, _,
   6797                 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src3,
   6798                                           _.FRC:$src1))),
   6799                 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2,
   6800                             (_.ScalarLdFrag addr:$src3), _.FRC:$src1))),
   6801                 (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src2, _.FRC:$src3,
   6802                          _.FRC:$src1, (i32 imm:$rc)))), 1>;
   6803 
   6804   // One pattern is 312 order so that the load is in a different place from the
   6805   // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
   6806   defm NAME#132#SUFF#Z: avx512_fma3s_common<opc132, OpcodeStr#"132"#_.Suffix, _,
   6807                 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src1, _.FRC:$src3,
   6808                          _.FRC:$src2))),
   6809                 (set _.FRC:$dst, (_.EltVT (OpNode (_.ScalarLdFrag addr:$src3),
   6810                                  _.FRC:$src1, _.FRC:$src2))),
   6811                 (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src1, _.FRC:$src3,
   6812                          _.FRC:$src2, (i32 imm:$rc)))), 1>;
   6813   }
   6814 }
   6815 
   6816 multiclass avx512_fma3s<bits<8> opc213, bits<8> opc231, bits<8> opc132,
   6817                         string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd> {
   6818   let Predicates = [HasAVX512] in {
   6819     defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
   6820                                  OpNodeRnd, f32x_info, "SS">,
   6821                                  EVEX_CD8<32, CD8VT1>, VEX_LIG;
   6822     defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
   6823                                  OpNodeRnd, f64x_info, "SD">,
   6824                                  EVEX_CD8<64, CD8VT1>, VEX_LIG, VEX_W;
   6825   }
   6826 }
   6827 
   6828 defm VFMADD  : avx512_fma3s<0xA9, 0xB9, 0x99, "vfmadd", X86Fmadd, X86FmaddRnd>;
   6829 defm VFMSUB  : avx512_fma3s<0xAB, 0xBB, 0x9B, "vfmsub", X86Fmsub, X86FmsubRnd>;
   6830 defm VFNMADD : avx512_fma3s<0xAD, 0xBD, 0x9D, "vfnmadd", X86Fnmadd, X86FnmaddRnd>;
   6831 defm VFNMSUB : avx512_fma3s<0xAF, 0xBF, 0x9F, "vfnmsub", X86Fnmsub, X86FnmsubRnd>;
   6832 
   6833 multiclass avx512_scalar_fma_patterns<SDNode Op, SDNode RndOp, string Prefix,
   6834                                       string Suffix, SDNode Move,
   6835                                       X86VectorVTInfo _, PatLeaf ZeroFP> {
   6836   let Predicates = [HasAVX512] in {
   6837     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
   6838                 (Op _.FRC:$src2,
   6839                     (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
   6840                     _.FRC:$src3))))),
   6841               (!cast<I>(Prefix#"213"#Suffix#"Zr_Int")
   6842                VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
   6843                (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
   6844 
   6845     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
   6846                 (Op _.FRC:$src2, _.FRC:$src3,
   6847                     (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
   6848               (!cast<I>(Prefix#"231"#Suffix#"Zr_Int")
   6849                VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
   6850                (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
   6851 
   6852     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
   6853                 (Op _.FRC:$src2,
   6854                     (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
   6855                     (_.ScalarLdFrag addr:$src3)))))),
   6856               (!cast<I>(Prefix#"213"#Suffix#"Zm_Int")
   6857                VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
   6858                addr:$src3)>;
   6859 
   6860     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
   6861                 (Op (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
   6862                     (_.ScalarLdFrag addr:$src3), _.FRC:$src2))))),
   6863               (!cast<I>(Prefix#"132"#Suffix#"Zm_Int")
   6864                VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
   6865                addr:$src3)>;
   6866 
   6867     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
   6868                 (Op _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
   6869                     (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
   6870               (!cast<I>(Prefix#"231"#Suffix#"Zm_Int")
   6871                VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
   6872                addr:$src3)>;
   6873 
   6874     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
   6875                (X86selects VK1WM:$mask,
   6876                 (Op _.FRC:$src2,
   6877                     (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
   6878                     _.FRC:$src3),
   6879                 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
   6880               (!cast<I>(Prefix#"213"#Suffix#"Zr_Intk")
   6881                VR128X:$src1, VK1WM:$mask,
   6882                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
   6883                (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
   6884 
   6885     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
   6886                (X86selects VK1WM:$mask,
   6887                 (Op _.FRC:$src2,
   6888                     (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
   6889                     (_.ScalarLdFrag addr:$src3)),
   6890                 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
   6891               (!cast<I>(Prefix#"213"#Suffix#"Zm_Intk")
   6892                VR128X:$src1, VK1WM:$mask,
   6893                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
   6894 
   6895     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
   6896                (X86selects VK1WM:$mask,
   6897                 (Op (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
   6898                     (_.ScalarLdFrag addr:$src3), _.FRC:$src2),
   6899                 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
   6900               (!cast<I>(Prefix#"132"#Suffix#"Zm_Intk")
   6901                VR128X:$src1, VK1WM:$mask,
   6902                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
   6903 
   6904     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
   6905                (X86selects VK1WM:$mask,
   6906                 (Op _.FRC:$src2, _.FRC:$src3,
   6907                     (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
   6908                 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
   6909               (!cast<I>(Prefix#"231"#Suffix#"Zr_Intk")
   6910                VR128X:$src1, VK1WM:$mask,
   6911                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
   6912                (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
   6913 
   6914     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
   6915                (X86selects VK1WM:$mask,
   6916                 (Op _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
   6917                     (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
   6918                 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
   6919               (!cast<I>(Prefix#"231"#Suffix#"Zm_Intk")
   6920                VR128X:$src1, VK1WM:$mask,
   6921                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
   6922 
   6923     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
   6924                (X86selects VK1WM:$mask,
   6925                 (Op _.FRC:$src2,
   6926                     (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
   6927                     _.FRC:$src3),
   6928                 (_.EltVT ZeroFP)))))),
   6929               (!cast<I>(Prefix#"213"#Suffix#"Zr_Intkz")
   6930                VR128X:$src1, VK1WM:$mask,
   6931                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
   6932                (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
   6933 
   6934     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
   6935                (X86selects VK1WM:$mask,
   6936                 (Op _.FRC:$src2, _.FRC:$src3,
   6937                     (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
   6938                 (_.EltVT ZeroFP)))))),
   6939               (!cast<I>(Prefix#"231"#Suffix#"Zr_Intkz")
   6940                VR128X:$src1, VK1WM:$mask,
   6941                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
   6942                (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
   6943 
   6944     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
   6945                (X86selects VK1WM:$mask,
   6946                 (Op _.FRC:$src2,
   6947                     (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
   6948                     (_.ScalarLdFrag addr:$src3)),
   6949                 (_.EltVT ZeroFP)))))),
   6950               (!cast<I>(Prefix#"213"#Suffix#"Zm_Intkz")
   6951                VR128X:$src1, VK1WM:$mask,
   6952                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
   6953 
   6954     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
   6955                (X86selects VK1WM:$mask,
   6956                 (Op (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
   6957                     _.FRC:$src2, (_.ScalarLdFrag addr:$src3)),
   6958                 (_.EltVT ZeroFP)))))),
   6959               (!cast<I>(Prefix#"132"#Suffix#"Zm_Intkz")
   6960                VR128X:$src1, VK1WM:$mask,
   6961                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
   6962 
   6963     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
   6964                (X86selects VK1WM:$mask,
   6965                 (Op _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
   6966                     (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
   6967                 (_.EltVT ZeroFP)))))),
   6968               (!cast<I>(Prefix#"231"#Suffix#"Zm_Intkz")
   6969                VR128X:$src1, VK1WM:$mask,
   6970                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
   6971 
   6972     // Patterns with rounding mode.
   6973     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
   6974                 (RndOp _.FRC:$src2,
   6975                        (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
   6976                        _.FRC:$src3, (i32 imm:$rc)))))),
   6977               (!cast<I>(Prefix#"213"#Suffix#"Zrb_Int")
   6978                VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
   6979                (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), imm:$rc)>;
   6980 
   6981     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
   6982                 (RndOp _.FRC:$src2, _.FRC:$src3,
   6983                        (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
   6984                        (i32 imm:$rc)))))),
   6985               (!cast<I>(Prefix#"231"#Suffix#"Zrb_Int")
   6986                VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
   6987                (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), imm:$rc)>;
   6988 
   6989     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
   6990                (X86selects VK1WM:$mask,
   6991                 (RndOp _.FRC:$src2,
   6992                        (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
   6993                        _.FRC:$src3, (i32 imm:$rc)),
   6994                 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
   6995               (!cast<I>(Prefix#"213"#Suffix#"Zrb_Intk")
   6996                VR128X:$src1, VK1WM:$mask,
   6997                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
   6998                (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), imm:$rc)>;
   6999 
   7000     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
   7001                (X86selects VK1WM:$mask,
   7002                 (RndOp _.FRC:$src2, _.FRC:$src3,
   7003                        (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
   7004                        (i32 imm:$rc)),
   7005                 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
   7006               (!cast<I>(Prefix#"231"#Suffix#"Zrb_Intk")
   7007                VR128X:$src1, VK1WM:$mask,
   7008                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
   7009                (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), imm:$rc)>;
   7010 
   7011     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
   7012                (X86selects VK1WM:$mask,
   7013                 (RndOp _.FRC:$src2,
   7014                        (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
   7015                        _.FRC:$src3, (i32 imm:$rc)),
   7016                 (_.EltVT ZeroFP)))))),
   7017               (!cast<I>(Prefix#"213"#Suffix#"Zrb_Intkz")
   7018                VR128X:$src1, VK1WM:$mask,
   7019                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
   7020                (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), imm:$rc)>;
   7021 
   7022     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
   7023                (X86selects VK1WM:$mask,
   7024                 (RndOp _.FRC:$src2, _.FRC:$src3,
   7025                        (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
   7026                        (i32 imm:$rc)),
   7027                 (_.EltVT ZeroFP)))))),
   7028               (!cast<I>(Prefix#"231"#Suffix#"Zrb_Intkz")
   7029                VR128X:$src1, VK1WM:$mask,
   7030                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
   7031                (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), imm:$rc)>;
   7032   }
   7033 }
   7034 
   7035 defm : avx512_scalar_fma_patterns<X86Fmadd, X86FmaddRnd, "VFMADD", "SS",
   7036                                   X86Movss, v4f32x_info, fp32imm0>;
   7037 defm : avx512_scalar_fma_patterns<X86Fmsub, X86FmsubRnd, "VFMSUB", "SS",
   7038                                   X86Movss, v4f32x_info, fp32imm0>;
   7039 defm : avx512_scalar_fma_patterns<X86Fnmadd, X86FnmaddRnd, "VFNMADD", "SS",
   7040                                   X86Movss, v4f32x_info, fp32imm0>;
   7041 defm : avx512_scalar_fma_patterns<X86Fnmsub, X86FnmsubRnd, "VFNMSUB", "SS",
   7042                                   X86Movss, v4f32x_info, fp32imm0>;
   7043 
   7044 defm : avx512_scalar_fma_patterns<X86Fmadd, X86FmaddRnd, "VFMADD", "SD",
   7045                                   X86Movsd, v2f64x_info, fp64imm0>;
   7046 defm : avx512_scalar_fma_patterns<X86Fmsub, X86FmsubRnd, "VFMSUB", "SD",
   7047                                   X86Movsd, v2f64x_info, fp64imm0>;
   7048 defm : avx512_scalar_fma_patterns<X86Fnmadd, X86FnmaddRnd, "VFNMADD", "SD",
   7049                                   X86Movsd, v2f64x_info, fp64imm0>;
   7050 defm : avx512_scalar_fma_patterns<X86Fnmsub, X86FnmsubRnd, "VFNMSUB", "SD",
   7051                                   X86Movsd, v2f64x_info, fp64imm0>;
   7052 
   7053 //===----------------------------------------------------------------------===//
   7054 // AVX-512  Packed Multiply of Unsigned 52-bit Integers and Add the Low 52-bit IFMA
   7055 //===----------------------------------------------------------------------===//
   7056 let Constraints = "$src1 = $dst" in {
   7057 multiclass avx512_pmadd52_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
   7058                              X86FoldableSchedWrite sched, X86VectorVTInfo _> {
   7059   // NOTE: The SDNode have the multiply operands first with the add last.
   7060   // This enables commuted load patterns to be autogenerated by tablegen.
   7061   let ExeDomain = _.ExeDomain in {
   7062   defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
   7063           (ins _.RC:$src2, _.RC:$src3),
   7064           OpcodeStr, "$src3, $src2", "$src2, $src3",
   7065           (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1>,
   7066          AVX512FMA3Base, Sched<[sched]>;
   7067 
   7068   defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
   7069           (ins _.RC:$src2, _.MemOp:$src3),
   7070           OpcodeStr, "$src3, $src2", "$src2, $src3",
   7071           (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1))>,
   7072           AVX512FMA3Base, Sched<[sched.Folded, ReadAfterLd]>;
   7073 
   7074   defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
   7075             (ins _.RC:$src2, _.ScalarMemOp:$src3),
   7076             OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
   7077             !strconcat("$src2, ${src3}", _.BroadcastStr ),
   7078             (OpNode _.RC:$src2,
   7079                     (_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))),
   7080                     _.RC:$src1)>,
   7081             AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
   7082   }
   7083 }
   7084 } // Constraints = "$src1 = $dst"
   7085 
   7086 multiclass avx512_pmadd52_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
   7087                                  X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
   7088   let Predicates = [HasIFMA] in {
   7089     defm Z      : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
   7090                       EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
   7091   }
   7092   let Predicates = [HasVLX, HasIFMA] in {
   7093     defm Z256 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
   7094                       EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
   7095     defm Z128 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
   7096                       EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
   7097   }
   7098 }
   7099 
   7100 defm VPMADD52LUQ : avx512_pmadd52_common<0xb4, "vpmadd52luq", x86vpmadd52l,
   7101                                          SchedWriteVecIMul, avx512vl_i64_info>,
   7102                                          VEX_W;
   7103 defm VPMADD52HUQ : avx512_pmadd52_common<0xb5, "vpmadd52huq", x86vpmadd52h,
   7104                                          SchedWriteVecIMul, avx512vl_i64_info>,
   7105                                          VEX_W;
   7106 
   7107 //===----------------------------------------------------------------------===//
   7108 // AVX-512  Scalar convert from sign integer to float/double
   7109 //===----------------------------------------------------------------------===//
   7110 
   7111 multiclass avx512_vcvtsi<bits<8> opc, SDNode OpNode, X86FoldableSchedWrite sched,
   7112                     RegisterClass SrcRC, X86VectorVTInfo DstVT,
   7113                     X86MemOperand x86memop, PatFrag ld_frag, string asm> {
   7114   let hasSideEffects = 0 in {
   7115     def rr : SI<opc, MRMSrcReg, (outs DstVT.FRC:$dst),
   7116               (ins DstVT.FRC:$src1, SrcRC:$src),
   7117               !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
   7118               EVEX_4V, Sched<[sched]>;
   7119     let mayLoad = 1 in
   7120       def rm : SI<opc, MRMSrcMem, (outs DstVT.FRC:$dst),
   7121               (ins DstVT.FRC:$src1, x86memop:$src),
   7122               !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
   7123               EVEX_4V, Sched<[sched.Folded, ReadAfterLd]>;
   7124   } // hasSideEffects = 0
   7125   let isCodeGenOnly = 1 in {
   7126     def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst),
   7127                   (ins DstVT.RC:$src1, SrcRC:$src2),
   7128                   !strconcat(asm,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
   7129                   [(set DstVT.RC:$dst,
   7130                         (OpNode (DstVT.VT DstVT.RC:$src1),
   7131                                  SrcRC:$src2,
   7132                                  (i32 FROUND_CURRENT)))]>,
   7133                  EVEX_4V, Sched<[sched]>;
   7134 
   7135     def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst),
   7136                   (ins DstVT.RC:$src1, x86memop:$src2),
   7137                   !strconcat(asm,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
   7138                   [(set DstVT.RC:$dst,
   7139                         (OpNode (DstVT.VT DstVT.RC:$src1),
   7140                                  (ld_frag addr:$src2),
   7141                                  (i32 FROUND_CURRENT)))]>,
   7142                   EVEX_4V, Sched<[sched.Folded, ReadAfterLd]>;
   7143   }//isCodeGenOnly = 1
   7144 }
   7145 
   7146 multiclass avx512_vcvtsi_round<bits<8> opc, SDNode OpNode,
   7147                                X86FoldableSchedWrite sched, RegisterClass SrcRC,
   7148                                X86VectorVTInfo DstVT, string asm> {
   7149   def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst),
   7150               (ins DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc),
   7151               !strconcat(asm,
   7152                   "\t{$src2, $rc, $src1, $dst|$dst, $src1, $rc, $src2}"),
   7153               [(set DstVT.RC:$dst,
   7154                     (OpNode (DstVT.VT DstVT.RC:$src1),
   7155                              SrcRC:$src2,
   7156                              (i32 imm:$rc)))]>,
   7157               EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched]>;
   7158 }
   7159 
   7160 multiclass avx512_vcvtsi_common<bits<8> opc, SDNode OpNode,
   7161                                 X86FoldableSchedWrite sched,
   7162                                 RegisterClass SrcRC, X86VectorVTInfo DstVT,
   7163                                 X86MemOperand x86memop, PatFrag ld_frag, string asm> {
   7164   defm NAME : avx512_vcvtsi_round<opc, OpNode, sched, SrcRC, DstVT, asm>,
   7165               avx512_vcvtsi<opc, OpNode, sched, SrcRC, DstVT, x86memop,
   7166                             ld_frag, asm>, VEX_LIG;
   7167 }
   7168 
   7169 let Predicates = [HasAVX512] in {
   7170 defm VCVTSI2SSZ  : avx512_vcvtsi_common<0x2A, X86SintToFpRnd, WriteCvtI2SS, GR32,
   7171                                  v4f32x_info, i32mem, loadi32, "cvtsi2ss{l}">,
   7172                                  XS, EVEX_CD8<32, CD8VT1>;
   7173 defm VCVTSI642SSZ: avx512_vcvtsi_common<0x2A, X86SintToFpRnd, WriteCvtI2SS, GR64,
   7174                                  v4f32x_info, i64mem, loadi64, "cvtsi2ss{q}">,
   7175                                  XS, VEX_W, EVEX_CD8<64, CD8VT1>;
   7176 defm VCVTSI2SDZ  : avx512_vcvtsi_common<0x2A, X86SintToFpRnd, WriteCvtI2SD, GR32,
   7177                                  v2f64x_info, i32mem, loadi32, "cvtsi2sd{l}">,
   7178                                  XD, EVEX_CD8<32, CD8VT1>;
   7179 defm VCVTSI642SDZ: avx512_vcvtsi_common<0x2A, X86SintToFpRnd, WriteCvtI2SD, GR64,
   7180                                  v2f64x_info, i64mem, loadi64, "cvtsi2sd{q}">,
   7181                                  XD, VEX_W, EVEX_CD8<64, CD8VT1>;
   7182 
   7183 def : InstAlias<"vcvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
   7184               (VCVTSI2SSZrm FR64X:$dst, FR64X:$src1, i32mem:$src), 0, "att">;
   7185 def : InstAlias<"vcvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
   7186               (VCVTSI2SDZrm FR64X:$dst, FR64X:$src1, i32mem:$src), 0, "att">;
   7187 
   7188 def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))),
   7189           (VCVTSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
   7190 def : Pat<(f32 (sint_to_fp (loadi64 addr:$src))),
   7191           (VCVTSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
   7192 def : Pat<(f64 (sint_to_fp (loadi32 addr:$src))),
   7193           (VCVTSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
   7194 def : Pat<(f64 (sint_to_fp (loadi64 addr:$src))),
   7195           (VCVTSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
   7196 
   7197 def : Pat<(f32 (sint_to_fp GR32:$src)),
   7198           (VCVTSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
   7199 def : Pat<(f32 (sint_to_fp GR64:$src)),
   7200           (VCVTSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
   7201 def : Pat<(f64 (sint_to_fp GR32:$src)),
   7202           (VCVTSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
   7203 def : Pat<(f64 (sint_to_fp GR64:$src)),
   7204           (VCVTSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
   7205 
   7206 defm VCVTUSI2SSZ   : avx512_vcvtsi_common<0x7B, X86UintToFpRnd, WriteCvtI2SS, GR32,
   7207                                   v4f32x_info, i32mem, loadi32,
   7208                                   "cvtusi2ss{l}">, XS, EVEX_CD8<32, CD8VT1>;
   7209 defm VCVTUSI642SSZ : avx512_vcvtsi_common<0x7B, X86UintToFpRnd, WriteCvtI2SS, GR64,
   7210                                   v4f32x_info, i64mem, loadi64, "cvtusi2ss{q}">,
   7211                                   XS, VEX_W, EVEX_CD8<64, CD8VT1>;
   7212 defm VCVTUSI2SDZ   : avx512_vcvtsi<0x7B, X86UintToFpRnd, WriteCvtI2SD, GR32, v2f64x_info,
   7213                                   i32mem, loadi32, "cvtusi2sd{l}">,
   7214                                   XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
   7215 defm VCVTUSI642SDZ : avx512_vcvtsi_common<0x7B, X86UintToFpRnd, WriteCvtI2SD, GR64,
   7216                                   v2f64x_info, i64mem, loadi64, "cvtusi2sd{q}">,
   7217                                   XD, VEX_W, EVEX_CD8<64, CD8VT1>;
   7218 
   7219 def : InstAlias<"vcvtusi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
   7220               (VCVTUSI2SSZrm FR64X:$dst, FR64X:$src1, i32mem:$src), 0, "att">;
   7221 def : InstAlias<"vcvtusi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
   7222               (VCVTUSI2SDZrm FR64X:$dst, FR64X:$src1, i32mem:$src), 0, "att">;
   7223 
   7224 def : Pat<(f32 (uint_to_fp (loadi32 addr:$src))),
   7225           (VCVTUSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
   7226 def : Pat<(f32 (uint_to_fp (loadi64 addr:$src))),
   7227           (VCVTUSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
   7228 def : Pat<(f64 (uint_to_fp (loadi32 addr:$src))),
   7229           (VCVTUSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
   7230 def : Pat<(f64 (uint_to_fp (loadi64 addr:$src))),
   7231           (VCVTUSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
   7232 
   7233 def : Pat<(f32 (uint_to_fp GR32:$src)),
   7234           (VCVTUSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
   7235 def : Pat<(f32 (uint_to_fp GR64:$src)),
   7236           (VCVTUSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
   7237 def : Pat<(f64 (uint_to_fp GR32:$src)),
   7238           (VCVTUSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
   7239 def : Pat<(f64 (uint_to_fp GR64:$src)),
   7240           (VCVTUSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
   7241 }
   7242 
   7243 //===----------------------------------------------------------------------===//
   7244 // AVX-512  Scalar convert from float/double to integer
   7245 //===----------------------------------------------------------------------===//
   7246 
   7247 multiclass avx512_cvt_s_int_round<bits<8> opc, X86VectorVTInfo SrcVT,
   7248                                   X86VectorVTInfo DstVT, SDNode OpNode,
   7249                                   X86FoldableSchedWrite sched, string asm,
   7250                                   string aliasStr,
   7251                                   bit CodeGenOnly = 1> {
   7252   let Predicates = [HasAVX512] in {
   7253     def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src),
   7254                 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
   7255                 [(set DstVT.RC:$dst, (OpNode (SrcVT.VT SrcVT.RC:$src),(i32 FROUND_CURRENT)))]>,
   7256                 EVEX, VEX_LIG, Sched<[sched]>;
   7257     def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src, AVX512RC:$rc),
   7258                  !strconcat(asm,"\t{$rc, $src, $dst|$dst, $src, $rc}"),
   7259                  [(set DstVT.RC:$dst, (OpNode (SrcVT.VT SrcVT.RC:$src),(i32 imm:$rc)))]>,
   7260                  EVEX, VEX_LIG, EVEX_B, EVEX_RC,
   7261                  Sched<[sched]>;
   7262     let isCodeGenOnly = CodeGenOnly, ForceDisassemble = CodeGenOnly in
   7263     def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst), (ins SrcVT.IntScalarMemOp:$src),
   7264                 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
   7265                 [(set DstVT.RC:$dst, (OpNode
   7266                       (SrcVT.VT SrcVT.ScalarIntMemCPat:$src),
   7267                       (i32 FROUND_CURRENT)))]>,
   7268                 EVEX, VEX_LIG, Sched<[sched.Folded, ReadAfterLd]>;
   7269 
   7270     def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}",
   7271             (!cast<Instruction>(NAME # "rr_Int") DstVT.RC:$dst, SrcVT.RC:$src), 0, "att">;
   7272     def : InstAlias<"v" # asm # aliasStr # "\t{$rc, $src, $dst|$dst, $src, $rc}",
   7273             (!cast<Instruction>(NAME # "rrb_Int") DstVT.RC:$dst, SrcVT.RC:$src, AVX512RC:$rc), 0, "att">;
   7274   } // Predicates = [HasAVX512]
   7275 }
   7276 
   7277 multiclass avx512_cvt_s_int_round_aliases<bits<8> opc, X86VectorVTInfo SrcVT,
   7278                                           X86VectorVTInfo DstVT, SDNode OpNode,
   7279                                           X86FoldableSchedWrite sched, string asm,
   7280                                           string aliasStr> :
   7281   avx512_cvt_s_int_round<opc, SrcVT, DstVT, OpNode, sched, asm, aliasStr, 0> {
   7282   let Predicates = [HasAVX512] in {
   7283     def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}",
   7284             (!cast<Instruction>(NAME # "rm_Int") DstVT.RC:$dst,
   7285                                             SrcVT.IntScalarMemOp:$src), 0, "att">;
   7286   } // Predicates = [HasAVX512]
   7287 }
   7288 
   7289 // Convert float/double to signed/unsigned int 32/64
   7290 defm VCVTSS2SIZ: avx512_cvt_s_int_round<0x2D, f32x_info, i32x_info,
   7291                                    X86cvts2si, WriteCvtSS2I, "cvtss2si", "{l}">,
   7292                                    XS, EVEX_CD8<32, CD8VT1>;
   7293 defm VCVTSS2SI64Z: avx512_cvt_s_int_round<0x2D, f32x_info, i64x_info,
   7294                                    X86cvts2si, WriteCvtSS2I, "cvtss2si", "{q}">,
   7295                                    XS, VEX_W, EVEX_CD8<32, CD8VT1>;
   7296 defm VCVTSS2USIZ: avx512_cvt_s_int_round_aliases<0x79, f32x_info, i32x_info,
   7297                                    X86cvts2usi, WriteCvtSS2I, "cvtss2usi", "{l}">,
   7298                                    XS, EVEX_CD8<32, CD8VT1>;
   7299 defm VCVTSS2USI64Z: avx512_cvt_s_int_round_aliases<0x79, f32x_info, i64x_info,
   7300                                    X86cvts2usi, WriteCvtSS2I, "cvtss2usi", "{q}">,
   7301                                    XS, VEX_W, EVEX_CD8<32, CD8VT1>;
   7302 defm VCVTSD2SIZ: avx512_cvt_s_int_round<0x2D, f64x_info, i32x_info,
   7303                                    X86cvts2si, WriteCvtSD2I, "cvtsd2si", "{l}">,
   7304                                    XD, EVEX_CD8<64, CD8VT1>;
   7305 defm VCVTSD2SI64Z: avx512_cvt_s_int_round<0x2D, f64x_info, i64x_info,
   7306                                    X86cvts2si, WriteCvtSD2I, "cvtsd2si", "{q}">,
   7307                                    XD, VEX_W, EVEX_CD8<64, CD8VT1>;
   7308 defm VCVTSD2USIZ:   avx512_cvt_s_int_round_aliases<0x79, f64x_info, i32x_info,
   7309                                    X86cvts2usi, WriteCvtSD2I, "cvtsd2usi", "{l}">,
   7310                                    XD, EVEX_CD8<64, CD8VT1>;
   7311 defm VCVTSD2USI64Z: avx512_cvt_s_int_round_aliases<0x79, f64x_info, i64x_info,
   7312                                    X86cvts2usi, WriteCvtSD2I, "cvtsd2usi", "{q}">,
   7313                                    XD, VEX_W, EVEX_CD8<64, CD8VT1>;
   7314 
   7315 // The SSE version of these instructions are disabled for AVX512.
   7316 // Therefore, the SSE intrinsics are mapped to the AVX512 instructions.
   7317 let Predicates = [HasAVX512] in {
   7318   def : Pat<(i32 (int_x86_sse_cvtss2si (v4f32 VR128X:$src))),
   7319             (VCVTSS2SIZrr_Int VR128X:$src)>;
   7320   def : Pat<(i32 (int_x86_sse_cvtss2si sse_load_f32:$src)),
   7321             (VCVTSS2SIZrm_Int sse_load_f32:$src)>;
   7322   def : Pat<(i64 (int_x86_sse_cvtss2si64 (v4f32 VR128X:$src))),
   7323             (VCVTSS2SI64Zrr_Int VR128X:$src)>;
   7324   def : Pat<(i64 (int_x86_sse_cvtss2si64 sse_load_f32:$src)),
   7325             (VCVTSS2SI64Zrm_Int sse_load_f32:$src)>;
   7326   def : Pat<(i32 (int_x86_sse2_cvtsd2si (v2f64 VR128X:$src))),
   7327             (VCVTSD2SIZrr_Int VR128X:$src)>;
   7328   def : Pat<(i32 (int_x86_sse2_cvtsd2si sse_load_f64:$src)),
   7329             (VCVTSD2SIZrm_Int sse_load_f64:$src)>;
   7330   def : Pat<(i64 (int_x86_sse2_cvtsd2si64 (v2f64 VR128X:$src))),
   7331             (VCVTSD2SI64Zrr_Int VR128X:$src)>;
   7332   def : Pat<(i64 (int_x86_sse2_cvtsd2si64 sse_load_f64:$src)),
   7333             (VCVTSD2SI64Zrm_Int sse_load_f64:$src)>;
   7334 } // HasAVX512
   7335 
   7336 // Patterns used for matching vcvtsi2s{s,d} intrinsic sequences from clang
   7337 // which produce unnecessary vmovs{s,d} instructions
   7338 let Predicates = [HasAVX512] in {
   7339 def : Pat<(v4f32 (X86Movss
   7340                    (v4f32 VR128X:$dst),
   7341                    (v4f32 (scalar_to_vector (f32 (sint_to_fp GR64:$src)))))),
   7342           (VCVTSI642SSZrr_Int VR128X:$dst, GR64:$src)>;
   7343 
   7344 def : Pat<(v4f32 (X86Movss
   7345                    (v4f32 VR128X:$dst),
   7346                    (v4f32 (scalar_to_vector (f32 (sint_to_fp (loadi64 addr:$src))))))),
   7347           (VCVTSI642SSZrm_Int VR128X:$dst, addr:$src)>;
   7348 
   7349 def : Pat<(v4f32 (X86Movss
   7350                    (v4f32 VR128X:$dst),
   7351                    (v4f32 (scalar_to_vector (f32 (sint_to_fp GR32:$src)))))),
   7352           (VCVTSI2SSZrr_Int VR128X:$dst, GR32:$src)>;
   7353 
   7354 def : Pat<(v4f32 (X86Movss
   7355                    (v4f32 VR128X:$dst),
   7356                    (v4f32 (scalar_to_vector (f32 (sint_to_fp (loadi32 addr:$src))))))),
   7357           (VCVTSI2SSZrm_Int VR128X:$dst, addr:$src)>;
   7358 
   7359 def : Pat<(v2f64 (X86Movsd
   7360                    (v2f64 VR128X:$dst),
   7361                    (v2f64 (scalar_to_vector (f64 (sint_to_fp GR64:$src)))))),
   7362           (VCVTSI642SDZrr_Int VR128X:$dst, GR64:$src)>;
   7363 
   7364 def : Pat<(v2f64 (X86Movsd
   7365                    (v2f64 VR128X:$dst),
   7366                    (v2f64 (scalar_to_vector (f64 (sint_to_fp (loadi64 addr:$src))))))),
   7367           (VCVTSI642SDZrm_Int VR128X:$dst, addr:$src)>;
   7368 
   7369 def : Pat<(v2f64 (X86Movsd
   7370                    (v2f64 VR128X:$dst),
   7371                    (v2f64 (scalar_to_vector (f64 (sint_to_fp GR32:$src)))))),
   7372           (VCVTSI2SDZrr_Int VR128X:$dst, GR32:$src)>;
   7373 
   7374 def : Pat<(v2f64 (X86Movsd
   7375                    (v2f64 VR128X:$dst),
   7376                    (v2f64 (scalar_to_vector (f64 (sint_to_fp (loadi32 addr:$src))))))),
   7377           (VCVTSI2SDZrm_Int VR128X:$dst, addr:$src)>;
   7378 
   7379 def : Pat<(v4f32 (X86Movss
   7380                    (v4f32 VR128X:$dst),
   7381                    (v4f32 (scalar_to_vector (f32 (uint_to_fp GR64:$src)))))),
   7382           (VCVTUSI642SSZrr_Int VR128X:$dst, GR64:$src)>;
   7383 
   7384 def : Pat<(v4f32 (X86Movss
   7385                    (v4f32 VR128X:$dst),
   7386                    (v4f32 (scalar_to_vector (f32 (uint_to_fp (loadi64 addr:$src))))))),
   7387           (VCVTUSI642SSZrm_Int VR128X:$dst, addr:$src)>;
   7388 
   7389 def : Pat<(v4f32 (X86Movss
   7390                    (v4f32 VR128X:$dst),
   7391                    (v4f32 (scalar_to_vector (f32 (uint_to_fp GR32:$src)))))),
   7392           (VCVTUSI2SSZrr_Int VR128X:$dst, GR32:$src)>;
   7393 
   7394 def : Pat<(v4f32 (X86Movss
   7395                    (v4f32 VR128X:$dst),
   7396                    (v4f32 (scalar_to_vector (f32 (uint_to_fp (loadi32 addr:$src))))))),
   7397           (VCVTUSI2SSZrm_Int VR128X:$dst, addr:$src)>;
   7398 
   7399 def : Pat<(v2f64 (X86Movsd
   7400                    (v2f64 VR128X:$dst),
   7401                    (v2f64 (scalar_to_vector (f64 (uint_to_fp GR64:$src)))))),
   7402           (VCVTUSI642SDZrr_Int VR128X:$dst, GR64:$src)>;
   7403 
   7404 def : Pat<(v2f64 (X86Movsd
   7405                    (v2f64 VR128X:$dst),
   7406                    (v2f64 (scalar_to_vector (f64 (uint_to_fp (loadi64 addr:$src))))))),
   7407           (VCVTUSI642SDZrm_Int VR128X:$dst, addr:$src)>;
   7408 
   7409 def : Pat<(v2f64 (X86Movsd
   7410                    (v2f64 VR128X:$dst),
   7411                    (v2f64 (scalar_to_vector (f64 (uint_to_fp GR32:$src)))))),
   7412           (VCVTUSI2SDZrr_Int VR128X:$dst, GR32:$src)>;
   7413 
   7414 def : Pat<(v2f64 (X86Movsd
   7415                    (v2f64 VR128X:$dst),
   7416                    (v2f64 (scalar_to_vector (f64 (uint_to_fp (loadi32 addr:$src))))))),
   7417           (VCVTUSI2SDZrm_Int VR128X:$dst, addr:$src)>;
   7418 } // Predicates = [HasAVX512]
   7419 
   7420 // Convert float/double to signed/unsigned int 32/64 with truncation
   7421 multiclass avx512_cvt_s_all<bits<8> opc, string asm, X86VectorVTInfo _SrcRC,
   7422                             X86VectorVTInfo _DstRC, SDNode OpNode,
   7423                             SDNode OpNodeRnd, X86FoldableSchedWrite sched,
   7424                             string aliasStr, bit CodeGenOnly = 1>{
   7425 let Predicates = [HasAVX512] in {
   7426   let isCodeGenOnly = 1 in {
   7427   def rr : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.FRC:$src),
   7428               !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
   7429               [(set _DstRC.RC:$dst, (OpNode _SrcRC.FRC:$src))]>,
   7430               EVEX, Sched<[sched]>;
   7431   def rm : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst), (ins _SrcRC.ScalarMemOp:$src),
   7432               !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
   7433               [(set _DstRC.RC:$dst, (OpNode (_SrcRC.ScalarLdFrag addr:$src)))]>,
   7434               EVEX, Sched<[sched.Folded, ReadAfterLd]>;
   7435   }
   7436 
   7437   def rr_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
   7438             !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
   7439            [(set _DstRC.RC:$dst, (OpNodeRnd (_SrcRC.VT _SrcRC.RC:$src),
   7440                                  (i32 FROUND_CURRENT)))]>,
   7441            EVEX, VEX_LIG, Sched<[sched]>;
   7442   def rrb_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
   7443             !strconcat(asm,"\t{{sae}, $src, $dst|$dst, $src, {sae}}"),
   7444             [(set _DstRC.RC:$dst, (OpNodeRnd (_SrcRC.VT _SrcRC.RC:$src),
   7445                                   (i32 FROUND_NO_EXC)))]>,
   7446                                   EVEX,VEX_LIG , EVEX_B, Sched<[sched]>;
   7447   let isCodeGenOnly = CodeGenOnly, ForceDisassemble = CodeGenOnly in
   7448   def rm_Int : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst),
   7449               (ins _SrcRC.IntScalarMemOp:$src),
   7450               !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
   7451               [(set _DstRC.RC:$dst, (OpNodeRnd
   7452                                      (_SrcRC.VT _SrcRC.ScalarIntMemCPat:$src),
   7453                                      (i32 FROUND_CURRENT)))]>,
   7454               EVEX, VEX_LIG, Sched<[sched.Folded, ReadAfterLd]>;
   7455 
   7456   def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}",
   7457           (!cast<Instruction>(NAME # "rr_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">;
   7458   def : InstAlias<asm # aliasStr # "\t{{sae}, $src, $dst|$dst, $src, {sae}}",
   7459           (!cast<Instruction>(NAME # "rrb_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">;
   7460 } //HasAVX512
   7461 }
   7462 
   7463 multiclass avx512_cvt_s_all_unsigned<bits<8> opc, string asm,
   7464                                      X86VectorVTInfo _SrcRC,
   7465                                      X86VectorVTInfo _DstRC, SDNode OpNode,
   7466                                      SDNode OpNodeRnd, X86FoldableSchedWrite sched,
   7467                                      string aliasStr> :
   7468   avx512_cvt_s_all<opc, asm, _SrcRC, _DstRC, OpNode, OpNodeRnd, sched,
   7469                    aliasStr, 0> {
   7470 let Predicates = [HasAVX512] in {
   7471   def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}",
   7472           (!cast<Instruction>(NAME # "rm_Int") _DstRC.RC:$dst,
   7473                                           _SrcRC.IntScalarMemOp:$src), 0, "att">;
   7474 }
   7475 }
   7476 
   7477 defm VCVTTSS2SIZ: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i32x_info,
   7478                         fp_to_sint, X86cvtts2IntRnd, WriteCvtSS2I, "{l}">,
   7479                         XS, EVEX_CD8<32, CD8VT1>;
   7480 defm VCVTTSS2SI64Z: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i64x_info,
   7481                         fp_to_sint, X86cvtts2IntRnd, WriteCvtSS2I, "{q}">,
   7482                         VEX_W, XS, EVEX_CD8<32, CD8VT1>;
   7483 defm VCVTTSD2SIZ: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i32x_info,
   7484                         fp_to_sint, X86cvtts2IntRnd, WriteCvtSD2I, "{l}">,
   7485                         XD, EVEX_CD8<64, CD8VT1>;
   7486 defm VCVTTSD2SI64Z: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i64x_info,
   7487                         fp_to_sint, X86cvtts2IntRnd, WriteCvtSD2I, "{q}">,
   7488                         VEX_W, XD, EVEX_CD8<64, CD8VT1>;
   7489 
   7490 defm VCVTTSS2USIZ: avx512_cvt_s_all_unsigned<0x78, "vcvttss2usi", f32x_info, i32x_info,
   7491                         fp_to_uint, X86cvtts2UIntRnd, WriteCvtSS2I, "{l}">,
   7492                         XS, EVEX_CD8<32, CD8VT1>;
   7493 defm VCVTTSS2USI64Z: avx512_cvt_s_all_unsigned<0x78, "vcvttss2usi", f32x_info, i64x_info,
   7494                         fp_to_uint, X86cvtts2UIntRnd, WriteCvtSS2I, "{q}">,
   7495                         XS,VEX_W, EVEX_CD8<32, CD8VT1>;
   7496 defm VCVTTSD2USIZ: avx512_cvt_s_all_unsigned<0x78, "vcvttsd2usi", f64x_info, i32x_info,
   7497                         fp_to_uint, X86cvtts2UIntRnd, WriteCvtSD2I, "{l}">,
   7498                         XD, EVEX_CD8<64, CD8VT1>;
   7499 defm VCVTTSD2USI64Z: avx512_cvt_s_all_unsigned<0x78, "vcvttsd2usi", f64x_info, i64x_info,
   7500                         fp_to_uint, X86cvtts2UIntRnd, WriteCvtSD2I, "{q}">,
   7501                         XD, VEX_W, EVEX_CD8<64, CD8VT1>;
   7502 
   7503 let Predicates = [HasAVX512] in {
   7504   def : Pat<(i32 (int_x86_sse_cvttss2si (v4f32 VR128X:$src))),
   7505             (VCVTTSS2SIZrr_Int VR128X:$src)>;
   7506   def : Pat<(i32 (int_x86_sse_cvttss2si sse_load_f32:$src)),
   7507             (VCVTTSS2SIZrm_Int ssmem:$src)>;
   7508   def : Pat<(i64 (int_x86_sse_cvttss2si64 (v4f32 VR128X:$src))),
   7509             (VCVTTSS2SI64Zrr_Int VR128X:$src)>;
   7510   def : Pat<(i64 (int_x86_sse_cvttss2si64 sse_load_f32:$src)),
   7511             (VCVTTSS2SI64Zrm_Int ssmem:$src)>;
   7512   def : Pat<(i32 (int_x86_sse2_cvttsd2si (v2f64 VR128X:$src))),
   7513             (VCVTTSD2SIZrr_Int VR128X:$src)>;
   7514   def : Pat<(i32 (int_x86_sse2_cvttsd2si sse_load_f64:$src)),
   7515             (VCVTTSD2SIZrm_Int sdmem:$src)>;
   7516   def : Pat<(i64 (int_x86_sse2_cvttsd2si64 (v2f64 VR128X:$src))),
   7517             (VCVTTSD2SI64Zrr_Int VR128X:$src)>;
   7518   def : Pat<(i64 (int_x86_sse2_cvttsd2si64 sse_load_f64:$src)),
   7519             (VCVTTSD2SI64Zrm_Int sdmem:$src)>;
   7520 } // HasAVX512
   7521 
   7522 //===----------------------------------------------------------------------===//
   7523 // AVX-512  Convert form float to double and back
   7524 //===----------------------------------------------------------------------===//
   7525 
   7526 multiclass avx512_cvt_fp_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
   7527                                 X86VectorVTInfo _Src, SDNode OpNode,
   7528                                 X86FoldableSchedWrite sched> {
   7529   defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
   7530                          (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
   7531                          "$src2, $src1", "$src1, $src2",
   7532                          (_.VT (OpNode (_.VT _.RC:$src1),
   7533                                        (_Src.VT _Src.RC:$src2),
   7534                                        (i32 FROUND_CURRENT)))>,
   7535                          EVEX_4V, VEX_LIG, Sched<[sched]>;
   7536   defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
   7537                          (ins _.RC:$src1, _Src.IntScalarMemOp:$src2), OpcodeStr,
   7538                          "$src2, $src1", "$src1, $src2",
   7539                          (_.VT (OpNode (_.VT _.RC:$src1),
   7540                                   (_Src.VT _Src.ScalarIntMemCPat:$src2),
   7541                                   (i32 FROUND_CURRENT)))>,
   7542                          EVEX_4V, VEX_LIG,
   7543                          Sched<[sched.Folded, ReadAfterLd]>;
   7544 
   7545   let isCodeGenOnly = 1, hasSideEffects = 0 in {
   7546     def rr : I<opc, MRMSrcReg, (outs _.FRC:$dst),
   7547                (ins _.FRC:$src1, _Src.FRC:$src2),
   7548                OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
   7549                EVEX_4V, VEX_LIG, Sched<[sched]>;
   7550     let mayLoad = 1 in
   7551     def rm : I<opc, MRMSrcMem, (outs _.FRC:$dst),
   7552                (ins _.FRC:$src1, _Src.ScalarMemOp:$src2),
   7553                OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
   7554                EVEX_4V, VEX_LIG, Sched<[sched.Folded, ReadAfterLd]>;
   7555   }
   7556 }
   7557 
   7558 // Scalar Coversion with SAE - suppress all exceptions
   7559 multiclass avx512_cvt_fp_sae_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
   7560                                     X86VectorVTInfo _Src, SDNode OpNodeRnd,
   7561                                     X86FoldableSchedWrite sched> {
   7562   defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
   7563                         (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
   7564                         "{sae}, $src2, $src1", "$src1, $src2, {sae}",
   7565                         (_.VT (OpNodeRnd (_.VT _.RC:$src1),
   7566                                          (_Src.VT _Src.RC:$src2),
   7567                                          (i32 FROUND_NO_EXC)))>,
   7568                         EVEX_4V, VEX_LIG, EVEX_B, Sched<[sched]>;
   7569 }
   7570 
   7571 // Scalar Conversion with rounding control (RC)
   7572 multiclass avx512_cvt_fp_rc_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
   7573                                    X86VectorVTInfo _Src, SDNode OpNodeRnd,
   7574                                    X86FoldableSchedWrite sched> {
   7575   defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
   7576                         (ins _.RC:$src1, _Src.RC:$src2, AVX512RC:$rc), OpcodeStr,
   7577                         "$rc, $src2, $src1", "$src1, $src2, $rc",
   7578                         (_.VT (OpNodeRnd (_.VT _.RC:$src1),
   7579                                          (_Src.VT _Src.RC:$src2), (i32 imm:$rc)))>,
   7580                         EVEX_4V, VEX_LIG, Sched<[sched]>,
   7581                         EVEX_B, EVEX_RC;
   7582 }
   7583 multiclass avx512_cvt_fp_scalar_sd2ss<bits<8> opc, string OpcodeStr,
   7584                                   SDNode OpNodeRnd, X86FoldableSchedWrite sched,
   7585                                   X86VectorVTInfo _src, X86VectorVTInfo _dst> {
   7586   let Predicates = [HasAVX512] in {
   7587     defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNodeRnd, sched>,
   7588              avx512_cvt_fp_rc_scalar<opc, OpcodeStr, _dst, _src,
   7589                                OpNodeRnd, sched>, VEX_W, EVEX_CD8<64, CD8VT1>, XD;
   7590   }
   7591 }
   7592 
   7593 multiclass avx512_cvt_fp_scalar_ss2sd<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
   7594                                       X86FoldableSchedWrite sched,
   7595                                       X86VectorVTInfo _src, X86VectorVTInfo _dst> {
   7596   let Predicates = [HasAVX512] in {
   7597     defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNodeRnd, sched>,
   7598              avx512_cvt_fp_sae_scalar<opc, OpcodeStr, _dst, _src, OpNodeRnd, sched>,
   7599              EVEX_CD8<32, CD8VT1>, XS;
   7600   }
   7601 }
   7602 defm VCVTSD2SS : avx512_cvt_fp_scalar_sd2ss<0x5A, "vcvtsd2ss",
   7603                                          X86froundRnd, WriteCvtSD2SS, f64x_info,
   7604                                          f32x_info>;
   7605 defm VCVTSS2SD : avx512_cvt_fp_scalar_ss2sd<0x5A, "vcvtss2sd",
   7606                                           X86fpextRnd, WriteCvtSS2SD, f32x_info,
   7607                                           f64x_info>;
   7608 
   7609 def : Pat<(f64 (fpextend FR32X:$src)),
   7610           (VCVTSS2SDZrr (f64 (IMPLICIT_DEF)), FR32X:$src)>,
   7611           Requires<[HasAVX512]>;
   7612 def : Pat<(f64 (fpextend (loadf32 addr:$src))),
   7613           (VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>,
   7614           Requires<[HasAVX512, OptForSize]>;
   7615 
   7616 def : Pat<(f64 (extloadf32 addr:$src)),
   7617           (VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>,
   7618       Requires<[HasAVX512, OptForSize]>;
   7619 
   7620 def : Pat<(f64 (extloadf32 addr:$src)),
   7621           (VCVTSS2SDZrr (f64 (IMPLICIT_DEF)), (VMOVSSZrm addr:$src))>,
   7622           Requires<[HasAVX512, OptForSpeed]>;
   7623 
   7624 def : Pat<(f32 (fpround FR64X:$src)),
   7625           (VCVTSD2SSZrr (f32 (IMPLICIT_DEF)), FR64X:$src)>,
   7626            Requires<[HasAVX512]>;
   7627 
   7628 def : Pat<(v4f32 (X86Movss
   7629                    (v4f32 VR128X:$dst),
   7630                    (v4f32 (scalar_to_vector
   7631                      (f32 (fpround (f64 (extractelt VR128X:$src, (iPTR 0))))))))),
   7632           (VCVTSD2SSZrr_Int VR128X:$dst, VR128X:$src)>,
   7633           Requires<[HasAVX512]>;
   7634 
   7635 def : Pat<(v2f64 (X86Movsd
   7636                    (v2f64 VR128X:$dst),
   7637                    (v2f64 (scalar_to_vector
   7638                      (f64 (fpextend (f32 (extractelt VR128X:$src, (iPTR 0))))))))),
   7639           (VCVTSS2SDZrr_Int VR128X:$dst, VR128X:$src)>,
   7640           Requires<[HasAVX512]>;
   7641 
   7642 //===----------------------------------------------------------------------===//
   7643 // AVX-512  Vector convert from signed/unsigned integer to float/double
   7644 //          and from float/double to signed/unsigned integer
   7645 //===----------------------------------------------------------------------===//
   7646 
   7647 multiclass avx512_vcvt_fp<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
   7648                           X86VectorVTInfo _Src, SDNode OpNode,
   7649                           X86FoldableSchedWrite sched,
   7650                           string Broadcast = _.BroadcastStr,
   7651                           string Alias = "", X86MemOperand MemOp = _Src.MemOp> {
   7652 
   7653   defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
   7654                          (ins _Src.RC:$src), OpcodeStr, "$src", "$src",
   7655                          (_.VT (OpNode (_Src.VT _Src.RC:$src)))>,
   7656                          EVEX, Sched<[sched]>;
   7657 
   7658   defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
   7659                          (ins MemOp:$src), OpcodeStr#Alias, "$src", "$src",
   7660                          (_.VT (OpNode (_Src.VT
   7661                              (bitconvert (_Src.LdFrag addr:$src)))))>,
   7662                          EVEX, Sched<[sched.Folded]>;
   7663 
   7664   defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
   7665                          (ins _Src.ScalarMemOp:$src), OpcodeStr,
   7666                          "${src}"##Broadcast, "${src}"##Broadcast,
   7667                          (_.VT (OpNode (_Src.VT
   7668                                   (X86VBroadcast (_Src.ScalarLdFrag addr:$src)))
   7669                             ))>, EVEX, EVEX_B,
   7670                          Sched<[sched.Folded]>;
   7671 }
   7672 // Coversion with SAE - suppress all exceptions
   7673 multiclass avx512_vcvt_fp_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
   7674                               X86VectorVTInfo _Src, SDNode OpNodeRnd,
   7675                               X86FoldableSchedWrite sched> {
   7676   defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
   7677                         (ins _Src.RC:$src), OpcodeStr,
   7678                         "{sae}, $src", "$src, {sae}",
   7679                         (_.VT (OpNodeRnd (_Src.VT _Src.RC:$src),
   7680                                (i32 FROUND_NO_EXC)))>,
   7681                         EVEX, EVEX_B, Sched<[sched]>;
   7682 }
   7683 
   7684 // Conversion with rounding control (RC)
   7685 multiclass avx512_vcvt_fp_rc<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
   7686                          X86VectorVTInfo _Src, SDNode OpNodeRnd,
   7687                          X86FoldableSchedWrite sched> {
   7688   defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
   7689                         (ins _Src.RC:$src, AVX512RC:$rc), OpcodeStr,
   7690                         "$rc, $src", "$src, $rc",
   7691                         (_.VT (OpNodeRnd (_Src.VT _Src.RC:$src), (i32 imm:$rc)))>,
   7692                         EVEX, EVEX_B, EVEX_RC, Sched<[sched]>;
   7693 }
   7694 
   7695 // Extend Float to Double
   7696 multiclass avx512_cvtps2pd<bits<8> opc, string OpcodeStr,
   7697                            X86SchedWriteWidths sched> {
   7698   let Predicates = [HasAVX512] in {
   7699     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8f32x_info,
   7700                             fpextend, sched.ZMM>,
   7701              avx512_vcvt_fp_sae<opc, OpcodeStr, v8f64_info, v8f32x_info,
   7702                                 X86vfpextRnd, sched.ZMM>, EVEX_V512;
   7703   }
   7704   let Predicates = [HasVLX] in {
   7705     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v4f32x_info,
   7706                                X86vfpext, sched.XMM, "{1to2}", "", f64mem>, EVEX_V128;
   7707     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4f32x_info, fpextend,
   7708                                sched.YMM>, EVEX_V256;
   7709   }
   7710 }
   7711 
   7712 // Truncate Double to Float
   7713 multiclass avx512_cvtpd2ps<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched> {
   7714   let Predicates = [HasAVX512] in {
   7715     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8f64_info, fpround, sched.ZMM>,
   7716              avx512_vcvt_fp_rc<opc, OpcodeStr, v8f32x_info, v8f64_info,
   7717                                X86vfproundRnd, sched.ZMM>, EVEX_V512;
   7718   }
   7719   let Predicates = [HasVLX] in {
   7720     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2f64x_info,
   7721                                X86vfpround, sched.XMM, "{1to2}", "{x}">, EVEX_V128;
   7722     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4f64x_info, fpround,
   7723                                sched.YMM, "{1to4}", "{y}">, EVEX_V256;
   7724 
   7725     def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
   7726                     (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0>;
   7727     def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
   7728                     (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst, f128mem:$src), 0, "intel">;
   7729     def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
   7730                     (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0>;
   7731     def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
   7732                     (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst, f256mem:$src), 0, "intel">;
   7733   }
   7734 }
   7735 
   7736 defm VCVTPD2PS : avx512_cvtpd2ps<0x5A, "vcvtpd2ps", SchedWriteCvtPD2PS>,
   7737                                   VEX_W, PD, EVEX_CD8<64, CD8VF>;
   7738 defm VCVTPS2PD : avx512_cvtps2pd<0x5A, "vcvtps2pd", SchedWriteCvtPS2PD>,
   7739                                   PS, EVEX_CD8<32, CD8VH>;
   7740 
   7741 def : Pat<(v8f64 (extloadv8f32 addr:$src)),
   7742             (VCVTPS2PDZrm addr:$src)>;
   7743 
   7744 let Predicates = [HasVLX] in {
   7745   def : Pat<(X86vzmovl (v2f64 (bitconvert
   7746                                (v4f32 (X86vfpround (v2f64 VR128X:$src)))))),
   7747             (VCVTPD2PSZ128rr VR128X:$src)>;
   7748   def : Pat<(X86vzmovl (v2f64 (bitconvert
   7749                                (v4f32 (X86vfpround (loadv2f64 addr:$src)))))),
   7750             (VCVTPD2PSZ128rm addr:$src)>;
   7751   def : Pat<(v2f64 (extloadv2f32 addr:$src)),
   7752               (VCVTPS2PDZ128rm addr:$src)>;
   7753   def : Pat<(v4f64 (extloadv4f32 addr:$src)),
   7754               (VCVTPS2PDZ256rm addr:$src)>;
   7755 }
   7756 
   7757 // Convert Signed/Unsigned Doubleword to Double
   7758 multiclass avx512_cvtdq2pd<bits<8> opc, string OpcodeStr, SDNode OpNode,
   7759                            SDNode OpNode128, X86SchedWriteWidths sched> {
   7760   // No rounding in this op
   7761   let Predicates = [HasAVX512] in
   7762     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i32x_info, OpNode,
   7763                             sched.ZMM>, EVEX_V512;
   7764 
   7765   let Predicates = [HasVLX] in {
   7766     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v4i32x_info,
   7767                                OpNode128, sched.XMM, "{1to2}", "", i64mem>, EVEX_V128;
   7768     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i32x_info, OpNode,
   7769                                sched.YMM>, EVEX_V256;
   7770   }
   7771 }
   7772 
   7773 // Convert Signed/Unsigned Doubleword to Float
   7774 multiclass avx512_cvtdq2ps<bits<8> opc, string OpcodeStr, SDNode OpNode,
   7775                            SDNode OpNodeRnd, X86SchedWriteWidths sched> {
   7776   let Predicates = [HasAVX512] in
   7777     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16f32_info, v16i32_info, OpNode,
   7778                             sched.ZMM>,
   7779              avx512_vcvt_fp_rc<opc, OpcodeStr, v16f32_info, v16i32_info,
   7780                                OpNodeRnd, sched.ZMM>, EVEX_V512;
   7781 
   7782   let Predicates = [HasVLX] in {
   7783     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i32x_info, OpNode,
   7784                                sched.XMM>, EVEX_V128;
   7785     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i32x_info, OpNode,
   7786                                sched.YMM>, EVEX_V256;
   7787   }
   7788 }
   7789 
   7790 // Convert Float to Signed/Unsigned Doubleword with truncation
   7791 multiclass avx512_cvttps2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
   7792                             SDNode OpNodeRnd, X86SchedWriteWidths sched> {
   7793   let Predicates = [HasAVX512] in {
   7794     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode,
   7795                             sched.ZMM>,
   7796              avx512_vcvt_fp_sae<opc, OpcodeStr, v16i32_info, v16f32_info,
   7797                                 OpNodeRnd, sched.ZMM>, EVEX_V512;
   7798   }
   7799   let Predicates = [HasVLX] in {
   7800     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode,
   7801                                sched.XMM>, EVEX_V128;
   7802     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode,
   7803                                sched.YMM>, EVEX_V256;
   7804   }
   7805 }
   7806 
   7807 // Convert Float to Signed/Unsigned Doubleword
   7808 multiclass avx512_cvtps2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
   7809                            SDNode OpNodeRnd, X86SchedWriteWidths sched> {
   7810   let Predicates = [HasAVX512] in {
   7811     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode,
   7812                             sched.ZMM>,
   7813              avx512_vcvt_fp_rc<opc, OpcodeStr, v16i32_info, v16f32_info,
   7814                                 OpNodeRnd, sched.ZMM>, EVEX_V512;
   7815   }
   7816   let Predicates = [HasVLX] in {
   7817     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode,
   7818                                sched.XMM>, EVEX_V128;
   7819     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode,
   7820                                sched.YMM>, EVEX_V256;
   7821   }
   7822 }
   7823 
   7824 // Convert Double to Signed/Unsigned Doubleword with truncation
   7825 multiclass avx512_cvttpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
   7826                             SDNode OpNodeRnd, X86SchedWriteWidths sched> {
   7827   let Predicates = [HasAVX512] in {
   7828     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode,
   7829                             sched.ZMM>,
   7830              avx512_vcvt_fp_sae<opc, OpcodeStr, v8i32x_info, v8f64_info,
   7831                                 OpNodeRnd, sched.ZMM>, EVEX_V512;
   7832   }
   7833   let Predicates = [HasVLX] in {
   7834     // we need "x"/"y" suffixes in order to distinguish between 128 and 256
   7835     // memory forms of these instructions in Asm Parser. They have the same
   7836     // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
   7837     // due to the same reason.
   7838     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info,
   7839                                OpNode, sched.XMM, "{1to2}", "{x}">, EVEX_V128;
   7840     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
   7841                                sched.YMM, "{1to4}", "{y}">, EVEX_V256;
   7842 
   7843     def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
   7844                     (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0>;
   7845     def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
   7846                     (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst, i128mem:$src), 0, "intel">;
   7847     def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
   7848                     (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0>;
   7849     def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
   7850                     (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst, i256mem:$src), 0, "intel">;
   7851   }
   7852 }
   7853 
   7854 // Convert Double to Signed/Unsigned Doubleword
   7855 multiclass avx512_cvtpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
   7856                            SDNode OpNodeRnd, X86SchedWriteWidths sched> {
   7857   let Predicates = [HasAVX512] in {
   7858     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode,
   7859                             sched.ZMM>,
   7860              avx512_vcvt_fp_rc<opc, OpcodeStr, v8i32x_info, v8f64_info,
   7861                                OpNodeRnd, sched.ZMM>, EVEX_V512;
   7862   }
   7863   let Predicates = [HasVLX] in {
   7864     // we need "x"/"y" suffixes in order to distinguish between 128 and 256
   7865     // memory forms of these instructions in Asm Parcer. They have the same
   7866     // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
   7867     // due to the same reason.
   7868     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info, OpNode,
   7869                                sched.XMM, "{1to2}", "{x}">, EVEX_V128;
   7870     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
   7871                                sched.YMM, "{1to4}", "{y}">, EVEX_V256;
   7872 
   7873     def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
   7874                     (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0>;
   7875     def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
   7876                     (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst, f128mem:$src), 0, "intel">;
   7877     def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
   7878                     (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0>;
   7879     def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
   7880                     (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst, f256mem:$src), 0, "intel">;
   7881   }
   7882 }
   7883 
   7884 // Convert Double to Signed/Unsigned Quardword
   7885 multiclass avx512_cvtpd2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
   7886                            SDNode OpNodeRnd, X86SchedWriteWidths sched> {
   7887   let Predicates = [HasDQI] in {
   7888     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode,
   7889                             sched.ZMM>,
   7890              avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f64_info,
   7891                                OpNodeRnd, sched.ZMM>, EVEX_V512;
   7892   }
   7893   let Predicates = [HasDQI, HasVLX] in {
   7894     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode,
   7895                                sched.XMM>, EVEX_V128;
   7896     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode,
   7897                                sched.YMM>, EVEX_V256;
   7898   }
   7899 }
   7900 
   7901 // Convert Double to Signed/Unsigned Quardword with truncation
   7902 multiclass avx512_cvttpd2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
   7903                             SDNode OpNodeRnd, X86SchedWriteWidths sched> {
   7904   let Predicates = [HasDQI] in {
   7905     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode,
   7906                             sched.ZMM>,
   7907              avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f64_info,
   7908                                 OpNodeRnd, sched.ZMM>, EVEX_V512;
   7909   }
   7910   let Predicates = [HasDQI, HasVLX] in {
   7911     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode,
   7912                                sched.XMM>, EVEX_V128;
   7913     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode,
   7914                                sched.YMM>, EVEX_V256;
   7915   }
   7916 }
   7917 
   7918 // Convert Signed/Unsigned Quardword to Double
   7919 multiclass avx512_cvtqq2pd<bits<8> opc, string OpcodeStr, SDNode OpNode,
   7920                            SDNode OpNodeRnd, X86SchedWriteWidths sched> {
   7921   let Predicates = [HasDQI] in {
   7922     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i64_info, OpNode,
   7923                             sched.ZMM>,
   7924              avx512_vcvt_fp_rc<opc, OpcodeStr, v8f64_info, v8i64_info,
   7925                                OpNodeRnd, sched.ZMM>, EVEX_V512;
   7926   }
   7927   let Predicates = [HasDQI, HasVLX] in {
   7928     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v2i64x_info, OpNode,
   7929                                sched.XMM>, EVEX_V128, NotEVEX2VEXConvertible;
   7930     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i64x_info, OpNode,
   7931                                sched.YMM>, EVEX_V256, NotEVEX2VEXConvertible;
   7932   }
   7933 }
   7934 
   7935 // Convert Float to Signed/Unsigned Quardword
   7936 multiclass avx512_cvtps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
   7937                            SDNode OpNodeRnd, X86SchedWriteWidths sched> {
   7938   let Predicates = [HasDQI] in {
   7939     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode,
   7940                             sched.ZMM>,
   7941              avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f32x_info,
   7942                                OpNodeRnd, sched.ZMM>, EVEX_V512;
   7943   }
   7944   let Predicates = [HasDQI, HasVLX] in {
   7945     // Explicitly specified broadcast string, since we take only 2 elements
   7946     // from v4f32x_info source
   7947     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode,
   7948                                sched.XMM, "{1to2}", "", f64mem>, EVEX_V128;
   7949     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode,
   7950                                sched.YMM>, EVEX_V256;
   7951   }
   7952 }
   7953 
   7954 // Convert Float to Signed/Unsigned Quardword with truncation
   7955 multiclass avx512_cvttps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
   7956                             SDNode OpNodeRnd, X86SchedWriteWidths sched> {
   7957   let Predicates = [HasDQI] in {
   7958     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode, sched.ZMM>,
   7959              avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f32x_info,
   7960                                 OpNodeRnd, sched.ZMM>, EVEX_V512;
   7961   }
   7962   let Predicates = [HasDQI, HasVLX] in {
   7963     // Explicitly specified broadcast string, since we take only 2 elements
   7964     // from v4f32x_info source
   7965     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode,
   7966                                sched.XMM, "{1to2}", "", f64mem>, EVEX_V128;
   7967     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode,
   7968                                sched.YMM>, EVEX_V256;
   7969   }
   7970 }
   7971 
   7972 // Convert Signed/Unsigned Quardword to Float
   7973 multiclass avx512_cvtqq2ps<bits<8> opc, string OpcodeStr, SDNode OpNode,
   7974                            SDNode OpNode128, SDNode OpNodeRnd,
   7975                            X86SchedWriteWidths sched> {
   7976   let Predicates = [HasDQI] in {
   7977     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i64_info, OpNode,
   7978                             sched.ZMM>,
   7979              avx512_vcvt_fp_rc<opc, OpcodeStr, v8f32x_info, v8i64_info,
   7980                                OpNodeRnd, sched.ZMM>, EVEX_V512;
   7981   }
   7982   let Predicates = [HasDQI, HasVLX] in {
   7983     // we need "x"/"y" suffixes in order to distinguish between 128 and 256
   7984     // memory forms of these instructions in Asm Parcer. They have the same
   7985     // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
   7986     // due to the same reason.
   7987     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2i64x_info, OpNode128,
   7988                                sched.XMM, "{1to2}", "{x}">, EVEX_V128,
   7989                                NotEVEX2VEXConvertible;
   7990     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i64x_info, OpNode,
   7991                                sched.YMM, "{1to4}", "{y}">, EVEX_V256,
   7992                                NotEVEX2VEXConvertible;
   7993 
   7994     def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
   7995                     (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0>;
   7996     def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
   7997                     (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst, i128mem:$src), 0, "intel">;
   7998     def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
   7999                     (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0>;
   8000     def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
   8001                     (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst, i256mem:$src), 0, "intel">;
   8002   }
   8003 }
   8004 
   8005 defm VCVTDQ2PD : avx512_cvtdq2pd<0xE6, "vcvtdq2pd", sint_to_fp, X86VSintToFP,
   8006                                  SchedWriteCvtDQ2PD>, XS, EVEX_CD8<32, CD8VH>;
   8007 
   8008 defm VCVTDQ2PS : avx512_cvtdq2ps<0x5B, "vcvtdq2ps", sint_to_fp,
   8009                                 X86VSintToFpRnd, SchedWriteCvtDQ2PS>,
   8010                                 PS, EVEX_CD8<32, CD8VF>;
   8011 
   8012 defm VCVTTPS2DQ : avx512_cvttps2dq<0x5B, "vcvttps2dq", X86cvttp2si,
   8013                                 X86cvttp2siRnd, SchedWriteCvtPS2DQ>,
   8014                                 XS, EVEX_CD8<32, CD8VF>;
   8015 
   8016 defm VCVTTPD2DQ : avx512_cvttpd2dq<0xE6, "vcvttpd2dq", X86cvttp2si,
   8017                                  X86cvttp2siRnd, SchedWriteCvtPD2DQ>,
   8018                                  PD, VEX_W, EVEX_CD8<64, CD8VF>;
   8019 
   8020 defm VCVTTPS2UDQ : avx512_cvttps2dq<0x78, "vcvttps2udq", X86cvttp2ui,
   8021                                  X86cvttp2uiRnd, SchedWriteCvtPS2DQ>, PS,
   8022                                  EVEX_CD8<32, CD8VF>;
   8023 
   8024 defm VCVTTPD2UDQ : avx512_cvttpd2dq<0x78, "vcvttpd2udq", X86cvttp2ui,
   8025                                  X86cvttp2uiRnd, SchedWriteCvtPD2DQ>,
   8026                                  PS, VEX_W, EVEX_CD8<64, CD8VF>;
   8027 
   8028 defm VCVTUDQ2PD : avx512_cvtdq2pd<0x7A, "vcvtudq2pd", uint_to_fp,
   8029                                   X86VUintToFP, SchedWriteCvtDQ2PD>, XS,
   8030                                   EVEX_CD8<32, CD8VH>;
   8031 
   8032 defm VCVTUDQ2PS : avx512_cvtdq2ps<0x7A, "vcvtudq2ps", uint_to_fp,
   8033                                  X86VUintToFpRnd, SchedWriteCvtDQ2PS>, XD,
   8034                                  EVEX_CD8<32, CD8VF>;
   8035 
   8036 defm VCVTPS2DQ : avx512_cvtps2dq<0x5B, "vcvtps2dq", X86cvtp2Int,
   8037                                  X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, PD,
   8038                                  EVEX_CD8<32, CD8VF>;
   8039 
   8040 defm VCVTPD2DQ : avx512_cvtpd2dq<0xE6, "vcvtpd2dq", X86cvtp2Int,
   8041                                  X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, XD,
   8042                                  VEX_W, EVEX_CD8<64, CD8VF>;
   8043 
   8044 defm VCVTPS2UDQ : avx512_cvtps2dq<0x79, "vcvtps2udq", X86cvtp2UInt,
   8045                                  X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>,
   8046                                  PS, EVEX_CD8<32, CD8VF>;
   8047 
   8048 defm VCVTPD2UDQ : avx512_cvtpd2dq<0x79, "vcvtpd2udq", X86cvtp2UInt,
   8049                                  X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, VEX_W,
   8050                                  PS, EVEX_CD8<64, CD8VF>;
   8051 
   8052 defm VCVTPD2QQ : avx512_cvtpd2qq<0x7B, "vcvtpd2qq", X86cvtp2Int,
   8053                                  X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, VEX_W,
   8054                                  PD, EVEX_CD8<64, CD8VF>;
   8055 
   8056 defm VCVTPS2QQ : avx512_cvtps2qq<0x7B, "vcvtps2qq", X86cvtp2Int,
   8057                                  X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, PD,
   8058                                  EVEX_CD8<32, CD8VH>;
   8059 
   8060 defm VCVTPD2UQQ : avx512_cvtpd2qq<0x79, "vcvtpd2uqq", X86cvtp2UInt,
   8061                                  X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, VEX_W,
   8062                                  PD, EVEX_CD8<64, CD8VF>;
   8063 
   8064 defm VCVTPS2UQQ : avx512_cvtps2qq<0x79, "vcvtps2uqq", X86cvtp2UInt,
   8065                                  X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, PD,
   8066                                  EVEX_CD8<32, CD8VH>;
   8067 
   8068 defm VCVTTPD2QQ : avx512_cvttpd2qq<0x7A, "vcvttpd2qq", X86cvttp2si,
   8069                                  X86cvttp2siRnd, SchedWriteCvtPD2DQ>, VEX_W,
   8070                                  PD, EVEX_CD8<64, CD8VF>;
   8071 
   8072 defm VCVTTPS2QQ : avx512_cvttps2qq<0x7A, "vcvttps2qq", X86cvttp2si,
   8073                                  X86cvttp2siRnd, SchedWriteCvtPS2DQ>, PD,
   8074                                  EVEX_CD8<32, CD8VH>;
   8075 
   8076 defm VCVTTPD2UQQ : avx512_cvttpd2qq<0x78, "vcvttpd2uqq", X86cvttp2ui,
   8077                                  X86cvttp2uiRnd, SchedWriteCvtPD2DQ>, VEX_W,
   8078                                  PD, EVEX_CD8<64, CD8VF>;
   8079 
   8080 defm VCVTTPS2UQQ : avx512_cvttps2qq<0x78, "vcvttps2uqq", X86cvttp2ui,
   8081                                  X86cvttp2uiRnd, SchedWriteCvtPS2DQ>, PD,
   8082                                  EVEX_CD8<32, CD8VH>;
   8083 
   8084 defm VCVTQQ2PD : avx512_cvtqq2pd<0xE6, "vcvtqq2pd", sint_to_fp,
   8085                             X86VSintToFpRnd, SchedWriteCvtDQ2PD>, VEX_W, XS,
   8086                             EVEX_CD8<64, CD8VF>;
   8087 
   8088 defm VCVTUQQ2PD : avx512_cvtqq2pd<0x7A, "vcvtuqq2pd", uint_to_fp,
   8089                             X86VUintToFpRnd, SchedWriteCvtDQ2PD>, VEX_W, XS,
   8090                             EVEX_CD8<64, CD8VF>;
   8091 
   8092 defm VCVTQQ2PS : avx512_cvtqq2ps<0x5B, "vcvtqq2ps", sint_to_fp, X86VSintToFP,
   8093                             X86VSintToFpRnd, SchedWriteCvtDQ2PS>, VEX_W, PS,
   8094                             EVEX_CD8<64, CD8VF>;
   8095 
   8096 defm VCVTUQQ2PS : avx512_cvtqq2ps<0x7A, "vcvtuqq2ps", uint_to_fp, X86VUintToFP,
   8097                             X86VUintToFpRnd, SchedWriteCvtDQ2PS>, VEX_W, XD,
   8098                             EVEX_CD8<64, CD8VF>;
   8099 
   8100 let Predicates = [HasAVX512] in  {
   8101   def : Pat<(v16i32 (fp_to_sint (v16f32 VR512:$src))),
   8102             (VCVTTPS2DQZrr VR512:$src)>;
   8103   def : Pat<(v16i32 (fp_to_sint (loadv16f32 addr:$src))),
   8104             (VCVTTPS2DQZrm addr:$src)>;
   8105 
   8106   def : Pat<(v16i32 (fp_to_uint (v16f32 VR512:$src))),
   8107             (VCVTTPS2UDQZrr VR512:$src)>;
   8108   def : Pat<(v16i32 (fp_to_uint (loadv16f32 addr:$src))),
   8109             (VCVTTPS2UDQZrm addr:$src)>;
   8110 
   8111   def : Pat<(v8i32 (fp_to_sint (v8f64 VR512:$src))),
   8112             (VCVTTPD2DQZrr VR512:$src)>;
   8113   def : Pat<(v8i32 (fp_to_sint (loadv8f64 addr:$src))),
   8114             (VCVTTPD2DQZrm addr:$src)>;
   8115 
   8116   def : Pat<(v8i32 (fp_to_uint (v8f64 VR512:$src))),
   8117             (VCVTTPD2UDQZrr VR512:$src)>;
   8118   def : Pat<(v8i32 (fp_to_uint (loadv8f64 addr:$src))),
   8119             (VCVTTPD2UDQZrm addr:$src)>;
   8120 }
   8121 
   8122 let Predicates = [HasVLX] in {
   8123   def : Pat<(v4i32 (fp_to_sint (v4f32 VR128X:$src))),
   8124             (VCVTTPS2DQZ128rr VR128X:$src)>;
   8125   def : Pat<(v4i32 (fp_to_sint (loadv4f32 addr:$src))),
   8126             (VCVTTPS2DQZ128rm addr:$src)>;
   8127 
   8128   def : Pat<(v4i32 (fp_to_uint (v4f32 VR128X:$src))),
   8129             (VCVTTPS2UDQZ128rr VR128X:$src)>;
   8130   def : Pat<(v4i32 (fp_to_uint (loadv4f32 addr:$src))),
   8131             (VCVTTPS2UDQZ128rm addr:$src)>;
   8132 
   8133   def : Pat<(v8i32 (fp_to_sint (v8f32 VR256X:$src))),
   8134             (VCVTTPS2DQZ256rr VR256X:$src)>;
   8135   def : Pat<(v8i32 (fp_to_sint (loadv8f32 addr:$src))),
   8136             (VCVTTPS2DQZ256rm addr:$src)>;
   8137 
   8138   def : Pat<(v8i32 (fp_to_uint (v8f32 VR256X:$src))),
   8139             (VCVTTPS2UDQZ256rr VR256X:$src)>;
   8140   def : Pat<(v8i32 (fp_to_uint (loadv8f32 addr:$src))),
   8141             (VCVTTPS2UDQZ256rm addr:$src)>;
   8142 
   8143   def : Pat<(v4i32 (fp_to_sint (v4f64 VR256X:$src))),
   8144             (VCVTTPD2DQZ256rr VR256X:$src)>;
   8145   def : Pat<(v4i32 (fp_to_sint (loadv4f64 addr:$src))),
   8146             (VCVTTPD2DQZ256rm addr:$src)>;
   8147 
   8148   def : Pat<(v4i32 (fp_to_uint (v4f64 VR256X:$src))),
   8149             (VCVTTPD2UDQZ256rr VR256X:$src)>;
   8150   def : Pat<(v4i32 (fp_to_uint (loadv4f64 addr:$src))),
   8151             (VCVTTPD2UDQZ256rm addr:$src)>;
   8152 }
   8153 
   8154 let Predicates = [HasDQI] in {
   8155   def : Pat<(v8i64 (fp_to_sint (v8f32 VR256X:$src))),
   8156             (VCVTTPS2QQZrr VR256X:$src)>;
   8157   def : Pat<(v8i64 (fp_to_sint (loadv8f32 addr:$src))),
   8158             (VCVTTPS2QQZrm addr:$src)>;
   8159 
   8160   def : Pat<(v8i64 (fp_to_uint (v8f32 VR256X:$src))),
   8161             (VCVTTPS2UQQZrr VR256X:$src)>;
   8162   def : Pat<(v8i64 (fp_to_uint (loadv8f32 addr:$src))),
   8163             (VCVTTPS2UQQZrm addr:$src)>;
   8164 
   8165   def : Pat<(v8i64 (fp_to_sint (v8f64 VR512:$src))),
   8166             (VCVTTPD2QQZrr VR512:$src)>;
   8167   def : Pat<(v8i64 (fp_to_sint (loadv8f64 addr:$src))),
   8168             (VCVTTPD2QQZrm addr:$src)>;
   8169 
   8170   def : Pat<(v8i64 (fp_to_uint (v8f64 VR512:$src))),
   8171             (VCVTTPD2UQQZrr VR512:$src)>;
   8172   def : Pat<(v8i64 (fp_to_uint (loadv8f64 addr:$src))),
   8173             (VCVTTPD2UQQZrm addr:$src)>;
   8174 }
   8175 
   8176 let Predicates = [HasDQI, HasVLX] in {
   8177   def : Pat<(v4i64 (fp_to_sint (v4f32 VR128X:$src))),
   8178             (VCVTTPS2QQZ256rr VR128X:$src)>;
   8179   def : Pat<(v4i64 (fp_to_sint (loadv4f32 addr:$src))),
   8180             (VCVTTPS2QQZ256rm addr:$src)>;
   8181 
   8182   def : Pat<(v4i64 (fp_to_uint (v4f32 VR128X:$src))),
   8183             (VCVTTPS2UQQZ256rr VR128X:$src)>;
   8184   def : Pat<(v4i64 (fp_to_uint (loadv4f32 addr:$src))),
   8185             (VCVTTPS2UQQZ256rm addr:$src)>;
   8186 
   8187   def : Pat<(v2i64 (fp_to_sint (v2f64 VR128X:$src))),
   8188             (VCVTTPD2QQZ128rr VR128X:$src)>;
   8189   def : Pat<(v2i64 (fp_to_sint (loadv2f64 addr:$src))),
   8190             (VCVTTPD2QQZ128rm addr:$src)>;
   8191 
   8192   def : Pat<(v2i64 (fp_to_uint (v2f64 VR128X:$src))),
   8193             (VCVTTPD2UQQZ128rr VR128X:$src)>;
   8194   def : Pat<(v2i64 (fp_to_uint (loadv2f64 addr:$src))),
   8195             (VCVTTPD2UQQZ128rm addr:$src)>;
   8196 
   8197   def : Pat<(v4i64 (fp_to_sint (v4f64 VR256X:$src))),
   8198             (VCVTTPD2QQZ256rr VR256X:$src)>;
   8199   def : Pat<(v4i64 (fp_to_sint (loadv4f64 addr:$src))),
   8200             (VCVTTPD2QQZ256rm addr:$src)>;
   8201 
   8202   def : Pat<(v4i64 (fp_to_uint (v4f64 VR256X:$src))),
   8203             (VCVTTPD2UQQZ256rr VR256X:$src)>;
   8204   def : Pat<(v4i64 (fp_to_uint (loadv4f64 addr:$src))),
   8205             (VCVTTPD2UQQZ256rm addr:$src)>;
   8206 }
   8207 
   8208 let Predicates = [HasAVX512, NoVLX] in {
   8209 def : Pat<(v8i32 (fp_to_uint (v8f32 VR256X:$src1))),
   8210           (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr
   8211            (v16f32 (INSERT_SUBREG (IMPLICIT_DEF),
   8212                                   VR256X:$src1, sub_ymm)))), sub_ymm)>;
   8213 
   8214 def : Pat<(v4i32 (fp_to_uint (v4f32 VR128X:$src1))),
   8215           (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr
   8216            (v16f32 (INSERT_SUBREG (IMPLICIT_DEF),
   8217                                   VR128X:$src1, sub_xmm)))), sub_xmm)>;
   8218 
   8219 def : Pat<(v4i32 (fp_to_uint (v4f64 VR256X:$src1))),
   8220           (EXTRACT_SUBREG (v8i32 (VCVTTPD2UDQZrr
   8221            (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
   8222                                  VR256X:$src1, sub_ymm)))), sub_xmm)>;
   8223 
   8224 def : Pat<(v8f32 (uint_to_fp (v8i32 VR256X:$src1))),
   8225           (EXTRACT_SUBREG (v16f32 (VCVTUDQ2PSZrr
   8226            (v16i32 (INSERT_SUBREG (IMPLICIT_DEF),
   8227                                   VR256X:$src1, sub_ymm)))), sub_ymm)>;
   8228 
   8229 def : Pat<(v4f32 (uint_to_fp (v4i32 VR128X:$src1))),
   8230           (EXTRACT_SUBREG (v16f32 (VCVTUDQ2PSZrr
   8231            (v16i32 (INSERT_SUBREG (IMPLICIT_DEF),
   8232                                   VR128X:$src1, sub_xmm)))), sub_xmm)>;
   8233 
   8234 def : Pat<(v4f64 (uint_to_fp (v4i32 VR128X:$src1))),
   8235           (EXTRACT_SUBREG (v8f64 (VCVTUDQ2PDZrr
   8236            (v8i32 (INSERT_SUBREG (IMPLICIT_DEF),
   8237                                  VR128X:$src1, sub_xmm)))), sub_ymm)>;
   8238 
   8239 def : Pat<(v2f64 (X86VUintToFP (v4i32 VR128X:$src1))),
   8240           (EXTRACT_SUBREG (v8f64 (VCVTUDQ2PDZrr
   8241            (v8i32 (INSERT_SUBREG (IMPLICIT_DEF),
   8242                                  VR128X:$src1, sub_xmm)))), sub_xmm)>;
   8243 }
   8244 
   8245 let Predicates = [HasAVX512, HasVLX] in {
   8246   def : Pat<(X86vzmovl (v2i64 (bitconvert
   8247                               (v4i32 (X86cvtp2Int (v2f64 VR128X:$src)))))),
   8248             (VCVTPD2DQZ128rr VR128X:$src)>;
   8249   def : Pat<(X86vzmovl (v2i64 (bitconvert
   8250                               (v4i32 (X86cvtp2Int (loadv2f64 addr:$src)))))),
   8251             (VCVTPD2DQZ128rm addr:$src)>;
   8252   def : Pat<(X86vzmovl (v2i64 (bitconvert
   8253                                (v4i32 (X86cvtp2UInt (v2f64 VR128X:$src)))))),
   8254             (VCVTPD2UDQZ128rr VR128X:$src)>;
   8255   def : Pat<(X86vzmovl (v2i64 (bitconvert
   8256                               (v4i32 (X86cvttp2si (v2f64 VR128X:$src)))))),
   8257             (VCVTTPD2DQZ128rr VR128X:$src)>;
   8258   def : Pat<(X86vzmovl (v2i64 (bitconvert
   8259                               (v4i32 (X86cvttp2si (loadv2f64 addr:$src)))))),
   8260             (VCVTTPD2DQZ128rm addr:$src)>;
   8261   def : Pat<(X86vzmovl (v2i64 (bitconvert
   8262                                (v4i32 (X86cvttp2ui (v2f64 VR128X:$src)))))),
   8263             (VCVTTPD2UDQZ128rr VR128X:$src)>;
   8264 
   8265   def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
   8266             (VCVTDQ2PDZ128rm addr:$src)>;
   8267   def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload addr:$src))))),
   8268             (VCVTDQ2PDZ128rm addr:$src)>;
   8269 
   8270   def : Pat<(v2f64 (X86VUintToFP (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
   8271             (VCVTUDQ2PDZ128rm addr:$src)>;
   8272   def : Pat<(v2f64 (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload addr:$src))))),
   8273             (VCVTUDQ2PDZ128rm addr:$src)>;
   8274 }
   8275 
   8276 let Predicates = [HasAVX512] in {
   8277   def : Pat<(v8f32 (fpround (loadv8f64 addr:$src))),
   8278             (VCVTPD2PSZrm addr:$src)>;
   8279   def : Pat<(v8f64 (extloadv8f32 addr:$src)),
   8280             (VCVTPS2PDZrm addr:$src)>;
   8281 }
   8282 
   8283 let Predicates = [HasDQI, HasVLX] in {
   8284   def : Pat<(X86vzmovl (v2f64 (bitconvert
   8285                               (v4f32 (X86VSintToFP (v2i64 VR128X:$src)))))),
   8286             (VCVTQQ2PSZ128rr VR128X:$src)>;
   8287   def : Pat<(X86vzmovl (v2f64 (bitconvert
   8288                               (v4f32 (X86VUintToFP (v2i64 VR128X:$src)))))),
   8289             (VCVTUQQ2PSZ128rr VR128X:$src)>;
   8290 }
   8291 
   8292 let Predicates = [HasDQI, NoVLX] in {
   8293 def : Pat<(v2i64 (fp_to_sint (v2f64 VR128X:$src1))),
   8294           (EXTRACT_SUBREG (v8i64 (VCVTTPD2QQZrr
   8295            (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
   8296                                   VR128X:$src1, sub_xmm)))), sub_xmm)>;
   8297 
   8298 def : Pat<(v4i64 (fp_to_sint (v4f32 VR128X:$src1))),
   8299           (EXTRACT_SUBREG (v8i64 (VCVTTPS2QQZrr
   8300            (v8f32 (INSERT_SUBREG (IMPLICIT_DEF),
   8301                                   VR128X:$src1, sub_xmm)))), sub_ymm)>;
   8302 
   8303 def : Pat<(v4i64 (fp_to_sint (v4f64 VR256X:$src1))),
   8304           (EXTRACT_SUBREG (v8i64 (VCVTTPD2QQZrr
   8305            (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
   8306                                   VR256X:$src1, sub_ymm)))), sub_ymm)>;
   8307 
   8308 def : Pat<(v2i64 (fp_to_uint (v2f64 VR128X:$src1))),
   8309           (EXTRACT_SUBREG (v8i64 (VCVTTPD2UQQZrr
   8310            (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
   8311                                   VR128X:$src1, sub_xmm)))), sub_xmm)>;
   8312 
   8313 def : Pat<(v4i64 (fp_to_uint (v4f32 VR128X:$src1))),
   8314           (EXTRACT_SUBREG (v8i64 (VCVTTPS2UQQZrr
   8315            (v8f32 (INSERT_SUBREG (IMPLICIT_DEF),
   8316                                   VR128X:$src1, sub_xmm)))), sub_ymm)>;
   8317 
   8318 def : Pat<(v4i64 (fp_to_uint (v4f64 VR256X:$src1))),
   8319           (EXTRACT_SUBREG (v8i64 (VCVTTPD2UQQZrr
   8320            (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
   8321                                   VR256X:$src1, sub_ymm)))), sub_ymm)>;
   8322 
   8323 def : Pat<(v4f32 (sint_to_fp (v4i64 VR256X:$src1))),
   8324           (EXTRACT_SUBREG (v8f32 (VCVTQQ2PSZrr
   8325            (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
   8326                                   VR256X:$src1, sub_ymm)))), sub_xmm)>;
   8327 
   8328 def : Pat<(v2f64 (sint_to_fp (v2i64 VR128X:$src1))),
   8329           (EXTRACT_SUBREG (v8f64 (VCVTQQ2PDZrr
   8330            (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
   8331                                   VR128X:$src1, sub_xmm)))), sub_xmm)>;
   8332 
   8333 def : Pat<(v4f64 (sint_to_fp (v4i64 VR256X:$src1))),
   8334           (EXTRACT_SUBREG (v8f64 (VCVTQQ2PDZrr
   8335            (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
   8336                                   VR256X:$src1, sub_ymm)))), sub_ymm)>;
   8337 
   8338 def : Pat<(v4f32 (uint_to_fp (v4i64 VR256X:$src1))),
   8339           (EXTRACT_SUBREG (v8f32 (VCVTUQQ2PSZrr
   8340            (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
   8341                                   VR256X:$src1, sub_ymm)))), sub_xmm)>;
   8342 
   8343 def : Pat<(v2f64 (uint_to_fp (v2i64 VR128X:$src1))),
   8344           (EXTRACT_SUBREG (v8f64 (VCVTUQQ2PDZrr
   8345            (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
   8346                                   VR128X:$src1, sub_xmm)))), sub_xmm)>;
   8347 
   8348 def : Pat<(v4f64 (uint_to_fp (v4i64 VR256X:$src1))),
   8349           (EXTRACT_SUBREG (v8f64 (VCVTUQQ2PDZrr
   8350            (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
   8351                                   VR256X:$src1, sub_ymm)))), sub_ymm)>;
   8352 }
   8353 
   8354 //===----------------------------------------------------------------------===//
   8355 // Half precision conversion instructions
   8356 //===----------------------------------------------------------------------===//
   8357 
   8358 multiclass avx512_cvtph2ps<X86VectorVTInfo _dest, X86VectorVTInfo _src,
   8359                            X86MemOperand x86memop, PatFrag ld_frag,
   8360                            X86FoldableSchedWrite sched> {
   8361   defm rr : AVX512_maskable<0x13, MRMSrcReg, _dest ,(outs _dest.RC:$dst),
   8362                             (ins _src.RC:$src), "vcvtph2ps", "$src", "$src",
   8363                             (X86cvtph2ps (_src.VT _src.RC:$src))>,
   8364                             T8PD, Sched<[sched]>;
   8365   defm rm : AVX512_maskable<0x13, MRMSrcMem, _dest, (outs _dest.RC:$dst),
   8366                             (ins x86memop:$src), "vcvtph2ps", "$src", "$src",
   8367                             (X86cvtph2ps (_src.VT
   8368                                           (bitconvert
   8369                                            (ld_frag addr:$src))))>,
   8370                             T8PD, Sched<[sched.Folded]>;
   8371 }
   8372 
   8373 multiclass avx512_cvtph2ps_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
   8374                                X86FoldableSchedWrite sched> {
   8375   defm rrb : AVX512_maskable<0x13, MRMSrcReg, _dest, (outs _dest.RC:$dst),
   8376                              (ins _src.RC:$src), "vcvtph2ps",
   8377                              "{sae}, $src", "$src, {sae}",
   8378                              (X86cvtph2psRnd (_src.VT _src.RC:$src),
   8379                                              (i32 FROUND_NO_EXC))>,
   8380                              T8PD, EVEX_B, Sched<[sched]>;
   8381 }
   8382 
   8383 let Predicates = [HasAVX512] in
   8384   defm VCVTPH2PSZ : avx512_cvtph2ps<v16f32_info, v16i16x_info, f256mem, loadv4i64,
   8385                                     WriteCvtPH2PSZ>,
   8386                     avx512_cvtph2ps_sae<v16f32_info, v16i16x_info, WriteCvtPH2PSZ>,
   8387                     EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
   8388 
   8389 let Predicates = [HasVLX] in {
   8390   defm VCVTPH2PSZ256 : avx512_cvtph2ps<v8f32x_info, v8i16x_info, f128mem,
   8391                        loadv2i64, WriteCvtPH2PSY>, EVEX, EVEX_V256,
   8392                        EVEX_CD8<32, CD8VH>;
   8393   defm VCVTPH2PSZ128 : avx512_cvtph2ps<v4f32x_info, v8i16x_info, f64mem,
   8394                        loadv2i64, WriteCvtPH2PS>, EVEX, EVEX_V128,
   8395                        EVEX_CD8<32, CD8VH>;
   8396 
   8397   // Pattern match vcvtph2ps of a scalar i64 load.
   8398   def : Pat<(v4f32 (X86cvtph2ps (v8i16 (vzmovl_v2i64 addr:$src)))),
   8399             (VCVTPH2PSZ128rm addr:$src)>;
   8400   def : Pat<(v4f32 (X86cvtph2ps (v8i16 (vzload_v2i64 addr:$src)))),
   8401             (VCVTPH2PSZ128rm addr:$src)>;
   8402   def : Pat<(v4f32 (X86cvtph2ps (v8i16 (bitconvert
   8403               (v2i64 (scalar_to_vector (loadi64 addr:$src))))))),
   8404             (VCVTPH2PSZ128rm addr:$src)>;
   8405 }
   8406 
   8407 multiclass avx512_cvtps2ph<X86VectorVTInfo _dest, X86VectorVTInfo _src,
   8408                            X86MemOperand x86memop, SchedWrite RR, SchedWrite MR> {
   8409   defm rr : AVX512_maskable<0x1D, MRMDestReg, _dest ,(outs _dest.RC:$dst),
   8410                    (ins _src.RC:$src1, i32u8imm:$src2),
   8411                    "vcvtps2ph", "$src2, $src1", "$src1, $src2",
   8412                    (X86cvtps2ph (_src.VT _src.RC:$src1),
   8413                                 (i32 imm:$src2)), 0, 0>,
   8414                    AVX512AIi8Base, Sched<[RR]>;
   8415   let hasSideEffects = 0, mayStore = 1 in {
   8416     def mr : AVX512AIi8<0x1D, MRMDestMem, (outs),
   8417                (ins x86memop:$dst, _src.RC:$src1, i32u8imm:$src2),
   8418                "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
   8419                Sched<[MR]>;
   8420     def mrk : AVX512AIi8<0x1D, MRMDestMem, (outs),
   8421                (ins x86memop:$dst, _dest.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
   8422                "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", []>,
   8423                 EVEX_K, Sched<[MR]>, NotMemoryFoldable;
   8424   }
   8425 }
   8426 
   8427 multiclass avx512_cvtps2ph_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
   8428                                SchedWrite Sched> {
   8429   let hasSideEffects = 0 in
   8430   defm rrb : AVX512_maskable_in_asm<0x1D, MRMDestReg, _dest,
   8431                    (outs _dest.RC:$dst),
   8432                    (ins _src.RC:$src1, i32u8imm:$src2),
   8433                    "vcvtps2ph", "$src2, {sae}, $src1", "$src1, {sae}, $src2", []>,
   8434                    EVEX_B, AVX512AIi8Base, Sched<[Sched]>;
   8435 }
   8436 
   8437 let Predicates = [HasAVX512] in {
   8438   defm VCVTPS2PHZ : avx512_cvtps2ph<v16i16x_info, v16f32_info, f256mem,
   8439                                     WriteCvtPS2PHZ, WriteCvtPS2PHZSt>,
   8440                     avx512_cvtps2ph_sae<v16i16x_info, v16f32_info, WriteCvtPS2PHZ>,
   8441                                         EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
   8442   let Predicates = [HasVLX] in {
   8443     defm VCVTPS2PHZ256 : avx512_cvtps2ph<v8i16x_info, v8f32x_info, f128mem,
   8444                                          WriteCvtPS2PHY, WriteCvtPS2PHYSt>,
   8445                                          EVEX, EVEX_V256, EVEX_CD8<32, CD8VH>;
   8446     defm VCVTPS2PHZ128 : avx512_cvtps2ph<v8i16x_info, v4f32x_info, f64mem,
   8447                                          WriteCvtPS2PH, WriteCvtPS2PHSt>,
   8448                                          EVEX, EVEX_V128, EVEX_CD8<32, CD8VH>;
   8449   }
   8450 
   8451   def : Pat<(store (f64 (extractelt
   8452                          (bc_v2f64 (v8i16 (X86cvtps2ph VR128X:$src1, i32:$src2))),
   8453                          (iPTR 0))), addr:$dst),
   8454             (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, imm:$src2)>;
   8455   def : Pat<(store (i64 (extractelt
   8456                          (bc_v2i64 (v8i16 (X86cvtps2ph VR128X:$src1, i32:$src2))),
   8457                          (iPTR 0))), addr:$dst),
   8458             (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, imm:$src2)>;
   8459   def : Pat<(store (v8i16 (X86cvtps2ph VR256X:$src1, i32:$src2)), addr:$dst),
   8460             (VCVTPS2PHZ256mr addr:$dst, VR256X:$src1, imm:$src2)>;
   8461   def : Pat<(store (v16i16 (X86cvtps2ph VR512:$src1, i32:$src2)), addr:$dst),
   8462             (VCVTPS2PHZmr addr:$dst, VR512:$src1, imm:$src2)>;
   8463 }
   8464 
   8465 // Patterns for matching conversions from float to half-float and vice versa.
   8466 let Predicates = [HasVLX] in {
   8467   // Use MXCSR.RC for rounding instead of explicitly specifying the default
   8468   // rounding mode (Nearest-Even, encoded as 0). Both are equivalent in the
   8469   // configurations we support (the default). However, falling back to MXCSR is
   8470   // more consistent with other instructions, which are always controlled by it.
   8471   // It's encoded as 0b100.
   8472   def : Pat<(fp_to_f16 FR32X:$src),
   8473             (i16 (EXTRACT_SUBREG (VMOVPDI2DIZrr (v8i16 (VCVTPS2PHZ128rr
   8474               (v4f32 (COPY_TO_REGCLASS FR32X:$src, VR128X)), 4))), sub_16bit))>;
   8475 
   8476   def : Pat<(f16_to_fp GR16:$src),
   8477             (f32 (COPY_TO_REGCLASS (v4f32 (VCVTPH2PSZ128rr
   8478               (v8i16 (COPY_TO_REGCLASS (MOVSX32rr16 GR16:$src), VR128X)))), FR32X)) >;
   8479 
   8480   def : Pat<(f16_to_fp (i16 (fp_to_f16 FR32X:$src))),
   8481             (f32 (COPY_TO_REGCLASS (v4f32 (VCVTPH2PSZ128rr
   8482               (v8i16 (VCVTPS2PHZ128rr
   8483                (v4f32 (COPY_TO_REGCLASS FR32X:$src, VR128X)), 4)))), FR32X)) >;
   8484 }
   8485 
   8486 //  Unordered/Ordered scalar fp compare with Sea and set EFLAGS
   8487 multiclass avx512_ord_cmp_sae<bits<8> opc, X86VectorVTInfo _,
   8488                             string OpcodeStr, X86FoldableSchedWrite sched> {
   8489   let hasSideEffects = 0 in
   8490   def rrb: AVX512<opc, MRMSrcReg, (outs), (ins _.RC:$src1, _.RC:$src2),
   8491                   !strconcat(OpcodeStr, "\t{{sae}, $src2, $src1|$src1, $src2, {sae}}"), []>,
   8492                   EVEX, EVEX_B, VEX_LIG, EVEX_V128, Sched<[sched]>;
   8493 }
   8494 
   8495 let Defs = [EFLAGS], Predicates = [HasAVX512] in {
   8496   defm VUCOMISSZ : avx512_ord_cmp_sae<0x2E, v4f32x_info, "vucomiss", WriteFCom>,
   8497                                    AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
   8498   defm VUCOMISDZ : avx512_ord_cmp_sae<0x2E, v2f64x_info, "vucomisd", WriteFCom>,
   8499                                    AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>;
   8500   defm VCOMISSZ : avx512_ord_cmp_sae<0x2F, v4f32x_info, "vcomiss", WriteFCom>,
   8501                                    AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
   8502   defm VCOMISDZ : avx512_ord_cmp_sae<0x2F, v2f64x_info, "vcomisd", WriteFCom>,
   8503                                    AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>;
   8504 }
   8505 
   8506 let Defs = [EFLAGS], Predicates = [HasAVX512] in {
   8507   defm VUCOMISSZ : sse12_ord_cmp<0x2E, FR32X, X86cmp, f32, f32mem, loadf32,
   8508                                  "ucomiss", WriteFCom>, PS, EVEX, VEX_LIG,
   8509                                  EVEX_CD8<32, CD8VT1>;
   8510   defm VUCOMISDZ : sse12_ord_cmp<0x2E, FR64X, X86cmp, f64, f64mem, loadf64,
   8511                                   "ucomisd", WriteFCom>, PD, EVEX,
   8512                                   VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
   8513   let Pattern = []<dag> in {
   8514     defm VCOMISSZ  : sse12_ord_cmp<0x2F, FR32X, undef, f32, f32mem, loadf32,
   8515                                    "comiss", WriteFCom>, PS, EVEX, VEX_LIG,
   8516                                    EVEX_CD8<32, CD8VT1>;
   8517     defm VCOMISDZ  : sse12_ord_cmp<0x2F, FR64X, undef, f64, f64mem, loadf64,
   8518                                    "comisd", WriteFCom>, PD, EVEX,
   8519                                     VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
   8520   }
   8521   let isCodeGenOnly = 1 in {
   8522     defm VUCOMISSZ  : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v4f32, ssmem,
   8523                           sse_load_f32, "ucomiss", WriteFCom>, PS, EVEX, VEX_LIG,
   8524                           EVEX_CD8<32, CD8VT1>;
   8525     defm VUCOMISDZ  : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v2f64, sdmem,
   8526                           sse_load_f64, "ucomisd", WriteFCom>, PD, EVEX,
   8527                           VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
   8528 
   8529     defm VCOMISSZ  : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v4f32, ssmem,
   8530                           sse_load_f32, "comiss", WriteFCom>, PS, EVEX, VEX_LIG,
   8531                           EVEX_CD8<32, CD8VT1>;
   8532     defm VCOMISDZ  : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v2f64, sdmem,
   8533                           sse_load_f64, "comisd", WriteFCom>, PD, EVEX,
   8534                           VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
   8535   }
   8536 }
   8537 
   8538 /// avx512_fp14_s rcp14ss, rcp14sd, rsqrt14ss, rsqrt14sd
   8539 multiclass avx512_fp14_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
   8540                          X86FoldableSchedWrite sched, X86VectorVTInfo _> {
   8541   let Predicates = [HasAVX512], ExeDomain = _.ExeDomain in {
   8542   defm rr : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
   8543                            (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
   8544                            "$src2, $src1", "$src1, $src2",
   8545                            (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
   8546                            EVEX_4V, Sched<[sched]>;
   8547   defm rm : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
   8548                          (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
   8549                          "$src2, $src1", "$src1, $src2",
   8550                          (OpNode (_.VT _.RC:$src1),
   8551                           _.ScalarIntMemCPat:$src2)>, EVEX_4V,
   8552                           Sched<[sched.Folded, ReadAfterLd]>;
   8553 }
   8554 }
   8555 
   8556 defm VRCP14SSZ : avx512_fp14_s<0x4D, "vrcp14ss", X86rcp14s, SchedWriteFRcp.Scl,
   8557                                f32x_info>, EVEX_CD8<32, CD8VT1>,
   8558                                T8PD;
   8559 defm VRCP14SDZ : avx512_fp14_s<0x4D, "vrcp14sd", X86rcp14s, SchedWriteFRcp.Scl,
   8560                                f64x_info>, VEX_W, EVEX_CD8<64, CD8VT1>,
   8561                                T8PD;
   8562 defm VRSQRT14SSZ : avx512_fp14_s<0x4F, "vrsqrt14ss", X86rsqrt14s,
   8563                                  SchedWriteFRsqrt.Scl, f32x_info>,
   8564                                  EVEX_CD8<32, CD8VT1>, T8PD;
   8565 defm VRSQRT14SDZ : avx512_fp14_s<0x4F, "vrsqrt14sd", X86rsqrt14s,
   8566                                  SchedWriteFRsqrt.Scl, f64x_info>, VEX_W,
   8567                                  EVEX_CD8<64, CD8VT1>, T8PD;
   8568 
   8569 /// avx512_fp14_p rcp14ps, rcp14pd, rsqrt14ps, rsqrt14pd
   8570 multiclass avx512_fp14_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
   8571                          X86FoldableSchedWrite sched, X86VectorVTInfo _> {
   8572   let ExeDomain = _.ExeDomain in {
   8573   defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
   8574                          (ins _.RC:$src), OpcodeStr, "$src", "$src",
   8575                          (_.VT (OpNode _.RC:$src))>, EVEX, T8PD,
   8576                          Sched<[sched]>;
   8577   defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
   8578                          (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
   8579                          (OpNode (_.VT
   8580                            (bitconvert (_.LdFrag addr:$src))))>, EVEX, T8PD,
   8581                          Sched<[sched.Folded, ReadAfterLd]>;
   8582   defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
   8583                           (ins _.ScalarMemOp:$src), OpcodeStr,
   8584                           "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr,
   8585                           (OpNode (_.VT
   8586                             (X86VBroadcast (_.ScalarLdFrag addr:$src))))>,
   8587                           EVEX, T8PD, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
   8588   }
   8589 }
   8590 
   8591 multiclass avx512_fp14_p_vl_all<bits<8> opc, string OpcodeStr, SDNode OpNode,
   8592                                 X86SchedWriteWidths sched> {
   8593   defm PSZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"), OpNode, sched.ZMM,
   8594                            v16f32_info>, EVEX_V512, EVEX_CD8<32, CD8VF>;
   8595   defm PDZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"), OpNode, sched.ZMM,
   8596                            v8f64_info>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
   8597 
   8598   // Define only if AVX512VL feature is present.
   8599   let Predicates = [HasVLX] in {
   8600     defm PSZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"),
   8601                                 OpNode, sched.XMM, v4f32x_info>,
   8602                                EVEX_V128, EVEX_CD8<32, CD8VF>;
   8603     defm PSZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"),
   8604                                 OpNode, sched.YMM, v8f32x_info>,
   8605                                EVEX_V256, EVEX_CD8<32, CD8VF>;
   8606     defm PDZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"),
   8607                                 OpNode, sched.XMM, v2f64x_info>,
   8608                                EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>;
   8609     defm PDZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"),
   8610                                 OpNode, sched.YMM, v4f64x_info>,
   8611                                EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>;
   8612   }
   8613 }
   8614 
   8615 defm VRSQRT14 : avx512_fp14_p_vl_all<0x4E, "vrsqrt14", X86rsqrt14, SchedWriteFRsqrt>;
   8616 defm VRCP14 : avx512_fp14_p_vl_all<0x4C, "vrcp14", X86rcp14, SchedWriteFRcp>;
   8617 
   8618 /// avx512_fp28_s rcp28ss, rcp28sd, rsqrt28ss, rsqrt28sd
   8619 multiclass avx512_fp28_s<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
   8620                          SDNode OpNode, X86FoldableSchedWrite sched> {
   8621   let ExeDomain = _.ExeDomain in {
   8622   defm r : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
   8623                            (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
   8624                            "$src2, $src1", "$src1, $src2",
   8625                            (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
   8626                            (i32 FROUND_CURRENT))>,
   8627                            Sched<[sched]>;
   8628 
   8629   defm rb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
   8630                             (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
   8631                             "{sae}, $src2, $src1", "$src1, $src2, {sae}",
   8632                             (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
   8633                             (i32 FROUND_NO_EXC))>, EVEX_B,
   8634                             Sched<[sched]>;
   8635 
   8636   defm m : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
   8637                          (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
   8638                          "$src2, $src1", "$src1, $src2",
   8639                          (OpNode (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2,
   8640                          (i32 FROUND_CURRENT))>,
   8641                          Sched<[sched.Folded, ReadAfterLd]>;
   8642   }
   8643 }
   8644 
   8645 multiclass avx512_eri_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
   8646                         X86FoldableSchedWrite sched> {
   8647   defm SSZ : avx512_fp28_s<opc, OpcodeStr#"ss", f32x_info, OpNode, sched>,
   8648                EVEX_CD8<32, CD8VT1>;
   8649   defm SDZ : avx512_fp28_s<opc, OpcodeStr#"sd", f64x_info, OpNode, sched>,
   8650                EVEX_CD8<64, CD8VT1>, VEX_W;
   8651 }
   8652 
   8653 let Predicates = [HasERI] in {
   8654   defm VRCP28   : avx512_eri_s<0xCB, "vrcp28", X86rcp28s, SchedWriteFRcp.Scl>,
   8655                               T8PD, EVEX_4V;
   8656   defm VRSQRT28 : avx512_eri_s<0xCD, "vrsqrt28", X86rsqrt28s,
   8657                                SchedWriteFRsqrt.Scl>, T8PD, EVEX_4V;
   8658 }
   8659 
   8660 defm VGETEXP   : avx512_eri_s<0x43, "vgetexp", X86fgetexpRnds,
   8661                               SchedWriteFRnd.Scl>, T8PD, EVEX_4V;
   8662 /// avx512_fp28_p rcp28ps, rcp28pd, rsqrt28ps, rsqrt28pd
   8663 
   8664 multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
   8665                          SDNode OpNode, X86FoldableSchedWrite sched> {
   8666   let ExeDomain = _.ExeDomain in {
   8667   defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
   8668                          (ins _.RC:$src), OpcodeStr, "$src", "$src",
   8669                          (OpNode (_.VT _.RC:$src), (i32 FROUND_CURRENT))>,
   8670                          Sched<[sched]>;
   8671 
   8672   defm m : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
   8673                          (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
   8674                          (OpNode (_.VT
   8675                              (bitconvert (_.LdFrag addr:$src))),
   8676                           (i32 FROUND_CURRENT))>,
   8677                           Sched<[sched.Folded, ReadAfterLd]>;
   8678 
   8679   defm mb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
   8680                          (ins _.ScalarMemOp:$src), OpcodeStr,
   8681                          "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr,
   8682                          (OpNode (_.VT
   8683                                   (X86VBroadcast (_.ScalarLdFrag addr:$src))),
   8684                                  (i32 FROUND_CURRENT))>, EVEX_B,
   8685                          Sched<[sched.Folded, ReadAfterLd]>;
   8686   }
   8687 }
   8688 multiclass avx512_fp28_p_round<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
   8689                          SDNode OpNode, X86FoldableSchedWrite sched> {
   8690   let ExeDomain = _.ExeDomain in
   8691   defm rb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
   8692                         (ins _.RC:$src), OpcodeStr,
   8693                         "{sae}, $src", "$src, {sae}",
   8694                         (OpNode (_.VT _.RC:$src), (i32 FROUND_NO_EXC))>,
   8695                         EVEX_B, Sched<[sched]>;
   8696 }
   8697 
   8698 multiclass  avx512_eri<bits<8> opc, string OpcodeStr, SDNode OpNode,
   8699                        X86SchedWriteWidths sched> {
   8700    defm PSZ : avx512_fp28_p<opc, OpcodeStr#"ps", v16f32_info, OpNode, sched.ZMM>,
   8701               avx512_fp28_p_round<opc, OpcodeStr#"ps", v16f32_info, OpNode, sched.ZMM>,
   8702               T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
   8703    defm PDZ : avx512_fp28_p<opc, OpcodeStr#"pd", v8f64_info, OpNode, sched.ZMM>,
   8704               avx512_fp28_p_round<opc, OpcodeStr#"pd", v8f64_info, OpNode, sched.ZMM>,
   8705               T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
   8706 }
   8707 
   8708 multiclass avx512_fp_unaryop_packed<bits<8> opc, string OpcodeStr,
   8709                                   SDNode OpNode, X86SchedWriteWidths sched> {
   8710   // Define only if AVX512VL feature is present.
   8711   let Predicates = [HasVLX] in {
   8712     defm PSZ128 : avx512_fp28_p<opc, OpcodeStr#"ps", v4f32x_info, OpNode, sched.XMM>,
   8713                                      EVEX_V128, T8PD, EVEX_CD8<32, CD8VF>;
   8714     defm PSZ256 : avx512_fp28_p<opc, OpcodeStr#"ps", v8f32x_info, OpNode, sched.YMM>,
   8715                                      EVEX_V256, T8PD, EVEX_CD8<32, CD8VF>;
   8716     defm PDZ128 : avx512_fp28_p<opc, OpcodeStr#"pd", v2f64x_info, OpNode, sched.XMM>,
   8717                                      EVEX_V128, VEX_W, T8PD, EVEX_CD8<64, CD8VF>;
   8718     defm PDZ256 : avx512_fp28_p<opc, OpcodeStr#"pd", v4f64x_info, OpNode, sched.YMM>,
   8719                                      EVEX_V256, VEX_W, T8PD, EVEX_CD8<64, CD8VF>;
   8720   }
   8721 }
   8722 
   8723 let Predicates = [HasERI] in {
   8724  defm VRSQRT28 : avx512_eri<0xCC, "vrsqrt28", X86rsqrt28, SchedWriteFRsqrt>, EVEX;
   8725  defm VRCP28   : avx512_eri<0xCA, "vrcp28", X86rcp28, SchedWriteFRcp>, EVEX;
   8726  defm VEXP2    : avx512_eri<0xC8, "vexp2", X86exp2, SchedWriteFAdd>, EVEX;
   8727 }
   8728 defm VGETEXP   : avx512_eri<0x42, "vgetexp", X86fgetexpRnd, SchedWriteFRnd>,
   8729                  avx512_fp_unaryop_packed<0x42, "vgetexp", X86fgetexpRnd,
   8730                                           SchedWriteFRnd>, EVEX;
   8731 
   8732 multiclass avx512_sqrt_packed_round<bits<8> opc, string OpcodeStr,
   8733                                     X86FoldableSchedWrite sched, X86VectorVTInfo _>{
   8734   let ExeDomain = _.ExeDomain in
   8735   defm rb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
   8736                          (ins _.RC:$src, AVX512RC:$rc), OpcodeStr, "$rc, $src", "$src, $rc",
   8737                          (_.VT (X86fsqrtRnd _.RC:$src, (i32 imm:$rc)))>,
   8738                          EVEX, EVEX_B, EVEX_RC, Sched<[sched]>;
   8739 }
   8740 
   8741 multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr,
   8742                               X86FoldableSchedWrite sched, X86VectorVTInfo _>{
   8743   let ExeDomain = _.ExeDomain in {
   8744   defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
   8745                          (ins _.RC:$src), OpcodeStr, "$src", "$src",
   8746                          (_.VT (fsqrt _.RC:$src))>, EVEX,
   8747                          Sched<[sched]>;
   8748   defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
   8749                          (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
   8750                          (fsqrt (_.VT
   8751                            (bitconvert (_.LdFrag addr:$src))))>, EVEX,
   8752                            Sched<[sched.Folded, ReadAfterLd]>;
   8753   defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
   8754                           (ins _.ScalarMemOp:$src), OpcodeStr,
   8755                           "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr,
   8756                           (fsqrt (_.VT
   8757                             (X86VBroadcast (_.ScalarLdFrag addr:$src))))>,
   8758                           EVEX, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
   8759   }
   8760 }
   8761 
   8762 multiclass avx512_sqrt_packed_all<bits<8> opc, string OpcodeStr,
   8763                                   X86SchedWriteSizes sched> {
   8764   defm PSZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
   8765                                 sched.PS.ZMM, v16f32_info>,
   8766                                 EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
   8767   defm PDZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
   8768                                 sched.PD.ZMM, v8f64_info>,
   8769                                 EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>;
   8770   // Define only if AVX512VL feature is present.
   8771   let Predicates = [HasVLX] in {
   8772     defm PSZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
   8773                                      sched.PS.XMM, v4f32x_info>,
   8774                                      EVEX_V128, PS, EVEX_CD8<32, CD8VF>;
   8775     defm PSZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
   8776                                      sched.PS.YMM, v8f32x_info>,
   8777                                      EVEX_V256, PS, EVEX_CD8<32, CD8VF>;
   8778     defm PDZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
   8779                                      sched.PD.XMM, v2f64x_info>,
   8780                                      EVEX_V128, VEX_W, PD, EVEX_CD8<64, CD8VF>;
   8781     defm PDZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
   8782                                      sched.PD.YMM, v4f64x_info>,
   8783                                      EVEX_V256, VEX_W, PD, EVEX_CD8<64, CD8VF>;
   8784   }
   8785 }
   8786 
   8787 multiclass avx512_sqrt_packed_all_round<bits<8> opc, string OpcodeStr,
   8788                                         X86SchedWriteSizes sched> {
   8789   defm PSZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "ps"),
   8790                                       sched.PS.ZMM, v16f32_info>,
   8791                                       EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
   8792   defm PDZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "pd"),
   8793                                       sched.PD.ZMM, v8f64_info>,
   8794                                       EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>;
   8795 }
   8796 
   8797 multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched,
   8798                               X86VectorVTInfo _, string Name> {
   8799   let ExeDomain = _.ExeDomain in {
   8800     defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
   8801                          (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
   8802                          "$src2, $src1", "$src1, $src2",
   8803                          (X86fsqrtRnds (_.VT _.RC:$src1),
   8804                                     (_.VT _.RC:$src2),
   8805                                     (i32 FROUND_CURRENT))>,
   8806                          Sched<[sched]>;
   8807     defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
   8808                          (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
   8809                          "$src2, $src1", "$src1, $src2",
   8810                          (X86fsqrtRnds (_.VT _.RC:$src1),
   8811                                     _.ScalarIntMemCPat:$src2,
   8812                                     (i32 FROUND_CURRENT))>,
   8813                          Sched<[sched.Folded, ReadAfterLd]>;
   8814     defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
   8815                          (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
   8816                          "$rc, $src2, $src1", "$src1, $src2, $rc",
   8817                          (X86fsqrtRnds (_.VT _.RC:$src1),
   8818                                      (_.VT _.RC:$src2),
   8819                                      (i32 imm:$rc))>,
   8820                          EVEX_B, EVEX_RC, Sched<[sched]>;
   8821 
   8822     let isCodeGenOnly = 1, hasSideEffects = 0, Predicates=[HasAVX512] in {
   8823       def r : I<opc, MRMSrcReg, (outs _.FRC:$dst),
   8824                 (ins _.FRC:$src1, _.FRC:$src2),
   8825                 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
   8826                 Sched<[sched]>;
   8827       let mayLoad = 1 in
   8828         def m : I<opc, MRMSrcMem, (outs _.FRC:$dst),
   8829                   (ins _.FRC:$src1, _.ScalarMemOp:$src2),
   8830                   OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
   8831                   Sched<[sched.Folded, ReadAfterLd]>;
   8832     }
   8833   }
   8834 
   8835   let Predicates = [HasAVX512] in {
   8836     def : Pat<(_.EltVT (fsqrt _.FRC:$src)),
   8837               (!cast<Instruction>(Name#Zr)
   8838                   (_.EltVT (IMPLICIT_DEF)), _.FRC:$src)>;
   8839   }
   8840 
   8841   let Predicates = [HasAVX512, OptForSize] in {
   8842     def : Pat<(_.EltVT (fsqrt (load addr:$src))),
   8843               (!cast<Instruction>(Name#Zm)
   8844                   (_.EltVT (IMPLICIT_DEF)), addr:$src)>;
   8845   }
   8846 }
   8847 
   8848 multiclass avx512_sqrt_scalar_all<bits<8> opc, string OpcodeStr,
   8849                                   X86SchedWriteSizes sched> {
   8850   defm SSZ : avx512_sqrt_scalar<opc, OpcodeStr#"ss", sched.PS.Scl, f32x_info, NAME#"SS">,
   8851                         EVEX_CD8<32, CD8VT1>, EVEX_4V, XS;
   8852   defm SDZ : avx512_sqrt_scalar<opc, OpcodeStr#"sd", sched.PD.Scl, f64x_info, NAME#"SD">,
   8853                         EVEX_CD8<64, CD8VT1>, EVEX_4V, XD, VEX_W;
   8854 }
   8855 
   8856 defm VSQRT : avx512_sqrt_packed_all<0x51, "vsqrt", SchedWriteFSqrtSizes>,
   8857              avx512_sqrt_packed_all_round<0x51, "vsqrt", SchedWriteFSqrtSizes>;
   8858 
   8859 defm VSQRT : avx512_sqrt_scalar_all<0x51, "vsqrt", SchedWriteFSqrtSizes>, VEX_LIG;
   8860 
   8861 multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr,
   8862                                   X86FoldableSchedWrite sched, X86VectorVTInfo _> {
   8863   let ExeDomain = _.ExeDomain in {
   8864   defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
   8865                            (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
   8866                            "$src3, $src2, $src1", "$src1, $src2, $src3",
   8867                            (_.VT (X86RndScales (_.VT _.RC:$src1), (_.VT _.RC:$src2),
   8868                            (i32 imm:$src3)))>,
   8869                            Sched<[sched]>;
   8870 
   8871   defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
   8872                          (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
   8873                          "$src3, {sae}, $src2, $src1", "$src1, $src2, {sae}, $src3",
   8874                          (_.VT (X86RndScalesRnd (_.VT _.RC:$src1), (_.VT _.RC:$src2),
   8875                          (i32 imm:$src3), (i32 FROUND_NO_EXC)))>, EVEX_B,
   8876                          Sched<[sched]>;
   8877 
   8878   defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
   8879                          (ins _.RC:$src1, _.IntScalarMemOp:$src2, i32u8imm:$src3),
   8880                          OpcodeStr,
   8881                          "$src3, $src2, $src1", "$src1, $src2, $src3",
   8882                          (_.VT (X86RndScales _.RC:$src1,
   8883                                 _.ScalarIntMemCPat:$src2, (i32 imm:$src3)))>,
   8884                          Sched<[sched.Folded, ReadAfterLd]>;
   8885 
   8886   let isCodeGenOnly = 1, hasSideEffects = 0, Predicates = [HasAVX512] in {
   8887     def r : I<opc, MRMSrcReg, (outs _.FRC:$dst),
   8888                (ins _.FRC:$src1, _.FRC:$src2, i32u8imm:$src3),
   8889                OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
   8890                []>, Sched<[sched]>;
   8891 
   8892     let mayLoad = 1 in
   8893       def m : I<opc, MRMSrcMem, (outs _.FRC:$dst),
   8894                  (ins _.FRC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
   8895                  OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
   8896                  []>, Sched<[sched.Folded, ReadAfterLd]>;
   8897   }
   8898   }
   8899 
   8900   let Predicates = [HasAVX512] in {
   8901     def : Pat<(ffloor _.FRC:$src),
   8902               (_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)),
   8903                _.FRC:$src, (i32 0x9)))>;
   8904     def : Pat<(fceil _.FRC:$src),
   8905               (_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)),
   8906                _.FRC:$src, (i32 0xa)))>;
   8907     def : Pat<(ftrunc _.FRC:$src),
   8908               (_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)),
   8909                _.FRC:$src, (i32 0xb)))>;
   8910     def : Pat<(frint _.FRC:$src),
   8911               (_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)),
   8912                _.FRC:$src, (i32 0x4)))>;
   8913     def : Pat<(fnearbyint _.FRC:$src),
   8914               (_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)),
   8915                _.FRC:$src, (i32 0xc)))>;
   8916   }
   8917 
   8918   let Predicates = [HasAVX512, OptForSize] in {
   8919     def : Pat<(ffloor (_.ScalarLdFrag addr:$src)),
   8920               (_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)),
   8921                addr:$src, (i32 0x9)))>;
   8922     def : Pat<(fceil (_.ScalarLdFrag addr:$src)),
   8923               (_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)),
   8924                addr:$src, (i32 0xa)))>;
   8925     def : Pat<(ftrunc (_.ScalarLdFrag addr:$src)),
   8926               (_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)),
   8927                addr:$src, (i32 0xb)))>;
   8928     def : Pat<(frint (_.ScalarLdFrag addr:$src)),
   8929               (_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)),
   8930                addr:$src, (i32 0x4)))>;
   8931     def : Pat<(fnearbyint (_.ScalarLdFrag addr:$src)),
   8932               (_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)),
   8933                addr:$src, (i32 0xc)))>;
   8934   }
   8935 }
   8936 
   8937 defm VRNDSCALESSZ : avx512_rndscale_scalar<0x0A, "vrndscaless",
   8938                                            SchedWriteFRnd.Scl, f32x_info>,
   8939                                            AVX512AIi8Base, EVEX_4V,
   8940                                            EVEX_CD8<32, CD8VT1>;
   8941 
   8942 defm VRNDSCALESDZ : avx512_rndscale_scalar<0x0B, "vrndscalesd",
   8943                                            SchedWriteFRnd.Scl, f64x_info>,
   8944                                            VEX_W, AVX512AIi8Base, EVEX_4V,
   8945                                            EVEX_CD8<64, CD8VT1>;
   8946 
   8947 multiclass avx512_masked_scalar<SDNode OpNode, string OpcPrefix, SDNode Move,
   8948                                 dag Mask, X86VectorVTInfo _, PatLeaf ZeroFP,
   8949                                 dag OutMask, Predicate BasePredicate> {
   8950   let Predicates = [BasePredicate] in {
   8951     def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects Mask,
   8952                (OpNode (extractelt _.VT:$src2, (iPTR 0))),
   8953                (extractelt _.VT:$dst, (iPTR 0))))),
   8954               (!cast<Instruction>("V"#OpcPrefix#r_Intk)
   8955                _.VT:$dst, OutMask, _.VT:$src2, _.VT:$src1)>;
   8956 
   8957     def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects Mask,
   8958                (OpNode (extractelt _.VT:$src2, (iPTR 0))),
   8959                ZeroFP))),
   8960               (!cast<Instruction>("V"#OpcPrefix#r_Intkz)
   8961                OutMask, _.VT:$src2, _.VT:$src1)>;
   8962   }
   8963 }
   8964 
   8965 defm : avx512_masked_scalar<fsqrt, "SQRTSSZ", X86Movss,
   8966                             (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v4f32x_info,
   8967                             fp32imm0, (COPY_TO_REGCLASS  $mask, VK1WM), HasAVX512>;
   8968 defm : avx512_masked_scalar<fsqrt, "SQRTSDZ", X86Movsd,
   8969                             (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v2f64x_info,
   8970                             fp64imm0, (COPY_TO_REGCLASS  $mask, VK1WM), HasAVX512>;
   8971 
   8972 multiclass avx512_masked_scalar_imm<SDNode OpNode, string OpcPrefix, SDNode Move,
   8973                                     X86VectorVTInfo _, PatLeaf ZeroFP,
   8974                                     bits<8> ImmV, Predicate BasePredicate> {
   8975   let Predicates = [BasePredicate] in {
   8976     def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects VK1WM:$mask,
   8977                (OpNode (extractelt _.VT:$src2, (iPTR 0))),
   8978                (extractelt _.VT:$dst, (iPTR 0))))),
   8979               (!cast<Instruction>("V"#OpcPrefix#Zr_Intk)
   8980                _.VT:$dst, VK1WM:$mask, _.VT:$src1, _.VT:$src2, (i32 ImmV))>;
   8981 
   8982     def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects VK1WM:$mask,
   8983                (OpNode (extractelt _.VT:$src2, (iPTR 0))), ZeroFP))),
   8984               (!cast<Instruction>("V"#OpcPrefix#Zr_Intkz)
   8985                VK1WM:$mask, _.VT:$src1, _.VT:$src2, (i32 ImmV))>;
   8986   }
   8987 }
   8988 
   8989 defm : avx512_masked_scalar_imm<ffloor, "RNDSCALESS", X86Movss,
   8990                                 v4f32x_info, fp32imm0, 0x01, HasAVX512>;
   8991 defm : avx512_masked_scalar_imm<fceil, "RNDSCALESS", X86Movss,
   8992                                 v4f32x_info, fp32imm0, 0x02, HasAVX512>;
   8993 defm : avx512_masked_scalar_imm<ffloor, "RNDSCALESD", X86Movsd,
   8994                                 v2f64x_info, fp64imm0, 0x01, HasAVX512>;
   8995 defm : avx512_masked_scalar_imm<fceil, "RNDSCALESD", X86Movsd,
   8996                                 v2f64x_info, fp64imm0, 0x02,  HasAVX512>;
   8997 
   8998 
   8999 //-------------------------------------------------
   9000 // Integer truncate and extend operations
   9001 //-------------------------------------------------
   9002 
   9003 multiclass avx512_trunc_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
   9004                               X86FoldableSchedWrite sched, X86VectorVTInfo SrcInfo,
   9005                               X86VectorVTInfo DestInfo, X86MemOperand x86memop> {
   9006   let ExeDomain = DestInfo.ExeDomain in
   9007   defm rr  : AVX512_maskable<opc, MRMDestReg, DestInfo, (outs DestInfo.RC:$dst),
   9008                       (ins SrcInfo.RC:$src1), OpcodeStr ,"$src1", "$src1",
   9009                       (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1)))>,
   9010                       EVEX, T8XS, Sched<[sched]>;
   9011 
   9012   let mayStore = 1, hasSideEffects = 0, ExeDomain = DestInfo.ExeDomain in {
   9013     def mr : AVX512XS8I<opc, MRMDestMem, (outs),
   9014                (ins x86memop:$dst, SrcInfo.RC:$src),
   9015                OpcodeStr # "\t{$src, $dst|$dst, $src}", []>,
   9016                EVEX, Sched<[sched.Folded]>;
   9017 
   9018     def mrk : AVX512XS8I<opc, MRMDestMem, (outs),
   9019                (ins x86memop:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
   9020                OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", []>,
   9021                EVEX, EVEX_K, Sched<[sched.Folded]>, NotMemoryFoldable;
   9022   }//mayStore = 1, hasSideEffects = 0
   9023 }
   9024 
   9025 multiclass avx512_trunc_mr_lowering<X86VectorVTInfo SrcInfo,
   9026                                     X86VectorVTInfo DestInfo,
   9027                                     PatFrag truncFrag, PatFrag mtruncFrag,
   9028                                     string Name> {
   9029 
   9030   def : Pat<(truncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst),
   9031             (!cast<Instruction>(Name#SrcInfo.ZSuffix##mr)
   9032                                     addr:$dst, SrcInfo.RC:$src)>;
   9033 
   9034   def : Pat<(mtruncFrag addr:$dst, SrcInfo.KRCWM:$mask,
   9035                                                (SrcInfo.VT SrcInfo.RC:$src)),
   9036             (!cast<Instruction>(Name#SrcInfo.ZSuffix##mrk)
   9037                             addr:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src)>;
   9038 }
   9039 
   9040 multiclass avx512_trunc<bits<8> opc, string OpcodeStr, SDNode OpNode128,
   9041                         SDNode OpNode256, SDNode OpNode512, X86FoldableSchedWrite sched,
   9042                         AVX512VLVectorVTInfo VTSrcInfo,
   9043                         X86VectorVTInfo DestInfoZ128,
   9044                         X86VectorVTInfo DestInfoZ256, X86VectorVTInfo DestInfoZ,
   9045                         X86MemOperand x86memopZ128, X86MemOperand x86memopZ256,
   9046                         X86MemOperand x86memopZ, PatFrag truncFrag,
   9047                         PatFrag mtruncFrag, Predicate prd = HasAVX512>{
   9048 
   9049   let Predicates = [HasVLX, prd] in {
   9050     defm Z128:  avx512_trunc_common<opc, OpcodeStr, OpNode128, sched,
   9051                              VTSrcInfo.info128, DestInfoZ128, x86memopZ128>,
   9052                 avx512_trunc_mr_lowering<VTSrcInfo.info128, DestInfoZ128,
   9053                              truncFrag, mtruncFrag, NAME>, EVEX_V128;
   9054 
   9055     defm Z256:  avx512_trunc_common<opc, OpcodeStr, OpNode256, sched,
   9056                              VTSrcInfo.info256, DestInfoZ256, x86memopZ256>,
   9057                 avx512_trunc_mr_lowering<VTSrcInfo.info256, DestInfoZ256,
   9058                              truncFrag, mtruncFrag, NAME>, EVEX_V256;
   9059   }
   9060   let Predicates = [prd] in
   9061     defm Z:     avx512_trunc_common<opc, OpcodeStr, OpNode512, sched,
   9062                              VTSrcInfo.info512, DestInfoZ, x86memopZ>,
   9063                 avx512_trunc_mr_lowering<VTSrcInfo.info512, DestInfoZ,
   9064                              truncFrag, mtruncFrag, NAME>, EVEX_V512;
   9065 }
   9066 
   9067 multiclass avx512_trunc_qb<bits<8> opc, string OpcodeStr, SDNode OpNode,
   9068                            X86FoldableSchedWrite sched, PatFrag StoreNode,
   9069                            PatFrag MaskedStoreNode, SDNode InVecNode = OpNode> {
   9070   defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, InVecNode, sched,
   9071                           avx512vl_i64_info, v16i8x_info, v16i8x_info,
   9072                           v16i8x_info, i16mem, i32mem, i64mem, StoreNode,
   9073                           MaskedStoreNode>, EVEX_CD8<8, CD8VO>;
   9074 }
   9075 
   9076 multiclass avx512_trunc_qw<bits<8> opc, string OpcodeStr, SDNode OpNode,
   9077                            X86FoldableSchedWrite sched, PatFrag StoreNode,
   9078                            PatFrag MaskedStoreNode, SDNode InVecNode = OpNode> {
   9079   defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode, sched,
   9080                           avx512vl_i64_info, v8i16x_info, v8i16x_info,
   9081                           v8i16x_info, i32mem, i64mem, i128mem, StoreNode,
   9082                           MaskedStoreNode>, EVEX_CD8<16, CD8VQ>;
   9083 }
   9084 
   9085 multiclass avx512_trunc_qd<bits<8> opc, string OpcodeStr, SDNode OpNode,
   9086                            X86FoldableSchedWrite sched, PatFrag StoreNode,
   9087                            PatFrag MaskedStoreNode, SDNode InVecNode = OpNode> {
   9088   defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode, sched,
   9089                           avx512vl_i64_info, v4i32x_info, v4i32x_info,
   9090                           v8i32x_info, i64mem, i128mem, i256mem, StoreNode,
   9091                           MaskedStoreNode>, EVEX_CD8<32, CD8VH>;
   9092 }
   9093 
   9094 multiclass avx512_trunc_db<bits<8> opc, string OpcodeStr, SDNode OpNode,
   9095                            X86FoldableSchedWrite sched, PatFrag StoreNode,
   9096                            PatFrag MaskedStoreNode, SDNode InVecNode = OpNode> {
   9097   defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode, sched,
   9098                           avx512vl_i32_info, v16i8x_info, v16i8x_info,
   9099                           v16i8x_info, i32mem, i64mem, i128mem, StoreNode,
   9100                           MaskedStoreNode>, EVEX_CD8<8, CD8VQ>;
   9101 }
   9102 
   9103 multiclass avx512_trunc_dw<bits<8> opc, string OpcodeStr, SDNode OpNode,
   9104                            X86FoldableSchedWrite sched, PatFrag StoreNode,
   9105                            PatFrag MaskedStoreNode, SDNode InVecNode = OpNode> {
   9106   defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode, sched,
   9107                           avx512vl_i32_info, v8i16x_info, v8i16x_info,
   9108                           v16i16x_info, i64mem, i128mem, i256mem, StoreNode,
   9109                           MaskedStoreNode>, EVEX_CD8<16, CD8VH>;
   9110 }
   9111 
   9112 multiclass avx512_trunc_wb<bits<8> opc, string OpcodeStr, SDNode OpNode,
   9113                            X86FoldableSchedWrite sched, PatFrag StoreNode,
   9114                            PatFrag MaskedStoreNode, SDNode InVecNode = OpNode> {
   9115   defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode,
   9116                           sched, avx512vl_i16_info, v16i8x_info, v16i8x_info,
   9117                           v32i8x_info, i64mem, i128mem, i256mem, StoreNode,
   9118                           MaskedStoreNode, HasBWI>, EVEX_CD8<16, CD8VH>;
   9119 }
   9120 
   9121 defm VPMOVQB    : avx512_trunc_qb<0x32, "vpmovqb",   trunc, WriteShuffle256,
   9122                                   truncstorevi8, masked_truncstorevi8, X86vtrunc>;
   9123 defm VPMOVSQB   : avx512_trunc_qb<0x22, "vpmovsqb",  X86vtruncs, WriteShuffle256,
   9124                                   truncstore_s_vi8, masked_truncstore_s_vi8>;
   9125 defm VPMOVUSQB  : avx512_trunc_qb<0x12, "vpmovusqb", X86vtruncus, WriteShuffle256,
   9126                                   truncstore_us_vi8, masked_truncstore_us_vi8>;
   9127 
   9128 defm VPMOVQW    : avx512_trunc_qw<0x34, "vpmovqw",   trunc, WriteShuffle256,
   9129                                   truncstorevi16, masked_truncstorevi16, X86vtrunc>;
   9130 defm VPMOVSQW   : avx512_trunc_qw<0x24, "vpmovsqw",  X86vtruncs, WriteShuffle256,
   9131                                   truncstore_s_vi16, masked_truncstore_s_vi16>;
   9132 defm VPMOVUSQW  : avx512_trunc_qw<0x14, "vpmovusqw", X86vtruncus, WriteShuffle256,
   9133                                   truncstore_us_vi16, masked_truncstore_us_vi16>;
   9134 
   9135 defm VPMOVQD    : avx512_trunc_qd<0x35, "vpmovqd",   trunc, WriteShuffle256,
   9136                                   truncstorevi32, masked_truncstorevi32, X86vtrunc>;
   9137 defm VPMOVSQD   : avx512_trunc_qd<0x25, "vpmovsqd",  X86vtruncs, WriteShuffle256,
   9138                                   truncstore_s_vi32, masked_truncstore_s_vi32>;
   9139 defm VPMOVUSQD  : avx512_trunc_qd<0x15, "vpmovusqd", X86vtruncus, WriteShuffle256,
   9140                                   truncstore_us_vi32, masked_truncstore_us_vi32>;
   9141 
   9142 defm VPMOVDB    : avx512_trunc_db<0x31, "vpmovdb", trunc, WriteShuffle256,
   9143                                   truncstorevi8, masked_truncstorevi8, X86vtrunc>;
   9144 defm VPMOVSDB   : avx512_trunc_db<0x21, "vpmovsdb",   X86vtruncs, WriteShuffle256,
   9145                                   truncstore_s_vi8, masked_truncstore_s_vi8>;
   9146 defm VPMOVUSDB  : avx512_trunc_db<0x11, "vpmovusdb",  X86vtruncus, WriteShuffle256,
   9147                                   truncstore_us_vi8, masked_truncstore_us_vi8>;
   9148 
   9149 defm VPMOVDW    : avx512_trunc_dw<0x33, "vpmovdw", trunc, WriteShuffle256,
   9150                                   truncstorevi16, masked_truncstorevi16, X86vtrunc>;
   9151 defm VPMOVSDW   : avx512_trunc_dw<0x23, "vpmovsdw",   X86vtruncs, WriteShuffle256,
   9152                                   truncstore_s_vi16, masked_truncstore_s_vi16>;
   9153 defm VPMOVUSDW  : avx512_trunc_dw<0x13, "vpmovusdw",  X86vtruncus, WriteShuffle256,
   9154                                   truncstore_us_vi16, masked_truncstore_us_vi16>;
   9155 
   9156 defm VPMOVWB    : avx512_trunc_wb<0x30, "vpmovwb", trunc, WriteShuffle256,
   9157                                   truncstorevi8, masked_truncstorevi8, X86vtrunc>;
   9158 defm VPMOVSWB   : avx512_trunc_wb<0x20, "vpmovswb",   X86vtruncs, WriteShuffle256,
   9159                                   truncstore_s_vi8, masked_truncstore_s_vi8>;
   9160 defm VPMOVUSWB  : avx512_trunc_wb<0x10, "vpmovuswb",  X86vtruncus, WriteShuffle256,
   9161                                   truncstore_us_vi8, masked_truncstore_us_vi8>;
   9162 
   9163 let Predicates = [HasAVX512, NoVLX] in {
   9164 def: Pat<(v8i16 (trunc (v8i32 VR256X:$src))),
   9165          (v8i16 (EXTRACT_SUBREG
   9166                  (v16i16 (VPMOVDWZrr (v16i32 (INSERT_SUBREG (IMPLICIT_DEF),
   9167                                           VR256X:$src, sub_ymm)))), sub_xmm))>;
   9168 def: Pat<(v4i32 (trunc (v4i64 VR256X:$src))),
   9169          (v4i32 (EXTRACT_SUBREG
   9170                  (v8i32 (VPMOVQDZrr (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
   9171                                            VR256X:$src, sub_ymm)))), sub_xmm))>;
   9172 }
   9173 
   9174 let Predicates = [HasBWI, NoVLX] in {
   9175 def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))),
   9176          (v16i8 (EXTRACT_SUBREG (VPMOVWBZrr (v32i16 (INSERT_SUBREG (IMPLICIT_DEF),
   9177                                             VR256X:$src, sub_ymm))), sub_xmm))>;
   9178 }
   9179 
   9180 multiclass WriteShuffle256_common<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched,
   9181               X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo,
   9182               X86MemOperand x86memop, PatFrag LdFrag, SDNode OpNode>{
   9183   let ExeDomain = DestInfo.ExeDomain in {
   9184   defm rr   : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
   9185                     (ins SrcInfo.RC:$src), OpcodeStr ,"$src", "$src",
   9186                     (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src)))>,
   9187                   EVEX, Sched<[sched]>;
   9188 
   9189   defm rm : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
   9190                   (ins x86memop:$src), OpcodeStr ,"$src", "$src",
   9191                   (DestInfo.VT (LdFrag addr:$src))>,
   9192                 EVEX, Sched<[sched.Folded]>;
   9193   }
   9194 }
   9195 
   9196 multiclass WriteShuffle256_BW<bits<8> opc, string OpcodeStr,
   9197           SDNode OpNode, SDNode InVecNode, string ExtTy,
   9198           X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
   9199   let Predicates = [HasVLX, HasBWI] in {
   9200     defm Z128:  WriteShuffle256_common<opc, OpcodeStr, sched, v8i16x_info,
   9201                     v16i8x_info, i64mem, LdFrag, InVecNode>,
   9202                      EVEX_CD8<8, CD8VH>, T8PD, EVEX_V128, VEX_WIG;
   9203 
   9204     defm Z256:  WriteShuffle256_common<opc, OpcodeStr, sched, v16i16x_info,
   9205                     v16i8x_info, i128mem, LdFrag, OpNode>,
   9206                      EVEX_CD8<8, CD8VH>, T8PD, EVEX_V256, VEX_WIG;
   9207   }
   9208   let Predicates = [HasBWI] in {
   9209     defm Z   :  WriteShuffle256_common<opc, OpcodeStr, sched, v32i16_info,
   9210                     v32i8x_info, i256mem, LdFrag, OpNode>,
   9211                      EVEX_CD8<8, CD8VH>, T8PD, EVEX_V512, VEX_WIG;
   9212   }
   9213 }
   9214 
   9215 multiclass WriteShuffle256_BD<bits<8> opc, string OpcodeStr,
   9216           SDNode OpNode, SDNode InVecNode, string ExtTy,
   9217           X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
   9218   let Predicates = [HasVLX, HasAVX512] in {
   9219     defm Z128:  WriteShuffle256_common<opc, OpcodeStr, sched, v4i32x_info,
   9220                    v16i8x_info, i32mem, LdFrag, InVecNode>,
   9221                          EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V128, VEX_WIG;
   9222 
   9223     defm Z256:  WriteShuffle256_common<opc, OpcodeStr, sched, v8i32x_info,
   9224                    v16i8x_info, i64mem, LdFrag, OpNode>,
   9225                          EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V256, VEX_WIG;
   9226   }
   9227   let Predicates = [HasAVX512] in {
   9228     defm Z   :  WriteShuffle256_common<opc, OpcodeStr, sched, v16i32_info,
   9229                    v16i8x_info, i128mem, LdFrag, OpNode>,
   9230                          EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V512, VEX_WIG;
   9231   }
   9232 }
   9233 
   9234 multiclass WriteShuffle256_BQ<bits<8> opc, string OpcodeStr,
   9235           SDNode OpNode, SDNode InVecNode, string ExtTy,
   9236           X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
   9237   let Predicates = [HasVLX, HasAVX512] in {
   9238     defm Z128:  WriteShuffle256_common<opc, OpcodeStr, sched, v2i64x_info,
   9239                    v16i8x_info, i16mem, LdFrag, InVecNode>,
   9240                      EVEX_CD8<8, CD8VO>, T8PD, EVEX_V128, VEX_WIG;
   9241 
   9242     defm Z256:  WriteShuffle256_common<opc, OpcodeStr, sched, v4i64x_info,
   9243                    v16i8x_info, i32mem, LdFrag, OpNode>,
   9244                      EVEX_CD8<8, CD8VO>, T8PD, EVEX_V256, VEX_WIG;
   9245   }
   9246   let Predicates = [HasAVX512] in {
   9247     defm Z   :  WriteShuffle256_common<opc, OpcodeStr, sched, v8i64_info,
   9248                    v16i8x_info, i64mem, LdFrag, OpNode>,
   9249                      EVEX_CD8<8, CD8VO>, T8PD, EVEX_V512, VEX_WIG;
   9250   }
   9251 }
   9252 
   9253 multiclass WriteShuffle256_WD<bits<8> opc, string OpcodeStr,
   9254          SDNode OpNode, SDNode InVecNode, string ExtTy,
   9255          X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
   9256   let Predicates = [HasVLX, HasAVX512] in {
   9257     defm Z128:  WriteShuffle256_common<opc, OpcodeStr, sched, v4i32x_info,
   9258                    v8i16x_info, i64mem, LdFrag, InVecNode>,
   9259                      EVEX_CD8<16, CD8VH>, T8PD, EVEX_V128, VEX_WIG;
   9260 
   9261     defm Z256:  WriteShuffle256_common<opc, OpcodeStr, sched, v8i32x_info,
   9262                    v8i16x_info, i128mem, LdFrag, OpNode>,
   9263                      EVEX_CD8<16, CD8VH>, T8PD, EVEX_V256, VEX_WIG;
   9264   }
   9265   let Predicates = [HasAVX512] in {
   9266     defm Z   :  WriteShuffle256_common<opc, OpcodeStr, sched, v16i32_info,
   9267                    v16i16x_info, i256mem, LdFrag, OpNode>,
   9268                      EVEX_CD8<16, CD8VH>, T8PD, EVEX_V512, VEX_WIG;
   9269   }
   9270 }
   9271 
   9272 multiclass WriteShuffle256_WQ<bits<8> opc, string OpcodeStr,
   9273          SDNode OpNode, SDNode InVecNode, string ExtTy,
   9274          X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
   9275   let Predicates = [HasVLX, HasAVX512] in {
   9276     defm Z128:  WriteShuffle256_common<opc, OpcodeStr, sched, v2i64x_info,
   9277                    v8i16x_info, i32mem, LdFrag, InVecNode>,
   9278                      EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V128, VEX_WIG;
   9279 
   9280     defm Z256:  WriteShuffle256_common<opc, OpcodeStr, sched, v4i64x_info,
   9281                    v8i16x_info, i64mem, LdFrag, OpNode>,
   9282                      EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V256, VEX_WIG;
   9283   }
   9284   let Predicates = [HasAVX512] in {
   9285     defm Z   :  WriteShuffle256_common<opc, OpcodeStr, sched, v8i64_info,
   9286                    v8i16x_info, i128mem, LdFrag, OpNode>,
   9287                      EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V512, VEX_WIG;
   9288   }
   9289 }
   9290 
   9291 multiclass WriteShuffle256_DQ<bits<8> opc, string OpcodeStr,
   9292          SDNode OpNode, SDNode InVecNode, string ExtTy,
   9293          X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi32")> {
   9294 
   9295   let Predicates = [HasVLX, HasAVX512] in {
   9296     defm Z128:  WriteShuffle256_common<opc, OpcodeStr, sched, v2i64x_info,
   9297                    v4i32x_info, i64mem, LdFrag, InVecNode>,
   9298                      EVEX_CD8<32, CD8VH>, T8PD, EVEX_V128;
   9299 
   9300     defm Z256:  WriteShuffle256_common<opc, OpcodeStr, sched, v4i64x_info,
   9301                    v4i32x_info, i128mem, LdFrag, OpNode>,
   9302                      EVEX_CD8<32, CD8VH>, T8PD, EVEX_V256;
   9303   }
   9304   let Predicates = [HasAVX512] in {
   9305     defm Z   :  WriteShuffle256_common<opc, OpcodeStr, sched, v8i64_info,
   9306                    v8i32x_info, i256mem, LdFrag, OpNode>,
   9307                      EVEX_CD8<32, CD8VH>, T8PD, EVEX_V512;
   9308   }
   9309 }
   9310 
   9311 defm VPMOVZXBW : WriteShuffle256_BW<0x30, "vpmovzxbw", X86vzext, zext_invec, "z", WriteShuffle256>;
   9312 defm VPMOVZXBD : WriteShuffle256_BD<0x31, "vpmovzxbd", X86vzext, zext_invec, "z", WriteShuffle256>;
   9313 defm VPMOVZXBQ : WriteShuffle256_BQ<0x32, "vpmovzxbq", X86vzext, zext_invec, "z", WriteShuffle256>;
   9314 defm VPMOVZXWD : WriteShuffle256_WD<0x33, "vpmovzxwd", X86vzext, zext_invec, "z", WriteShuffle256>;
   9315 defm VPMOVZXWQ : WriteShuffle256_WQ<0x34, "vpmovzxwq", X86vzext, zext_invec, "z", WriteShuffle256>;
   9316 defm VPMOVZXDQ : WriteShuffle256_DQ<0x35, "vpmovzxdq", X86vzext, zext_invec, "z", WriteShuffle256>;
   9317 
   9318 defm VPMOVSXBW: WriteShuffle256_BW<0x20, "vpmovsxbw", X86vsext, sext_invec, "s", WriteShuffle256>;
   9319 defm VPMOVSXBD: WriteShuffle256_BD<0x21, "vpmovsxbd", X86vsext, sext_invec, "s", WriteShuffle256>;
   9320 defm VPMOVSXBQ: WriteShuffle256_BQ<0x22, "vpmovsxbq", X86vsext, sext_invec, "s", WriteShuffle256>;
   9321 defm VPMOVSXWD: WriteShuffle256_WD<0x23, "vpmovsxwd", X86vsext, sext_invec, "s", WriteShuffle256>;
   9322 defm VPMOVSXWQ: WriteShuffle256_WQ<0x24, "vpmovsxwq", X86vsext, sext_invec, "s", WriteShuffle256>;
   9323 defm VPMOVSXDQ: WriteShuffle256_DQ<0x25, "vpmovsxdq", X86vsext, sext_invec, "s", WriteShuffle256>;
   9324 
   9325 
   9326 multiclass AVX512_pmovx_patterns<string OpcPrefix, SDNode ExtOp,
   9327                                  SDNode InVecOp> {
   9328   // 128-bit patterns
   9329   let Predicates = [HasVLX, HasBWI] in {
   9330   def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
   9331             (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
   9332   def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
   9333             (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
   9334   def : Pat<(v8i16 (InVecOp (v16i8 (vzmovl_v2i64 addr:$src)))),
   9335             (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
   9336   def : Pat<(v8i16 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))),
   9337             (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
   9338   def : Pat<(v8i16 (InVecOp (bc_v16i8 (loadv2i64 addr:$src)))),
   9339             (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
   9340   }
   9341   let Predicates = [HasVLX] in {
   9342   def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
   9343             (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
   9344   def : Pat<(v4i32 (InVecOp (v16i8 (vzmovl_v4i32 addr:$src)))),
   9345             (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
   9346   def : Pat<(v4i32 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))),
   9347             (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
   9348   def : Pat<(v4i32 (InVecOp (bc_v16i8 (loadv2i64 addr:$src)))),
   9349             (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
   9350 
   9351   def : Pat<(v2i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (extloadi32i16 addr:$src)))))),
   9352             (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
   9353   def : Pat<(v2i64 (InVecOp (v16i8 (vzmovl_v4i32 addr:$src)))),
   9354             (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
   9355   def : Pat<(v2i64 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))),
   9356             (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
   9357   def : Pat<(v2i64 (InVecOp (bc_v16i8 (loadv2i64 addr:$src)))),
   9358             (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
   9359 
   9360   def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
   9361             (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
   9362   def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
   9363             (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
   9364   def : Pat<(v4i32 (InVecOp (v8i16 (vzmovl_v2i64 addr:$src)))),
   9365             (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
   9366   def : Pat<(v4i32 (InVecOp (v8i16 (vzload_v2i64 addr:$src)))),
   9367             (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
   9368   def : Pat<(v4i32 (InVecOp (bc_v8i16 (loadv2i64 addr:$src)))),
   9369             (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
   9370 
   9371   def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
   9372             (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
   9373   def : Pat<(v2i64 (InVecOp (v8i16 (vzmovl_v4i32 addr:$src)))),
   9374             (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
   9375   def : Pat<(v2i64 (InVecOp (v8i16 (vzload_v2i64 addr:$src)))),
   9376             (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
   9377   def : Pat<(v2i64 (InVecOp (bc_v8i16 (loadv2i64 addr:$src)))),
   9378             (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
   9379 
   9380   def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
   9381             (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
   9382   def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
   9383             (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
   9384   def : Pat<(v2i64 (InVecOp (v4i32 (vzmovl_v2i64 addr:$src)))),
   9385             (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
   9386   def : Pat<(v2i64 (InVecOp (v4i32 (vzload_v2i64 addr:$src)))),
   9387             (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
   9388   def : Pat<(v2i64 (InVecOp (bc_v4i32 (loadv2i64 addr:$src)))),
   9389             (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
   9390   }
   9391   // 256-bit patterns
   9392   let Predicates = [HasVLX, HasBWI] in {
   9393   def : Pat<(v16i16 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
   9394             (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>;
   9395   def : Pat<(v16i16 (ExtOp (v16i8 (vzmovl_v2i64 addr:$src)))),
   9396             (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>;
   9397   def : Pat<(v16i16 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
   9398             (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>;
   9399   }
   9400   let Predicates = [HasVLX] in {
   9401   def : Pat<(v8i32 (ExtOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
   9402             (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
   9403   def : Pat<(v8i32 (ExtOp (v16i8 (vzmovl_v2i64 addr:$src)))),
   9404             (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
   9405   def : Pat<(v8i32 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
   9406             (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
   9407   def : Pat<(v8i32 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
   9408             (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
   9409 
   9410   def : Pat<(v4i64 (ExtOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
   9411             (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
   9412   def : Pat<(v4i64 (ExtOp (v16i8 (vzmovl_v4i32 addr:$src)))),
   9413             (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
   9414   def : Pat<(v4i64 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
   9415             (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
   9416   def : Pat<(v4i64 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
   9417             (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
   9418 
   9419   def : Pat<(v8i32 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))),
   9420             (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>;
   9421   def : Pat<(v8i32 (ExtOp (v8i16 (vzmovl_v2i64 addr:$src)))),
   9422             (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>;
   9423   def : Pat<(v8i32 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))),
   9424             (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>;
   9425 
   9426   def : Pat<(v4i64 (ExtOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
   9427             (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
   9428   def : Pat<(v4i64 (ExtOp (v8i16 (vzmovl_v2i64 addr:$src)))),
   9429             (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
   9430   def : Pat<(v4i64 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))),
   9431             (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
   9432   def : Pat<(v4i64 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))),
   9433             (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
   9434 
   9435   def : Pat<(v4i64 (ExtOp (bc_v4i32 (loadv2i64 addr:$src)))),
   9436             (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>;
   9437   def : Pat<(v4i64 (ExtOp (v4i32 (vzmovl_v2i64 addr:$src)))),
   9438             (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>;
   9439   def : Pat<(v4i64 (ExtOp (v4i32 (vzload_v2i64 addr:$src)))),
   9440             (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>;
   9441   }
   9442   // 512-bit patterns
   9443   let Predicates = [HasBWI] in {
   9444   def : Pat<(v32i16 (ExtOp (bc_v32i8 (loadv4i64 addr:$src)))),
   9445             (!cast<I>(OpcPrefix#BWZrm) addr:$src)>;
   9446   }
   9447   let Predicates = [HasAVX512] in {
   9448   def : Pat<(v16i32 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
   9449             (!cast<I>(OpcPrefix#BDZrm) addr:$src)>;
   9450 
   9451   def : Pat<(v8i64 (ExtOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
   9452             (!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
   9453   def : Pat<(v8i64 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
   9454             (!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
   9455 
   9456   def : Pat<(v16i32 (ExtOp (bc_v16i16 (loadv4i64 addr:$src)))),
   9457             (!cast<I>(OpcPrefix#WDZrm) addr:$src)>;
   9458 
   9459   def : Pat<(v8i64 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))),
   9460             (!cast<I>(OpcPrefix#WQZrm) addr:$src)>;
   9461 
   9462   def : Pat<(v8i64 (ExtOp (bc_v8i32 (loadv4i64 addr:$src)))),
   9463             (!cast<I>(OpcPrefix#DQZrm) addr:$src)>;
   9464   }
   9465 }
   9466 
   9467 defm : AVX512_pmovx_patterns<"VPMOVSX", X86vsext, sext_invec>;
   9468 defm : AVX512_pmovx_patterns<"VPMOVZX", X86vzext, zext_invec>;
   9469 
   9470 //===----------------------------------------------------------------------===//
   9471 // GATHER - SCATTER Operations
   9472 
   9473 // FIXME: Improve scheduling of gather/scatter instructions.
   9474 multiclass avx512_gather<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
   9475                          X86MemOperand memop, PatFrag GatherNode,
   9476                          RegisterClass MaskRC = _.KRCWM> {
   9477   let Constraints = "@earlyclobber $dst, $src1 = $dst, $mask = $mask_wb",
   9478       ExeDomain = _.ExeDomain in
   9479   def rm  : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst, MaskRC:$mask_wb),
   9480             (ins _.RC:$src1, MaskRC:$mask, memop:$src2),
   9481             !strconcat(OpcodeStr#_.Suffix,
   9482             "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
   9483             [(set _.RC:$dst, MaskRC:$mask_wb,
   9484               (GatherNode  (_.VT _.RC:$src1), MaskRC:$mask,
   9485                      vectoraddr:$src2))]>, EVEX, EVEX_K,
   9486              EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteLoad]>;
   9487 }
   9488 
   9489 multiclass avx512_gather_q_pd<bits<8> dopc, bits<8> qopc,
   9490                         AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
   9491   defm NAME##D##SUFF##Z: avx512_gather<dopc, OpcodeStr##"d", _.info512,
   9492                                       vy512xmem, mgatherv8i32>, EVEX_V512, VEX_W;
   9493   defm NAME##Q##SUFF##Z: avx512_gather<qopc, OpcodeStr##"q", _.info512,
   9494                                       vz512mem,  mgatherv8i64>, EVEX_V512, VEX_W;
   9495 let Predicates = [HasVLX] in {
   9496   defm NAME##D##SUFF##Z256: avx512_gather<dopc, OpcodeStr##"d", _.info256,
   9497                               vx256xmem, mgatherv4i32>, EVEX_V256, VEX_W;
   9498   defm NAME##Q##SUFF##Z256: avx512_gather<qopc, OpcodeStr##"q", _.info256,
   9499                               vy256xmem, mgatherv4i64>, EVEX_V256, VEX_W;
   9500   defm NAME##D##SUFF##Z128: avx512_gather<dopc, OpcodeStr##"d", _.info128,
   9501                               vx128xmem, mgatherv4i32>, EVEX_V128, VEX_W;
   9502   defm NAME##Q##SUFF##Z128: avx512_gather<qopc, OpcodeStr##"q", _.info128,
   9503                               vx128xmem, mgatherv2i64>, EVEX_V128, VEX_W;
   9504 }
   9505 }
   9506 
   9507 multiclass avx512_gather_d_ps<bits<8> dopc, bits<8> qopc,
   9508                        AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
   9509   defm NAME##D##SUFF##Z: avx512_gather<dopc, OpcodeStr##"d", _.info512, vz512mem,
   9510                                        mgatherv16i32>, EVEX_V512;
   9511   defm NAME##Q##SUFF##Z: avx512_gather<qopc, OpcodeStr##"q", _.info256, vz256mem,
   9512                                        mgatherv8i64>, EVEX_V512;
   9513 let Predicates = [HasVLX] in {
   9514   defm NAME##D##SUFF##Z256: avx512_gather<dopc, OpcodeStr##"d", _.info256,
   9515                                           vy256xmem, mgatherv8i32>, EVEX_V256;
   9516   defm NAME##Q##SUFF##Z256: avx512_gather<qopc, OpcodeStr##"q", _.info128,
   9517                                           vy128xmem, mgatherv4i64>, EVEX_V256;
   9518   defm NAME##D##SUFF##Z128: avx512_gather<dopc, OpcodeStr##"d", _.info128,
   9519                                           vx128xmem, mgatherv4i32>, EVEX_V128;
   9520   defm NAME##Q##SUFF##Z128: avx512_gather<qopc, OpcodeStr##"q", _.info128,
   9521                                           vx64xmem, mgatherv2i64, VK2WM>,
   9522                                           EVEX_V128;
   9523 }
   9524 }
   9525 
   9526 
   9527 defm VGATHER : avx512_gather_q_pd<0x92, 0x93, avx512vl_f64_info, "vgather", "PD">,
   9528                avx512_gather_d_ps<0x92, 0x93, avx512vl_f32_info, "vgather", "PS">;
   9529 
   9530 defm VPGATHER : avx512_gather_q_pd<0x90, 0x91, avx512vl_i64_info, "vpgather", "Q">,
   9531                 avx512_gather_d_ps<0x90, 0x91, avx512vl_i32_info, "vpgather", "D">;
   9532 
   9533 multiclass avx512_scatter<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
   9534                           X86MemOperand memop, PatFrag ScatterNode,
   9535                           RegisterClass MaskRC = _.KRCWM> {
   9536 
   9537 let mayStore = 1, Constraints = "$mask = $mask_wb", ExeDomain = _.ExeDomain in
   9538 
   9539   def mr  : AVX5128I<opc, MRMDestMem, (outs MaskRC:$mask_wb),
   9540             (ins memop:$dst, MaskRC:$mask, _.RC:$src),
   9541             !strconcat(OpcodeStr#_.Suffix,
   9542             "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"),
   9543             [(set MaskRC:$mask_wb, (ScatterNode (_.VT _.RC:$src),
   9544                                     MaskRC:$mask,  vectoraddr:$dst))]>,
   9545             EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>,
   9546             Sched<[WriteStore]>;
   9547 }
   9548 
   9549 multiclass avx512_scatter_q_pd<bits<8> dopc, bits<8> qopc,
   9550                         AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
   9551   defm NAME##D##SUFF##Z: avx512_scatter<dopc, OpcodeStr##"d", _.info512,
   9552                                       vy512xmem, mscatterv8i32>, EVEX_V512, VEX_W;
   9553   defm NAME##Q##SUFF##Z: avx512_scatter<qopc, OpcodeStr##"q", _.info512,
   9554                                       vz512mem,  mscatterv8i64>, EVEX_V512, VEX_W;
   9555 let Predicates = [HasVLX] in {
   9556   defm NAME##D##SUFF##Z256: avx512_scatter<dopc, OpcodeStr##"d", _.info256,
   9557                               vx256xmem, mscatterv4i32>, EVEX_V256, VEX_W;
   9558   defm NAME##Q##SUFF##Z256: avx512_scatter<qopc, OpcodeStr##"q", _.info256,
   9559                               vy256xmem, mscatterv4i64>, EVEX_V256, VEX_W;
   9560   defm NAME##D##SUFF##Z128: avx512_scatter<dopc, OpcodeStr##"d", _.info128,
   9561                               vx128xmem, mscatterv4i32>, EVEX_V128, VEX_W;
   9562   defm NAME##Q##SUFF##Z128: avx512_scatter<qopc, OpcodeStr##"q", _.info128,
   9563                               vx128xmem, mscatterv2i64>, EVEX_V128, VEX_W;
   9564 }
   9565 }
   9566 
   9567 multiclass avx512_scatter_d_ps<bits<8> dopc, bits<8> qopc,
   9568                        AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
   9569   defm NAME##D##SUFF##Z: avx512_scatter<dopc, OpcodeStr##"d", _.info512, vz512mem,
   9570                                        mscatterv16i32>, EVEX_V512;
   9571   defm NAME##Q##SUFF##Z: avx512_scatter<qopc, OpcodeStr##"q", _.info256, vz256mem,
   9572                                        mscatterv8i64>, EVEX_V512;
   9573 let Predicates = [HasVLX] in {
   9574   defm NAME##D##SUFF##Z256: avx512_scatter<dopc, OpcodeStr##"d", _.info256,
   9575                                           vy256xmem, mscatterv8i32>, EVEX_V256;
   9576   defm NAME##Q##SUFF##Z256: avx512_scatter<qopc, OpcodeStr##"q", _.info128,
   9577                                           vy128xmem, mscatterv4i64>, EVEX_V256;
   9578   defm NAME##D##SUFF##Z128: avx512_scatter<dopc, OpcodeStr##"d", _.info128,
   9579                                           vx128xmem, mscatterv4i32>, EVEX_V128;
   9580   defm NAME##Q##SUFF##Z128: avx512_scatter<qopc, OpcodeStr##"q", _.info128,
   9581                                           vx64xmem, mscatterv2i64, VK2WM>,
   9582                                           EVEX_V128;
   9583 }
   9584 }
   9585 
   9586 defm VSCATTER : avx512_scatter_q_pd<0xA2, 0xA3, avx512vl_f64_info, "vscatter", "PD">,
   9587                avx512_scatter_d_ps<0xA2, 0xA3, avx512vl_f32_info, "vscatter", "PS">;
   9588 
   9589 defm VPSCATTER : avx512_scatter_q_pd<0xA0, 0xA1, avx512vl_i64_info, "vpscatter", "Q">,
   9590                 avx512_scatter_d_ps<0xA0, 0xA1, avx512vl_i32_info, "vpscatter", "D">;
   9591 
   9592 // prefetch
   9593 multiclass avx512_gather_scatter_prefetch<bits<8> opc, Format F, string OpcodeStr,
   9594                        RegisterClass KRC, X86MemOperand memop> {
   9595   let Predicates = [HasPFI], mayLoad = 1, mayStore = 1 in
   9596   def m  : AVX5128I<opc, F, (outs), (ins KRC:$mask, memop:$src),
   9597             !strconcat(OpcodeStr, "\t{$src {${mask}}|{${mask}}, $src}"), []>,
   9598             EVEX, EVEX_K, Sched<[WriteLoad]>;
   9599 }
   9600 
   9601 defm VGATHERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dps",
   9602                      VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
   9603 
   9604 defm VGATHERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qps",
   9605                      VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
   9606 
   9607 defm VGATHERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dpd",
   9608                      VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
   9609 
   9610 defm VGATHERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qpd",
   9611                      VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
   9612 
   9613 defm VGATHERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dps",
   9614                      VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
   9615 
   9616 defm VGATHERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qps",
   9617                      VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
   9618 
   9619 defm VGATHERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dpd",
   9620                      VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
   9621 
   9622 defm VGATHERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qpd",
   9623                      VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
   9624 
   9625 defm VSCATTERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dps",
   9626                      VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
   9627 
   9628 defm VSCATTERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qps",
   9629                      VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
   9630 
   9631 defm VSCATTERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dpd",
   9632                      VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
   9633 
   9634 defm VSCATTERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qpd",
   9635                      VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
   9636 
   9637 defm VSCATTERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dps",
   9638                      VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
   9639 
   9640 defm VSCATTERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qps",
   9641                      VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
   9642 
   9643 defm VSCATTERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dpd",
   9644                      VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
   9645 
   9646 defm VSCATTERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qpd",
   9647                      VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
   9648 
   9649 multiclass cvt_by_vec_width<bits<8> opc, X86VectorVTInfo Vec, string OpcodeStr > {
   9650 def rr : AVX512XS8I<opc, MRMSrcReg, (outs Vec.RC:$dst), (ins Vec.KRC:$src),
   9651                   !strconcat(OpcodeStr##Vec.Suffix, "\t{$src, $dst|$dst, $src}"),
   9652                   [(set Vec.RC:$dst, (Vec.VT (sext Vec.KRC:$src)))]>,
   9653                   EVEX, Sched<[WriteMove]>; // TODO - WriteVecTrunc?
   9654 }
   9655 
   9656 multiclass cvt_mask_by_elt_width<bits<8> opc, AVX512VLVectorVTInfo VTInfo,
   9657                                  string OpcodeStr, Predicate prd> {
   9658 let Predicates = [prd] in
   9659   defm Z : cvt_by_vec_width<opc, VTInfo.info512, OpcodeStr>, EVEX_V512;
   9660 
   9661   let Predicates = [prd, HasVLX] in {
   9662     defm Z256 : cvt_by_vec_width<opc, VTInfo.info256, OpcodeStr>, EVEX_V256;
   9663     defm Z128 : cvt_by_vec_width<opc, VTInfo.info128, OpcodeStr>, EVEX_V128;
   9664   }
   9665 }
   9666 
   9667 defm VPMOVM2B : cvt_mask_by_elt_width<0x28, avx512vl_i8_info, "vpmovm2" , HasBWI>;
   9668 defm VPMOVM2W : cvt_mask_by_elt_width<0x28, avx512vl_i16_info, "vpmovm2", HasBWI> , VEX_W;
   9669 defm VPMOVM2D : cvt_mask_by_elt_width<0x38, avx512vl_i32_info, "vpmovm2", HasDQI>;
   9670 defm VPMOVM2Q : cvt_mask_by_elt_width<0x38, avx512vl_i64_info, "vpmovm2", HasDQI> , VEX_W;
   9671 
   9672 multiclass convert_vector_to_mask_common<bits<8> opc, X86VectorVTInfo _, string OpcodeStr > {
   9673     def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.KRC:$dst), (ins _.RC:$src),
   9674                         !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
   9675                         [(set _.KRC:$dst, (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src)))]>,
   9676                         EVEX, Sched<[WriteMove]>;
   9677 }
   9678 
   9679 // Use 512bit version to implement 128/256 bit in case NoVLX.
   9680 multiclass convert_vector_to_mask_lowering<X86VectorVTInfo ExtendInfo,
   9681                                            X86VectorVTInfo _,
   9682                                            string Name> {
   9683 
   9684   def : Pat<(_.KVT (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src))),
   9685             (_.KVT (COPY_TO_REGCLASS
   9686                      (!cast<Instruction>(Name#"Zrr")
   9687                        (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
   9688                                       _.RC:$src, _.SubRegIdx)),
   9689                    _.KRC))>;
   9690 }
   9691 
   9692 multiclass avx512_convert_vector_to_mask<bits<8> opc, string OpcodeStr,
   9693                                    AVX512VLVectorVTInfo VTInfo, Predicate prd> {
   9694   let Predicates = [prd] in
   9695     defm Z : convert_vector_to_mask_common <opc, VTInfo.info512, OpcodeStr>,
   9696                                             EVEX_V512;
   9697 
   9698   let Predicates = [prd, HasVLX] in {
   9699     defm Z256 : convert_vector_to_mask_common<opc, VTInfo.info256, OpcodeStr>,
   9700                                               EVEX_V256;
   9701     defm Z128 : convert_vector_to_mask_common<opc, VTInfo.info128, OpcodeStr>,
   9702                                                EVEX_V128;
   9703   }
   9704   let Predicates = [prd, NoVLX] in {
   9705     defm Z256_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info256, NAME>;
   9706     defm Z128_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info128, NAME>;
   9707   }
   9708 }
   9709 
   9710 defm VPMOVB2M : avx512_convert_vector_to_mask<0x29, "vpmovb2m",
   9711                                               avx512vl_i8_info, HasBWI>;
   9712 defm VPMOVW2M : avx512_convert_vector_to_mask<0x29, "vpmovw2m",
   9713                                               avx512vl_i16_info, HasBWI>, VEX_W;
   9714 defm VPMOVD2M : avx512_convert_vector_to_mask<0x39, "vpmovd2m",
   9715                                               avx512vl_i32_info, HasDQI>;
   9716 defm VPMOVQ2M : avx512_convert_vector_to_mask<0x39, "vpmovq2m",
   9717                                               avx512vl_i64_info, HasDQI>, VEX_W;
   9718 
   9719 // Patterns for handling sext from a mask register to v16i8/v16i16 when DQI
   9720 // is available, but BWI is not. We can't handle this in lowering because
   9721 // a target independent DAG combine likes to combine sext and trunc.
   9722 let Predicates = [HasDQI, NoBWI] in {
   9723   def : Pat<(v16i8 (sext (v16i1 VK16:$src))),
   9724             (VPMOVDBZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
   9725   def : Pat<(v16i16 (sext (v16i1 VK16:$src))),
   9726             (VPMOVDWZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
   9727 }
   9728 
   9729 //===----------------------------------------------------------------------===//
   9730 // AVX-512 - COMPRESS and EXPAND
   9731 //
   9732 
   9733 multiclass compress_by_vec_width_common<bits<8> opc, X86VectorVTInfo _,
   9734                                  string OpcodeStr, X86FoldableSchedWrite sched> {
   9735   defm rr : AVX512_maskable<opc, MRMDestReg, _, (outs _.RC:$dst),
   9736               (ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
   9737               (_.VT (X86compress _.RC:$src1))>, AVX5128IBase,
   9738               Sched<[sched]>;
   9739 
   9740   let mayStore = 1, hasSideEffects = 0 in
   9741   def mr : AVX5128I<opc, MRMDestMem, (outs),
   9742               (ins _.MemOp:$dst, _.RC:$src),
   9743               OpcodeStr # "\t{$src, $dst|$dst, $src}",
   9744               []>, EVEX_CD8<_.EltSize, CD8VT1>,
   9745               Sched<[sched.Folded]>;
   9746 
   9747   def mrk : AVX5128I<opc, MRMDestMem, (outs),
   9748               (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
   9749               OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
   9750               []>,
   9751               EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>,
   9752               Sched<[sched.Folded]>;
   9753 }
   9754 
   9755 multiclass compress_by_vec_width_lowering<X86VectorVTInfo _, string Name> {
   9756   def : Pat<(X86mCompressingStore addr:$dst, _.KRCWM:$mask,
   9757                                                (_.VT _.RC:$src)),
   9758             (!cast<Instruction>(Name#_.ZSuffix##mrk)
   9759                             addr:$dst, _.KRCWM:$mask, _.RC:$src)>;
   9760 }
   9761 
   9762 multiclass compress_by_elt_width<bits<8> opc, string OpcodeStr,
   9763                                  X86FoldableSchedWrite sched,
   9764                                  AVX512VLVectorVTInfo VTInfo,
   9765                                  Predicate Pred = HasAVX512> {
   9766   let Predicates = [Pred] in
   9767   defm Z : compress_by_vec_width_common<opc, VTInfo.info512, OpcodeStr, sched>,
   9768            compress_by_vec_width_lowering<VTInfo.info512, NAME>, EVEX_V512;
   9769 
   9770   let Predicates = [Pred, HasVLX] in {
   9771     defm Z256 : compress_by_vec_width_common<opc, VTInfo.info256, OpcodeStr, sched>,
   9772                 compress_by_vec_width_lowering<VTInfo.info256, NAME>, EVEX_V256;
   9773     defm Z128 : compress_by_vec_width_common<opc, VTInfo.info128, OpcodeStr, sched>,
   9774                 compress_by_vec_width_lowering<VTInfo.info128, NAME>, EVEX_V128;
   9775   }
   9776 }
   9777 
   9778 // FIXME: Is there a better scheduler class for VPCOMPRESS?
   9779 defm VPCOMPRESSD : compress_by_elt_width <0x8B, "vpcompressd", WriteVarShuffle256,
   9780                                           avx512vl_i32_info>, EVEX, NotMemoryFoldable;
   9781 defm VPCOMPRESSQ : compress_by_elt_width <0x8B, "vpcompressq", WriteVarShuffle256,
   9782                                           avx512vl_i64_info>, EVEX, VEX_W, NotMemoryFoldable;
   9783 defm VCOMPRESSPS : compress_by_elt_width <0x8A, "vcompressps", WriteVarShuffle256,
   9784                                           avx512vl_f32_info>, EVEX, NotMemoryFoldable;
   9785 defm VCOMPRESSPD : compress_by_elt_width <0x8A, "vcompresspd", WriteVarShuffle256,
   9786                                           avx512vl_f64_info>, EVEX, VEX_W, NotMemoryFoldable;
   9787 
   9788 // expand
   9789 multiclass expand_by_vec_width<bits<8> opc, X86VectorVTInfo _,
   9790                                  string OpcodeStr, X86FoldableSchedWrite sched> {
   9791   defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
   9792               (ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
   9793               (_.VT (X86expand _.RC:$src1))>, AVX5128IBase,
   9794               Sched<[sched]>;
   9795 
   9796   defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
   9797               (ins _.MemOp:$src1), OpcodeStr, "$src1", "$src1",
   9798               (_.VT (X86expand (_.VT (bitconvert
   9799                                       (_.LdFrag addr:$src1)))))>,
   9800             AVX5128IBase, EVEX_CD8<_.EltSize, CD8VT1>,
   9801             Sched<[sched.Folded, ReadAfterLd]>;
   9802 }
   9803 
   9804 multiclass expand_by_vec_width_lowering<X86VectorVTInfo _, string Name> {
   9805 
   9806   def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, undef)),
   9807             (!cast<Instruction>(Name#_.ZSuffix##rmkz)
   9808                                         _.KRCWM:$mask, addr:$src)>;
   9809 
   9810   def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, _.ImmAllZerosV)),
   9811             (!cast<Instruction>(Name#_.ZSuffix##rmkz)
   9812                                         _.KRCWM:$mask, addr:$src)>;
   9813 
   9814   def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask,
   9815                                                (_.VT _.RC:$src0))),
   9816             (!cast<Instruction>(Name#_.ZSuffix##rmk)
   9817                             _.RC:$src0, _.KRCWM:$mask, addr:$src)>;
   9818 }
   9819 
   9820 multiclass expand_by_elt_width<bits<8> opc, string OpcodeStr,
   9821                                X86FoldableSchedWrite sched,
   9822                                AVX512VLVectorVTInfo VTInfo,
   9823                                Predicate Pred = HasAVX512> {
   9824   let Predicates = [Pred] in
   9825   defm Z : expand_by_vec_width<opc, VTInfo.info512, OpcodeStr, sched>,
   9826            expand_by_vec_width_lowering<VTInfo.info512, NAME>, EVEX_V512;
   9827 
   9828   let Predicates = [Pred, HasVLX] in {
   9829     defm Z256 : expand_by_vec_width<opc, VTInfo.info256, OpcodeStr, sched>,
   9830                 expand_by_vec_width_lowering<VTInfo.info256, NAME>, EVEX_V256;
   9831     defm Z128 : expand_by_vec_width<opc, VTInfo.info128, OpcodeStr, sched>,
   9832                 expand_by_vec_width_lowering<VTInfo.info128, NAME>, EVEX_V128;
   9833   }
   9834 }
   9835 
   9836 // FIXME: Is there a better scheduler class for VPEXPAND?
   9837 defm VPEXPANDD : expand_by_elt_width <0x89, "vpexpandd", WriteVarShuffle256,
   9838                                       avx512vl_i32_info>, EVEX;
   9839 defm VPEXPANDQ : expand_by_elt_width <0x89, "vpexpandq", WriteVarShuffle256,
   9840                                       avx512vl_i64_info>, EVEX, VEX_W;
   9841 defm VEXPANDPS : expand_by_elt_width <0x88, "vexpandps", WriteVarShuffle256,
   9842                                       avx512vl_f32_info>, EVEX;
   9843 defm VEXPANDPD : expand_by_elt_width <0x88, "vexpandpd", WriteVarShuffle256,
   9844                                       avx512vl_f64_info>, EVEX, VEX_W;
   9845 
   9846 //handle instruction  reg_vec1 = op(reg_vec,imm)
   9847 //                               op(mem_vec,imm)
   9848 //                               op(broadcast(eltVt),imm)
   9849 //all instruction created with FROUND_CURRENT
   9850 multiclass avx512_unary_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
   9851                                       X86FoldableSchedWrite sched, X86VectorVTInfo _> {
   9852   let ExeDomain = _.ExeDomain in {
   9853   defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
   9854                       (ins _.RC:$src1, i32u8imm:$src2),
   9855                       OpcodeStr##_.Suffix, "$src2, $src1", "$src1, $src2",
   9856                       (OpNode (_.VT _.RC:$src1),
   9857                               (i32 imm:$src2))>, Sched<[sched]>;
   9858   defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
   9859                     (ins _.MemOp:$src1, i32u8imm:$src2),
   9860                     OpcodeStr##_.Suffix, "$src2, $src1", "$src1, $src2",
   9861                     (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
   9862                             (i32 imm:$src2))>,
   9863                     Sched<[sched.Folded, ReadAfterLd]>;
   9864   defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
   9865                     (ins _.ScalarMemOp:$src1, i32u8imm:$src2),
   9866                     OpcodeStr##_.Suffix, "$src2, ${src1}"##_.BroadcastStr,
   9867                     "${src1}"##_.BroadcastStr##", $src2",
   9868                     (OpNode (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src1))),
   9869                             (i32 imm:$src2))>, EVEX_B,
   9870                     Sched<[sched.Folded, ReadAfterLd]>;
   9871   }
   9872 }
   9873 
   9874 //handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
   9875 multiclass avx512_unary_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
   9876                                           SDNode OpNode, X86FoldableSchedWrite sched,
   9877                                           X86VectorVTInfo _> {
   9878   let ExeDomain = _.ExeDomain in
   9879   defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
   9880                       (ins _.RC:$src1, i32u8imm:$src2),
   9881                       OpcodeStr##_.Suffix, "$src2, {sae}, $src1",
   9882                       "$src1, {sae}, $src2",
   9883                       (OpNode (_.VT _.RC:$src1),
   9884                               (i32 imm:$src2),
   9885                               (i32 FROUND_NO_EXC))>,
   9886                       EVEX_B, Sched<[sched]>;
   9887 }
   9888 
   9889 multiclass avx512_common_unary_fp_sae_packed_imm<string OpcodeStr,
   9890             AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode,
   9891             SDNode OpNodeRnd, X86SchedWriteWidths sched, Predicate prd>{
   9892   let Predicates = [prd] in {
   9893     defm Z    : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, sched.ZMM,
   9894                                            _.info512>,
   9895                 avx512_unary_fp_sae_packed_imm<opc, OpcodeStr, OpNodeRnd,
   9896                                                sched.ZMM, _.info512>, EVEX_V512;
   9897   }
   9898   let Predicates = [prd, HasVLX] in {
   9899     defm Z128 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, sched.XMM,
   9900                                            _.info128>, EVEX_V128;
   9901     defm Z256 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, sched.YMM,
   9902                                            _.info256>, EVEX_V256;
   9903   }
   9904 }
   9905 
   9906 //handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
   9907 //                               op(reg_vec2,mem_vec,imm)
   9908 //                               op(reg_vec2,broadcast(eltVt),imm)
   9909 //all instruction created with FROUND_CURRENT
   9910 multiclass avx512_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
   9911                                 X86FoldableSchedWrite sched, X86VectorVTInfo _>{
   9912   let ExeDomain = _.ExeDomain in {
   9913   defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
   9914                       (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
   9915                       OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
   9916                       (OpNode (_.VT _.RC:$src1),
   9917                               (_.VT _.RC:$src2),
   9918                               (i32 imm:$src3))>,
   9919                       Sched<[sched]>;
   9920   defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
   9921                     (ins _.RC:$src1, _.MemOp:$src2, i32u8imm:$src3),
   9922                     OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
   9923                     (OpNode (_.VT _.RC:$src1),
   9924                             (_.VT (bitconvert (_.LdFrag addr:$src2))),
   9925                             (i32 imm:$src3))>,
   9926                     Sched<[sched.Folded, ReadAfterLd]>;
   9927   defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
   9928                     (ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
   9929                     OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
   9930                     "$src1, ${src2}"##_.BroadcastStr##", $src3",
   9931                     (OpNode (_.VT _.RC:$src1),
   9932                             (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
   9933                             (i32 imm:$src3))>, EVEX_B,
   9934                     Sched<[sched.Folded, ReadAfterLd]>;
   9935   }
   9936 }
   9937 
   9938 //handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
   9939 //                               op(reg_vec2,mem_vec,imm)
   9940 multiclass avx512_3Op_rm_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
   9941                               X86FoldableSchedWrite sched, X86VectorVTInfo DestInfo,
   9942                               X86VectorVTInfo SrcInfo>{
   9943   let ExeDomain = DestInfo.ExeDomain in {
   9944   defm rri : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
   9945                   (ins SrcInfo.RC:$src1, SrcInfo.RC:$src2, u8imm:$src3),
   9946                   OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
   9947                   (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
   9948                                (SrcInfo.VT SrcInfo.RC:$src2),
   9949                                (i8 imm:$src3)))>,
   9950                   Sched<[sched]>;
   9951   defm rmi : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
   9952                 (ins SrcInfo.RC:$src1, SrcInfo.MemOp:$src2, u8imm:$src3),
   9953                 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
   9954                 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
   9955                              (SrcInfo.VT (bitconvert
   9956                                                 (SrcInfo.LdFrag addr:$src2))),
   9957                              (i8 imm:$src3)))>,
   9958                 Sched<[sched.Folded, ReadAfterLd]>;
   9959   }
   9960 }
   9961 
   9962 //handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
   9963 //                               op(reg_vec2,mem_vec,imm)
   9964 //                               op(reg_vec2,broadcast(eltVt),imm)
   9965 multiclass avx512_3Op_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
   9966                            X86FoldableSchedWrite sched, X86VectorVTInfo _>:
   9967   avx512_3Op_rm_imm8<opc, OpcodeStr, OpNode, sched, _, _>{
   9968 
   9969   let ExeDomain = _.ExeDomain in
   9970   defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
   9971                     (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
   9972                     OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
   9973                     "$src1, ${src2}"##_.BroadcastStr##", $src3",
   9974                     (OpNode (_.VT _.RC:$src1),
   9975                             (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
   9976                             (i8 imm:$src3))>, EVEX_B,
   9977                     Sched<[sched.Folded, ReadAfterLd]>;
   9978 }
   9979 
   9980 //handle scalar instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
   9981 //                                      op(reg_vec2,mem_scalar,imm)
   9982 multiclass avx512_fp_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
   9983                                 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
   9984   let ExeDomain = _.ExeDomain in {
   9985   defm rri : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
   9986                       (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
   9987                       OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
   9988                       (OpNode (_.VT _.RC:$src1),
   9989                               (_.VT _.RC:$src2),
   9990                               (i32 imm:$src3))>,
   9991                       Sched<[sched]>;
   9992   defm rmi : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
   9993                     (ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
   9994                     OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
   9995                     (OpNode (_.VT _.RC:$src1),
   9996                             (_.VT (scalar_to_vector
   9997                                       (_.ScalarLdFrag addr:$src2))),
   9998                             (i32 imm:$src3))>,
   9999                     Sched<[sched.Folded, ReadAfterLd]>;
   10000   }
   10001 }
   10002 
   10003 //handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
   10004 multiclass avx512_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
   10005                                     SDNode OpNode, X86FoldableSchedWrite sched,
   10006                                     X86VectorVTInfo _> {
   10007   let ExeDomain = _.ExeDomain in
   10008   defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
   10009                       (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
   10010                       OpcodeStr, "$src3, {sae}, $src2, $src1",
   10011                       "$src1, $src2, {sae}, $src3",
   10012                       (OpNode (_.VT _.RC:$src1),
   10013                               (_.VT _.RC:$src2),
   10014                               (i32 imm:$src3),
   10015                               (i32 FROUND_NO_EXC))>,
   10016                       EVEX_B, Sched<[sched]>;
   10017 }
   10018 
   10019 //handle scalar instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
   10020 multiclass avx512_fp_sae_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
   10021                                     X86FoldableSchedWrite sched, X86VectorVTInfo _> {
   10022   let ExeDomain = _.ExeDomain in
   10023   defm NAME#rrib : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
   10024                       (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
   10025                       OpcodeStr, "$src3, {sae}, $src2, $src1",
   10026                       "$src1, $src2, {sae}, $src3",
   10027                       (OpNode (_.VT _.RC:$src1),
   10028                               (_.VT _.RC:$src2),
   10029                               (i32 imm:$src3),
   10030                               (i32 FROUND_NO_EXC))>,
   10031                       EVEX_B, Sched<[sched]>;
   10032 }
   10033 
   10034 multiclass avx512_common_fp_sae_packed_imm<string OpcodeStr,
   10035             AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode,
   10036             SDNode OpNodeRnd, X86SchedWriteWidths sched, Predicate prd>{
   10037   let Predicates = [prd] in {
   10038     defm Z    : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
   10039                 avx512_fp_sae_packed_imm<opc, OpcodeStr, OpNodeRnd, sched.ZMM, _.info512>,
   10040                                   EVEX_V512;
   10041 
   10042   }
   10043   let Predicates = [prd, HasVLX] in {
   10044     defm Z128 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
   10045                                   EVEX_V128;
   10046     defm Z256 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
   10047                                   EVEX_V256;
   10048   }
   10049 }
   10050 
   10051 multiclass avx512_common_3Op_rm_imm8<bits<8> opc, SDNode OpNode, string OpStr,
   10052                    X86SchedWriteWidths sched, AVX512VLVectorVTInfo DestInfo,
   10053                    AVX512VLVectorVTInfo SrcInfo, Predicate Pred = HasBWI> {
   10054   let Predicates = [Pred] in {
   10055     defm Z    : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.ZMM, DestInfo.info512,
   10056                            SrcInfo.info512>, EVEX_V512, AVX512AIi8Base, EVEX_4V;
   10057   }
   10058   let Predicates = [Pred, HasVLX] in {
   10059     defm Z128 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.XMM, DestInfo.info128,
   10060                            SrcInfo.info128>, EVEX_V128, AVX512AIi8Base, EVEX_4V;
   10061     defm Z256 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.YMM, DestInfo.info256,
   10062                            SrcInfo.info256>, EVEX_V256, AVX512AIi8Base, EVEX_4V;
   10063   }
   10064 }
   10065 
   10066 multiclass avx512_common_3Op_imm8<string OpcodeStr, AVX512VLVectorVTInfo _,
   10067                                   bits<8> opc, SDNode OpNode, X86SchedWriteWidths sched,
   10068                                   Predicate Pred = HasAVX512> {
   10069   let Predicates = [Pred] in {
   10070     defm Z    : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
   10071                                 EVEX_V512;
   10072   }
   10073   let Predicates = [Pred, HasVLX] in {
   10074     defm Z128 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
   10075                                 EVEX_V128;
   10076     defm Z256 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
   10077                                 EVEX_V256;
   10078   }
   10079 }
   10080 
   10081 multiclass avx512_common_fp_sae_scalar_imm<string OpcodeStr,
   10082                   X86VectorVTInfo _, bits<8> opc, SDNode OpNode,
   10083                   SDNode OpNodeRnd, X86SchedWriteWidths sched, Predicate prd> {
   10084   let Predicates = [prd] in {
   10085      defm Z : avx512_fp_scalar_imm<opc, OpcodeStr, OpNode, sched.XMM, _>,
   10086               avx512_fp_sae_scalar_imm<opc, OpcodeStr, OpNodeRnd, sched.XMM, _>;
   10087   }
   10088 }
   10089 
   10090 multiclass avx512_common_unary_fp_sae_packed_imm_all<string OpcodeStr,
   10091                     bits<8> opcPs, bits<8> opcPd, SDNode OpNode,
   10092                     SDNode OpNodeRnd, X86SchedWriteWidths sched, Predicate prd>{
   10093   defm PS : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f32_info,
   10094                             opcPs, OpNode, OpNodeRnd, sched, prd>,
   10095                             EVEX_CD8<32, CD8VF>;
   10096   defm PD : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f64_info,
   10097                             opcPd, OpNode, OpNodeRnd, sched, prd>,
   10098                             EVEX_CD8<64, CD8VF>, VEX_W;
   10099 }
   10100 
   10101 defm VREDUCE   : avx512_common_unary_fp_sae_packed_imm_all<"vreduce", 0x56, 0x56,
   10102                               X86VReduce, X86VReduceRnd, SchedWriteFRnd, HasDQI>,
   10103                               AVX512AIi8Base, EVEX;
   10104 defm VRNDSCALE : avx512_common_unary_fp_sae_packed_imm_all<"vrndscale", 0x08, 0x09,
   10105                               X86VRndScale, X86VRndScaleRnd, SchedWriteFRnd, HasAVX512>,
   10106                               AVX512AIi8Base, EVEX;
   10107 defm VGETMANT : avx512_common_unary_fp_sae_packed_imm_all<"vgetmant", 0x26, 0x26,
   10108                               X86VGetMant, X86VGetMantRnd, SchedWriteFRnd, HasAVX512>,
   10109                               AVX512AIi8Base, EVEX;
   10110 
   10111 defm VRANGEPD : avx512_common_fp_sae_packed_imm<"vrangepd", avx512vl_f64_info,
   10112                                                 0x50, X86VRange, X86VRangeRnd,
   10113                                                 SchedWriteFAdd, HasDQI>,
   10114       AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
   10115 defm VRANGEPS : avx512_common_fp_sae_packed_imm<"vrangeps", avx512vl_f32_info,
   10116                                                 0x50, X86VRange, X86VRangeRnd,
   10117                                                 SchedWriteFAdd, HasDQI>,
   10118       AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
   10119 
   10120 defm VRANGESD: avx512_common_fp_sae_scalar_imm<"vrangesd",
   10121       f64x_info, 0x51, X86Ranges, X86RangesRnd, SchedWriteFAdd, HasDQI>,
   10122       AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
   10123 defm VRANGESS: avx512_common_fp_sae_scalar_imm<"vrangess", f32x_info,
   10124       0x51, X86Ranges, X86RangesRnd, SchedWriteFAdd, HasDQI>,
   10125       AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
   10126 
   10127 defm VREDUCESD: avx512_common_fp_sae_scalar_imm<"vreducesd", f64x_info,
   10128       0x57, X86Reduces, X86ReducesRnd, SchedWriteFRnd, HasDQI>,
   10129       AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
   10130 defm VREDUCESS: avx512_common_fp_sae_scalar_imm<"vreducess", f32x_info,
   10131       0x57, X86Reduces, X86ReducesRnd, SchedWriteFRnd, HasDQI>,
   10132       AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
   10133 
   10134 defm VGETMANTSD: avx512_common_fp_sae_scalar_imm<"vgetmantsd", f64x_info,
   10135       0x27, X86GetMants, X86GetMantsRnd, SchedWriteFRnd, HasAVX512>,
   10136       AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
   10137 defm VGETMANTSS: avx512_common_fp_sae_scalar_imm<"vgetmantss", f32x_info,
   10138       0x27, X86GetMants, X86GetMantsRnd, SchedWriteFRnd, HasAVX512>,
   10139       AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
   10140 
   10141 
   10142 multiclass AVX512_rndscale_lowering<X86VectorVTInfo _, string Suffix> {
   10143   // Register
   10144   def : Pat<(_.VT (ffloor _.RC:$src)),
   10145             (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rri")
   10146              _.RC:$src, (i32 0x9))>;
   10147   def : Pat<(_.VT (fnearbyint _.RC:$src)),
   10148             (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rri")
   10149              _.RC:$src, (i32 0xC))>;
   10150   def : Pat<(_.VT (fceil _.RC:$src)),
   10151             (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rri")
   10152              _.RC:$src, (i32 0xA))>;
   10153   def : Pat<(_.VT (frint _.RC:$src)),
   10154             (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rri")
   10155              _.RC:$src, (i32 0x4))>;
   10156   def : Pat<(_.VT (ftrunc _.RC:$src)),
   10157             (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rri")
   10158              _.RC:$src, (i32 0xB))>;
   10159 
   10160   // Merge-masking
   10161   def : Pat<(_.VT (vselect _.KRCWM:$mask, (ffloor _.RC:$src), _.RC:$dst)),
   10162             (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rrik")
   10163              _.RC:$dst, _.KRCWM:$mask, _.RC:$src, (i32 0x9))>;
   10164   def : Pat<(_.VT (vselect _.KRCWM:$mask, (fnearbyint _.RC:$src), _.RC:$dst)),
   10165             (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rrik")
   10166              _.RC:$dst, _.KRCWM:$mask, _.RC:$src, (i32 0xC))>;
   10167   def : Pat<(_.VT (vselect _.KRCWM:$mask, (fceil _.RC:$src), _.RC:$dst)),
   10168             (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rrik")
   10169              _.RC:$dst, _.KRCWM:$mask, _.RC:$src, (i32 0xA))>;
   10170   def : Pat<(_.VT (vselect _.KRCWM:$mask, (frint _.RC:$src), _.RC:$dst)),
   10171             (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rrik")
   10172              _.RC:$dst, _.KRCWM:$mask, _.RC:$src, (i32 0x4))>;
   10173   def : Pat<(_.VT (vselect _.KRCWM:$mask, (ftrunc _.RC:$src), _.RC:$dst)),
   10174             (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rrik")
   10175              _.RC:$dst, _.KRCWM:$mask, _.RC:$src, (i32 0xB))>;
   10176 
   10177   // Zero-masking
   10178   def : Pat<(_.VT (vselect _.KRCWM:$mask, (ffloor _.RC:$src),
   10179                            _.ImmAllZerosV)),
   10180             (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rrikz")
   10181              _.KRCWM:$mask, _.RC:$src, (i32 0x9))>;
   10182   def : Pat<(_.VT (vselect _.KRCWM:$mask, (fnearbyint _.RC:$src),
   10183                            _.ImmAllZerosV)),
   10184             (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rrikz")
   10185              _.KRCWM:$mask, _.RC:$src, (i32 0xC))>;
   10186   def : Pat<(_.VT (vselect _.KRCWM:$mask, (fceil _.RC:$src),
   10187                            _.ImmAllZerosV)),
   10188             (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rrikz")
   10189              _.KRCWM:$mask, _.RC:$src, (i32 0xA))>;
   10190   def : Pat<(_.VT (vselect _.KRCWM:$mask, (frint _.RC:$src),
   10191                            _.ImmAllZerosV)),
   10192             (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rrikz")
   10193              _.KRCWM:$mask, _.RC:$src, (i32 0x4))>;
   10194   def : Pat<(_.VT (vselect _.KRCWM:$mask, (ftrunc _.RC:$src),
   10195                            _.ImmAllZerosV)),
   10196             (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rrikz")
   10197              _.KRCWM:$mask, _.RC:$src, (i32 0xB))>;
   10198 
   10199   // Load
   10200   def : Pat<(_.VT (ffloor (_.LdFrag addr:$src))),
   10201             (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmi")
   10202              addr:$src, (i32 0x9))>;
   10203   def : Pat<(_.VT (fnearbyint (_.LdFrag addr:$src))),
   10204             (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmi")
   10205              addr:$src, (i32 0xC))>;
   10206   def : Pat<(_.VT (fceil (_.LdFrag addr:$src))),
   10207             (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmi")
   10208              addr:$src, (i32 0xA))>;
   10209   def : Pat<(_.VT (frint (_.LdFrag addr:$src))),
   10210             (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmi")
   10211              addr:$src, (i32 0x4))>;
   10212   def : Pat<(_.VT (ftrunc (_.LdFrag addr:$src))),
   10213             (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmi")
   10214              addr:$src, (i32 0xB))>;
   10215 
   10216   // Merge-masking + load
   10217   def : Pat<(_.VT (vselect _.KRCWM:$mask, (ffloor (_.LdFrag addr:$src)),
   10218                            _.RC:$dst)),
   10219             (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmik")
   10220              _.RC:$dst, _.KRCWM:$mask, addr:$src, (i32 0x9))>;
   10221   def : Pat<(_.VT (vselect _.KRCWM:$mask, (fnearbyint (_.LdFrag addr:$src)),
   10222                            _.RC:$dst)),
   10223             (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmik")
   10224              _.RC:$dst, _.KRCWM:$mask, addr:$src, (i32 0xC))>;
   10225   def : Pat<(_.VT (vselect _.KRCWM:$mask, (fceil (_.LdFrag addr:$src)),
   10226                            _.RC:$dst)),
   10227             (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmik")
   10228              _.RC:$dst, _.KRCWM:$mask, addr:$src, (i32 0xA))>;
   10229   def : Pat<(_.VT (vselect _.KRCWM:$mask, (frint (_.LdFrag addr:$src)),
   10230                            _.RC:$dst)),
   10231             (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmik")
   10232              _.RC:$dst, _.KRCWM:$mask, addr:$src, (i32 0x4))>;
   10233   def : Pat<(_.VT (vselect _.KRCWM:$mask, (ftrunc (_.LdFrag addr:$src)),
   10234                            _.RC:$dst)),
   10235             (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmik")
   10236              _.RC:$dst, _.KRCWM:$mask, addr:$src, (i32 0xB))>;
   10237 
   10238   // Zero-masking + load
   10239   def : Pat<(_.VT (vselect _.KRCWM:$mask, (ffloor (_.LdFrag addr:$src)),
   10240                            _.ImmAllZerosV)),
   10241             (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmikz")
   10242              _.KRCWM:$mask, addr:$src, (i32 0x9))>;
   10243   def : Pat<(_.VT (vselect _.KRCWM:$mask, (fnearbyint (_.LdFrag addr:$src)),
   10244                            _.ImmAllZerosV)),
   10245             (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmikz")
   10246              _.KRCWM:$mask, addr:$src, (i32 0xC))>;
   10247   def : Pat<(_.VT (vselect _.KRCWM:$mask, (fceil (_.LdFrag addr:$src)),
   10248                            _.ImmAllZerosV)),
   10249             (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmikz")
   10250              _.KRCWM:$mask, addr:$src, (i32 0xA))>;
   10251   def : Pat<(_.VT (vselect _.KRCWM:$mask, (frint (_.LdFrag addr:$src)),
   10252                            _.ImmAllZerosV)),
   10253             (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmikz")
   10254              _.KRCWM:$mask, addr:$src, (i32 0x4))>;
   10255   def : Pat<(_.VT (vselect _.KRCWM:$mask, (ftrunc (_.LdFrag addr:$src)),
   10256                            _.ImmAllZerosV)),
   10257             (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmikz")
   10258              _.KRCWM:$mask, addr:$src, (i32 0xB))>;
   10259 
   10260   // Broadcast load
   10261   def : Pat<(_.VT (ffloor (X86VBroadcast (_.ScalarLdFrag addr:$src)))),
   10262             (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbi")
   10263              addr:$src, (i32 0x9))>;
   10264   def : Pat<(_.VT (fnearbyint (X86VBroadcast (_.ScalarLdFrag addr:$src)))),
   10265             (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbi")
   10266              addr:$src, (i32 0xC))>;
   10267   def : Pat<(_.VT (fceil (X86VBroadcast (_.ScalarLdFrag addr:$src)))),
   10268             (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbi")
   10269              addr:$src, (i32 0xA))>;
   10270   def : Pat<(_.VT (frint (X86VBroadcast (_.ScalarLdFrag addr:$src)))),
   10271             (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbi")
   10272              addr:$src, (i32 0x4))>;
   10273   def : Pat<(_.VT (ftrunc (X86VBroadcast (_.ScalarLdFrag addr:$src)))),
   10274             (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbi")
   10275              addr:$src, (i32 0xB))>;
   10276 
   10277   // Merge-masking + broadcast load
   10278   def : Pat<(_.VT (vselect _.KRCWM:$mask,
   10279                            (ffloor (X86VBroadcast (_.ScalarLdFrag addr:$src))),
   10280                            _.RC:$dst)),
   10281             (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbik")
   10282              _.RC:$dst, _.KRCWM:$mask, addr:$src, (i32 0x9))>;
   10283   def : Pat<(_.VT (vselect _.KRCWM:$mask,
   10284                            (fnearbyint (X86VBroadcast (_.ScalarLdFrag addr:$src))),
   10285                            _.RC:$dst)),
   10286             (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbik")
   10287              _.RC:$dst, _.KRCWM:$mask, addr:$src, (i32 0xC))>;
   10288   def : Pat<(_.VT (vselect _.KRCWM:$mask,
   10289                            (fceil (X86VBroadcast (_.ScalarLdFrag addr:$src))),
   10290                            _.RC:$dst)),
   10291             (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbik")
   10292              _.RC:$dst, _.KRCWM:$mask, addr:$src, (i32 0xA))>;
   10293   def : Pat<(_.VT (vselect _.KRCWM:$mask,
   10294                            (frint (X86VBroadcast (_.ScalarLdFrag addr:$src))),
   10295                            _.RC:$dst)),
   10296             (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbik")
   10297              _.RC:$dst, _.KRCWM:$mask, addr:$src, (i32 0x4))>;
   10298   def : Pat<(_.VT (vselect _.KRCWM:$mask,
   10299                            (ftrunc (X86VBroadcast (_.ScalarLdFrag addr:$src))),
   10300                            _.RC:$dst)),
   10301             (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbik")
   10302              _.RC:$dst, _.KRCWM:$mask, addr:$src, (i32 0xB))>;
   10303 
   10304   // Zero-masking + broadcast load
   10305   def : Pat<(_.VT (vselect _.KRCWM:$mask,
   10306                            (ffloor (X86VBroadcast (_.ScalarLdFrag addr:$src))),
   10307                            _.ImmAllZerosV)),
   10308             (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbikz")
   10309              _.KRCWM:$mask, addr:$src, (i32 0x9))>;
   10310   def : Pat<(_.VT (vselect _.KRCWM:$mask,
   10311                            (fnearbyint (X86VBroadcast (_.ScalarLdFrag addr:$src))),
   10312                            _.ImmAllZerosV)),
   10313             (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbikz")
   10314              _.KRCWM:$mask, addr:$src, (i32 0xC))>;
   10315   def : Pat<(_.VT (vselect _.KRCWM:$mask,
   10316                            (fceil (X86VBroadcast (_.ScalarLdFrag addr:$src))),
   10317                            _.ImmAllZerosV)),
   10318             (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbikz")
   10319              _.KRCWM:$mask, addr:$src, (i32 0xA))>;
   10320   def : Pat<(_.VT (vselect _.KRCWM:$mask,
   10321                            (frint (X86VBroadcast (_.ScalarLdFrag addr:$src))),
   10322                            _.ImmAllZerosV)),
   10323             (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbikz")
   10324              _.KRCWM:$mask, addr:$src, (i32 0x4))>;
   10325   def : Pat<(_.VT (vselect _.KRCWM:$mask,
   10326                            (ftrunc (X86VBroadcast (_.ScalarLdFrag addr:$src))),
   10327                            _.ImmAllZerosV)),
   10328             (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbikz")
   10329              _.KRCWM:$mask, addr:$src, (i32 0xB))>;
   10330 }
   10331 
   10332 let Predicates = [HasAVX512] in {
   10333   defm : AVX512_rndscale_lowering<v16f32_info, "PS">;
   10334   defm : AVX512_rndscale_lowering<v8f64_info,  "PD">;
   10335 }
   10336 
   10337 let Predicates = [HasVLX] in {
   10338   defm : AVX512_rndscale_lowering<v8f32x_info, "PS">;
   10339   defm : AVX512_rndscale_lowering<v4f64x_info, "PD">;
   10340   defm : AVX512_rndscale_lowering<v4f32x_info, "PS">;
   10341   defm : AVX512_rndscale_lowering<v2f64x_info, "PD">;
   10342 }
   10343 
   10344 multiclass avx512_shuff_packed_128_common<bits<8> opc, string OpcodeStr,
   10345                                           X86FoldableSchedWrite sched,
   10346                                           X86VectorVTInfo _,
   10347                                           X86VectorVTInfo CastInfo,
   10348                                           string EVEX2VEXOvrd> {
   10349   let ExeDomain = _.ExeDomain in {
   10350   defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
   10351                   (ins _.RC:$src1, _.RC:$src2, u8imm:$src3),
   10352                   OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
   10353                   (_.VT (bitconvert
   10354                          (CastInfo.VT (X86Shuf128 _.RC:$src1, _.RC:$src2,
   10355                                                   (i8 imm:$src3)))))>,
   10356                   Sched<[sched]>, EVEX2VEXOverride<EVEX2VEXOvrd#"rr">;
   10357   defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
   10358                 (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3),
   10359                 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
   10360                 (_.VT
   10361                  (bitconvert
   10362                   (CastInfo.VT (X86Shuf128 _.RC:$src1,
   10363                                            (bitconvert (_.LdFrag addr:$src2)),
   10364                                            (i8 imm:$src3)))))>,
   10365                 Sched<[sched.Folded, ReadAfterLd]>,
   10366                 EVEX2VEXOverride<EVEX2VEXOvrd#"rm">;
   10367   defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
   10368                     (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
   10369                     OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
   10370                     "$src1, ${src2}"##_.BroadcastStr##", $src3",
   10371                     (_.VT
   10372                      (bitconvert
   10373                       (CastInfo.VT
   10374                        (X86Shuf128 _.RC:$src1,
   10375                                    (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
   10376                                    (i8 imm:$src3)))))>, EVEX_B,
   10377                     Sched<[sched.Folded, ReadAfterLd]>;
   10378   }
   10379 }
   10380 
   10381 multiclass avx512_shuff_packed_128<string OpcodeStr, X86FoldableSchedWrite sched,
   10382                                    AVX512VLVectorVTInfo _,
   10383                                    AVX512VLVectorVTInfo CastInfo, bits<8> opc,
   10384                                    string EVEX2VEXOvrd>{
   10385   let Predicates = [HasAVX512] in
   10386   defm Z : avx512_shuff_packed_128_common<opc, OpcodeStr, sched,
   10387                                           _.info512, CastInfo.info512, "">, EVEX_V512;
   10388 
   10389   let Predicates = [HasAVX512, HasVLX] in
   10390   defm Z256 : avx512_shuff_packed_128_common<opc, OpcodeStr, sched,
   10391                                              _.info256, CastInfo.info256,
   10392                                              EVEX2VEXOvrd>, EVEX_V256;
   10393 }
   10394 
   10395 defm VSHUFF32X4 : avx512_shuff_packed_128<"vshuff32x4", WriteFShuffle256,
   10396       avx512vl_f32_info, avx512vl_f64_info, 0x23, "VPERM2F128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
   10397 defm VSHUFF64X2 : avx512_shuff_packed_128<"vshuff64x2", WriteFShuffle256,
   10398       avx512vl_f64_info, avx512vl_f64_info, 0x23, "VPERM2F128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
   10399 defm VSHUFI32X4 : avx512_shuff_packed_128<"vshufi32x4", WriteFShuffle256,
   10400       avx512vl_i32_info, avx512vl_i64_info, 0x43, "VPERM2I128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
   10401 defm VSHUFI64X2 : avx512_shuff_packed_128<"vshufi64x2", WriteFShuffle256,
   10402       avx512vl_i64_info, avx512vl_i64_info, 0x43, "VPERM2I128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
   10403 
   10404 let Predicates = [HasAVX512] in {
   10405 // Provide fallback in case the load node that is used in the broadcast
   10406 // patterns above is used by additional users, which prevents the pattern
   10407 // selection.
   10408 def : Pat<(v8f64 (X86SubVBroadcast (v2f64 VR128X:$src))),
   10409           (VSHUFF64X2Zrri (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
   10410                           (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
   10411                           0)>;
   10412 def : Pat<(v8i64 (X86SubVBroadcast (v2i64 VR128X:$src))),
   10413           (VSHUFI64X2Zrri (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
   10414                           (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
   10415                           0)>;
   10416 
   10417 def : Pat<(v16f32 (X86SubVBroadcast (v4f32 VR128X:$src))),
   10418           (VSHUFF32X4Zrri (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
   10419                           (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
   10420                           0)>;
   10421 def : Pat<(v16i32 (X86SubVBroadcast (v4i32 VR128X:$src))),
   10422           (VSHUFI32X4Zrri (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
   10423                           (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
   10424                           0)>;
   10425 
   10426 def : Pat<(v32i16 (X86SubVBroadcast (v8i16 VR128X:$src))),
   10427           (VSHUFI32X4Zrri (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
   10428                           (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
   10429                           0)>;
   10430 
   10431 def : Pat<(v64i8 (X86SubVBroadcast (v16i8 VR128X:$src))),
   10432           (VSHUFI32X4Zrri (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
   10433                           (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
   10434                           0)>;
   10435 }
   10436 
   10437 multiclass avx512_valign<bits<8> opc, string OpcodeStr,
   10438                          X86FoldableSchedWrite sched, X86VectorVTInfo _>{
   10439   // NOTE: EVEX2VEXOverride changed back to Unset for 256-bit at the
   10440   // instantiation of this class.
   10441   let ExeDomain = _.ExeDomain in {
   10442   defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
   10443                   (ins _.RC:$src1, _.RC:$src2, u8imm:$src3),
   10444                   OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
   10445                   (_.VT (X86VAlign _.RC:$src1, _.RC:$src2, (i8 imm:$src3)))>,
   10446                   Sched<[sched]>, EVEX2VEXOverride<"VPALIGNRrri">;
   10447   defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
   10448                 (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3),
   10449                 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
   10450                 (_.VT (X86VAlign _.RC:$src1,
   10451                                  (bitconvert (_.LdFrag addr:$src2)),
   10452                                  (i8 imm:$src3)))>,
   10453                 Sched<[sched.Folded, ReadAfterLd]>,
   10454                 EVEX2VEXOverride<"VPALIGNRrmi">;
   10455 
   10456   defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
   10457                    (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
   10458                    OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
   10459                    "$src1, ${src2}"##_.BroadcastStr##", $src3",
   10460                    (X86VAlign _.RC:$src1,
   10461                               (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
   10462                               (i8 imm:$src3))>, EVEX_B,
   10463                    Sched<[sched.Folded, ReadAfterLd]>;
   10464   }
   10465 }
   10466 
   10467 multiclass avx512_valign_common<string OpcodeStr, X86SchedWriteWidths sched,
   10468                                 AVX512VLVectorVTInfo _> {
   10469   let Predicates = [HasAVX512] in {
   10470     defm Z    : avx512_valign<0x03, OpcodeStr, sched.ZMM, _.info512>,
   10471                                 AVX512AIi8Base, EVEX_4V, EVEX_V512;
   10472   }
   10473   let Predicates = [HasAVX512, HasVLX] in {
   10474     defm Z128 : avx512_valign<0x03, OpcodeStr, sched.XMM, _.info128>,
   10475                                 AVX512AIi8Base, EVEX_4V, EVEX_V128;
   10476     // We can't really override the 256-bit version so change it back to unset.
   10477     let EVEX2VEXOverride = ? in
   10478     defm Z256 : avx512_valign<0x03, OpcodeStr, sched.YMM, _.info256>,
   10479                                 AVX512AIi8Base, EVEX_4V, EVEX_V256;
   10480   }
   10481 }
   10482 
   10483 defm VALIGND: avx512_valign_common<"valignd", SchedWriteShuffle,
   10484                                    avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
   10485 defm VALIGNQ: avx512_valign_common<"valignq", SchedWriteShuffle,
   10486                                    avx512vl_i64_info>, EVEX_CD8<64, CD8VF>,
   10487                                    VEX_W;
   10488 
   10489 defm VPALIGNR: avx512_common_3Op_rm_imm8<0x0F, X86PAlignr, "vpalignr",
   10490                                          SchedWriteShuffle, avx512vl_i8_info,
   10491                                          avx512vl_i8_info>, EVEX_CD8<8, CD8VF>;
   10492 
   10493 // Fragments to help convert valignq into masked valignd. Or valignq/valignd
   10494 // into vpalignr.
   10495 def ValignqImm32XForm : SDNodeXForm<imm, [{
   10496   return getI8Imm(N->getZExtValue() * 2, SDLoc(N));
   10497 }]>;
   10498 def ValignqImm8XForm : SDNodeXForm<imm, [{
   10499   return getI8Imm(N->getZExtValue() * 8, SDLoc(N));
   10500 }]>;
   10501 def ValigndImm8XForm : SDNodeXForm<imm, [{
   10502   return getI8Imm(N->getZExtValue() * 4, SDLoc(N));
   10503 }]>;
   10504 
   10505 multiclass avx512_vpalign_mask_lowering<string OpcodeStr, SDNode OpNode,
   10506                                         X86VectorVTInfo From, X86VectorVTInfo To,
   10507                                         SDNodeXForm ImmXForm> {
   10508   def : Pat<(To.VT (vselect To.KRCWM:$mask,
   10509                             (bitconvert
   10510                              (From.VT (OpNode From.RC:$src1, From.RC:$src2,
   10511                                               imm:$src3))),
   10512                             To.RC:$src0)),
   10513             (!cast<Instruction>(OpcodeStr#"rrik") To.RC:$src0, To.KRCWM:$mask,
   10514                                                   To.RC:$src1, To.RC:$src2,
   10515                                                   (ImmXForm imm:$src3))>;
   10516 
   10517   def : Pat<(To.VT (vselect To.KRCWM:$mask,
   10518                             (bitconvert
   10519                              (From.VT (OpNode From.RC:$src1, From.RC:$src2,
   10520                                               imm:$src3))),
   10521                             To.ImmAllZerosV)),
   10522             (!cast<Instruction>(OpcodeStr#"rrikz") To.KRCWM:$mask,
   10523                                                    To.RC:$src1, To.RC:$src2,
   10524                                                    (ImmXForm imm:$src3))>;
   10525 
   10526   def : Pat<(To.VT (vselect To.KRCWM:$mask,
   10527                             (bitconvert
   10528                              (From.VT (OpNode From.RC:$src1,
   10529                                       (bitconvert (To.LdFrag addr:$src2)),
   10530                                       imm:$src3))),
   10531                             To.RC:$src0)),
   10532             (!cast<Instruction>(OpcodeStr#"rmik") To.RC:$src0, To.KRCWM:$mask,
   10533                                                   To.RC:$src1, addr:$src2,
   10534                                                   (ImmXForm imm:$src3))>;
   10535 
   10536   def : Pat<(To.VT (vselect To.KRCWM:$mask,
   10537                             (bitconvert
   10538                              (From.VT (OpNode From.RC:$src1,
   10539                                       (bitconvert (To.LdFrag addr:$src2)),
   10540                                       imm:$src3))),
   10541                             To.ImmAllZerosV)),
   10542             (!cast<Instruction>(OpcodeStr#"rmikz") To.KRCWM:$mask,
   10543                                                    To.RC:$src1, addr:$src2,
   10544                                                    (ImmXForm imm:$src3))>;
   10545 }
   10546 
   10547 multiclass avx512_vpalign_mask_lowering_mb<string OpcodeStr, SDNode OpNode,
   10548                                            X86VectorVTInfo From,
   10549                                            X86VectorVTInfo To,
   10550                                            SDNodeXForm ImmXForm> :
   10551       avx512_vpalign_mask_lowering<OpcodeStr, OpNode, From, To, ImmXForm> {
   10552   def : Pat<(From.VT (OpNode From.RC:$src1,
   10553                              (bitconvert (To.VT (X86VBroadcast
   10554                                                 (To.ScalarLdFrag addr:$src2)))),
   10555                              imm:$src3)),
   10556             (!cast<Instruction>(OpcodeStr#"rmbi") To.RC:$src1, addr:$src2,
   10557                                                   (ImmXForm imm:$src3))>;
   10558 
   10559   def : Pat<(To.VT (vselect To.KRCWM:$mask,
   10560                             (bitconvert
   10561                              (From.VT (OpNode From.RC:$src1,
   10562                                       (bitconvert
   10563                                        (To.VT (X86VBroadcast
   10564                                                (To.ScalarLdFrag addr:$src2)))),
   10565                                       imm:$src3))),
   10566                             To.RC:$src0)),
   10567             (!cast<Instruction>(OpcodeStr#"rmbik") To.RC:$src0, To.KRCWM:$mask,
   10568                                                    To.RC:$src1, addr:$src2,
   10569                                                    (ImmXForm imm:$src3))>;
   10570 
   10571   def : Pat<(To.VT (vselect To.KRCWM:$mask,
   10572                             (bitconvert
   10573                              (From.VT (OpNode From.RC:$src1,
   10574                                       (bitconvert
   10575                                        (To.VT (X86VBroadcast
   10576                                                (To.ScalarLdFrag addr:$src2)))),
   10577                                       imm:$src3))),
   10578                             To.ImmAllZerosV)),
   10579             (!cast<Instruction>(OpcodeStr#"rmbikz") To.KRCWM:$mask,
   10580                                                     To.RC:$src1, addr:$src2,
   10581                                                     (ImmXForm imm:$src3))>;
   10582 }
   10583 
   10584 let Predicates = [HasAVX512] in {
   10585   // For 512-bit we lower to the widest element type we can. So we only need
   10586   // to handle converting valignq to valignd.
   10587   defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ", X86VAlign, v8i64_info,
   10588                                          v16i32_info, ValignqImm32XForm>;
   10589 }
   10590 
   10591 let Predicates = [HasVLX] in {
   10592   // For 128-bit we lower to the widest element type we can. So we only need
   10593   // to handle converting valignq to valignd.
   10594   defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ128", X86VAlign, v2i64x_info,
   10595                                          v4i32x_info, ValignqImm32XForm>;
   10596   // For 256-bit we lower to the widest element type we can. So we only need
   10597   // to handle converting valignq to valignd.
   10598   defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ256", X86VAlign, v4i64x_info,
   10599                                          v8i32x_info, ValignqImm32XForm>;
   10600 }
   10601 
   10602 let Predicates = [HasVLX, HasBWI] in {
   10603   // We can turn 128 and 256 bit VALIGND/VALIGNQ into VPALIGNR.
   10604   defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v2i64x_info,
   10605                                       v16i8x_info, ValignqImm8XForm>;
   10606   defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v4i32x_info,
   10607                                       v16i8x_info, ValigndImm8XForm>;
   10608 }
   10609 
   10610 defm VDBPSADBW: avx512_common_3Op_rm_imm8<0x42, X86dbpsadbw, "vdbpsadbw",
   10611                 SchedWritePSADBW, avx512vl_i16_info, avx512vl_i8_info>,
   10612                 EVEX_CD8<8, CD8VF>, NotEVEX2VEXConvertible;
   10613 
   10614 multiclass avx512_unary_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
   10615                            X86FoldableSchedWrite sched, X86VectorVTInfo _> {
   10616   let ExeDomain = _.ExeDomain in {
   10617   defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
   10618                     (ins _.RC:$src1), OpcodeStr,
   10619                     "$src1", "$src1",
   10620                     (_.VT (OpNode _.RC:$src1))>, EVEX, AVX5128IBase,
   10621                     Sched<[sched]>;
   10622 
   10623   defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
   10624                   (ins _.MemOp:$src1), OpcodeStr,
   10625                   "$src1", "$src1",
   10626                   (_.VT (OpNode (bitconvert (_.LdFrag addr:$src1))))>,
   10627             EVEX, AVX5128IBase, EVEX_CD8<_.EltSize, CD8VF>,
   10628             Sched<[sched.Folded]>;
   10629   }
   10630 }
   10631 
   10632 multiclass avx512_unary_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
   10633                             X86FoldableSchedWrite sched, X86VectorVTInfo _> :
   10634            avx512_unary_rm<opc, OpcodeStr, OpNode, sched, _> {
   10635   defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
   10636                   (ins _.ScalarMemOp:$src1), OpcodeStr,
   10637                   "${src1}"##_.BroadcastStr,
   10638                   "${src1}"##_.BroadcastStr,
   10639                   (_.VT (OpNode (X86VBroadcast
   10640                                     (_.ScalarLdFrag addr:$src1))))>,
   10641              EVEX, AVX5128IBase, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
   10642              Sched<[sched.Folded]>;
   10643 }
   10644 
   10645 multiclass avx512_unary_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
   10646                               X86SchedWriteWidths sched,
   10647                               AVX512VLVectorVTInfo VTInfo, Predicate prd> {
   10648   let Predicates = [prd] in
   10649     defm Z : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.ZMM, VTInfo.info512>,
   10650                              EVEX_V512;
   10651 
   10652   let Predicates = [prd, HasVLX] in {
   10653     defm Z256 : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.YMM, VTInfo.info256>,
   10654                               EVEX_V256;
   10655     defm Z128 : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.XMM, VTInfo.info128>,
   10656                               EVEX_V128;
   10657   }
   10658 }
   10659 
   10660 multiclass avx512_unary_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
   10661                                X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo,
   10662                                Predicate prd> {
   10663   let Predicates = [prd] in
   10664     defm Z : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.ZMM, VTInfo.info512>,
   10665                               EVEX_V512;
   10666 
   10667   let Predicates = [prd, HasVLX] in {
   10668     defm Z256 : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.YMM, VTInfo.info256>,
   10669                                  EVEX_V256;
   10670     defm Z128 : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.XMM, VTInfo.info128>,
   10671                                  EVEX_V128;
   10672   }
   10673 }
   10674 
   10675 multiclass avx512_unary_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
   10676                                  SDNode OpNode, X86SchedWriteWidths sched,
   10677                                  Predicate prd> {
   10678   defm Q : avx512_unary_rmb_vl<opc_q, OpcodeStr#"q", OpNode, sched,
   10679                                avx512vl_i64_info, prd>, VEX_W;
   10680   defm D : avx512_unary_rmb_vl<opc_d, OpcodeStr#"d", OpNode, sched,
   10681                                avx512vl_i32_info, prd>;
   10682 }
   10683 
   10684 multiclass avx512_unary_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr,
   10685                                  SDNode OpNode, X86SchedWriteWidths sched,
   10686                                  Predicate prd> {
   10687   defm W : avx512_unary_rm_vl<opc_w, OpcodeStr#"w", OpNode, sched,
   10688                               avx512vl_i16_info, prd>, VEX_WIG;
   10689   defm B : avx512_unary_rm_vl<opc_b, OpcodeStr#"b", OpNode, sched,
   10690                               avx512vl_i8_info, prd>, VEX_WIG;
   10691 }
   10692 
   10693 multiclass avx512_unary_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
   10694                                   bits<8> opc_d, bits<8> opc_q,
   10695                                   string OpcodeStr, SDNode OpNode,
   10696                                   X86SchedWriteWidths sched> {
   10697   defm NAME : avx512_unary_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode, sched,
   10698                                     HasAVX512>,
   10699               avx512_unary_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode, sched,
   10700                                     HasBWI>;
   10701 }
   10702 
   10703 defm VPABS : avx512_unary_rm_vl_all<0x1C, 0x1D, 0x1E, 0x1F, "vpabs", abs,
   10704                                     SchedWriteVecALU>;
   10705 
   10706 // VPABS: Use 512bit version to implement 128/256 bit in case NoVLX.
   10707 let Predicates = [HasAVX512, NoVLX] in {
   10708   def : Pat<(v4i64 (abs VR256X:$src)),
   10709             (EXTRACT_SUBREG
   10710                 (VPABSQZrr
   10711                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)),
   10712              sub_ymm)>;
   10713   def : Pat<(v2i64 (abs VR128X:$src)),
   10714             (EXTRACT_SUBREG
   10715                 (VPABSQZrr
   10716                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm)),
   10717              sub_xmm)>;
   10718 }
   10719 
   10720 // Use 512bit version to implement 128/256 bit.
   10721 multiclass avx512_unary_lowering<string InstrStr, SDNode OpNode,
   10722                                  AVX512VLVectorVTInfo _, Predicate prd> {
   10723   let Predicates = [prd, NoVLX] in {
   10724     def : Pat<(_.info256.VT(OpNode _.info256.RC:$src1)),
   10725               (EXTRACT_SUBREG
   10726                 (!cast<Instruction>(InstrStr # "Zrr")
   10727                   (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)),
   10728                                  _.info256.RC:$src1,
   10729                                  _.info256.SubRegIdx)),
   10730               _.info256.SubRegIdx)>;
   10731 
   10732     def : Pat<(_.info128.VT(OpNode _.info128.RC:$src1)),
   10733               (EXTRACT_SUBREG
   10734                 (!cast<Instruction>(InstrStr # "Zrr")
   10735                   (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)),
   10736                                  _.info128.RC:$src1,
   10737                                  _.info128.SubRegIdx)),
   10738               _.info128.SubRegIdx)>;
   10739   }
   10740 }
   10741 
   10742 defm VPLZCNT    : avx512_unary_rm_vl_dq<0x44, 0x44, "vplzcnt", ctlz,
   10743                                         SchedWriteVecIMul, HasCDI>;
   10744 
   10745 // FIXME: Is there a better scheduler class for VPCONFLICT?
   10746 defm VPCONFLICT : avx512_unary_rm_vl_dq<0xC4, 0xC4, "vpconflict", X86Conflict,
   10747                                         SchedWriteVecALU, HasCDI>;
   10748 
   10749 // VPLZCNT: Use 512bit version to implement 128/256 bit in case NoVLX.
   10750 defm : avx512_unary_lowering<"VPLZCNTQ", ctlz, avx512vl_i64_info, HasCDI>;
   10751 defm : avx512_unary_lowering<"VPLZCNTD", ctlz, avx512vl_i32_info, HasCDI>;
   10752 
   10753 //===---------------------------------------------------------------------===//
   10754 // Counts number of ones - VPOPCNTD and VPOPCNTQ
   10755 //===---------------------------------------------------------------------===//
   10756 
   10757 // FIXME: Is there a better scheduler class for VPOPCNTD/VPOPCNTQ?
   10758 defm VPOPCNT : avx512_unary_rm_vl_dq<0x55, 0x55, "vpopcnt", ctpop,
   10759                                      SchedWriteVecALU, HasVPOPCNTDQ>;
   10760 
   10761 defm : avx512_unary_lowering<"VPOPCNTQ", ctpop, avx512vl_i64_info, HasVPOPCNTDQ>;
   10762 defm : avx512_unary_lowering<"VPOPCNTD", ctpop, avx512vl_i32_info, HasVPOPCNTDQ>;
   10763 
   10764 //===---------------------------------------------------------------------===//
   10765 // Replicate Single FP - MOVSHDUP and MOVSLDUP
   10766 //===---------------------------------------------------------------------===//
   10767 
   10768 multiclass avx512_replicate<bits<8> opc, string OpcodeStr, SDNode OpNode,
   10769                             X86SchedWriteWidths sched> {
   10770   defm NAME:       avx512_unary_rm_vl<opc, OpcodeStr, OpNode, sched,
   10771                                       avx512vl_f32_info, HasAVX512>, XS;
   10772 }
   10773 
   10774 defm VMOVSHDUP : avx512_replicate<0x16, "vmovshdup", X86Movshdup,
   10775                                   SchedWriteFShuffle>;
   10776 defm VMOVSLDUP : avx512_replicate<0x12, "vmovsldup", X86Movsldup,
   10777                                   SchedWriteFShuffle>;
   10778 
   10779 //===----------------------------------------------------------------------===//
   10780 // AVX-512 - MOVDDUP
   10781 //===----------------------------------------------------------------------===//
   10782 
   10783 multiclass avx512_movddup_128<bits<8> opc, string OpcodeStr, SDNode OpNode,
   10784                               X86FoldableSchedWrite sched, X86VectorVTInfo _> {
   10785   let ExeDomain = _.ExeDomain in {
   10786   defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
   10787                    (ins _.RC:$src), OpcodeStr, "$src", "$src",
   10788                    (_.VT (OpNode (_.VT _.RC:$src)))>, EVEX,
   10789                    Sched<[sched]>;
   10790   defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
   10791                  (ins _.ScalarMemOp:$src), OpcodeStr, "$src", "$src",
   10792                  (_.VT (OpNode (_.VT (scalar_to_vector
   10793                                        (_.ScalarLdFrag addr:$src)))))>,
   10794                  EVEX, EVEX_CD8<_.EltSize, CD8VH>,
   10795                  Sched<[sched.Folded]>;
   10796   }
   10797 }
   10798 
   10799 multiclass avx512_movddup_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
   10800                                  X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo> {
   10801   defm Z : avx512_unary_rm<opc, OpcodeStr, X86Movddup, sched.ZMM,
   10802                            VTInfo.info512>, EVEX_V512;
   10803 
   10804   let Predicates = [HasAVX512, HasVLX] in {
   10805     defm Z256 : avx512_unary_rm<opc, OpcodeStr, X86Movddup, sched.YMM,
   10806                                 VTInfo.info256>, EVEX_V256;
   10807     defm Z128 : avx512_movddup_128<opc, OpcodeStr, X86VBroadcast, sched.XMM,
   10808                                    VTInfo.info128>, EVEX_V128;
   10809   }
   10810 }
   10811 
   10812 multiclass avx512_movddup<bits<8> opc, string OpcodeStr, SDNode OpNode,
   10813                           X86SchedWriteWidths sched> {
   10814   defm NAME:      avx512_movddup_common<opc, OpcodeStr, OpNode, sched,
   10815                                         avx512vl_f64_info>, XD, VEX_W;
   10816 }
   10817 
   10818 defm VMOVDDUP : avx512_movddup<0x12, "vmovddup", X86Movddup, SchedWriteFShuffle>;
   10819 
   10820 let Predicates = [HasVLX] in {
   10821 def : Pat<(v2f64 (X86VBroadcast (loadf64 addr:$src))),
   10822           (VMOVDDUPZ128rm addr:$src)>;
   10823 def : Pat<(v2f64 (X86VBroadcast f64:$src)),
   10824           (VMOVDDUPZ128rr (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
   10825 def : Pat<(v2f64 (X86VBroadcast (loadv2f64 addr:$src))),
   10826           (VMOVDDUPZ128rm addr:$src)>;
   10827 
   10828 def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
   10829                    (v2f64 VR128X:$src0)),
   10830           (VMOVDDUPZ128rrk VR128X:$src0, VK2WM:$mask,
   10831                            (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
   10832 def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
   10833                    (bitconvert (v4i32 immAllZerosV))),
   10834           (VMOVDDUPZ128rrkz VK2WM:$mask, (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
   10835 
   10836 def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadf64 addr:$src))),
   10837                    (v2f64 VR128X:$src0)),
   10838           (VMOVDDUPZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
   10839 def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadf64 addr:$src))),
   10840                    (bitconvert (v4i32 immAllZerosV))),
   10841           (VMOVDDUPZ128rmkz VK2WM:$mask, addr:$src)>;
   10842 
   10843 def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadv2f64 addr:$src))),
   10844                    (v2f64 VR128X:$src0)),
   10845           (VMOVDDUPZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
   10846 def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadv2f64 addr:$src))),
   10847                    (bitconvert (v4i32 immAllZerosV))),
   10848           (VMOVDDUPZ128rmkz VK2WM:$mask, addr:$src)>;
   10849 }
   10850 
   10851 //===----------------------------------------------------------------------===//
   10852 // AVX-512 - Unpack Instructions
   10853 //===----------------------------------------------------------------------===//
   10854 
   10855 defm VUNPCKH : avx512_fp_binop_p<0x15, "vunpckh", X86Unpckh, HasAVX512,
   10856                                  SchedWriteFShuffleSizes, 0, 1>;
   10857 defm VUNPCKL : avx512_fp_binop_p<0x14, "vunpckl", X86Unpckl, HasAVX512,
   10858                                  SchedWriteFShuffleSizes>;
   10859 
   10860 defm VPUNPCKLBW : avx512_binop_rm_vl_b<0x60, "vpunpcklbw", X86Unpckl,
   10861                                        SchedWriteShuffle, HasBWI>;
   10862 defm VPUNPCKHBW : avx512_binop_rm_vl_b<0x68, "vpunpckhbw", X86Unpckh,
   10863                                        SchedWriteShuffle, HasBWI>;
   10864 defm VPUNPCKLWD : avx512_binop_rm_vl_w<0x61, "vpunpcklwd", X86Unpckl,
   10865                                        SchedWriteShuffle, HasBWI>;
   10866 defm VPUNPCKHWD : avx512_binop_rm_vl_w<0x69, "vpunpckhwd", X86Unpckh,
   10867                                        SchedWriteShuffle, HasBWI>;
   10868 
   10869 defm VPUNPCKLDQ : avx512_binop_rm_vl_d<0x62, "vpunpckldq", X86Unpckl,
   10870                                        SchedWriteShuffle, HasAVX512>;
   10871 defm VPUNPCKHDQ : avx512_binop_rm_vl_d<0x6A, "vpunpckhdq", X86Unpckh,
   10872                                        SchedWriteShuffle, HasAVX512>;
   10873 defm VPUNPCKLQDQ : avx512_binop_rm_vl_q<0x6C, "vpunpcklqdq", X86Unpckl,
   10874                                         SchedWriteShuffle, HasAVX512>;
   10875 defm VPUNPCKHQDQ : avx512_binop_rm_vl_q<0x6D, "vpunpckhqdq", X86Unpckh,
   10876                                         SchedWriteShuffle, HasAVX512>;
   10877 
   10878 //===----------------------------------------------------------------------===//
   10879 // AVX-512 - Extract & Insert Integer Instructions
   10880 //===----------------------------------------------------------------------===//
   10881 
   10882 multiclass avx512_extract_elt_bw_m<bits<8> opc, string OpcodeStr, SDNode OpNode,
   10883                                                             X86VectorVTInfo _> {
   10884   def mr : AVX512Ii8<opc, MRMDestMem, (outs),
   10885               (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2),
   10886               OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
   10887               [(store (_.EltVT (trunc (OpNode (_.VT _.RC:$src1), imm:$src2))),
   10888                        addr:$dst)]>,
   10889               EVEX, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecExtractSt]>;
   10890 }
   10891 
   10892 multiclass avx512_extract_elt_b<string OpcodeStr, X86VectorVTInfo _> {
   10893   let Predicates = [HasBWI] in {
   10894     def rr : AVX512Ii8<0x14, MRMDestReg, (outs GR32orGR64:$dst),
   10895                   (ins _.RC:$src1, u8imm:$src2),
   10896                   OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
   10897                   [(set GR32orGR64:$dst,
   10898                         (X86pextrb (_.VT _.RC:$src1), imm:$src2))]>,
   10899                   EVEX, TAPD, Sched<[WriteVecExtract]>;
   10900 
   10901     defm NAME : avx512_extract_elt_bw_m<0x14, OpcodeStr, X86pextrb, _>, TAPD;
   10902   }
   10903 }
   10904 
   10905 multiclass avx512_extract_elt_w<string OpcodeStr, X86VectorVTInfo _> {
   10906   let Predicates = [HasBWI] in {
   10907     def rr : AVX512Ii8<0xC5, MRMSrcReg, (outs GR32orGR64:$dst),
   10908                   (ins _.RC:$src1, u8imm:$src2),
   10909                   OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
   10910                   [(set GR32orGR64:$dst,
   10911                         (X86pextrw (_.VT _.RC:$src1), imm:$src2))]>,
   10912                   EVEX, PD, Sched<[WriteVecExtract]>;
   10913 
   10914     let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in
   10915     def rr_REV : AVX512Ii8<0x15, MRMDestReg, (outs GR32orGR64:$dst),
   10916                    (ins _.RC:$src1, u8imm:$src2),
   10917                    OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
   10918                    EVEX, TAPD, FoldGenData<NAME#rr>,
   10919                    Sched<[WriteVecExtract]>;
   10920 
   10921     defm NAME : avx512_extract_elt_bw_m<0x15, OpcodeStr, X86pextrw, _>, TAPD;
   10922   }
   10923 }
   10924 
   10925 multiclass avx512_extract_elt_dq<string OpcodeStr, X86VectorVTInfo _,
   10926                                                             RegisterClass GRC> {
   10927   let Predicates = [HasDQI] in {
   10928     def rr : AVX512Ii8<0x16, MRMDestReg, (outs GRC:$dst),
   10929                   (ins _.RC:$src1, u8imm:$src2),
   10930                   OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
   10931                   [(set GRC:$dst,
   10932                       (extractelt (_.VT _.RC:$src1), imm:$src2))]>,
   10933                   EVEX, TAPD, Sched<[WriteVecExtract]>;
   10934 
   10935     def mr : AVX512Ii8<0x16, MRMDestMem, (outs),
   10936                 (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2),
   10937                 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
   10938                 [(store (extractelt (_.VT _.RC:$src1),
   10939                                     imm:$src2),addr:$dst)]>,
   10940                 EVEX, EVEX_CD8<_.EltSize, CD8VT1>, TAPD,
   10941                 Sched<[WriteVecExtractSt]>;
   10942   }
   10943 }
   10944 
   10945 defm VPEXTRBZ : avx512_extract_elt_b<"vpextrb", v16i8x_info>, VEX_WIG;
   10946 defm VPEXTRWZ : avx512_extract_elt_w<"vpextrw", v8i16x_info>, VEX_WIG;
   10947 defm VPEXTRDZ : avx512_extract_elt_dq<"vpextrd", v4i32x_info, GR32>;
   10948 defm VPEXTRQZ : avx512_extract_elt_dq<"vpextrq", v2i64x_info, GR64>, VEX_W;
   10949 
   10950 multiclass avx512_insert_elt_m<bits<8> opc, string OpcodeStr, SDNode OpNode,
   10951                                             X86VectorVTInfo _, PatFrag LdFrag> {
   10952   def rm : AVX512Ii8<opc, MRMSrcMem, (outs _.RC:$dst),
   10953       (ins _.RC:$src1,  _.ScalarMemOp:$src2, u8imm:$src3),
   10954       OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
   10955       [(set _.RC:$dst,
   10956           (_.VT (OpNode _.RC:$src1, (LdFrag addr:$src2), imm:$src3)))]>,
   10957       EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecInsertLd, ReadAfterLd]>;
   10958 }
   10959 
   10960 multiclass avx512_insert_elt_bw<bits<8> opc, string OpcodeStr, SDNode OpNode,
   10961                                             X86VectorVTInfo _, PatFrag LdFrag> {
   10962   let Predicates = [HasBWI] in {
   10963     def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst),
   10964         (ins _.RC:$src1, GR32orGR64:$src2, u8imm:$src3),
   10965         OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
   10966         [(set _.RC:$dst,
   10967             (OpNode _.RC:$src1, GR32orGR64:$src2, imm:$src3))]>, EVEX_4V,
   10968         Sched<[WriteVecInsert]>;
   10969 
   10970     defm NAME : avx512_insert_elt_m<opc, OpcodeStr, OpNode, _, LdFrag>;
   10971   }
   10972 }
   10973 
   10974 multiclass avx512_insert_elt_dq<bits<8> opc, string OpcodeStr,
   10975                                          X86VectorVTInfo _, RegisterClass GRC> {
   10976   let Predicates = [HasDQI] in {
   10977     def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst),
   10978         (ins _.RC:$src1, GRC:$src2, u8imm:$src3),
   10979         OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
   10980         [(set _.RC:$dst,
   10981             (_.VT (insertelt _.RC:$src1, GRC:$src2, imm:$src3)))]>,
   10982         EVEX_4V, TAPD, Sched<[WriteVecInsert]>;
   10983 
   10984     defm NAME : avx512_insert_elt_m<opc, OpcodeStr, insertelt, _,
   10985                                     _.ScalarLdFrag>, TAPD;
   10986   }
   10987 }
   10988 
   10989 defm VPINSRBZ : avx512_insert_elt_bw<0x20, "vpinsrb", X86pinsrb, v16i8x_info,
   10990                                      extloadi8>, TAPD, VEX_WIG;
   10991 defm VPINSRWZ : avx512_insert_elt_bw<0xC4, "vpinsrw", X86pinsrw, v8i16x_info,
   10992                                      extloadi16>, PD, VEX_WIG;
   10993 defm VPINSRDZ : avx512_insert_elt_dq<0x22, "vpinsrd", v4i32x_info, GR32>;
   10994 defm VPINSRQZ : avx512_insert_elt_dq<0x22, "vpinsrq", v2i64x_info, GR64>, VEX_W;
   10995 
   10996 //===----------------------------------------------------------------------===//
   10997 // VSHUFPS - VSHUFPD Operations
   10998 //===----------------------------------------------------------------------===//
   10999 
   11000 multiclass avx512_shufp<string OpcodeStr, AVX512VLVectorVTInfo VTInfo_I,
   11001                         AVX512VLVectorVTInfo VTInfo_FP>{
   11002   defm NAME: avx512_common_3Op_imm8<OpcodeStr, VTInfo_FP, 0xC6, X86Shufp,
   11003                                     SchedWriteFShuffle>,
   11004                                     EVEX_CD8<VTInfo_FP.info512.EltSize, CD8VF>,
   11005                                     AVX512AIi8Base, EVEX_4V;
   11006 }
   11007 
   11008 defm VSHUFPS: avx512_shufp<"vshufps", avx512vl_i32_info, avx512vl_f32_info>, PS;
   11009 defm VSHUFPD: avx512_shufp<"vshufpd", avx512vl_i64_info, avx512vl_f64_info>, PD, VEX_W;
   11010 
   11011 //===----------------------------------------------------------------------===//
   11012 // AVX-512 - Byte shift Left/Right
   11013 //===----------------------------------------------------------------------===//
   11014 
   11015 // FIXME: The SSE/AVX names are PSLLDQri etc. - should we add the i here as well?
   11016 multiclass avx512_shift_packed<bits<8> opc, SDNode OpNode, Format MRMr,
   11017                                Format MRMm, string OpcodeStr,
   11018                                X86FoldableSchedWrite sched, X86VectorVTInfo _>{
   11019   def rr : AVX512<opc, MRMr,
   11020              (outs _.RC:$dst), (ins _.RC:$src1, u8imm:$src2),
   11021              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
   11022              [(set _.RC:$dst,(_.VT (OpNode _.RC:$src1, (i8 imm:$src2))))]>,
   11023              Sched<[sched]>;
   11024   def rm : AVX512<opc, MRMm,
   11025            (outs _.RC:$dst), (ins _.MemOp:$src1, u8imm:$src2),
   11026            !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
   11027            [(set _.RC:$dst,(_.VT (OpNode
   11028                                  (_.VT (bitconvert (_.LdFrag addr:$src1))),
   11029                                  (i8 imm:$src2))))]>,
   11030            Sched<[sched.Folded, ReadAfterLd]>;
   11031 }
   11032 
   11033 multiclass avx512_shift_packed_all<bits<8> opc, SDNode OpNode, Format MRMr,
   11034                                    Format MRMm, string OpcodeStr,
   11035                                    X86SchedWriteWidths sched, Predicate prd>{
   11036   let Predicates = [prd] in
   11037     defm Z : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr,
   11038                                  sched.ZMM, v64i8_info>, EVEX_V512;
   11039   let Predicates = [prd, HasVLX] in {
   11040     defm Z256 : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr,
   11041                                     sched.YMM, v32i8x_info>, EVEX_V256;
   11042     defm Z128 : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr,
   11043                                     sched.XMM, v16i8x_info>, EVEX_V128;
   11044   }
   11045 }
   11046 defm VPSLLDQ : avx512_shift_packed_all<0x73, X86vshldq, MRM7r, MRM7m, "vpslldq",
   11047                                        SchedWriteShuffle, HasBWI>,
   11048                                        AVX512PDIi8Base, EVEX_4V, VEX_WIG;
   11049 defm VPSRLDQ : avx512_shift_packed_all<0x73, X86vshrdq, MRM3r, MRM3m, "vpsrldq",
   11050                                        SchedWriteShuffle, HasBWI>,
   11051                                        AVX512PDIi8Base, EVEX_4V, VEX_WIG;
   11052 
   11053 multiclass avx512_psadbw_packed<bits<8> opc, SDNode OpNode,
   11054                                 string OpcodeStr, X86FoldableSchedWrite sched,
   11055                                 X86VectorVTInfo _dst, X86VectorVTInfo _src> {
   11056   def rr : AVX512BI<opc, MRMSrcReg,
   11057              (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.RC:$src2),
   11058              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
   11059              [(set _dst.RC:$dst,(_dst.VT
   11060                                 (OpNode (_src.VT _src.RC:$src1),
   11061                                         (_src.VT _src.RC:$src2))))]>,
   11062              Sched<[sched]>;
   11063   def rm : AVX512BI<opc, MRMSrcMem,
   11064            (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.MemOp:$src2),
   11065            !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
   11066            [(set _dst.RC:$dst,(_dst.VT
   11067                               (OpNode (_src.VT _src.RC:$src1),
   11068                               (_src.VT (bitconvert
   11069                                         (_src.LdFrag addr:$src2))))))]>,
   11070            Sched<[sched.Folded, ReadAfterLd]>;
   11071 }
   11072 
   11073 multiclass avx512_psadbw_packed_all<bits<8> opc, SDNode OpNode,
   11074                                     string OpcodeStr, X86SchedWriteWidths sched,
   11075                                     Predicate prd> {
   11076   let Predicates = [prd] in
   11077     defm Z : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.ZMM,
   11078                                   v8i64_info, v64i8_info>, EVEX_V512;
   11079   let Predicates = [prd, HasVLX] in {
   11080     defm Z256 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.YMM,
   11081                                      v4i64x_info, v32i8x_info>, EVEX_V256;
   11082     defm Z128 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.XMM,
   11083                                      v2i64x_info, v16i8x_info>, EVEX_V128;
   11084   }
   11085 }
   11086 
   11087 defm VPSADBW : avx512_psadbw_packed_all<0xf6, X86psadbw, "vpsadbw",
   11088                                         SchedWritePSADBW, HasBWI>, EVEX_4V, VEX_WIG;
   11089 
   11090 // Transforms to swizzle an immediate to enable better matching when
   11091 // memory operand isn't in the right place.
   11092 def VPTERNLOG321_imm8 : SDNodeXForm<imm, [{
   11093   // Convert a VPTERNLOG immediate by swapping operand 0 and operand 2.
   11094   uint8_t Imm = N->getZExtValue();
   11095   // Swap bits 1/4 and 3/6.
   11096   uint8_t NewImm = Imm & 0xa5;
   11097   if (Imm & 0x02) NewImm |= 0x10;
   11098   if (Imm & 0x10) NewImm |= 0x02;
   11099   if (Imm & 0x08) NewImm |= 0x40;
   11100   if (Imm & 0x40) NewImm |= 0x08;
   11101   return getI8Imm(NewImm, SDLoc(N));
   11102 }]>;
   11103 def VPTERNLOG213_imm8 : SDNodeXForm<imm, [{
   11104   // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2.
   11105   uint8_t Imm = N->getZExtValue();
   11106   // Swap bits 2/4 and 3/5.
   11107   uint8_t NewImm = Imm & 0xc3;
   11108   if (Imm & 0x04) NewImm |= 0x10;
   11109   if (Imm & 0x10) NewImm |= 0x04;
   11110   if (Imm & 0x08) NewImm |= 0x20;
   11111   if (Imm & 0x20) NewImm |= 0x08;
   11112   return getI8Imm(NewImm, SDLoc(N));
   11113 }]>;
   11114 def VPTERNLOG132_imm8 : SDNodeXForm<imm, [{
   11115   // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2.
   11116   uint8_t Imm = N->getZExtValue();
   11117   // Swap bits 1/2 and 5/6.
   11118   uint8_t NewImm = Imm & 0x99;
   11119   if (Imm & 0x02) NewImm |= 0x04;
   11120   if (Imm & 0x04) NewImm |= 0x02;
   11121   if (Imm & 0x20) NewImm |= 0x40;
   11122   if (Imm & 0x40) NewImm |= 0x20;
   11123   return getI8Imm(NewImm, SDLoc(N));
   11124 }]>;
   11125 def VPTERNLOG231_imm8 : SDNodeXForm<imm, [{
   11126   // Convert a VPTERNLOG immediate by moving operand 1 to the end.
   11127   uint8_t Imm = N->getZExtValue();
   11128   // Move bits 1->2, 2->4, 3->6, 4->1, 5->3, 6->5
   11129   uint8_t NewImm = Imm & 0x81;
   11130   if (Imm & 0x02) NewImm |= 0x04;
   11131   if (Imm & 0x04) NewImm |= 0x10;
   11132   if (Imm & 0x08) NewImm |= 0x40;
   11133   if (Imm & 0x10) NewImm |= 0x02;
   11134   if (Imm & 0x20) NewImm |= 0x08;
   11135   if (Imm & 0x40) NewImm |= 0x20;
   11136   return getI8Imm(NewImm, SDLoc(N));
   11137 }]>;
   11138 def VPTERNLOG312_imm8 : SDNodeXForm<imm, [{
   11139   // Convert a VPTERNLOG immediate by moving operand 2 to the beginning.
   11140   uint8_t Imm = N->getZExtValue();
   11141   // Move bits 1->4, 2->1, 3->5, 4->2, 5->6, 6->3
   11142   uint8_t NewImm = Imm & 0x81;
   11143   if (Imm & 0x02) NewImm |= 0x10;
   11144   if (Imm & 0x04) NewImm |= 0x02;
   11145   if (Imm & 0x08) NewImm |= 0x20;
   11146   if (Imm & 0x10) NewImm |= 0x04;
   11147   if (Imm & 0x20) NewImm |= 0x40;
   11148   if (Imm & 0x40) NewImm |= 0x08;
   11149   return getI8Imm(NewImm, SDLoc(N));
   11150 }]>;
   11151 
   11152 multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode,
   11153                           X86FoldableSchedWrite sched, X86VectorVTInfo _,
   11154                           string Name>{
   11155   let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
   11156   defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
   11157                       (ins _.RC:$src2, _.RC:$src3, u8imm:$src4),
   11158                       OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4",
   11159                       (OpNode (_.VT _.RC:$src1),
   11160                               (_.VT _.RC:$src2),
   11161                               (_.VT _.RC:$src3),
   11162                               (i8 imm:$src4)), 1, 1>,
   11163                       AVX512AIi8Base, EVEX_4V, Sched<[sched]>;
   11164   defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
   11165                     (ins _.RC:$src2, _.MemOp:$src3, u8imm:$src4),
   11166                     OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4",
   11167                     (OpNode (_.VT _.RC:$src1),
   11168                             (_.VT _.RC:$src2),
   11169                             (_.VT (bitconvert (_.LdFrag addr:$src3))),
   11170                             (i8 imm:$src4)), 1, 0>,
   11171                     AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
   11172                     Sched<[sched.Folded, ReadAfterLd]>;
   11173   defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
   11174                     (ins _.RC:$src2, _.ScalarMemOp:$src3, u8imm:$src4),
   11175                     OpcodeStr, "$src4, ${src3}"##_.BroadcastStr##", $src2",
   11176                     "$src2, ${src3}"##_.BroadcastStr##", $src4",
   11177                     (OpNode (_.VT _.RC:$src1),
   11178                             (_.VT _.RC:$src2),
   11179                             (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))),
   11180                             (i8 imm:$src4)), 1, 0>, EVEX_B,
   11181                     AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
   11182                     Sched<[sched.Folded, ReadAfterLd]>;
   11183   }// Constraints = "$src1 = $dst"
   11184 
   11185   // Additional patterns for matching passthru operand in other positions.
   11186   def : Pat<(_.VT (vselect _.KRCWM:$mask,
   11187                    (OpNode _.RC:$src3, _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
   11188                    _.RC:$src1)),
   11189             (!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask,
   11190              _.RC:$src2, _.RC:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
   11191   def : Pat<(_.VT (vselect _.KRCWM:$mask,
   11192                    (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i8 imm:$src4)),
   11193                    _.RC:$src1)),
   11194             (!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask,
   11195              _.RC:$src2, _.RC:$src3, (VPTERNLOG213_imm8 imm:$src4))>;
   11196 
   11197   // Additional patterns for matching loads in other positions.
   11198   def : Pat<(_.VT (OpNode (bitconvert (_.LdFrag addr:$src3)),
   11199                           _.RC:$src2, _.RC:$src1, (i8 imm:$src4))),
   11200             (!cast<Instruction>(Name#_.ZSuffix#rmi) _.RC:$src1, _.RC:$src2,
   11201                                    addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
   11202   def : Pat<(_.VT (OpNode _.RC:$src1,
   11203                           (bitconvert (_.LdFrag addr:$src3)),
   11204                           _.RC:$src2, (i8 imm:$src4))),
   11205             (!cast<Instruction>(Name#_.ZSuffix#rmi) _.RC:$src1, _.RC:$src2,
   11206                                    addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
   11207 
   11208   // Additional patterns for matching zero masking with loads in other
   11209   // positions.
   11210   def : Pat<(_.VT (vselect _.KRCWM:$mask,
   11211                    (OpNode (bitconvert (_.LdFrag addr:$src3)),
   11212                     _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
   11213                    _.ImmAllZerosV)),
   11214             (!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask,
   11215              _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
   11216   def : Pat<(_.VT (vselect _.KRCWM:$mask,
   11217                    (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)),
   11218                     _.RC:$src2, (i8 imm:$src4)),
   11219                    _.ImmAllZerosV)),
   11220             (!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask,
   11221              _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
   11222 
   11223   // Additional patterns for matching masked loads with different
   11224   // operand orders.
   11225   def : Pat<(_.VT (vselect _.KRCWM:$mask,
   11226                    (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)),
   11227                     _.RC:$src2, (i8 imm:$src4)),
   11228                    _.RC:$src1)),
   11229             (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
   11230              _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
   11231   def : Pat<(_.VT (vselect _.KRCWM:$mask,
   11232                    (OpNode (bitconvert (_.LdFrag addr:$src3)),
   11233                     _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
   11234                    _.RC:$src1)),
   11235             (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
   11236              _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
   11237   def : Pat<(_.VT (vselect _.KRCWM:$mask,
   11238                    (OpNode _.RC:$src2, _.RC:$src1,
   11239                     (bitconvert (_.LdFrag addr:$src3)), (i8 imm:$src4)),
   11240                    _.RC:$src1)),
   11241             (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
   11242              _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 imm:$src4))>;
   11243   def : Pat<(_.VT (vselect _.KRCWM:$mask,
   11244                    (OpNode _.RC:$src2, (bitconvert (_.LdFrag addr:$src3)),
   11245                     _.RC:$src1, (i8 imm:$src4)),
   11246                    _.RC:$src1)),
   11247             (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
   11248              _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 imm:$src4))>;
   11249   def : Pat<(_.VT (vselect _.KRCWM:$mask,
   11250                    (OpNode (bitconvert (_.LdFrag addr:$src3)),
   11251                     _.RC:$src1, _.RC:$src2, (i8 imm:$src4)),
   11252                    _.RC:$src1)),
   11253             (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
   11254              _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 imm:$src4))>;
   11255 
   11256   // Additional patterns for matching broadcasts in other positions.
   11257   def : Pat<(_.VT (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
   11258                           _.RC:$src2, _.RC:$src1, (i8 imm:$src4))),
   11259             (!cast<Instruction>(Name#_.ZSuffix#rmbi) _.RC:$src1, _.RC:$src2,
   11260                                    addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
   11261   def : Pat<(_.VT (OpNode _.RC:$src1,
   11262                           (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
   11263                           _.RC:$src2, (i8 imm:$src4))),
   11264             (!cast<Instruction>(Name#_.ZSuffix#rmbi) _.RC:$src1, _.RC:$src2,
   11265                                    addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
   11266 
   11267   // Additional patterns for matching zero masking with broadcasts in other
   11268   // positions.
   11269   def : Pat<(_.VT (vselect _.KRCWM:$mask,
   11270                    (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
   11271                     _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
   11272                    _.ImmAllZerosV)),
   11273             (!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1,
   11274              _.KRCWM:$mask, _.RC:$src2, addr:$src3,
   11275              (VPTERNLOG321_imm8 imm:$src4))>;
   11276   def : Pat<(_.VT (vselect _.KRCWM:$mask,
   11277                    (OpNode _.RC:$src1,
   11278                     (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
   11279                     _.RC:$src2, (i8 imm:$src4)),
   11280                    _.ImmAllZerosV)),
   11281             (!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1,
   11282              _.KRCWM:$mask, _.RC:$src2, addr:$src3,
   11283              (VPTERNLOG132_imm8 imm:$src4))>;
   11284 
   11285   // Additional patterns for matching masked broadcasts with different
   11286   // operand orders.
   11287   def : Pat<(_.VT (vselect _.KRCWM:$mask,
   11288                    (OpNode _.RC:$src1,
   11289                     (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
   11290                     _.RC:$src2, (i8 imm:$src4)),
   11291                    _.RC:$src1)),
   11292             (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
   11293              _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
   11294   def : Pat<(_.VT (vselect _.KRCWM:$mask,
   11295                    (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
   11296                     _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
   11297                    _.RC:$src1)),
   11298             (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
   11299              _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
   11300   def : Pat<(_.VT (vselect _.KRCWM:$mask,
   11301                    (OpNode _.RC:$src2, _.RC:$src1,
   11302                     (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
   11303                     (i8 imm:$src4)), _.RC:$src1)),
   11304             (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
   11305              _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 imm:$src4))>;
   11306   def : Pat<(_.VT (vselect _.KRCWM:$mask,
   11307                    (OpNode _.RC:$src2,
   11308                     (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
   11309                     _.RC:$src1, (i8 imm:$src4)),
   11310                    _.RC:$src1)),
   11311             (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
   11312              _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 imm:$src4))>;
   11313   def : Pat<(_.VT (vselect _.KRCWM:$mask,
   11314                    (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
   11315                     _.RC:$src1, _.RC:$src2, (i8 imm:$src4)),
   11316                    _.RC:$src1)),
   11317             (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
   11318              _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 imm:$src4))>;
   11319 }
   11320 
   11321 multiclass avx512_common_ternlog<string OpcodeStr, X86SchedWriteWidths sched,
   11322                                  AVX512VLVectorVTInfo _> {
   11323   let Predicates = [HasAVX512] in
   11324     defm Z    : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.ZMM,
   11325                                _.info512, NAME>, EVEX_V512;
   11326   let Predicates = [HasAVX512, HasVLX] in {
   11327     defm Z128 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.XMM,
   11328                                _.info128, NAME>, EVEX_V128;
   11329     defm Z256 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.YMM,
   11330                                _.info256, NAME>, EVEX_V256;
   11331   }
   11332 }
   11333 
   11334 defm VPTERNLOGD : avx512_common_ternlog<"vpternlogd", SchedWriteVecALU,
   11335                                         avx512vl_i32_info>;
   11336 defm VPTERNLOGQ : avx512_common_ternlog<"vpternlogq", SchedWriteVecALU,
   11337                                         avx512vl_i64_info>, VEX_W;
   11338 
   11339 // Patterns to implement vnot using vpternlog instead of creating all ones
   11340 // using pcmpeq or vpternlog and then xoring with that. The value 15 is chosen
   11341 // so that the result is only dependent on src0. But we use the same source
   11342 // for all operands to prevent a false dependency.
   11343 // TODO: We should maybe have a more generalized algorithm for folding to
   11344 // vpternlog.
   11345 let Predicates = [HasAVX512] in {
   11346   def : Pat<(v8i64 (xor VR512:$src, (bc_v8i64 (v16i32 immAllOnesV)))),
   11347             (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
   11348 }
   11349 
   11350 let Predicates = [HasAVX512, NoVLX] in {
   11351   def : Pat<(v2i64 (xor VR128X:$src, (bc_v2i64 (v4i32 immAllOnesV)))),
   11352             (EXTRACT_SUBREG
   11353              (VPTERNLOGQZrri
   11354               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
   11355               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
   11356               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
   11357               (i8 15)), sub_xmm)>;
   11358   def : Pat<(v4i64 (xor VR256X:$src, (bc_v4i64 (v8i32 immAllOnesV)))),
   11359             (EXTRACT_SUBREG
   11360              (VPTERNLOGQZrri
   11361               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
   11362               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
   11363               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
   11364               (i8 15)), sub_ymm)>;
   11365 }
   11366 
   11367 let Predicates = [HasVLX] in {
   11368   def : Pat<(v2i64 (xor VR128X:$src, (bc_v2i64 (v4i32 immAllOnesV)))),
   11369             (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
   11370   def : Pat<(v4i64 (xor VR256X:$src, (bc_v4i64 (v8i32 immAllOnesV)))),
   11371             (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
   11372 }
   11373 
   11374 //===----------------------------------------------------------------------===//
   11375 // AVX-512 - FixupImm
   11376 //===----------------------------------------------------------------------===//
   11377 
   11378 multiclass avx512_fixupimm_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
   11379                                   X86FoldableSchedWrite sched, X86VectorVTInfo _,
   11380                                   X86VectorVTInfo TblVT>{
   11381   let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
   11382     defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
   11383                         (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
   11384                          OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
   11385                         (OpNode (_.VT _.RC:$src1),
   11386                                 (_.VT _.RC:$src2),
   11387                                 (TblVT.VT _.RC:$src3),
   11388                                 (i32 imm:$src4),
   11389                                 (i32 FROUND_CURRENT))>, Sched<[sched]>;
   11390     defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
   11391                       (ins _.RC:$src2, _.MemOp:$src3, i32u8imm:$src4),
   11392                       OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
   11393                       (OpNode (_.VT _.RC:$src1),
   11394                               (_.VT _.RC:$src2),
   11395                               (TblVT.VT (bitconvert (TblVT.LdFrag addr:$src3))),
   11396                               (i32 imm:$src4),
   11397                               (i32 FROUND_CURRENT))>,
   11398                       Sched<[sched.Folded, ReadAfterLd]>;
   11399     defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
   11400                       (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
   11401                     OpcodeStr##_.Suffix, "$src4, ${src3}"##_.BroadcastStr##", $src2",
   11402                     "$src2, ${src3}"##_.BroadcastStr##", $src4",
   11403                       (OpNode (_.VT _.RC:$src1),
   11404                               (_.VT _.RC:$src2),
   11405                               (TblVT.VT (X86VBroadcast(TblVT.ScalarLdFrag addr:$src3))),
   11406                               (i32 imm:$src4),
   11407                               (i32 FROUND_CURRENT))>,
   11408                     EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
   11409   } // Constraints = "$src1 = $dst"
   11410 }
   11411 
   11412 multiclass avx512_fixupimm_packed_sae<bits<8> opc, string OpcodeStr,
   11413                                       SDNode OpNode, X86FoldableSchedWrite sched,
   11414                                       X86VectorVTInfo _, X86VectorVTInfo TblVT>{
   11415 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
   11416   defm rrib : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
   11417                       (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
   11418                       OpcodeStr##_.Suffix, "$src4, {sae}, $src3, $src2",
   11419                       "$src2, $src3, {sae}, $src4",
   11420                       (OpNode (_.VT _.RC:$src1),
   11421                                 (_.VT _.RC:$src2),
   11422                                 (TblVT.VT _.RC:$src3),
   11423                                 (i32 imm:$src4),
   11424                                 (i32 FROUND_NO_EXC))>,
   11425                       EVEX_B, Sched<[sched]>;
   11426   }
   11427 }
   11428 
   11429 multiclass avx512_fixupimm_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
   11430                                   X86FoldableSchedWrite sched, X86VectorVTInfo _,
   11431                                   X86VectorVTInfo _src3VT> {
   11432   let Constraints = "$src1 = $dst" , Predicates = [HasAVX512],
   11433       ExeDomain = _.ExeDomain in {
   11434     defm rri : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
   11435                       (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
   11436                       OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
   11437                       (OpNode (_.VT _.RC:$src1),
   11438                               (_.VT _.RC:$src2),
   11439                               (_src3VT.VT _src3VT.RC:$src3),
   11440                               (i32 imm:$src4),
   11441                               (i32 FROUND_CURRENT))>, Sched<[sched]>;
   11442     defm rrib : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
   11443                       (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
   11444                       OpcodeStr##_.Suffix, "$src4, {sae}, $src3, $src2",
   11445                       "$src2, $src3, {sae}, $src4",
   11446                       (OpNode (_.VT _.RC:$src1),
   11447                               (_.VT _.RC:$src2),
   11448                               (_src3VT.VT _src3VT.RC:$src3),
   11449                               (i32 imm:$src4),
   11450                               (i32 FROUND_NO_EXC))>,
   11451                       EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
   11452     defm rmi : AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
   11453                      (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
   11454                      OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
   11455                      (OpNode (_.VT _.RC:$src1),
   11456                              (_.VT _.RC:$src2),
   11457                              (_src3VT.VT (scalar_to_vector
   11458                                        (_src3VT.ScalarLdFrag addr:$src3))),
   11459                              (i32 imm:$src4),
   11460                              (i32 FROUND_CURRENT))>,
   11461                      Sched<[sched.Folded, ReadAfterLd]>;
   11462   }
   11463 }
   11464 
   11465 multiclass avx512_fixupimm_packed_all<X86SchedWriteWidths sched,
   11466                                       AVX512VLVectorVTInfo _Vec, 
   11467                                       AVX512VLVectorVTInfo _Tbl> {
   11468   let Predicates = [HasAVX512] in
   11469     defm Z    : avx512_fixupimm_packed<0x54, "vfixupimm", X86VFixupimm, sched.ZMM,
   11470                                        _Vec.info512, _Tbl.info512>,
   11471                 avx512_fixupimm_packed_sae<0x54, "vfixupimm", X86VFixupimm, sched.ZMM,
   11472                                 _Vec.info512, _Tbl.info512>, AVX512AIi8Base,
   11473                                 EVEX_4V, EVEX_V512;
   11474   let Predicates = [HasAVX512, HasVLX] in {
   11475     defm Z128 : avx512_fixupimm_packed<0x54, "vfixupimm", X86VFixupimm, sched.XMM,
   11476                             _Vec.info128, _Tbl.info128>, AVX512AIi8Base,
   11477                             EVEX_4V, EVEX_V128;
   11478     defm Z256 : avx512_fixupimm_packed<0x54, "vfixupimm", X86VFixupimm, sched.YMM,
   11479                             _Vec.info256, _Tbl.info256>, AVX512AIi8Base,
   11480                             EVEX_4V, EVEX_V256;
   11481   }
   11482 }
   11483 
   11484 defm VFIXUPIMMSSZ : avx512_fixupimm_scalar<0x55, "vfixupimm", X86VFixupimmScalar,
   11485                                            SchedWriteFAdd.Scl, f32x_info, v4i32x_info>,
   11486                           AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
   11487 defm VFIXUPIMMSDZ : avx512_fixupimm_scalar<0x55, "vfixupimm", X86VFixupimmScalar,
   11488                                            SchedWriteFAdd.Scl, f64x_info, v2i64x_info>,
   11489                           AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
   11490 defm VFIXUPIMMPS : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f32_info,
   11491                          avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
   11492 defm VFIXUPIMMPD : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f64_info,
   11493                          avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, VEX_W;
   11494 
   11495 // Patterns used to select SSE scalar fp arithmetic instructions from
   11496 // either:
   11497 //
   11498 // (1) a scalar fp operation followed by a blend
   11499 //
   11500 // The effect is that the backend no longer emits unnecessary vector
   11501 // insert instructions immediately after SSE scalar fp instructions
   11502 // like addss or mulss.
   11503 //
   11504 // For example, given the following code:
   11505 //   __m128 foo(__m128 A, __m128 B) {
   11506 //     A[0] += B[0];
   11507 //     return A;
   11508 //   }
   11509 //
   11510 // Previously we generated:
   11511 //   addss %xmm0, %xmm1
   11512 //   movss %xmm1, %xmm0
   11513 //
   11514 // We now generate:
   11515 //   addss %xmm1, %xmm0
   11516 //
   11517 // (2) a vector packed single/double fp operation followed by a vector insert
   11518 //
   11519 // The effect is that the backend converts the packed fp instruction
   11520 // followed by a vector insert into a single SSE scalar fp instruction.
   11521 //
   11522 // For example, given the following code:
   11523 //   __m128 foo(__m128 A, __m128 B) {
   11524 //     __m128 C = A + B;
   11525 //     return (__m128) {c[0], a[1], a[2], a[3]};
   11526 //   }
   11527 //
   11528 // Previously we generated:
   11529 //   addps %xmm0, %xmm1
   11530 //   movss %xmm1, %xmm0
   11531 //
   11532 // We now generate:
   11533 //   addss %xmm1, %xmm0
   11534 
   11535 // TODO: Some canonicalization in lowering would simplify the number of
   11536 // patterns we have to try to match.
   11537 multiclass AVX512_scalar_math_fp_patterns<SDNode Op, string OpcPrefix, SDNode MoveNode,
   11538                                            X86VectorVTInfo _, PatLeaf ZeroFP> {
   11539   let Predicates = [HasAVX512] in {
   11540     // extracted scalar math op with insert via movss
   11541     def : Pat<(MoveNode
   11542                (_.VT VR128X:$dst),
   11543                (_.VT (scalar_to_vector
   11544                       (Op (_.EltVT (extractelt (_.VT VR128X:$dst), (iPTR 0))),
   11545                           _.FRC:$src)))),
   11546               (!cast<Instruction>("V"#OpcPrefix#Zrr_Int) _.VT:$dst,
   11547                (_.VT (COPY_TO_REGCLASS _.FRC:$src, VR128X)))>;
   11548 
   11549     // extracted masked scalar math op with insert via movss
   11550     def : Pat<(MoveNode (_.VT VR128X:$src1),
   11551                (scalar_to_vector
   11552                 (X86selects VK1WM:$mask,
   11553                             (Op (_.EltVT
   11554                                  (extractelt (_.VT VR128X:$src1), (iPTR 0))),
   11555                                 _.FRC:$src2),
   11556                             _.FRC:$src0))),
   11557               (!cast<Instruction>("V"#OpcPrefix#Zrr_Intk)
   11558                (_.VT (COPY_TO_REGCLASS _.FRC:$src0, VR128X)),
   11559                VK1WM:$mask, _.VT:$src1,
   11560                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>;
   11561 
   11562     // extracted masked scalar math op with insert via movss
   11563     def : Pat<(MoveNode (_.VT VR128X:$src1),
   11564                (scalar_to_vector
   11565                 (X86selects VK1WM:$mask,
   11566                             (Op (_.EltVT
   11567                                  (extractelt (_.VT VR128X:$src1), (iPTR 0))),
   11568                                 _.FRC:$src2), (_.EltVT ZeroFP)))),
   11569       (!cast<I>("V"#OpcPrefix#Zrr_Intkz) 
   11570           VK1WM:$mask, _.VT:$src1,
   11571           (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>;
   11572   }
   11573 }
   11574 
   11575 defm : AVX512_scalar_math_fp_patterns<fadd, "ADDSS", X86Movss, v4f32x_info, fp32imm0>;
   11576 defm : AVX512_scalar_math_fp_patterns<fsub, "SUBSS", X86Movss, v4f32x_info, fp32imm0>;
   11577 defm : AVX512_scalar_math_fp_patterns<fmul, "MULSS", X86Movss, v4f32x_info, fp32imm0>;
   11578 defm : AVX512_scalar_math_fp_patterns<fdiv, "DIVSS", X86Movss, v4f32x_info, fp32imm0>;
   11579 
   11580 defm : AVX512_scalar_math_fp_patterns<fadd, "ADDSD", X86Movsd, v2f64x_info, fp64imm0>;
   11581 defm : AVX512_scalar_math_fp_patterns<fsub, "SUBSD", X86Movsd, v2f64x_info, fp64imm0>;
   11582 defm : AVX512_scalar_math_fp_patterns<fmul, "MULSD", X86Movsd, v2f64x_info, fp64imm0>;
   11583 defm : AVX512_scalar_math_fp_patterns<fdiv, "DIVSD", X86Movsd, v2f64x_info, fp64imm0>;
   11584 
   11585 multiclass AVX512_scalar_unary_math_patterns<SDNode OpNode, string OpcPrefix,
   11586                                              SDNode Move, X86VectorVTInfo _> {
   11587   let Predicates = [HasAVX512] in {
   11588     def : Pat<(_.VT (Move _.VT:$dst,
   11589                      (scalar_to_vector (OpNode (extractelt _.VT:$src, 0))))),
   11590               (!cast<Instruction>("V"#OpcPrefix#Zr_Int) _.VT:$dst, _.VT:$src)>;
   11591   }
   11592 }
   11593 
   11594 defm : AVX512_scalar_unary_math_patterns<fsqrt, "SQRTSS", X86Movss, v4f32x_info>;
   11595 defm : AVX512_scalar_unary_math_patterns<fsqrt, "SQRTSD", X86Movsd, v2f64x_info>;
   11596 
   11597 multiclass AVX512_scalar_unary_math_imm_patterns<SDNode OpNode, string OpcPrefix,
   11598                                                  SDNode Move, X86VectorVTInfo _,
   11599                                                  bits<8> ImmV> {
   11600   let Predicates = [HasAVX512] in {
   11601     def : Pat<(_.VT (Move _.VT:$dst,
   11602                      (scalar_to_vector (OpNode (extractelt _.VT:$src, 0))))),
   11603               (!cast<Instruction>("V"#OpcPrefix#Zr_Int) _.VT:$dst, _.VT:$src,
   11604                                                         (i32 ImmV))>;
   11605   }
   11606 }
   11607 
   11608 defm : AVX512_scalar_unary_math_imm_patterns<ffloor, "RNDSCALESS", X86Movss,
   11609                                              v4f32x_info, 0x01>;
   11610 defm : AVX512_scalar_unary_math_imm_patterns<fceil, "RNDSCALESS", X86Movss,
   11611                                              v4f32x_info, 0x02>;
   11612 defm : AVX512_scalar_unary_math_imm_patterns<ffloor, "RNDSCALESD", X86Movsd,
   11613                                              v2f64x_info, 0x01>;
   11614 defm : AVX512_scalar_unary_math_imm_patterns<fceil, "RNDSCALESD", X86Movsd,
   11615                                              v2f64x_info, 0x02>;
   11616 
   11617 //===----------------------------------------------------------------------===//
   11618 // AES instructions
   11619 //===----------------------------------------------------------------------===//
   11620 
   11621 multiclass avx512_vaes<bits<8> Op, string OpStr, string IntPrefix> {
   11622   let Predicates = [HasVLX, HasVAES] in {
   11623     defm Z128 : AESI_binop_rm_int<Op, OpStr,
   11624                                   !cast<Intrinsic>(IntPrefix),
   11625                                   loadv2i64, 0, VR128X, i128mem>,
   11626                   EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V128, VEX_WIG;
   11627     defm Z256 : AESI_binop_rm_int<Op, OpStr,
   11628                                   !cast<Intrinsic>(IntPrefix##"_256"),
   11629                                   loadv4i64, 0, VR256X, i256mem>,
   11630                   EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V256, VEX_WIG;
   11631     }
   11632     let Predicates = [HasAVX512, HasVAES] in
   11633     defm Z    : AESI_binop_rm_int<Op, OpStr,
   11634                                   !cast<Intrinsic>(IntPrefix##"_512"),
   11635                                   loadv8i64, 0, VR512, i512mem>,
   11636                   EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V512, VEX_WIG;
   11637 }
   11638 
   11639 defm VAESENC      : avx512_vaes<0xDC, "vaesenc", "int_x86_aesni_aesenc">;
   11640 defm VAESENCLAST  : avx512_vaes<0xDD, "vaesenclast", "int_x86_aesni_aesenclast">;
   11641 defm VAESDEC      : avx512_vaes<0xDE, "vaesdec", "int_x86_aesni_aesdec">;
   11642 defm VAESDECLAST  : avx512_vaes<0xDF, "vaesdeclast", "int_x86_aesni_aesdeclast">;
   11643 
   11644 //===----------------------------------------------------------------------===//
   11645 // PCLMUL instructions - Carry less multiplication
   11646 //===----------------------------------------------------------------------===//
   11647 
   11648 let Predicates = [HasAVX512, HasVPCLMULQDQ] in
   11649 defm VPCLMULQDQZ : vpclmulqdq<VR512, i512mem, loadv8i64, int_x86_pclmulqdq_512>,
   11650                               EVEX_4V, EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_WIG;
   11651 
   11652 let Predicates = [HasVLX, HasVPCLMULQDQ] in {
   11653 defm VPCLMULQDQZ128 : vpclmulqdq<VR128X, i128mem, loadv2i64, int_x86_pclmulqdq>,
   11654                               EVEX_4V, EVEX_V128, EVEX_CD8<64, CD8VF>, VEX_WIG;
   11655 
   11656 defm VPCLMULQDQZ256: vpclmulqdq<VR256X, i256mem, loadv4i64,
   11657                                 int_x86_pclmulqdq_256>, EVEX_4V, EVEX_V256,
   11658                                 EVEX_CD8<64, CD8VF>, VEX_WIG;
   11659 }
   11660 
   11661 // Aliases
   11662 defm : vpclmulqdq_aliases<"VPCLMULQDQZ", VR512, i512mem>;
   11663 defm : vpclmulqdq_aliases<"VPCLMULQDQZ128", VR128X, i128mem>;
   11664 defm : vpclmulqdq_aliases<"VPCLMULQDQZ256", VR256X, i256mem>;
   11665 
   11666 //===----------------------------------------------------------------------===//
   11667 // VBMI2
   11668 //===----------------------------------------------------------------------===//
   11669 
   11670 multiclass VBMI2_shift_var_rm<bits<8> Op, string OpStr, SDNode OpNode,
   11671                               X86FoldableSchedWrite sched, X86VectorVTInfo VTI> {
   11672   let Constraints = "$src1 = $dst",
   11673       ExeDomain   = VTI.ExeDomain in {
   11674     defm r:   AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst),
   11675                 (ins VTI.RC:$src2, VTI.RC:$src3), OpStr,
   11676                 "$src3, $src2", "$src2, $src3",
   11677                 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, VTI.RC:$src3))>,
   11678                 AVX512FMA3Base, Sched<[sched]>;
   11679     defm m:   AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
   11680                 (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr,
   11681                 "$src3, $src2", "$src2, $src3",
   11682                 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
   11683                         (VTI.VT (bitconvert (VTI.LdFrag addr:$src3)))))>,
   11684                 AVX512FMA3Base,
   11685                 Sched<[sched.Folded, ReadAfterLd]>;
   11686   }
   11687 }
   11688 
   11689 multiclass VBMI2_shift_var_rmb<bits<8> Op, string OpStr, SDNode OpNode,
   11690                                X86FoldableSchedWrite sched, X86VectorVTInfo VTI>
   11691          : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched, VTI> {
   11692   let Constraints = "$src1 = $dst",
   11693       ExeDomain   = VTI.ExeDomain in
   11694   defm mb:  AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
   11695               (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3), OpStr,
   11696               "${src3}"##VTI.BroadcastStr##", $src2",
   11697               "$src2, ${src3}"##VTI.BroadcastStr,
   11698               (OpNode VTI.RC:$src1, VTI.RC:$src2,
   11699                (VTI.VT (X86VBroadcast (VTI.ScalarLdFrag addr:$src3))))>,
   11700               AVX512FMA3Base, EVEX_B,
   11701               Sched<[sched.Folded, ReadAfterLd]>;
   11702 }
   11703 
   11704 multiclass VBMI2_shift_var_rm_common<bits<8> Op, string OpStr, SDNode OpNode,
   11705                                      X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
   11706   let Predicates = [HasVBMI2] in
   11707   defm Z      : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.ZMM, VTI.info512>,
   11708                                    EVEX_V512;
   11709   let Predicates = [HasVBMI2, HasVLX] in {
   11710     defm Z256 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.YMM, VTI.info256>,
   11711                                    EVEX_V256;
   11712     defm Z128 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.XMM, VTI.info128>,
   11713                                    EVEX_V128;
   11714   }
   11715 }
   11716 
   11717 multiclass VBMI2_shift_var_rmb_common<bits<8> Op, string OpStr, SDNode OpNode,
   11718                                       X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
   11719   let Predicates = [HasVBMI2] in
   11720   defm Z      : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.ZMM, VTI.info512>,
   11721                                     EVEX_V512;
   11722   let Predicates = [HasVBMI2, HasVLX] in {
   11723     defm Z256 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.YMM, VTI.info256>,
   11724                                     EVEX_V256;
   11725     defm Z128 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.XMM, VTI.info128>,
   11726                                     EVEX_V128;
   11727   }
   11728 }
   11729 multiclass VBMI2_shift_var<bits<8> wOp, bits<8> dqOp, string Prefix,
   11730                            SDNode OpNode, X86SchedWriteWidths sched> {
   11731   defm W : VBMI2_shift_var_rm_common<wOp, Prefix##"w", OpNode, sched,
   11732              avx512vl_i16_info>, VEX_W, EVEX_CD8<16, CD8VF>;
   11733   defm D : VBMI2_shift_var_rmb_common<dqOp, Prefix##"d", OpNode, sched,
   11734              avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
   11735   defm Q : VBMI2_shift_var_rmb_common<dqOp, Prefix##"q", OpNode, sched,
   11736              avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
   11737 }
   11738 
   11739 multiclass VBMI2_shift_imm<bits<8> wOp, bits<8> dqOp, string Prefix,
   11740                            SDNode OpNode, X86SchedWriteWidths sched> {
   11741   defm W : avx512_common_3Op_rm_imm8<wOp, OpNode, Prefix##"w", sched,
   11742              avx512vl_i16_info, avx512vl_i16_info, HasVBMI2>,
   11743              VEX_W, EVEX_CD8<16, CD8VF>;
   11744   defm D : avx512_common_3Op_imm8<Prefix##"d", avx512vl_i32_info, dqOp,
   11745              OpNode, sched, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
   11746   defm Q : avx512_common_3Op_imm8<Prefix##"q", avx512vl_i64_info, dqOp, OpNode,
   11747              sched, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
   11748 }
   11749 
   11750 // Concat & Shift
   11751 defm VPSHLDV : VBMI2_shift_var<0x70, 0x71, "vpshldv", X86VShldv, SchedWriteVecIMul>;
   11752 defm VPSHRDV : VBMI2_shift_var<0x72, 0x73, "vpshrdv", X86VShrdv, SchedWriteVecIMul>;
   11753 defm VPSHLD  : VBMI2_shift_imm<0x70, 0x71, "vpshld", X86VShld, SchedWriteVecIMul>;
   11754 defm VPSHRD  : VBMI2_shift_imm<0x72, 0x73, "vpshrd", X86VShrd, SchedWriteVecIMul>;
   11755 
   11756 // Compress
   11757 defm VPCOMPRESSB : compress_by_elt_width<0x63, "vpcompressb", WriteVarShuffle256,
   11758                                          avx512vl_i8_info, HasVBMI2>, EVEX,
   11759                                          NotMemoryFoldable;
   11760 defm VPCOMPRESSW : compress_by_elt_width <0x63, "vpcompressw", WriteVarShuffle256,
   11761                                           avx512vl_i16_info, HasVBMI2>, EVEX, VEX_W,
   11762                                           NotMemoryFoldable;
   11763 // Expand
   11764 defm VPEXPANDB : expand_by_elt_width <0x62, "vpexpandb", WriteVarShuffle256,
   11765                                       avx512vl_i8_info, HasVBMI2>, EVEX;
   11766 defm VPEXPANDW : expand_by_elt_width <0x62, "vpexpandw", WriteVarShuffle256,
   11767                                       avx512vl_i16_info, HasVBMI2>, EVEX, VEX_W;
   11768 
   11769 //===----------------------------------------------------------------------===//
   11770 // VNNI
   11771 //===----------------------------------------------------------------------===//
   11772 
   11773 let Constraints = "$src1 = $dst" in
   11774 multiclass VNNI_rmb<bits<8> Op, string OpStr, SDNode OpNode,
   11775                     X86FoldableSchedWrite sched, X86VectorVTInfo VTI> {
   11776   defm r  :   AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst),
   11777                                    (ins VTI.RC:$src2, VTI.RC:$src3), OpStr,
   11778                                    "$src3, $src2", "$src2, $src3",
   11779                                    (VTI.VT (OpNode VTI.RC:$src1,
   11780                                             VTI.RC:$src2, VTI.RC:$src3))>,
   11781                                    EVEX_4V, T8PD, Sched<[sched]>;
   11782   defm m  :   AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
   11783                                    (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr,
   11784                                    "$src3, $src2", "$src2, $src3",
   11785                                    (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
   11786                                             (VTI.VT (bitconvert
   11787                                                      (VTI.LdFrag addr:$src3)))))>,
   11788                                    EVEX_4V, EVEX_CD8<32, CD8VF>, T8PD,
   11789                                    Sched<[sched.Folded, ReadAfterLd]>;
   11790   defm mb :   AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
   11791                                    (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3),
   11792                                    OpStr, "${src3}"##VTI.BroadcastStr##", $src2",
   11793                                    "$src2, ${src3}"##VTI.BroadcastStr,
   11794                                    (OpNode VTI.RC:$src1, VTI.RC:$src2,
   11795                                     (VTI.VT (X86VBroadcast
   11796                                              (VTI.ScalarLdFrag addr:$src3))))>,
   11797                                    EVEX_4V, EVEX_CD8<32, CD8VF>, EVEX_B,
   11798                                    T8PD, Sched<[sched.Folded, ReadAfterLd]>;
   11799 }
   11800 
   11801 multiclass VNNI_common<bits<8> Op, string OpStr, SDNode OpNode,
   11802                        X86SchedWriteWidths sched> {
   11803   let Predicates = [HasVNNI] in
   11804   defm Z      :   VNNI_rmb<Op, OpStr, OpNode, sched.ZMM, v16i32_info>, EVEX_V512;
   11805   let Predicates = [HasVNNI, HasVLX] in {
   11806     defm Z256 :   VNNI_rmb<Op, OpStr, OpNode, sched.YMM, v8i32x_info>, EVEX_V256;
   11807     defm Z128 :   VNNI_rmb<Op, OpStr, OpNode, sched.XMM, v4i32x_info>, EVEX_V128;
   11808   }
   11809 }
   11810 
   11811 // FIXME: Is there a better scheduler class for VPDP?
   11812 defm VPDPBUSD   : VNNI_common<0x50, "vpdpbusd", X86Vpdpbusd, SchedWriteVecIMul>;
   11813 defm VPDPBUSDS  : VNNI_common<0x51, "vpdpbusds", X86Vpdpbusds, SchedWriteVecIMul>;
   11814 defm VPDPWSSD   : VNNI_common<0x52, "vpdpwssd", X86Vpdpwssd, SchedWriteVecIMul>;
   11815 defm VPDPWSSDS  : VNNI_common<0x53, "vpdpwssds", X86Vpdpwssds, SchedWriteVecIMul>;
   11816 
   11817 //===----------------------------------------------------------------------===//
   11818 // Bit Algorithms
   11819 //===----------------------------------------------------------------------===//
   11820 
   11821 // FIXME: Is there a better scheduler class for VPOPCNTB/VPOPCNTW?
   11822 defm VPOPCNTB : avx512_unary_rm_vl<0x54, "vpopcntb", ctpop, SchedWriteVecALU,
   11823                                    avx512vl_i8_info, HasBITALG>;
   11824 defm VPOPCNTW : avx512_unary_rm_vl<0x54, "vpopcntw", ctpop, SchedWriteVecALU,
   11825                                    avx512vl_i16_info, HasBITALG>, VEX_W;
   11826 
   11827 defm : avx512_unary_lowering<"VPOPCNTB", ctpop, avx512vl_i8_info, HasBITALG>;
   11828 defm : avx512_unary_lowering<"VPOPCNTW", ctpop, avx512vl_i16_info, HasBITALG>;
   11829 
   11830 multiclass VPSHUFBITQMB_rm<X86FoldableSchedWrite sched, X86VectorVTInfo VTI> {
   11831   defm rr : AVX512_maskable_cmp<0x8F, MRMSrcReg, VTI, (outs VTI.KRC:$dst),
   11832                                 (ins VTI.RC:$src1, VTI.RC:$src2),
   11833                                 "vpshufbitqmb",
   11834                                 "$src2, $src1", "$src1, $src2",
   11835                                 (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1),
   11836                                 (VTI.VT VTI.RC:$src2))>, EVEX_4V, T8PD,
   11837                                 Sched<[sched]>;
   11838   defm rm : AVX512_maskable_cmp<0x8F, MRMSrcMem, VTI, (outs VTI.KRC:$dst),
   11839                                 (ins VTI.RC:$src1, VTI.MemOp:$src2),
   11840                                 "vpshufbitqmb",
   11841                                 "$src2, $src1", "$src1, $src2",
   11842                                 (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1),
   11843                                 (VTI.VT (bitconvert (VTI.LdFrag addr:$src2))))>,
   11844                                 EVEX_4V, EVEX_CD8<8, CD8VF>, T8PD,
   11845                                 Sched<[sched.Folded, ReadAfterLd]>;
   11846 }
   11847 
   11848 multiclass VPSHUFBITQMB_common<X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
   11849   let Predicates = [HasBITALG] in
   11850   defm Z      : VPSHUFBITQMB_rm<sched.ZMM, VTI.info512>, EVEX_V512;
   11851   let Predicates = [HasBITALG, HasVLX] in {
   11852     defm Z256 : VPSHUFBITQMB_rm<sched.YMM, VTI.info256>, EVEX_V256;
   11853     defm Z128 : VPSHUFBITQMB_rm<sched.XMM, VTI.info128>, EVEX_V128;
   11854   }
   11855 }
   11856 
   11857 // FIXME: Is there a better scheduler class for VPSHUFBITQMB?
   11858 defm VPSHUFBITQMB : VPSHUFBITQMB_common<SchedWriteVecIMul, avx512vl_i8_info>;
   11859 
   11860 //===----------------------------------------------------------------------===//
   11861 // GFNI
   11862 //===----------------------------------------------------------------------===//
   11863 
   11864 multiclass GF2P8MULB_avx512_common<bits<8> Op, string OpStr, SDNode OpNode,
   11865                                    X86SchedWriteWidths sched> {
   11866   let Predicates = [HasGFNI, HasAVX512, HasBWI] in
   11867   defm Z      : avx512_binop_rm<Op, OpStr, OpNode, v64i8_info, sched.ZMM, 1>,
   11868                                 EVEX_V512;
   11869   let Predicates = [HasGFNI, HasVLX, HasBWI] in {
   11870     defm Z256 : avx512_binop_rm<Op, OpStr, OpNode, v32i8x_info, sched.YMM, 1>,
   11871                                 EVEX_V256;
   11872     defm Z128 : avx512_binop_rm<Op, OpStr, OpNode, v16i8x_info, sched.XMM, 1>,
   11873                                 EVEX_V128;
   11874   }
   11875 }
   11876 
   11877 defm VGF2P8MULB : GF2P8MULB_avx512_common<0xCF, "vgf2p8mulb", X86GF2P8mulb,
   11878                                           SchedWriteVecALU>,
   11879                                           EVEX_CD8<8, CD8VF>, T8PD;
   11880 
   11881 multiclass GF2P8AFFINE_avx512_rmb_imm<bits<8> Op, string OpStr, SDNode OpNode,
   11882                                       X86FoldableSchedWrite sched, X86VectorVTInfo VTI,
   11883                                       X86VectorVTInfo BcstVTI>
   11884            : avx512_3Op_rm_imm8<Op, OpStr, OpNode, sched, VTI, VTI> {
   11885   let ExeDomain = VTI.ExeDomain in
   11886   defm rmbi : AVX512_maskable<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
   11887                 (ins VTI.RC:$src1, VTI.ScalarMemOp:$src2, u8imm:$src3),
   11888                 OpStr, "$src3, ${src2}"##BcstVTI.BroadcastStr##", $src1",
   11889                 "$src1, ${src2}"##BcstVTI.BroadcastStr##", $src3",
   11890                 (OpNode (VTI.VT VTI.RC:$src1),
   11891                  (bitconvert (BcstVTI.VT (X86VBroadcast (loadi64 addr:$src2)))),
   11892                  (i8 imm:$src3))>, EVEX_B,
   11893                  Sched<[sched.Folded, ReadAfterLd]>;
   11894 }
   11895 
   11896 multiclass GF2P8AFFINE_avx512_common<bits<8> Op, string OpStr, SDNode OpNode,
   11897                                      X86SchedWriteWidths sched> {
   11898   let Predicates = [HasGFNI, HasAVX512, HasBWI] in
   11899   defm Z      : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.ZMM,
   11900                                            v64i8_info, v8i64_info>, EVEX_V512;
   11901   let Predicates = [HasGFNI, HasVLX, HasBWI] in {
   11902     defm Z256 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.YMM,
   11903                                            v32i8x_info, v4i64x_info>, EVEX_V256;
   11904     defm Z128 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.XMM,
   11905                                            v16i8x_info, v2i64x_info>, EVEX_V128;
   11906   }
   11907 }
   11908 
   11909 defm VGF2P8AFFINEINVQB : GF2P8AFFINE_avx512_common<0xCF, "vgf2p8affineinvqb",
   11910                          X86GF2P8affineinvqb, SchedWriteVecIMul>,
   11911                          EVEX_4V, EVEX_CD8<8, CD8VF>, VEX_W, AVX512AIi8Base;
   11912 defm VGF2P8AFFINEQB    : GF2P8AFFINE_avx512_common<0xCE, "vgf2p8affineqb",
   11913                          X86GF2P8affineqb, SchedWriteVecIMul>,
   11914                          EVEX_4V, EVEX_CD8<8, CD8VF>, VEX_W, AVX512AIi8Base;
   11915 
   11916 
   11917 //===----------------------------------------------------------------------===//
   11918 // AVX5124FMAPS
   11919 //===----------------------------------------------------------------------===//
   11920 
   11921 let hasSideEffects = 0, mayLoad = 1, ExeDomain = SSEPackedSingle,
   11922     Constraints = "$src1 = $dst" in {
   11923 defm V4FMADDPSrm : AVX512_maskable_3src_in_asm<0x9A, MRMSrcMem, v16f32_info,
   11924                     (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
   11925                     "v4fmaddps", "$src3, $src2", "$src2, $src3",
   11926                     []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
   11927                     Sched<[SchedWriteFMA.ZMM.Folded]>;
   11928 
   11929 defm V4FNMADDPSrm : AVX512_maskable_3src_in_asm<0xAA, MRMSrcMem, v16f32_info,
   11930                      (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
   11931                      "v4fnmaddps", "$src3, $src2", "$src2, $src3",
   11932                      []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
   11933                      Sched<[SchedWriteFMA.ZMM.Folded]>;
   11934 
   11935 defm V4FMADDSSrm : AVX512_maskable_3src_in_asm<0x9B, MRMSrcMem, f32x_info,
   11936                     (outs VR128X:$dst), (ins  VR128X:$src2, f128mem:$src3),
   11937                     "v4fmaddss", "$src3, $src2", "$src2, $src3",
   11938                     []>, EVEX_V128, EVEX_4V, T8XD, EVEX_CD8<32, CD8VF>,
   11939                     Sched<[SchedWriteFMA.Scl.Folded]>;
   11940 
   11941 defm V4FNMADDSSrm : AVX512_maskable_3src_in_asm<0xAB, MRMSrcMem, f32x_info,
   11942                      (outs VR128X:$dst), (ins VR128X:$src2, f128mem:$src3),
   11943                      "v4fnmaddss", "$src3, $src2", "$src2, $src3",
   11944                      []>, EVEX_V128, EVEX_4V, T8XD, EVEX_CD8<32, CD8VF>,
   11945                      Sched<[SchedWriteFMA.Scl.Folded]>;
   11946 }
   11947 
   11948 //===----------------------------------------------------------------------===//
   11949 // AVX5124VNNIW
   11950 //===----------------------------------------------------------------------===//
   11951 
   11952 let hasSideEffects = 0, mayLoad = 1, ExeDomain = SSEPackedInt,
   11953     Constraints = "$src1 = $dst" in {
   11954 defm VP4DPWSSDrm : AVX512_maskable_3src_in_asm<0x52, MRMSrcMem, v16i32_info,
   11955                     (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
   11956                      "vp4dpwssd", "$src3, $src2", "$src2, $src3",
   11957                     []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
   11958                     Sched<[SchedWriteFMA.ZMM.Folded]>;
   11959 
   11960 defm VP4DPWSSDSrm : AVX512_maskable_3src_in_asm<0x53, MRMSrcMem, v16i32_info,
   11961                      (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
   11962                      "vp4dpwssds", "$src3, $src2", "$src2, $src3",
   11963                      []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
   11964                      Sched<[SchedWriteFMA.ZMM.Folded]>;
   11965 }
   11966 
   11967