Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512dq,+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86
      3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64
      4 
      5 define <4 x float> @test_mask_andnot_ps_rr_128(<4 x float> %a, <4 x float> %b) {
      6 ; CHECK-LABEL: test_mask_andnot_ps_rr_128:
      7 ; CHECK:       # %bb.0:
      8 ; CHECK-NEXT:    vandnps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x55,0xc1]
      9 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
     10   %res = call <4 x float> @llvm.x86.avx512.mask.andn.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 -1)
     11   ret <4 x float> %res
     12 }
     13 
     14 define <4 x float> @test_mask_andnot_ps_rrk_128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask) {
     15 ; X86-LABEL: test_mask_andnot_ps_rrk_128:
     16 ; X86:       # %bb.0:
     17 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
     18 ; X86-NEXT:    vandnps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x55,0xd1]
     19 ; X86-NEXT:    vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
     20 ; X86-NEXT:    retl # encoding: [0xc3]
     21 ;
     22 ; X64-LABEL: test_mask_andnot_ps_rrk_128:
     23 ; X64:       # %bb.0:
     24 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
     25 ; X64-NEXT:    vandnps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x55,0xd1]
     26 ; X64-NEXT:    vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
     27 ; X64-NEXT:    retq # encoding: [0xc3]
     28   %res = call <4 x float> @llvm.x86.avx512.mask.andn.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask)
     29   ret <4 x float> %res
     30 }
     31 
     32 define <4 x float> @test_mask_andnot_ps_rrkz_128(<4 x float> %a, <4 x float> %b, i8 %mask) {
     33 ; X86-LABEL: test_mask_andnot_ps_rrkz_128:
     34 ; X86:       # %bb.0:
     35 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
     36 ; X86-NEXT:    vandnps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x55,0xc1]
     37 ; X86-NEXT:    retl # encoding: [0xc3]
     38 ;
     39 ; X64-LABEL: test_mask_andnot_ps_rrkz_128:
     40 ; X64:       # %bb.0:
     41 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
     42 ; X64-NEXT:    vandnps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x55,0xc1]
     43 ; X64-NEXT:    retq # encoding: [0xc3]
     44   %res = call <4 x float> @llvm.x86.avx512.mask.andn.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 %mask)
     45   ret <4 x float> %res
     46 }
     47 
     48 define <4 x float> @test_mask_andnot_ps_rm_128(<4 x float> %a, <4 x float>* %ptr_b) {
     49 ; X86-LABEL: test_mask_andnot_ps_rm_128:
     50 ; X86:       # %bb.0:
     51 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
     52 ; X86-NEXT:    vandnps (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x55,0x00]
     53 ; X86-NEXT:    retl # encoding: [0xc3]
     54 ;
     55 ; X64-LABEL: test_mask_andnot_ps_rm_128:
     56 ; X64:       # %bb.0:
     57 ; X64-NEXT:    vandnps (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x55,0x07]
     58 ; X64-NEXT:    retq # encoding: [0xc3]
     59   %b = load <4 x float>, <4 x float>* %ptr_b
     60   %res = call <4 x float> @llvm.x86.avx512.mask.andn.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 -1)
     61   ret <4 x float> %res
     62 }
     63 
     64 define <4 x float> @test_mask_andnot_ps_rmk_128(<4 x float> %a, <4 x float>* %ptr_b, <4 x float> %passThru, i8 %mask) {
     65 ; X86-LABEL: test_mask_andnot_ps_rmk_128:
     66 ; X86:       # %bb.0:
     67 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
     68 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
     69 ; X86-NEXT:    vandnps (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x55,0x08]
     70 ; X86-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
     71 ; X86-NEXT:    retl # encoding: [0xc3]
     72 ;
     73 ; X64-LABEL: test_mask_andnot_ps_rmk_128:
     74 ; X64:       # %bb.0:
     75 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
     76 ; X64-NEXT:    vandnps (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x55,0x0f]
     77 ; X64-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
     78 ; X64-NEXT:    retq # encoding: [0xc3]
     79   %b = load <4 x float>, <4 x float>* %ptr_b
     80   %res = call <4 x float> @llvm.x86.avx512.mask.andn.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask)
     81   ret <4 x float> %res
     82 }
     83 
     84 define <4 x float> @test_mask_andnot_ps_rmkz_128(<4 x float> %a, <4 x float>* %ptr_b, i8 %mask) {
     85 ; X86-LABEL: test_mask_andnot_ps_rmkz_128:
     86 ; X86:       # %bb.0:
     87 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
     88 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
     89 ; X86-NEXT:    vandnps (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x55,0x00]
     90 ; X86-NEXT:    retl # encoding: [0xc3]
     91 ;
     92 ; X64-LABEL: test_mask_andnot_ps_rmkz_128:
     93 ; X64:       # %bb.0:
     94 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
     95 ; X64-NEXT:    vandnps (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x55,0x07]
     96 ; X64-NEXT:    retq # encoding: [0xc3]
     97   %b = load <4 x float>, <4 x float>* %ptr_b
     98   %res = call <4 x float> @llvm.x86.avx512.mask.andn.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 %mask)
     99   ret <4 x float> %res
    100 }
    101 
    102 define <4 x float> @test_mask_andnot_ps_rmb_128(<4 x float> %a, float* %ptr_b) {
    103 ; X86-LABEL: test_mask_andnot_ps_rmb_128:
    104 ; X86:       # %bb.0:
    105 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
    106 ; X86-NEXT:    vandnps (%eax){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7c,0x18,0x55,0x00]
    107 ; X86-NEXT:    retl # encoding: [0xc3]
    108 ;
    109 ; X64-LABEL: test_mask_andnot_ps_rmb_128:
    110 ; X64:       # %bb.0:
    111 ; X64-NEXT:    vandnps (%rdi){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7c,0x18,0x55,0x07]
    112 ; X64-NEXT:    retq # encoding: [0xc3]
    113   %q = load float, float* %ptr_b
    114   %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
    115   %b = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer
    116   %res = call <4 x float> @llvm.x86.avx512.mask.andn.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 -1)
    117   ret <4 x float> %res
    118 }
    119 
    120 define <4 x float> @test_mask_andnot_ps_rmbk_128(<4 x float> %a, float* %ptr_b, <4 x float> %passThru, i8 %mask) {
    121 ; X86-LABEL: test_mask_andnot_ps_rmbk_128:
    122 ; X86:       # %bb.0:
    123 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
    124 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
    125 ; X86-NEXT:    vandnps (%eax){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x19,0x55,0x08]
    126 ; X86-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
    127 ; X86-NEXT:    retl # encoding: [0xc3]
    128 ;
    129 ; X64-LABEL: test_mask_andnot_ps_rmbk_128:
    130 ; X64:       # %bb.0:
    131 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
    132 ; X64-NEXT:    vandnps (%rdi){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x19,0x55,0x0f]
    133 ; X64-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
    134 ; X64-NEXT:    retq # encoding: [0xc3]
    135   %q = load float, float* %ptr_b
    136   %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
    137   %b = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer
    138   %res = call <4 x float> @llvm.x86.avx512.mask.andn.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask)
    139   ret <4 x float> %res
    140 }
    141 
    142 define <4 x float> @test_mask_andnot_ps_rmbkz_128(<4 x float> %a, float* %ptr_b, i8 %mask) {
    143 ; X86-LABEL: test_mask_andnot_ps_rmbkz_128:
    144 ; X86:       # %bb.0:
    145 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
    146 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
    147 ; X86-NEXT:    vandnps (%eax){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x99,0x55,0x00]
    148 ; X86-NEXT:    retl # encoding: [0xc3]
    149 ;
    150 ; X64-LABEL: test_mask_andnot_ps_rmbkz_128:
    151 ; X64:       # %bb.0:
    152 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
    153 ; X64-NEXT:    vandnps (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x99,0x55,0x07]
    154 ; X64-NEXT:    retq # encoding: [0xc3]
    155   %q = load float, float* %ptr_b
    156   %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
    157   %b = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer
    158   %res = call <4 x float> @llvm.x86.avx512.mask.andn.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 %mask)
    159   ret <4 x float> %res
    160 }
    161 
    162 declare <4 x float> @llvm.x86.avx512.mask.andn.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
    163 
    164 define <8 x float> @test_mask_andnot_ps_rr_256(<8 x float> %a, <8 x float> %b) {
    165 ; CHECK-LABEL: test_mask_andnot_ps_rr_256:
    166 ; CHECK:       # %bb.0:
    167 ; CHECK-NEXT:    vandnps %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x55,0xc1]
    168 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    169   %res = call <8 x float> @llvm.x86.avx512.mask.andn.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 -1)
    170   ret <8 x float> %res
    171 }
    172 
    173 define <8 x float> @test_mask_andnot_ps_rrk_256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask) {
    174 ; X86-LABEL: test_mask_andnot_ps_rrk_256:
    175 ; X86:       # %bb.0:
    176 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
    177 ; X86-NEXT:    vandnps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x55,0xd1]
    178 ; X86-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
    179 ; X86-NEXT:    retl # encoding: [0xc3]
    180 ;
    181 ; X64-LABEL: test_mask_andnot_ps_rrk_256:
    182 ; X64:       # %bb.0:
    183 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
    184 ; X64-NEXT:    vandnps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x55,0xd1]
    185 ; X64-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
    186 ; X64-NEXT:    retq # encoding: [0xc3]
    187   %res = call <8 x float> @llvm.x86.avx512.mask.andn.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask)
    188   ret <8 x float> %res
    189 }
    190 
    191 define <8 x float> @test_mask_andnot_ps_rrkz_256(<8 x float> %a, <8 x float> %b, i8 %mask) {
    192 ; X86-LABEL: test_mask_andnot_ps_rrkz_256:
    193 ; X86:       # %bb.0:
    194 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
    195 ; X86-NEXT:    vandnps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x55,0xc1]
    196 ; X86-NEXT:    retl # encoding: [0xc3]
    197 ;
    198 ; X64-LABEL: test_mask_andnot_ps_rrkz_256:
    199 ; X64:       # %bb.0:
    200 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
    201 ; X64-NEXT:    vandnps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x55,0xc1]
    202 ; X64-NEXT:    retq # encoding: [0xc3]
    203   %res = call <8 x float> @llvm.x86.avx512.mask.andn.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 %mask)
    204   ret <8 x float> %res
    205 }
    206 
    207 define <8 x float> @test_mask_andnot_ps_rm_256(<8 x float> %a, <8 x float>* %ptr_b) {
    208 ; X86-LABEL: test_mask_andnot_ps_rm_256:
    209 ; X86:       # %bb.0:
    210 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
    211 ; X86-NEXT:    vandnps (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x55,0x00]
    212 ; X86-NEXT:    retl # encoding: [0xc3]
    213 ;
    214 ; X64-LABEL: test_mask_andnot_ps_rm_256:
    215 ; X64:       # %bb.0:
    216 ; X64-NEXT:    vandnps (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x55,0x07]
    217 ; X64-NEXT:    retq # encoding: [0xc3]
    218   %b = load <8 x float>, <8 x float>* %ptr_b
    219   %res = call <8 x float> @llvm.x86.avx512.mask.andn.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 -1)
    220   ret <8 x float> %res
    221 }
    222 
    223 define <8 x float> @test_mask_andnot_ps_rmk_256(<8 x float> %a, <8 x float>* %ptr_b, <8 x float> %passThru, i8 %mask) {
    224 ; X86-LABEL: test_mask_andnot_ps_rmk_256:
    225 ; X86:       # %bb.0:
    226 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
    227 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
    228 ; X86-NEXT:    vandnps (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x55,0x08]
    229 ; X86-NEXT:    vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
    230 ; X86-NEXT:    retl # encoding: [0xc3]
    231 ;
    232 ; X64-LABEL: test_mask_andnot_ps_rmk_256:
    233 ; X64:       # %bb.0:
    234 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
    235 ; X64-NEXT:    vandnps (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x55,0x0f]
    236 ; X64-NEXT:    vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
    237 ; X64-NEXT:    retq # encoding: [0xc3]
    238   %b = load <8 x float>, <8 x float>* %ptr_b
    239   %res = call <8 x float> @llvm.x86.avx512.mask.andn.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask)
    240   ret <8 x float> %res
    241 }
    242 
    243 define <8 x float> @test_mask_andnot_ps_rmkz_256(<8 x float> %a, <8 x float>* %ptr_b, i8 %mask) {
    244 ; X86-LABEL: test_mask_andnot_ps_rmkz_256:
    245 ; X86:       # %bb.0:
    246 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
    247 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
    248 ; X86-NEXT:    vandnps (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x55,0x00]
    249 ; X86-NEXT:    retl # encoding: [0xc3]
    250 ;
    251 ; X64-LABEL: test_mask_andnot_ps_rmkz_256:
    252 ; X64:       # %bb.0:
    253 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
    254 ; X64-NEXT:    vandnps (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x55,0x07]
    255 ; X64-NEXT:    retq # encoding: [0xc3]
    256   %b = load <8 x float>, <8 x float>* %ptr_b
    257   %res = call <8 x float> @llvm.x86.avx512.mask.andn.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 %mask)
    258   ret <8 x float> %res
    259 }
    260 
    261 define <8 x float> @test_mask_andnot_ps_rmb_256(<8 x float> %a, float* %ptr_b) {
    262 ; X86-LABEL: test_mask_andnot_ps_rmb_256:
    263 ; X86:       # %bb.0:
    264 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
    265 ; X86-NEXT:    vandnps (%eax){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7c,0x38,0x55,0x00]
    266 ; X86-NEXT:    retl # encoding: [0xc3]
    267 ;
    268 ; X64-LABEL: test_mask_andnot_ps_rmb_256:
    269 ; X64:       # %bb.0:
    270 ; X64-NEXT:    vandnps (%rdi){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7c,0x38,0x55,0x07]
    271 ; X64-NEXT:    retq # encoding: [0xc3]
    272   %q = load float, float* %ptr_b
    273   %vecinit.i = insertelement <8 x float> undef, float %q, i32 0
    274   %b = shufflevector <8 x float> %vecinit.i, <8 x float> undef, <8 x i32> zeroinitializer
    275   %res = call <8 x float> @llvm.x86.avx512.mask.andn.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 -1)
    276   ret <8 x float> %res
    277 }
    278 
    279 define <8 x float> @test_mask_andnot_ps_rmbk_256(<8 x float> %a, float* %ptr_b, <8 x float> %passThru, i8 %mask) {
    280 ; X86-LABEL: test_mask_andnot_ps_rmbk_256:
    281 ; X86:       # %bb.0:
    282 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
    283 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
    284 ; X86-NEXT:    vandnps (%eax){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x39,0x55,0x08]
    285 ; X86-NEXT:    vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
    286 ; X86-NEXT:    retl # encoding: [0xc3]
    287 ;
    288 ; X64-LABEL: test_mask_andnot_ps_rmbk_256:
    289 ; X64:       # %bb.0:
    290 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
    291 ; X64-NEXT:    vandnps (%rdi){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x39,0x55,0x0f]
    292 ; X64-NEXT:    vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
    293 ; X64-NEXT:    retq # encoding: [0xc3]
    294   %q = load float, float* %ptr_b
    295   %vecinit.i = insertelement <8 x float> undef, float %q, i32 0
    296   %b = shufflevector <8 x float> %vecinit.i, <8 x float> undef, <8 x i32> zeroinitializer
    297   %res = call <8 x float> @llvm.x86.avx512.mask.andn.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask)
    298   ret <8 x float> %res
    299 }
    300 
    301 define <8 x float> @test_mask_andnot_ps_rmbkz_256(<8 x float> %a, float* %ptr_b, i8 %mask) {
    302 ; X86-LABEL: test_mask_andnot_ps_rmbkz_256:
    303 ; X86:       # %bb.0:
    304 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
    305 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
    306 ; X86-NEXT:    vandnps (%eax){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xb9,0x55,0x00]
    307 ; X86-NEXT:    retl # encoding: [0xc3]
    308 ;
    309 ; X64-LABEL: test_mask_andnot_ps_rmbkz_256:
    310 ; X64:       # %bb.0:
    311 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
    312 ; X64-NEXT:    vandnps (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xb9,0x55,0x07]
    313 ; X64-NEXT:    retq # encoding: [0xc3]
    314   %q = load float, float* %ptr_b
    315   %vecinit.i = insertelement <8 x float> undef, float %q, i32 0
    316   %b = shufflevector <8 x float> %vecinit.i, <8 x float> undef, <8 x i32> zeroinitializer
    317   %res = call <8 x float> @llvm.x86.avx512.mask.andn.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 %mask)
    318   ret <8 x float> %res
    319 }
    320 
    321 declare <8 x float> @llvm.x86.avx512.mask.andn.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
    322 
    323 define <16 x float> @test_mask_andnot_ps_rr_512(<16 x float> %a, <16 x float> %b) {
    324 ; CHECK-LABEL: test_mask_andnot_ps_rr_512:
    325 ; CHECK:       # %bb.0:
    326 ; CHECK-NEXT:    vandnps %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x55,0xc1]
    327 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    328   %res = call <16 x float> @llvm.x86.avx512.mask.andn.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 -1)
    329   ret <16 x float> %res
    330 }
    331 
    332 define <16 x float> @test_mask_andnot_ps_rrk_512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask) {
    333 ; X86-LABEL: test_mask_andnot_ps_rrk_512:
    334 ; X86:       # %bb.0:
    335 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
    336 ; X86-NEXT:    vandnps %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x49,0x55,0xd1]
    337 ; X86-NEXT:    vmovaps %zmm2, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
    338 ; X86-NEXT:    retl # encoding: [0xc3]
    339 ;
    340 ; X64-LABEL: test_mask_andnot_ps_rrk_512:
    341 ; X64:       # %bb.0:
    342 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
    343 ; X64-NEXT:    vandnps %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x49,0x55,0xd1]
    344 ; X64-NEXT:    vmovaps %zmm2, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
    345 ; X64-NEXT:    retq # encoding: [0xc3]
    346   %res = call <16 x float> @llvm.x86.avx512.mask.andn.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask)
    347   ret <16 x float> %res
    348 }
    349 
    350 define <16 x float> @test_mask_andnot_ps_rrkz_512(<16 x float> %a, <16 x float> %b, i16 %mask) {
    351 ; X86-LABEL: test_mask_andnot_ps_rrkz_512:
    352 ; X86:       # %bb.0:
    353 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
    354 ; X86-NEXT:    vandnps %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xc9,0x55,0xc1]
    355 ; X86-NEXT:    retl # encoding: [0xc3]
    356 ;
    357 ; X64-LABEL: test_mask_andnot_ps_rrkz_512:
    358 ; X64:       # %bb.0:
    359 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
    360 ; X64-NEXT:    vandnps %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xc9,0x55,0xc1]
    361 ; X64-NEXT:    retq # encoding: [0xc3]
    362   %res = call <16 x float> @llvm.x86.avx512.mask.andn.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 %mask)
    363   ret <16 x float> %res
    364 }
    365 
    366 define <16 x float> @test_mask_andnot_ps_rm_512(<16 x float> %a, <16 x float>* %ptr_b) {
    367 ; X86-LABEL: test_mask_andnot_ps_rm_512:
    368 ; X86:       # %bb.0:
    369 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
    370 ; X86-NEXT:    vandnps (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x55,0x00]
    371 ; X86-NEXT:    retl # encoding: [0xc3]
    372 ;
    373 ; X64-LABEL: test_mask_andnot_ps_rm_512:
    374 ; X64:       # %bb.0:
    375 ; X64-NEXT:    vandnps (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x55,0x07]
    376 ; X64-NEXT:    retq # encoding: [0xc3]
    377   %b = load <16 x float>, <16 x float>* %ptr_b
    378   %res = call <16 x float> @llvm.x86.avx512.mask.andn.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 -1)
    379   ret <16 x float> %res
    380 }
    381 
    382 define <16 x float> @test_mask_andnot_ps_rmk_512(<16 x float> %a, <16 x float>* %ptr_b, <16 x float> %passThru, i16 %mask) {
    383 ; X86-LABEL: test_mask_andnot_ps_rmk_512:
    384 ; X86:       # %bb.0:
    385 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
    386 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
    387 ; X86-NEXT:    vandnps (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x49,0x55,0x08]
    388 ; X86-NEXT:    vmovaps %zmm1, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
    389 ; X86-NEXT:    retl # encoding: [0xc3]
    390 ;
    391 ; X64-LABEL: test_mask_andnot_ps_rmk_512:
    392 ; X64:       # %bb.0:
    393 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
    394 ; X64-NEXT:    vandnps (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x49,0x55,0x0f]
    395 ; X64-NEXT:    vmovaps %zmm1, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
    396 ; X64-NEXT:    retq # encoding: [0xc3]
    397   %b = load <16 x float>, <16 x float>* %ptr_b
    398   %res = call <16 x float> @llvm.x86.avx512.mask.andn.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask)
    399   ret <16 x float> %res
    400 }
    401 
    402 define <16 x float> @test_mask_andnot_ps_rmkz_512(<16 x float> %a, <16 x float>* %ptr_b, i16 %mask) {
    403 ; X86-LABEL: test_mask_andnot_ps_rmkz_512:
    404 ; X86:       # %bb.0:
    405 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
    406 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
    407 ; X86-NEXT:    vandnps (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xc9,0x55,0x00]
    408 ; X86-NEXT:    retl # encoding: [0xc3]
    409 ;
    410 ; X64-LABEL: test_mask_andnot_ps_rmkz_512:
    411 ; X64:       # %bb.0:
    412 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
    413 ; X64-NEXT:    vandnps (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xc9,0x55,0x07]
    414 ; X64-NEXT:    retq # encoding: [0xc3]
    415   %b = load <16 x float>, <16 x float>* %ptr_b
    416   %res = call <16 x float> @llvm.x86.avx512.mask.andn.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 %mask)
    417   ret <16 x float> %res
    418 }
    419 
    420 define <16 x float> @test_mask_andnot_ps_rmb_512(<16 x float> %a, float* %ptr_b) {
    421 ; X86-LABEL: test_mask_andnot_ps_rmb_512:
    422 ; X86:       # %bb.0:
    423 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
    424 ; X86-NEXT:    vandnps (%eax){1to16}, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7c,0x58,0x55,0x00]
    425 ; X86-NEXT:    retl # encoding: [0xc3]
    426 ;
    427 ; X64-LABEL: test_mask_andnot_ps_rmb_512:
    428 ; X64:       # %bb.0:
    429 ; X64-NEXT:    vandnps (%rdi){1to16}, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7c,0x58,0x55,0x07]
    430 ; X64-NEXT:    retq # encoding: [0xc3]
    431   %q = load float, float* %ptr_b
    432   %vecinit.i = insertelement <16 x float> undef, float %q, i32 0
    433   %b = shufflevector <16 x float> %vecinit.i, <16 x float> undef, <16 x i32> zeroinitializer
    434   %res = call <16 x float> @llvm.x86.avx512.mask.andn.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 -1)
    435   ret <16 x float> %res
    436 }
    437 
    438 define <16 x float> @test_mask_andnot_ps_rmbk_512(<16 x float> %a, float* %ptr_b, <16 x float> %passThru, i16 %mask) {
    439 ; X86-LABEL: test_mask_andnot_ps_rmbk_512:
    440 ; X86:       # %bb.0:
    441 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
    442 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
    443 ; X86-NEXT:    vandnps (%eax){1to16}, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x59,0x55,0x08]
    444 ; X86-NEXT:    vmovaps %zmm1, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
    445 ; X86-NEXT:    retl # encoding: [0xc3]
    446 ;
    447 ; X64-LABEL: test_mask_andnot_ps_rmbk_512:
    448 ; X64:       # %bb.0:
    449 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
    450 ; X64-NEXT:    vandnps (%rdi){1to16}, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x59,0x55,0x0f]
    451 ; X64-NEXT:    vmovaps %zmm1, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
    452 ; X64-NEXT:    retq # encoding: [0xc3]
    453   %q = load float, float* %ptr_b
    454   %vecinit.i = insertelement <16 x float> undef, float %q, i32 0
    455   %b = shufflevector <16 x float> %vecinit.i, <16 x float> undef, <16 x i32> zeroinitializer
    456   %res = call <16 x float> @llvm.x86.avx512.mask.andn.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask)
    457   ret <16 x float> %res
    458 }
    459 
    460 define <16 x float> @test_mask_andnot_ps_rmbkz_512(<16 x float> %a, float* %ptr_b, i16 %mask) {
    461 ; X86-LABEL: test_mask_andnot_ps_rmbkz_512:
    462 ; X86:       # %bb.0:
    463 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
    464 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
    465 ; X86-NEXT:    vandnps (%eax){1to16}, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xd9,0x55,0x00]
    466 ; X86-NEXT:    retl # encoding: [0xc3]
    467 ;
    468 ; X64-LABEL: test_mask_andnot_ps_rmbkz_512:
    469 ; X64:       # %bb.0:
    470 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
    471 ; X64-NEXT:    vandnps (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xd9,0x55,0x07]
    472 ; X64-NEXT:    retq # encoding: [0xc3]
    473   %q = load float, float* %ptr_b
    474   %vecinit.i = insertelement <16 x float> undef, float %q, i32 0
    475   %b = shufflevector <16 x float> %vecinit.i, <16 x float> undef, <16 x i32> zeroinitializer
    476   %res = call <16 x float> @llvm.x86.avx512.mask.andn.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 %mask)
    477   ret <16 x float> %res
    478 }
    479 
    480 declare <16 x float> @llvm.x86.avx512.mask.andn.ps.512(<16 x float>, <16 x float>, <16 x float>, i16)
    481 
    482 define <4 x float> @test_mask_and_ps_rr_128(<4 x float> %a, <4 x float> %b) {
    483 ; CHECK-LABEL: test_mask_and_ps_rr_128:
    484 ; CHECK:       # %bb.0:
    485 ; CHECK-NEXT:    vandps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x54,0xc1]
    486 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    487   %res = call <4 x float> @llvm.x86.avx512.mask.and.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 -1)
    488   ret <4 x float> %res
    489 }
    490 
    491 define <4 x float> @test_mask_and_ps_rrk_128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask) {
    492 ; X86-LABEL: test_mask_and_ps_rrk_128:
    493 ; X86:       # %bb.0:
    494 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
    495 ; X86-NEXT:    vandps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x54,0xd1]
    496 ; X86-NEXT:    vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
    497 ; X86-NEXT:    retl # encoding: [0xc3]
    498 ;
    499 ; X64-LABEL: test_mask_and_ps_rrk_128:
    500 ; X64:       # %bb.0:
    501 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
    502 ; X64-NEXT:    vandps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x54,0xd1]
    503 ; X64-NEXT:    vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
    504 ; X64-NEXT:    retq # encoding: [0xc3]
    505   %res = call <4 x float> @llvm.x86.avx512.mask.and.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask)
    506   ret <4 x float> %res
    507 }
    508 
    509 define <4 x float> @test_mask_and_ps_rrkz_128(<4 x float> %a, <4 x float> %b, i8 %mask) {
    510 ; X86-LABEL: test_mask_and_ps_rrkz_128:
    511 ; X86:       # %bb.0:
    512 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
    513 ; X86-NEXT:    vandps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x54,0xc1]
    514 ; X86-NEXT:    retl # encoding: [0xc3]
    515 ;
    516 ; X64-LABEL: test_mask_and_ps_rrkz_128:
    517 ; X64:       # %bb.0:
    518 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
    519 ; X64-NEXT:    vandps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x54,0xc1]
    520 ; X64-NEXT:    retq # encoding: [0xc3]
    521   %res = call <4 x float> @llvm.x86.avx512.mask.and.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 %mask)
    522   ret <4 x float> %res
    523 }
    524 
    525 define <4 x float> @test_mask_and_ps_rm_128(<4 x float> %a, <4 x float>* %ptr_b) {
    526 ; X86-LABEL: test_mask_and_ps_rm_128:
    527 ; X86:       # %bb.0:
    528 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
    529 ; X86-NEXT:    vandps (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x54,0x00]
    530 ; X86-NEXT:    retl # encoding: [0xc3]
    531 ;
    532 ; X64-LABEL: test_mask_and_ps_rm_128:
    533 ; X64:       # %bb.0:
    534 ; X64-NEXT:    vandps (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x54,0x07]
    535 ; X64-NEXT:    retq # encoding: [0xc3]
    536   %b = load <4 x float>, <4 x float>* %ptr_b
    537   %res = call <4 x float> @llvm.x86.avx512.mask.and.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 -1)
    538   ret <4 x float> %res
    539 }
    540 
    541 define <4 x float> @test_mask_and_ps_rmk_128(<4 x float> %a, <4 x float>* %ptr_b, <4 x float> %passThru, i8 %mask) {
    542 ; X86-LABEL: test_mask_and_ps_rmk_128:
    543 ; X86:       # %bb.0:
    544 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
    545 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
    546 ; X86-NEXT:    vandps (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x54,0x08]
    547 ; X86-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
    548 ; X86-NEXT:    retl # encoding: [0xc3]
    549 ;
    550 ; X64-LABEL: test_mask_and_ps_rmk_128:
    551 ; X64:       # %bb.0:
    552 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
    553 ; X64-NEXT:    vandps (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x54,0x0f]
    554 ; X64-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
    555 ; X64-NEXT:    retq # encoding: [0xc3]
    556   %b = load <4 x float>, <4 x float>* %ptr_b
    557   %res = call <4 x float> @llvm.x86.avx512.mask.and.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask)
    558   ret <4 x float> %res
    559 }
    560 
    561 define <4 x float> @test_mask_and_ps_rmkz_128(<4 x float> %a, <4 x float>* %ptr_b, i8 %mask) {
    562 ; X86-LABEL: test_mask_and_ps_rmkz_128:
    563 ; X86:       # %bb.0:
    564 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
    565 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
    566 ; X86-NEXT:    vandps (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x54,0x00]
    567 ; X86-NEXT:    retl # encoding: [0xc3]
    568 ;
    569 ; X64-LABEL: test_mask_and_ps_rmkz_128:
    570 ; X64:       # %bb.0:
    571 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
    572 ; X64-NEXT:    vandps (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x54,0x07]
    573 ; X64-NEXT:    retq # encoding: [0xc3]
    574   %b = load <4 x float>, <4 x float>* %ptr_b
    575   %res = call <4 x float> @llvm.x86.avx512.mask.and.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 %mask)
    576   ret <4 x float> %res
    577 }
    578 
    579 define <4 x float> @test_mask_and_ps_rmb_128(<4 x float> %a, float* %ptr_b) {
    580 ; X86-LABEL: test_mask_and_ps_rmb_128:
    581 ; X86:       # %bb.0:
    582 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
    583 ; X86-NEXT:    vandps (%eax){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7c,0x18,0x54,0x00]
    584 ; X86-NEXT:    retl # encoding: [0xc3]
    585 ;
    586 ; X64-LABEL: test_mask_and_ps_rmb_128:
    587 ; X64:       # %bb.0:
    588 ; X64-NEXT:    vandps (%rdi){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7c,0x18,0x54,0x07]
    589 ; X64-NEXT:    retq # encoding: [0xc3]
    590   %q = load float, float* %ptr_b
    591   %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
    592   %b = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer
    593   %res = call <4 x float> @llvm.x86.avx512.mask.and.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 -1)
    594   ret <4 x float> %res
    595 }
    596 
    597 define <4 x float> @test_mask_and_ps_rmbk_128(<4 x float> %a, float* %ptr_b, <4 x float> %passThru, i8 %mask) {
    598 ; X86-LABEL: test_mask_and_ps_rmbk_128:
    599 ; X86:       # %bb.0:
    600 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
    601 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
    602 ; X86-NEXT:    vandps (%eax){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x19,0x54,0x08]
    603 ; X86-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
    604 ; X86-NEXT:    retl # encoding: [0xc3]
    605 ;
    606 ; X64-LABEL: test_mask_and_ps_rmbk_128:
    607 ; X64:       # %bb.0:
    608 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
    609 ; X64-NEXT:    vandps (%rdi){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x19,0x54,0x0f]
    610 ; X64-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
    611 ; X64-NEXT:    retq # encoding: [0xc3]
    612   %q = load float, float* %ptr_b
    613   %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
    614   %b = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer
    615   %res = call <4 x float> @llvm.x86.avx512.mask.and.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask)
    616   ret <4 x float> %res
    617 }
    618 
    619 define <4 x float> @test_mask_and_ps_rmbkz_128(<4 x float> %a, float* %ptr_b, i8 %mask) {
    620 ; X86-LABEL: test_mask_and_ps_rmbkz_128:
    621 ; X86:       # %bb.0:
    622 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
    623 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
    624 ; X86-NEXT:    vandps (%eax){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x99,0x54,0x00]
    625 ; X86-NEXT:    retl # encoding: [0xc3]
    626 ;
    627 ; X64-LABEL: test_mask_and_ps_rmbkz_128:
    628 ; X64:       # %bb.0:
    629 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
    630 ; X64-NEXT:    vandps (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x99,0x54,0x07]
    631 ; X64-NEXT:    retq # encoding: [0xc3]
    632   %q = load float, float* %ptr_b
    633   %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
    634   %b = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer
    635   %res = call <4 x float> @llvm.x86.avx512.mask.and.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 %mask)
    636   ret <4 x float> %res
    637 }
    638 
    639 declare <4 x float> @llvm.x86.avx512.mask.and.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
    640 
    641 define <8 x float> @test_mask_and_ps_rr_256(<8 x float> %a, <8 x float> %b) {
    642 ; CHECK-LABEL: test_mask_and_ps_rr_256:
    643 ; CHECK:       # %bb.0:
    644 ; CHECK-NEXT:    vandps %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x54,0xc1]
    645 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    646   %res = call <8 x float> @llvm.x86.avx512.mask.and.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 -1)
    647   ret <8 x float> %res
    648 }
    649 
    650 define <8 x float> @test_mask_and_ps_rrk_256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask) {
    651 ; X86-LABEL: test_mask_and_ps_rrk_256:
    652 ; X86:       # %bb.0:
    653 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
    654 ; X86-NEXT:    vandps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x54,0xd1]
    655 ; X86-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
    656 ; X86-NEXT:    retl # encoding: [0xc3]
    657 ;
    658 ; X64-LABEL: test_mask_and_ps_rrk_256:
    659 ; X64:       # %bb.0:
    660 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
    661 ; X64-NEXT:    vandps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x54,0xd1]
    662 ; X64-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
    663 ; X64-NEXT:    retq # encoding: [0xc3]
    664   %res = call <8 x float> @llvm.x86.avx512.mask.and.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask)
    665   ret <8 x float> %res
    666 }
    667 
    668 define <8 x float> @test_mask_and_ps_rrkz_256(<8 x float> %a, <8 x float> %b, i8 %mask) {
    669 ; X86-LABEL: test_mask_and_ps_rrkz_256:
    670 ; X86:       # %bb.0:
    671 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
    672 ; X86-NEXT:    vandps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x54,0xc1]
    673 ; X86-NEXT:    retl # encoding: [0xc3]
    674 ;
    675 ; X64-LABEL: test_mask_and_ps_rrkz_256:
    676 ; X64:       # %bb.0:
    677 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
    678 ; X64-NEXT:    vandps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x54,0xc1]
    679 ; X64-NEXT:    retq # encoding: [0xc3]
    680   %res = call <8 x float> @llvm.x86.avx512.mask.and.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 %mask)
    681   ret <8 x float> %res
    682 }
    683 
    684 define <8 x float> @test_mask_and_ps_rm_256(<8 x float> %a, <8 x float>* %ptr_b) {
    685 ; X86-LABEL: test_mask_and_ps_rm_256:
    686 ; X86:       # %bb.0:
    687 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
    688 ; X86-NEXT:    vandps (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x54,0x00]
    689 ; X86-NEXT:    retl # encoding: [0xc3]
    690 ;
    691 ; X64-LABEL: test_mask_and_ps_rm_256:
    692 ; X64:       # %bb.0:
    693 ; X64-NEXT:    vandps (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x54,0x07]
    694 ; X64-NEXT:    retq # encoding: [0xc3]
    695   %b = load <8 x float>, <8 x float>* %ptr_b
    696   %res = call <8 x float> @llvm.x86.avx512.mask.and.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 -1)
    697   ret <8 x float> %res
    698 }
    699 
    700 define <8 x float> @test_mask_and_ps_rmk_256(<8 x float> %a, <8 x float>* %ptr_b, <8 x float> %passThru, i8 %mask) {
    701 ; X86-LABEL: test_mask_and_ps_rmk_256:
    702 ; X86:       # %bb.0:
    703 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
    704 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
    705 ; X86-NEXT:    vandps (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x54,0x08]
    706 ; X86-NEXT:    vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
    707 ; X86-NEXT:    retl # encoding: [0xc3]
    708 ;
    709 ; X64-LABEL: test_mask_and_ps_rmk_256:
    710 ; X64:       # %bb.0:
    711 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
    712 ; X64-NEXT:    vandps (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x54,0x0f]
    713 ; X64-NEXT:    vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
    714 ; X64-NEXT:    retq # encoding: [0xc3]
    715   %b = load <8 x float>, <8 x float>* %ptr_b
    716   %res = call <8 x float> @llvm.x86.avx512.mask.and.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask)
    717   ret <8 x float> %res
    718 }
    719 
    720 define <8 x float> @test_mask_and_ps_rmkz_256(<8 x float> %a, <8 x float>* %ptr_b, i8 %mask) {
    721 ; X86-LABEL: test_mask_and_ps_rmkz_256:
    722 ; X86:       # %bb.0:
    723 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
    724 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
    725 ; X86-NEXT:    vandps (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x54,0x00]
    726 ; X86-NEXT:    retl # encoding: [0xc3]
    727 ;
    728 ; X64-LABEL: test_mask_and_ps_rmkz_256:
    729 ; X64:       # %bb.0:
    730 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
    731 ; X64-NEXT:    vandps (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x54,0x07]
    732 ; X64-NEXT:    retq # encoding: [0xc3]
    733   %b = load <8 x float>, <8 x float>* %ptr_b
    734   %res = call <8 x float> @llvm.x86.avx512.mask.and.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 %mask)
    735   ret <8 x float> %res
    736 }
    737 
    738 define <8 x float> @test_mask_and_ps_rmb_256(<8 x float> %a, float* %ptr_b) {
    739 ; X86-LABEL: test_mask_and_ps_rmb_256:
    740 ; X86:       # %bb.0:
    741 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
    742 ; X86-NEXT:    vandps (%eax){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7c,0x38,0x54,0x00]
    743 ; X86-NEXT:    retl # encoding: [0xc3]
    744 ;
    745 ; X64-LABEL: test_mask_and_ps_rmb_256:
    746 ; X64:       # %bb.0:
    747 ; X64-NEXT:    vandps (%rdi){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7c,0x38,0x54,0x07]
    748 ; X64-NEXT:    retq # encoding: [0xc3]
    749   %q = load float, float* %ptr_b
    750   %vecinit.i = insertelement <8 x float> undef, float %q, i32 0
    751   %b = shufflevector <8 x float> %vecinit.i, <8 x float> undef, <8 x i32> zeroinitializer
    752   %res = call <8 x float> @llvm.x86.avx512.mask.and.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 -1)
    753   ret <8 x float> %res
    754 }
    755 
    756 define <8 x float> @test_mask_and_ps_rmbk_256(<8 x float> %a, float* %ptr_b, <8 x float> %passThru, i8 %mask) {
    757 ; X86-LABEL: test_mask_and_ps_rmbk_256:
    758 ; X86:       # %bb.0:
    759 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
    760 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
    761 ; X86-NEXT:    vandps (%eax){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x39,0x54,0x08]
    762 ; X86-NEXT:    vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
    763 ; X86-NEXT:    retl # encoding: [0xc3]
    764 ;
    765 ; X64-LABEL: test_mask_and_ps_rmbk_256:
    766 ; X64:       # %bb.0:
    767 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
    768 ; X64-NEXT:    vandps (%rdi){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x39,0x54,0x0f]
    769 ; X64-NEXT:    vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
    770 ; X64-NEXT:    retq # encoding: [0xc3]
    771   %q = load float, float* %ptr_b
    772   %vecinit.i = insertelement <8 x float> undef, float %q, i32 0
    773   %b = shufflevector <8 x float> %vecinit.i, <8 x float> undef, <8 x i32> zeroinitializer
    774   %res = call <8 x float> @llvm.x86.avx512.mask.and.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask)
    775   ret <8 x float> %res
    776 }
    777 
    778 define <8 x float> @test_mask_and_ps_rmbkz_256(<8 x float> %a, float* %ptr_b, i8 %mask) {
    779 ; X86-LABEL: test_mask_and_ps_rmbkz_256:
    780 ; X86:       # %bb.0:
    781 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
    782 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
    783 ; X86-NEXT:    vandps (%eax){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xb9,0x54,0x00]
    784 ; X86-NEXT:    retl # encoding: [0xc3]
    785 ;
    786 ; X64-LABEL: test_mask_and_ps_rmbkz_256:
    787 ; X64:       # %bb.0:
    788 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
    789 ; X64-NEXT:    vandps (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xb9,0x54,0x07]
    790 ; X64-NEXT:    retq # encoding: [0xc3]
    791   %q = load float, float* %ptr_b
    792   %vecinit.i = insertelement <8 x float> undef, float %q, i32 0
    793   %b = shufflevector <8 x float> %vecinit.i, <8 x float> undef, <8 x i32> zeroinitializer
    794   %res = call <8 x float> @llvm.x86.avx512.mask.and.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 %mask)
    795   ret <8 x float> %res
    796 }
    797 
    798 declare <8 x float> @llvm.x86.avx512.mask.and.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
    799 
    800 define <16 x float> @test_mask_and_ps_rr_512(<16 x float> %a, <16 x float> %b) {
    801 ; CHECK-LABEL: test_mask_and_ps_rr_512:
    802 ; CHECK:       # %bb.0:
    803 ; CHECK-NEXT:    vandps %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x54,0xc1]
    804 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    805   %res = call <16 x float> @llvm.x86.avx512.mask.and.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 -1)
    806   ret <16 x float> %res
    807 }
    808 
    809 define <16 x float> @test_mask_and_ps_rrk_512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask) {
    810 ; X86-LABEL: test_mask_and_ps_rrk_512:
    811 ; X86:       # %bb.0:
    812 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
    813 ; X86-NEXT:    vandps %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x49,0x54,0xd1]
    814 ; X86-NEXT:    vmovaps %zmm2, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
    815 ; X86-NEXT:    retl # encoding: [0xc3]
    816 ;
    817 ; X64-LABEL: test_mask_and_ps_rrk_512:
    818 ; X64:       # %bb.0:
    819 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
    820 ; X64-NEXT:    vandps %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x49,0x54,0xd1]
    821 ; X64-NEXT:    vmovaps %zmm2, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
    822 ; X64-NEXT:    retq # encoding: [0xc3]
    823   %res = call <16 x float> @llvm.x86.avx512.mask.and.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask)
    824   ret <16 x float> %res
    825 }
    826 
    827 define <16 x float> @test_mask_and_ps_rrkz_512(<16 x float> %a, <16 x float> %b, i16 %mask) {
    828 ; X86-LABEL: test_mask_and_ps_rrkz_512:
    829 ; X86:       # %bb.0:
    830 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
    831 ; X86-NEXT:    vandps %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xc9,0x54,0xc1]
    832 ; X86-NEXT:    retl # encoding: [0xc3]
    833 ;
    834 ; X64-LABEL: test_mask_and_ps_rrkz_512:
    835 ; X64:       # %bb.0:
    836 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
    837 ; X64-NEXT:    vandps %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xc9,0x54,0xc1]
    838 ; X64-NEXT:    retq # encoding: [0xc3]
    839   %res = call <16 x float> @llvm.x86.avx512.mask.and.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 %mask)
    840   ret <16 x float> %res
    841 }
    842 
    843 define <16 x float> @test_mask_and_ps_rm_512(<16 x float> %a, <16 x float>* %ptr_b) {
    844 ; X86-LABEL: test_mask_and_ps_rm_512:
    845 ; X86:       # %bb.0:
    846 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
    847 ; X86-NEXT:    vandps (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x54,0x00]
    848 ; X86-NEXT:    retl # encoding: [0xc3]
    849 ;
    850 ; X64-LABEL: test_mask_and_ps_rm_512:
    851 ; X64:       # %bb.0:
    852 ; X64-NEXT:    vandps (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x54,0x07]
    853 ; X64-NEXT:    retq # encoding: [0xc3]
    854   %b = load <16 x float>, <16 x float>* %ptr_b
    855   %res = call <16 x float> @llvm.x86.avx512.mask.and.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 -1)
    856   ret <16 x float> %res
    857 }
    858 
    859 define <16 x float> @test_mask_and_ps_rmk_512(<16 x float> %a, <16 x float>* %ptr_b, <16 x float> %passThru, i16 %mask) {
    860 ; X86-LABEL: test_mask_and_ps_rmk_512:
    861 ; X86:       # %bb.0:
    862 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
    863 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
    864 ; X86-NEXT:    vandps (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x49,0x54,0x08]
    865 ; X86-NEXT:    vmovaps %zmm1, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
    866 ; X86-NEXT:    retl # encoding: [0xc3]
    867 ;
    868 ; X64-LABEL: test_mask_and_ps_rmk_512:
    869 ; X64:       # %bb.0:
    870 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
    871 ; X64-NEXT:    vandps (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x49,0x54,0x0f]
    872 ; X64-NEXT:    vmovaps %zmm1, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
    873 ; X64-NEXT:    retq # encoding: [0xc3]
    874   %b = load <16 x float>, <16 x float>* %ptr_b
    875   %res = call <16 x float> @llvm.x86.avx512.mask.and.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask)
    876   ret <16 x float> %res
    877 }
    878 
    879 define <16 x float> @test_mask_and_ps_rmkz_512(<16 x float> %a, <16 x float>* %ptr_b, i16 %mask) {
    880 ; X86-LABEL: test_mask_and_ps_rmkz_512:
    881 ; X86:       # %bb.0:
    882 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
    883 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
    884 ; X86-NEXT:    vandps (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xc9,0x54,0x00]
    885 ; X86-NEXT:    retl # encoding: [0xc3]
    886 ;
    887 ; X64-LABEL: test_mask_and_ps_rmkz_512:
    888 ; X64:       # %bb.0:
    889 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
    890 ; X64-NEXT:    vandps (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xc9,0x54,0x07]
    891 ; X64-NEXT:    retq # encoding: [0xc3]
    892   %b = load <16 x float>, <16 x float>* %ptr_b
    893   %res = call <16 x float> @llvm.x86.avx512.mask.and.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 %mask)
    894   ret <16 x float> %res
    895 }
    896 
    897 define <16 x float> @test_mask_and_ps_rmb_512(<16 x float> %a, float* %ptr_b) {
    898 ; X86-LABEL: test_mask_and_ps_rmb_512:
    899 ; X86:       # %bb.0:
    900 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
    901 ; X86-NEXT:    vandps (%eax){1to16}, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7c,0x58,0x54,0x00]
    902 ; X86-NEXT:    retl # encoding: [0xc3]
    903 ;
    904 ; X64-LABEL: test_mask_and_ps_rmb_512:
    905 ; X64:       # %bb.0:
    906 ; X64-NEXT:    vandps (%rdi){1to16}, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7c,0x58,0x54,0x07]
    907 ; X64-NEXT:    retq # encoding: [0xc3]
    908   %q = load float, float* %ptr_b
    909   %vecinit.i = insertelement <16 x float> undef, float %q, i32 0
    910   %b = shufflevector <16 x float> %vecinit.i, <16 x float> undef, <16 x i32> zeroinitializer
    911   %res = call <16 x float> @llvm.x86.avx512.mask.and.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 -1)
    912   ret <16 x float> %res
    913 }
    914 
    915 define <16 x float> @test_mask_and_ps_rmbk_512(<16 x float> %a, float* %ptr_b, <16 x float> %passThru, i16 %mask) {
    916 ; X86-LABEL: test_mask_and_ps_rmbk_512:
    917 ; X86:       # %bb.0:
    918 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
    919 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
    920 ; X86-NEXT:    vandps (%eax){1to16}, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x59,0x54,0x08]
    921 ; X86-NEXT:    vmovaps %zmm1, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
    922 ; X86-NEXT:    retl # encoding: [0xc3]
    923 ;
    924 ; X64-LABEL: test_mask_and_ps_rmbk_512:
    925 ; X64:       # %bb.0:
    926 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
    927 ; X64-NEXT:    vandps (%rdi){1to16}, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x59,0x54,0x0f]
    928 ; X64-NEXT:    vmovaps %zmm1, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
    929 ; X64-NEXT:    retq # encoding: [0xc3]
    930   %q = load float, float* %ptr_b
    931   %vecinit.i = insertelement <16 x float> undef, float %q, i32 0
    932   %b = shufflevector <16 x float> %vecinit.i, <16 x float> undef, <16 x i32> zeroinitializer
    933   %res = call <16 x float> @llvm.x86.avx512.mask.and.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask)
    934   ret <16 x float> %res
    935 }
    936 
    937 define <16 x float> @test_mask_and_ps_rmbkz_512(<16 x float> %a, float* %ptr_b, i16 %mask) {
    938 ; X86-LABEL: test_mask_and_ps_rmbkz_512:
    939 ; X86:       # %bb.0:
    940 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
    941 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
    942 ; X86-NEXT:    vandps (%eax){1to16}, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xd9,0x54,0x00]
    943 ; X86-NEXT:    retl # encoding: [0xc3]
    944 ;
    945 ; X64-LABEL: test_mask_and_ps_rmbkz_512:
    946 ; X64:       # %bb.0:
    947 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
    948 ; X64-NEXT:    vandps (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xd9,0x54,0x07]
    949 ; X64-NEXT:    retq # encoding: [0xc3]
    950   %q = load float, float* %ptr_b
    951   %vecinit.i = insertelement <16 x float> undef, float %q, i32 0
    952   %b = shufflevector <16 x float> %vecinit.i, <16 x float> undef, <16 x i32> zeroinitializer
    953   %res = call <16 x float> @llvm.x86.avx512.mask.and.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 %mask)
    954   ret <16 x float> %res
    955 }
    956 
    957 declare <16 x float> @llvm.x86.avx512.mask.and.ps.512(<16 x float>, <16 x float>, <16 x float>, i16)
    958 
    959 define <4 x float> @test_mask_or_ps_rr_128(<4 x float> %a, <4 x float> %b) {
    960 ; CHECK-LABEL: test_mask_or_ps_rr_128:
    961 ; CHECK:       # %bb.0:
    962 ; CHECK-NEXT:    vorps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x56,0xc1]
    963 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    964   %res = call <4 x float> @llvm.x86.avx512.mask.or.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 -1)
    965   ret <4 x float> %res
    966 }
    967 
    968 define <4 x float> @test_mask_or_ps_rrk_128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask) {
    969 ; X86-LABEL: test_mask_or_ps_rrk_128:
    970 ; X86:       # %bb.0:
    971 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
    972 ; X86-NEXT:    vorps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x56,0xd1]
    973 ; X86-NEXT:    vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
    974 ; X86-NEXT:    retl # encoding: [0xc3]
    975 ;
    976 ; X64-LABEL: test_mask_or_ps_rrk_128:
    977 ; X64:       # %bb.0:
    978 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
    979 ; X64-NEXT:    vorps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x56,0xd1]
    980 ; X64-NEXT:    vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
    981 ; X64-NEXT:    retq # encoding: [0xc3]
    982   %res = call <4 x float> @llvm.x86.avx512.mask.or.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask)
    983   ret <4 x float> %res
    984 }
    985 
    986 define <4 x float> @test_mask_or_ps_rrkz_128(<4 x float> %a, <4 x float> %b, i8 %mask) {
    987 ; X86-LABEL: test_mask_or_ps_rrkz_128:
    988 ; X86:       # %bb.0:
    989 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
    990 ; X86-NEXT:    vorps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x56,0xc1]
    991 ; X86-NEXT:    retl # encoding: [0xc3]
    992 ;
    993 ; X64-LABEL: test_mask_or_ps_rrkz_128:
    994 ; X64:       # %bb.0:
    995 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
    996 ; X64-NEXT:    vorps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x56,0xc1]
    997 ; X64-NEXT:    retq # encoding: [0xc3]
    998   %res = call <4 x float> @llvm.x86.avx512.mask.or.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 %mask)
    999   ret <4 x float> %res
   1000 }
   1001 
   1002 define <4 x float> @test_mask_or_ps_rm_128(<4 x float> %a, <4 x float>* %ptr_b) {
   1003 ; X86-LABEL: test_mask_or_ps_rm_128:
   1004 ; X86:       # %bb.0:
   1005 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   1006 ; X86-NEXT:    vorps (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x56,0x00]
   1007 ; X86-NEXT:    retl # encoding: [0xc3]
   1008 ;
   1009 ; X64-LABEL: test_mask_or_ps_rm_128:
   1010 ; X64:       # %bb.0:
   1011 ; X64-NEXT:    vorps (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x56,0x07]
   1012 ; X64-NEXT:    retq # encoding: [0xc3]
   1013   %b = load <4 x float>, <4 x float>* %ptr_b
   1014   %res = call <4 x float> @llvm.x86.avx512.mask.or.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 -1)
   1015   ret <4 x float> %res
   1016 }
   1017 
   1018 define <4 x float> @test_mask_or_ps_rmk_128(<4 x float> %a, <4 x float>* %ptr_b, <4 x float> %passThru, i8 %mask) {
   1019 ; X86-LABEL: test_mask_or_ps_rmk_128:
   1020 ; X86:       # %bb.0:
   1021 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   1022 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
   1023 ; X86-NEXT:    vorps (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x56,0x08]
   1024 ; X86-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
   1025 ; X86-NEXT:    retl # encoding: [0xc3]
   1026 ;
   1027 ; X64-LABEL: test_mask_or_ps_rmk_128:
   1028 ; X64:       # %bb.0:
   1029 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   1030 ; X64-NEXT:    vorps (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x56,0x0f]
   1031 ; X64-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
   1032 ; X64-NEXT:    retq # encoding: [0xc3]
   1033   %b = load <4 x float>, <4 x float>* %ptr_b
   1034   %res = call <4 x float> @llvm.x86.avx512.mask.or.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask)
   1035   ret <4 x float> %res
   1036 }
   1037 
   1038 define <4 x float> @test_mask_or_ps_rmkz_128(<4 x float> %a, <4 x float>* %ptr_b, i8 %mask) {
   1039 ; X86-LABEL: test_mask_or_ps_rmkz_128:
   1040 ; X86:       # %bb.0:
   1041 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   1042 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
   1043 ; X86-NEXT:    vorps (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x56,0x00]
   1044 ; X86-NEXT:    retl # encoding: [0xc3]
   1045 ;
   1046 ; X64-LABEL: test_mask_or_ps_rmkz_128:
   1047 ; X64:       # %bb.0:
   1048 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   1049 ; X64-NEXT:    vorps (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x56,0x07]
   1050 ; X64-NEXT:    retq # encoding: [0xc3]
   1051   %b = load <4 x float>, <4 x float>* %ptr_b
   1052   %res = call <4 x float> @llvm.x86.avx512.mask.or.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 %mask)
   1053   ret <4 x float> %res
   1054 }
   1055 
   1056 define <4 x float> @test_mask_or_ps_rmb_128(<4 x float> %a, float* %ptr_b) {
   1057 ; X86-LABEL: test_mask_or_ps_rmb_128:
   1058 ; X86:       # %bb.0:
   1059 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   1060 ; X86-NEXT:    vorps (%eax){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7c,0x18,0x56,0x00]
   1061 ; X86-NEXT:    retl # encoding: [0xc3]
   1062 ;
   1063 ; X64-LABEL: test_mask_or_ps_rmb_128:
   1064 ; X64:       # %bb.0:
   1065 ; X64-NEXT:    vorps (%rdi){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7c,0x18,0x56,0x07]
   1066 ; X64-NEXT:    retq # encoding: [0xc3]
   1067   %q = load float, float* %ptr_b
   1068   %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
   1069   %b = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer
   1070   %res = call <4 x float> @llvm.x86.avx512.mask.or.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 -1)
   1071   ret <4 x float> %res
   1072 }
   1073 
   1074 define <4 x float> @test_mask_or_ps_rmbk_128(<4 x float> %a, float* %ptr_b, <4 x float> %passThru, i8 %mask) {
   1075 ; X86-LABEL: test_mask_or_ps_rmbk_128:
   1076 ; X86:       # %bb.0:
   1077 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   1078 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
   1079 ; X86-NEXT:    vorps (%eax){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x19,0x56,0x08]
   1080 ; X86-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
   1081 ; X86-NEXT:    retl # encoding: [0xc3]
   1082 ;
   1083 ; X64-LABEL: test_mask_or_ps_rmbk_128:
   1084 ; X64:       # %bb.0:
   1085 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   1086 ; X64-NEXT:    vorps (%rdi){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x19,0x56,0x0f]
   1087 ; X64-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
   1088 ; X64-NEXT:    retq # encoding: [0xc3]
   1089   %q = load float, float* %ptr_b
   1090   %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
   1091   %b = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer
   1092   %res = call <4 x float> @llvm.x86.avx512.mask.or.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask)
   1093   ret <4 x float> %res
   1094 }
   1095 
   1096 define <4 x float> @test_mask_or_ps_rmbkz_128(<4 x float> %a, float* %ptr_b, i8 %mask) {
   1097 ; X86-LABEL: test_mask_or_ps_rmbkz_128:
   1098 ; X86:       # %bb.0:
   1099 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   1100 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
   1101 ; X86-NEXT:    vorps (%eax){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x99,0x56,0x00]
   1102 ; X86-NEXT:    retl # encoding: [0xc3]
   1103 ;
   1104 ; X64-LABEL: test_mask_or_ps_rmbkz_128:
   1105 ; X64:       # %bb.0:
   1106 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   1107 ; X64-NEXT:    vorps (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x99,0x56,0x07]
   1108 ; X64-NEXT:    retq # encoding: [0xc3]
   1109   %q = load float, float* %ptr_b
   1110   %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
   1111   %b = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer
   1112   %res = call <4 x float> @llvm.x86.avx512.mask.or.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 %mask)
   1113   ret <4 x float> %res
   1114 }
   1115 
   1116 declare <4 x float> @llvm.x86.avx512.mask.or.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
   1117 
   1118 define <8 x float> @test_mask_or_ps_rr_256(<8 x float> %a, <8 x float> %b) {
   1119 ; CHECK-LABEL: test_mask_or_ps_rr_256:
   1120 ; CHECK:       # %bb.0:
   1121 ; CHECK-NEXT:    vorps %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x56,0xc1]
   1122 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   1123   %res = call <8 x float> @llvm.x86.avx512.mask.or.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 -1)
   1124   ret <8 x float> %res
   1125 }
   1126 
   1127 define <8 x float> @test_mask_or_ps_rrk_256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask) {
   1128 ; X86-LABEL: test_mask_or_ps_rrk_256:
   1129 ; X86:       # %bb.0:
   1130 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
   1131 ; X86-NEXT:    vorps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x56,0xd1]
   1132 ; X86-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
   1133 ; X86-NEXT:    retl # encoding: [0xc3]
   1134 ;
   1135 ; X64-LABEL: test_mask_or_ps_rrk_256:
   1136 ; X64:       # %bb.0:
   1137 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   1138 ; X64-NEXT:    vorps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x56,0xd1]
   1139 ; X64-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
   1140 ; X64-NEXT:    retq # encoding: [0xc3]
   1141   %res = call <8 x float> @llvm.x86.avx512.mask.or.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask)
   1142   ret <8 x float> %res
   1143 }
   1144 
   1145 define <8 x float> @test_mask_or_ps_rrkz_256(<8 x float> %a, <8 x float> %b, i8 %mask) {
   1146 ; X86-LABEL: test_mask_or_ps_rrkz_256:
   1147 ; X86:       # %bb.0:
   1148 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
   1149 ; X86-NEXT:    vorps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x56,0xc1]
   1150 ; X86-NEXT:    retl # encoding: [0xc3]
   1151 ;
   1152 ; X64-LABEL: test_mask_or_ps_rrkz_256:
   1153 ; X64:       # %bb.0:
   1154 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   1155 ; X64-NEXT:    vorps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x56,0xc1]
   1156 ; X64-NEXT:    retq # encoding: [0xc3]
   1157   %res = call <8 x float> @llvm.x86.avx512.mask.or.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 %mask)
   1158   ret <8 x float> %res
   1159 }
   1160 
   1161 define <8 x float> @test_mask_or_ps_rm_256(<8 x float> %a, <8 x float>* %ptr_b) {
   1162 ; X86-LABEL: test_mask_or_ps_rm_256:
   1163 ; X86:       # %bb.0:
   1164 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   1165 ; X86-NEXT:    vorps (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x56,0x00]
   1166 ; X86-NEXT:    retl # encoding: [0xc3]
   1167 ;
   1168 ; X64-LABEL: test_mask_or_ps_rm_256:
   1169 ; X64:       # %bb.0:
   1170 ; X64-NEXT:    vorps (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x56,0x07]
   1171 ; X64-NEXT:    retq # encoding: [0xc3]
   1172   %b = load <8 x float>, <8 x float>* %ptr_b
   1173   %res = call <8 x float> @llvm.x86.avx512.mask.or.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 -1)
   1174   ret <8 x float> %res
   1175 }
   1176 
   1177 define <8 x float> @test_mask_or_ps_rmk_256(<8 x float> %a, <8 x float>* %ptr_b, <8 x float> %passThru, i8 %mask) {
   1178 ; X86-LABEL: test_mask_or_ps_rmk_256:
   1179 ; X86:       # %bb.0:
   1180 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   1181 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
   1182 ; X86-NEXT:    vorps (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x56,0x08]
   1183 ; X86-NEXT:    vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
   1184 ; X86-NEXT:    retl # encoding: [0xc3]
   1185 ;
   1186 ; X64-LABEL: test_mask_or_ps_rmk_256:
   1187 ; X64:       # %bb.0:
   1188 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   1189 ; X64-NEXT:    vorps (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x56,0x0f]
   1190 ; X64-NEXT:    vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
   1191 ; X64-NEXT:    retq # encoding: [0xc3]
   1192   %b = load <8 x float>, <8 x float>* %ptr_b
   1193   %res = call <8 x float> @llvm.x86.avx512.mask.or.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask)
   1194   ret <8 x float> %res
   1195 }
   1196 
   1197 define <8 x float> @test_mask_or_ps_rmkz_256(<8 x float> %a, <8 x float>* %ptr_b, i8 %mask) {
   1198 ; X86-LABEL: test_mask_or_ps_rmkz_256:
   1199 ; X86:       # %bb.0:
   1200 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   1201 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
   1202 ; X86-NEXT:    vorps (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x56,0x00]
   1203 ; X86-NEXT:    retl # encoding: [0xc3]
   1204 ;
   1205 ; X64-LABEL: test_mask_or_ps_rmkz_256:
   1206 ; X64:       # %bb.0:
   1207 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   1208 ; X64-NEXT:    vorps (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x56,0x07]
   1209 ; X64-NEXT:    retq # encoding: [0xc3]
   1210   %b = load <8 x float>, <8 x float>* %ptr_b
   1211   %res = call <8 x float> @llvm.x86.avx512.mask.or.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 %mask)
   1212   ret <8 x float> %res
   1213 }
   1214 
   1215 define <8 x float> @test_mask_or_ps_rmb_256(<8 x float> %a, float* %ptr_b) {
   1216 ; X86-LABEL: test_mask_or_ps_rmb_256:
   1217 ; X86:       # %bb.0:
   1218 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   1219 ; X86-NEXT:    vorps (%eax){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7c,0x38,0x56,0x00]
   1220 ; X86-NEXT:    retl # encoding: [0xc3]
   1221 ;
   1222 ; X64-LABEL: test_mask_or_ps_rmb_256:
   1223 ; X64:       # %bb.0:
   1224 ; X64-NEXT:    vorps (%rdi){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7c,0x38,0x56,0x07]
   1225 ; X64-NEXT:    retq # encoding: [0xc3]
   1226   %q = load float, float* %ptr_b
   1227   %vecinit.i = insertelement <8 x float> undef, float %q, i32 0
   1228   %b = shufflevector <8 x float> %vecinit.i, <8 x float> undef, <8 x i32> zeroinitializer
   1229   %res = call <8 x float> @llvm.x86.avx512.mask.or.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 -1)
   1230   ret <8 x float> %res
   1231 }
   1232 
   1233 define <8 x float> @test_mask_or_ps_rmbk_256(<8 x float> %a, float* %ptr_b, <8 x float> %passThru, i8 %mask) {
   1234 ; X86-LABEL: test_mask_or_ps_rmbk_256:
   1235 ; X86:       # %bb.0:
   1236 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   1237 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
   1238 ; X86-NEXT:    vorps (%eax){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x39,0x56,0x08]
   1239 ; X86-NEXT:    vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
   1240 ; X86-NEXT:    retl # encoding: [0xc3]
   1241 ;
   1242 ; X64-LABEL: test_mask_or_ps_rmbk_256:
   1243 ; X64:       # %bb.0:
   1244 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   1245 ; X64-NEXT:    vorps (%rdi){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x39,0x56,0x0f]
   1246 ; X64-NEXT:    vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
   1247 ; X64-NEXT:    retq # encoding: [0xc3]
   1248   %q = load float, float* %ptr_b
   1249   %vecinit.i = insertelement <8 x float> undef, float %q, i32 0
   1250   %b = shufflevector <8 x float> %vecinit.i, <8 x float> undef, <8 x i32> zeroinitializer
   1251   %res = call <8 x float> @llvm.x86.avx512.mask.or.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask)
   1252   ret <8 x float> %res
   1253 }
   1254 
   1255 define <8 x float> @test_mask_or_ps_rmbkz_256(<8 x float> %a, float* %ptr_b, i8 %mask) {
   1256 ; X86-LABEL: test_mask_or_ps_rmbkz_256:
   1257 ; X86:       # %bb.0:
   1258 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   1259 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
   1260 ; X86-NEXT:    vorps (%eax){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xb9,0x56,0x00]
   1261 ; X86-NEXT:    retl # encoding: [0xc3]
   1262 ;
   1263 ; X64-LABEL: test_mask_or_ps_rmbkz_256:
   1264 ; X64:       # %bb.0:
   1265 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   1266 ; X64-NEXT:    vorps (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xb9,0x56,0x07]
   1267 ; X64-NEXT:    retq # encoding: [0xc3]
   1268   %q = load float, float* %ptr_b
   1269   %vecinit.i = insertelement <8 x float> undef, float %q, i32 0
   1270   %b = shufflevector <8 x float> %vecinit.i, <8 x float> undef, <8 x i32> zeroinitializer
   1271   %res = call <8 x float> @llvm.x86.avx512.mask.or.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 %mask)
   1272   ret <8 x float> %res
   1273 }
   1274 
   1275 declare <8 x float> @llvm.x86.avx512.mask.or.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
   1276 
   1277 define <16 x float> @test_mask_or_ps_rr_512(<16 x float> %a, <16 x float> %b) {
   1278 ; CHECK-LABEL: test_mask_or_ps_rr_512:
   1279 ; CHECK:       # %bb.0:
   1280 ; CHECK-NEXT:    vorps %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x56,0xc1]
   1281 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   1282   %res = call <16 x float> @llvm.x86.avx512.mask.or.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 -1)
   1283   ret <16 x float> %res
   1284 }
   1285 
   1286 define <16 x float> @test_mask_or_ps_rrk_512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask) {
   1287 ; X86-LABEL: test_mask_or_ps_rrk_512:
   1288 ; X86:       # %bb.0:
   1289 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   1290 ; X86-NEXT:    vorps %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x49,0x56,0xd1]
   1291 ; X86-NEXT:    vmovaps %zmm2, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
   1292 ; X86-NEXT:    retl # encoding: [0xc3]
   1293 ;
   1294 ; X64-LABEL: test_mask_or_ps_rrk_512:
   1295 ; X64:       # %bb.0:
   1296 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   1297 ; X64-NEXT:    vorps %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x49,0x56,0xd1]
   1298 ; X64-NEXT:    vmovaps %zmm2, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
   1299 ; X64-NEXT:    retq # encoding: [0xc3]
   1300   %res = call <16 x float> @llvm.x86.avx512.mask.or.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask)
   1301   ret <16 x float> %res
   1302 }
   1303 
   1304 define <16 x float> @test_mask_or_ps_rrkz_512(<16 x float> %a, <16 x float> %b, i16 %mask) {
   1305 ; X86-LABEL: test_mask_or_ps_rrkz_512:
   1306 ; X86:       # %bb.0:
   1307 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   1308 ; X86-NEXT:    vorps %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xc9,0x56,0xc1]
   1309 ; X86-NEXT:    retl # encoding: [0xc3]
   1310 ;
   1311 ; X64-LABEL: test_mask_or_ps_rrkz_512:
   1312 ; X64:       # %bb.0:
   1313 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   1314 ; X64-NEXT:    vorps %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xc9,0x56,0xc1]
   1315 ; X64-NEXT:    retq # encoding: [0xc3]
   1316   %res = call <16 x float> @llvm.x86.avx512.mask.or.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 %mask)
   1317   ret <16 x float> %res
   1318 }
   1319 
   1320 define <16 x float> @test_mask_or_ps_rm_512(<16 x float> %a, <16 x float>* %ptr_b) {
   1321 ; X86-LABEL: test_mask_or_ps_rm_512:
   1322 ; X86:       # %bb.0:
   1323 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   1324 ; X86-NEXT:    vorps (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x56,0x00]
   1325 ; X86-NEXT:    retl # encoding: [0xc3]
   1326 ;
   1327 ; X64-LABEL: test_mask_or_ps_rm_512:
   1328 ; X64:       # %bb.0:
   1329 ; X64-NEXT:    vorps (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x56,0x07]
   1330 ; X64-NEXT:    retq # encoding: [0xc3]
   1331   %b = load <16 x float>, <16 x float>* %ptr_b
   1332   %res = call <16 x float> @llvm.x86.avx512.mask.or.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 -1)
   1333   ret <16 x float> %res
   1334 }
   1335 
   1336 define <16 x float> @test_mask_or_ps_rmk_512(<16 x float> %a, <16 x float>* %ptr_b, <16 x float> %passThru, i16 %mask) {
   1337 ; X86-LABEL: test_mask_or_ps_rmk_512:
   1338 ; X86:       # %bb.0:
   1339 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   1340 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
   1341 ; X86-NEXT:    vorps (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x49,0x56,0x08]
   1342 ; X86-NEXT:    vmovaps %zmm1, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
   1343 ; X86-NEXT:    retl # encoding: [0xc3]
   1344 ;
   1345 ; X64-LABEL: test_mask_or_ps_rmk_512:
   1346 ; X64:       # %bb.0:
   1347 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   1348 ; X64-NEXT:    vorps (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x49,0x56,0x0f]
   1349 ; X64-NEXT:    vmovaps %zmm1, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
   1350 ; X64-NEXT:    retq # encoding: [0xc3]
   1351   %b = load <16 x float>, <16 x float>* %ptr_b
   1352   %res = call <16 x float> @llvm.x86.avx512.mask.or.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask)
   1353   ret <16 x float> %res
   1354 }
   1355 
   1356 define <16 x float> @test_mask_or_ps_rmkz_512(<16 x float> %a, <16 x float>* %ptr_b, i16 %mask) {
   1357 ; X86-LABEL: test_mask_or_ps_rmkz_512:
   1358 ; X86:       # %bb.0:
   1359 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   1360 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
   1361 ; X86-NEXT:    vorps (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xc9,0x56,0x00]
   1362 ; X86-NEXT:    retl # encoding: [0xc3]
   1363 ;
   1364 ; X64-LABEL: test_mask_or_ps_rmkz_512:
   1365 ; X64:       # %bb.0:
   1366 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   1367 ; X64-NEXT:    vorps (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xc9,0x56,0x07]
   1368 ; X64-NEXT:    retq # encoding: [0xc3]
   1369   %b = load <16 x float>, <16 x float>* %ptr_b
   1370   %res = call <16 x float> @llvm.x86.avx512.mask.or.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 %mask)
   1371   ret <16 x float> %res
   1372 }
   1373 
   1374 define <16 x float> @test_mask_or_ps_rmb_512(<16 x float> %a, float* %ptr_b) {
   1375 ; X86-LABEL: test_mask_or_ps_rmb_512:
   1376 ; X86:       # %bb.0:
   1377 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   1378 ; X86-NEXT:    vorps (%eax){1to16}, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7c,0x58,0x56,0x00]
   1379 ; X86-NEXT:    retl # encoding: [0xc3]
   1380 ;
   1381 ; X64-LABEL: test_mask_or_ps_rmb_512:
   1382 ; X64:       # %bb.0:
   1383 ; X64-NEXT:    vorps (%rdi){1to16}, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7c,0x58,0x56,0x07]
   1384 ; X64-NEXT:    retq # encoding: [0xc3]
   1385   %q = load float, float* %ptr_b
   1386   %vecinit.i = insertelement <16 x float> undef, float %q, i32 0
   1387   %b = shufflevector <16 x float> %vecinit.i, <16 x float> undef, <16 x i32> zeroinitializer
   1388   %res = call <16 x float> @llvm.x86.avx512.mask.or.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 -1)
   1389   ret <16 x float> %res
   1390 }
   1391 
   1392 define <16 x float> @test_mask_or_ps_rmbk_512(<16 x float> %a, float* %ptr_b, <16 x float> %passThru, i16 %mask) {
   1393 ; X86-LABEL: test_mask_or_ps_rmbk_512:
   1394 ; X86:       # %bb.0:
   1395 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   1396 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
   1397 ; X86-NEXT:    vorps (%eax){1to16}, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x59,0x56,0x08]
   1398 ; X86-NEXT:    vmovaps %zmm1, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
   1399 ; X86-NEXT:    retl # encoding: [0xc3]
   1400 ;
   1401 ; X64-LABEL: test_mask_or_ps_rmbk_512:
   1402 ; X64:       # %bb.0:
   1403 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   1404 ; X64-NEXT:    vorps (%rdi){1to16}, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x59,0x56,0x0f]
   1405 ; X64-NEXT:    vmovaps %zmm1, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
   1406 ; X64-NEXT:    retq # encoding: [0xc3]
   1407   %q = load float, float* %ptr_b
   1408   %vecinit.i = insertelement <16 x float> undef, float %q, i32 0
   1409   %b = shufflevector <16 x float> %vecinit.i, <16 x float> undef, <16 x i32> zeroinitializer
   1410   %res = call <16 x float> @llvm.x86.avx512.mask.or.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask)
   1411   ret <16 x float> %res
   1412 }
   1413 
   1414 define <16 x float> @test_mask_or_ps_rmbkz_512(<16 x float> %a, float* %ptr_b, i16 %mask) {
   1415 ; X86-LABEL: test_mask_or_ps_rmbkz_512:
   1416 ; X86:       # %bb.0:
   1417 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   1418 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
   1419 ; X86-NEXT:    vorps (%eax){1to16}, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xd9,0x56,0x00]
   1420 ; X86-NEXT:    retl # encoding: [0xc3]
   1421 ;
   1422 ; X64-LABEL: test_mask_or_ps_rmbkz_512:
   1423 ; X64:       # %bb.0:
   1424 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   1425 ; X64-NEXT:    vorps (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xd9,0x56,0x07]
   1426 ; X64-NEXT:    retq # encoding: [0xc3]
   1427   %q = load float, float* %ptr_b
   1428   %vecinit.i = insertelement <16 x float> undef, float %q, i32 0
   1429   %b = shufflevector <16 x float> %vecinit.i, <16 x float> undef, <16 x i32> zeroinitializer
   1430   %res = call <16 x float> @llvm.x86.avx512.mask.or.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 %mask)
   1431   ret <16 x float> %res
   1432 }
   1433 
   1434 declare <16 x float> @llvm.x86.avx512.mask.or.ps.512(<16 x float>, <16 x float>, <16 x float>, i16)
   1435 
   1436 define <4 x float> @test_mask_xor_ps_rr_128(<4 x float> %a, <4 x float> %b) {
   1437 ; CHECK-LABEL: test_mask_xor_ps_rr_128:
   1438 ; CHECK:       # %bb.0:
   1439 ; CHECK-NEXT:    vxorps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x57,0xc1]
   1440 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   1441   %res = call <4 x float> @llvm.x86.avx512.mask.xor.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 -1)
   1442   ret <4 x float> %res
   1443 }
   1444 
   1445 define <4 x float> @test_mask_xor_ps_rrk_128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask) {
   1446 ; X86-LABEL: test_mask_xor_ps_rrk_128:
   1447 ; X86:       # %bb.0:
   1448 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
   1449 ; X86-NEXT:    vxorps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x57,0xd1]
   1450 ; X86-NEXT:    vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
   1451 ; X86-NEXT:    retl # encoding: [0xc3]
   1452 ;
   1453 ; X64-LABEL: test_mask_xor_ps_rrk_128:
   1454 ; X64:       # %bb.0:
   1455 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   1456 ; X64-NEXT:    vxorps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x57,0xd1]
   1457 ; X64-NEXT:    vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
   1458 ; X64-NEXT:    retq # encoding: [0xc3]
   1459   %res = call <4 x float> @llvm.x86.avx512.mask.xor.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask)
   1460   ret <4 x float> %res
   1461 }
   1462 
   1463 define <4 x float> @test_mask_xor_ps_rrkz_128(<4 x float> %a, <4 x float> %b, i8 %mask) {
   1464 ; X86-LABEL: test_mask_xor_ps_rrkz_128:
   1465 ; X86:       # %bb.0:
   1466 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
   1467 ; X86-NEXT:    vxorps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x57,0xc1]
   1468 ; X86-NEXT:    retl # encoding: [0xc3]
   1469 ;
   1470 ; X64-LABEL: test_mask_xor_ps_rrkz_128:
   1471 ; X64:       # %bb.0:
   1472 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   1473 ; X64-NEXT:    vxorps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x57,0xc1]
   1474 ; X64-NEXT:    retq # encoding: [0xc3]
   1475   %res = call <4 x float> @llvm.x86.avx512.mask.xor.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 %mask)
   1476   ret <4 x float> %res
   1477 }
   1478 
   1479 define <4 x float> @test_mask_xor_ps_rm_128(<4 x float> %a, <4 x float>* %ptr_b) {
   1480 ; X86-LABEL: test_mask_xor_ps_rm_128:
   1481 ; X86:       # %bb.0:
   1482 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   1483 ; X86-NEXT:    vxorps (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x57,0x00]
   1484 ; X86-NEXT:    retl # encoding: [0xc3]
   1485 ;
   1486 ; X64-LABEL: test_mask_xor_ps_rm_128:
   1487 ; X64:       # %bb.0:
   1488 ; X64-NEXT:    vxorps (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x57,0x07]
   1489 ; X64-NEXT:    retq # encoding: [0xc3]
   1490   %b = load <4 x float>, <4 x float>* %ptr_b
   1491   %res = call <4 x float> @llvm.x86.avx512.mask.xor.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 -1)
   1492   ret <4 x float> %res
   1493 }
   1494 
   1495 define <4 x float> @test_mask_xor_ps_rmk_128(<4 x float> %a, <4 x float>* %ptr_b, <4 x float> %passThru, i8 %mask) {
   1496 ; X86-LABEL: test_mask_xor_ps_rmk_128:
   1497 ; X86:       # %bb.0:
   1498 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   1499 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
   1500 ; X86-NEXT:    vxorps (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x57,0x08]
   1501 ; X86-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
   1502 ; X86-NEXT:    retl # encoding: [0xc3]
   1503 ;
   1504 ; X64-LABEL: test_mask_xor_ps_rmk_128:
   1505 ; X64:       # %bb.0:
   1506 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   1507 ; X64-NEXT:    vxorps (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x57,0x0f]
   1508 ; X64-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
   1509 ; X64-NEXT:    retq # encoding: [0xc3]
   1510   %b = load <4 x float>, <4 x float>* %ptr_b
   1511   %res = call <4 x float> @llvm.x86.avx512.mask.xor.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask)
   1512   ret <4 x float> %res
   1513 }
   1514 
   1515 define <4 x float> @test_mask_xor_ps_rmkz_128(<4 x float> %a, <4 x float>* %ptr_b, i8 %mask) {
   1516 ; X86-LABEL: test_mask_xor_ps_rmkz_128:
   1517 ; X86:       # %bb.0:
   1518 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   1519 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
   1520 ; X86-NEXT:    vxorps (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x57,0x00]
   1521 ; X86-NEXT:    retl # encoding: [0xc3]
   1522 ;
   1523 ; X64-LABEL: test_mask_xor_ps_rmkz_128:
   1524 ; X64:       # %bb.0:
   1525 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   1526 ; X64-NEXT:    vxorps (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x57,0x07]
   1527 ; X64-NEXT:    retq # encoding: [0xc3]
   1528   %b = load <4 x float>, <4 x float>* %ptr_b
   1529   %res = call <4 x float> @llvm.x86.avx512.mask.xor.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 %mask)
   1530   ret <4 x float> %res
   1531 }
   1532 
   1533 define <4 x float> @test_mask_xor_ps_rmb_128(<4 x float> %a, float* %ptr_b) {
   1534 ; X86-LABEL: test_mask_xor_ps_rmb_128:
   1535 ; X86:       # %bb.0:
   1536 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   1537 ; X86-NEXT:    vxorps (%eax){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7c,0x18,0x57,0x00]
   1538 ; X86-NEXT:    retl # encoding: [0xc3]
   1539 ;
   1540 ; X64-LABEL: test_mask_xor_ps_rmb_128:
   1541 ; X64:       # %bb.0:
   1542 ; X64-NEXT:    vxorps (%rdi){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7c,0x18,0x57,0x07]
   1543 ; X64-NEXT:    retq # encoding: [0xc3]
   1544   %q = load float, float* %ptr_b
   1545   %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
   1546   %b = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer
   1547   %res = call <4 x float> @llvm.x86.avx512.mask.xor.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 -1)
   1548   ret <4 x float> %res
   1549 }
   1550 
   1551 define <4 x float> @test_mask_xor_ps_rmbk_128(<4 x float> %a, float* %ptr_b, <4 x float> %passThru, i8 %mask) {
   1552 ; X86-LABEL: test_mask_xor_ps_rmbk_128:
   1553 ; X86:       # %bb.0:
   1554 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   1555 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
   1556 ; X86-NEXT:    vxorps (%eax){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x19,0x57,0x08]
   1557 ; X86-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
   1558 ; X86-NEXT:    retl # encoding: [0xc3]
   1559 ;
   1560 ; X64-LABEL: test_mask_xor_ps_rmbk_128:
   1561 ; X64:       # %bb.0:
   1562 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   1563 ; X64-NEXT:    vxorps (%rdi){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x19,0x57,0x0f]
   1564 ; X64-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
   1565 ; X64-NEXT:    retq # encoding: [0xc3]
   1566   %q = load float, float* %ptr_b
   1567   %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
   1568   %b = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer
   1569   %res = call <4 x float> @llvm.x86.avx512.mask.xor.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask)
   1570   ret <4 x float> %res
   1571 }
   1572 
   1573 define <4 x float> @test_mask_xor_ps_rmbkz_128(<4 x float> %a, float* %ptr_b, i8 %mask) {
   1574 ; X86-LABEL: test_mask_xor_ps_rmbkz_128:
   1575 ; X86:       # %bb.0:
   1576 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   1577 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
   1578 ; X86-NEXT:    vxorps (%eax){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x99,0x57,0x00]
   1579 ; X86-NEXT:    retl # encoding: [0xc3]
   1580 ;
   1581 ; X64-LABEL: test_mask_xor_ps_rmbkz_128:
   1582 ; X64:       # %bb.0:
   1583 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   1584 ; X64-NEXT:    vxorps (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x99,0x57,0x07]
   1585 ; X64-NEXT:    retq # encoding: [0xc3]
   1586   %q = load float, float* %ptr_b
   1587   %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
   1588   %b = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer
   1589   %res = call <4 x float> @llvm.x86.avx512.mask.xor.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 %mask)
   1590   ret <4 x float> %res
   1591 }
   1592 
   1593 declare <4 x float> @llvm.x86.avx512.mask.xor.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
   1594 
   1595 define <8 x float> @test_mask_xor_ps_rr_256(<8 x float> %a, <8 x float> %b) {
   1596 ; CHECK-LABEL: test_mask_xor_ps_rr_256:
   1597 ; CHECK:       # %bb.0:
   1598 ; CHECK-NEXT:    vxorps %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x57,0xc1]
   1599 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   1600   %res = call <8 x float> @llvm.x86.avx512.mask.xor.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 -1)
   1601   ret <8 x float> %res
   1602 }
   1603 
   1604 define <8 x float> @test_mask_xor_ps_rrk_256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask) {
   1605 ; X86-LABEL: test_mask_xor_ps_rrk_256:
   1606 ; X86:       # %bb.0:
   1607 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
   1608 ; X86-NEXT:    vxorps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x57,0xd1]
   1609 ; X86-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
   1610 ; X86-NEXT:    retl # encoding: [0xc3]
   1611 ;
   1612 ; X64-LABEL: test_mask_xor_ps_rrk_256:
   1613 ; X64:       # %bb.0:
   1614 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   1615 ; X64-NEXT:    vxorps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x57,0xd1]
   1616 ; X64-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
   1617 ; X64-NEXT:    retq # encoding: [0xc3]
   1618   %res = call <8 x float> @llvm.x86.avx512.mask.xor.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask)
   1619   ret <8 x float> %res
   1620 }
   1621 
   1622 define <8 x float> @test_mask_xor_ps_rrkz_256(<8 x float> %a, <8 x float> %b, i8 %mask) {
   1623 ; X86-LABEL: test_mask_xor_ps_rrkz_256:
   1624 ; X86:       # %bb.0:
   1625 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
   1626 ; X86-NEXT:    vxorps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x57,0xc1]
   1627 ; X86-NEXT:    retl # encoding: [0xc3]
   1628 ;
   1629 ; X64-LABEL: test_mask_xor_ps_rrkz_256:
   1630 ; X64:       # %bb.0:
   1631 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   1632 ; X64-NEXT:    vxorps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x57,0xc1]
   1633 ; X64-NEXT:    retq # encoding: [0xc3]
   1634   %res = call <8 x float> @llvm.x86.avx512.mask.xor.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 %mask)
   1635   ret <8 x float> %res
   1636 }
   1637 
   1638 define <8 x float> @test_mask_xor_ps_rm_256(<8 x float> %a, <8 x float>* %ptr_b) {
   1639 ; X86-LABEL: test_mask_xor_ps_rm_256:
   1640 ; X86:       # %bb.0:
   1641 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   1642 ; X86-NEXT:    vxorps (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x57,0x00]
   1643 ; X86-NEXT:    retl # encoding: [0xc3]
   1644 ;
   1645 ; X64-LABEL: test_mask_xor_ps_rm_256:
   1646 ; X64:       # %bb.0:
   1647 ; X64-NEXT:    vxorps (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x57,0x07]
   1648 ; X64-NEXT:    retq # encoding: [0xc3]
   1649   %b = load <8 x float>, <8 x float>* %ptr_b
   1650   %res = call <8 x float> @llvm.x86.avx512.mask.xor.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 -1)
   1651   ret <8 x float> %res
   1652 }
   1653 
   1654 define <8 x float> @test_mask_xor_ps_rmk_256(<8 x float> %a, <8 x float>* %ptr_b, <8 x float> %passThru, i8 %mask) {
   1655 ; X86-LABEL: test_mask_xor_ps_rmk_256:
   1656 ; X86:       # %bb.0:
   1657 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   1658 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
   1659 ; X86-NEXT:    vxorps (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x57,0x08]
   1660 ; X86-NEXT:    vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
   1661 ; X86-NEXT:    retl # encoding: [0xc3]
   1662 ;
   1663 ; X64-LABEL: test_mask_xor_ps_rmk_256:
   1664 ; X64:       # %bb.0:
   1665 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   1666 ; X64-NEXT:    vxorps (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x57,0x0f]
   1667 ; X64-NEXT:    vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
   1668 ; X64-NEXT:    retq # encoding: [0xc3]
   1669   %b = load <8 x float>, <8 x float>* %ptr_b
   1670   %res = call <8 x float> @llvm.x86.avx512.mask.xor.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask)
   1671   ret <8 x float> %res
   1672 }
   1673 
   1674 define <8 x float> @test_mask_xor_ps_rmkz_256(<8 x float> %a, <8 x float>* %ptr_b, i8 %mask) {
   1675 ; X86-LABEL: test_mask_xor_ps_rmkz_256:
   1676 ; X86:       # %bb.0:
   1677 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   1678 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
   1679 ; X86-NEXT:    vxorps (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x57,0x00]
   1680 ; X86-NEXT:    retl # encoding: [0xc3]
   1681 ;
   1682 ; X64-LABEL: test_mask_xor_ps_rmkz_256:
   1683 ; X64:       # %bb.0:
   1684 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   1685 ; X64-NEXT:    vxorps (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x57,0x07]
   1686 ; X64-NEXT:    retq # encoding: [0xc3]
   1687   %b = load <8 x float>, <8 x float>* %ptr_b
   1688   %res = call <8 x float> @llvm.x86.avx512.mask.xor.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 %mask)
   1689   ret <8 x float> %res
   1690 }
   1691 
   1692 define <8 x float> @test_mask_xor_ps_rmb_256(<8 x float> %a, float* %ptr_b) {
   1693 ; X86-LABEL: test_mask_xor_ps_rmb_256:
   1694 ; X86:       # %bb.0:
   1695 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   1696 ; X86-NEXT:    vxorps (%eax){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7c,0x38,0x57,0x00]
   1697 ; X86-NEXT:    retl # encoding: [0xc3]
   1698 ;
   1699 ; X64-LABEL: test_mask_xor_ps_rmb_256:
   1700 ; X64:       # %bb.0:
   1701 ; X64-NEXT:    vxorps (%rdi){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7c,0x38,0x57,0x07]
   1702 ; X64-NEXT:    retq # encoding: [0xc3]
   1703   %q = load float, float* %ptr_b
   1704   %vecinit.i = insertelement <8 x float> undef, float %q, i32 0
   1705   %b = shufflevector <8 x float> %vecinit.i, <8 x float> undef, <8 x i32> zeroinitializer
   1706   %res = call <8 x float> @llvm.x86.avx512.mask.xor.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 -1)
   1707   ret <8 x float> %res
   1708 }
   1709 
   1710 define <8 x float> @test_mask_xor_ps_rmbk_256(<8 x float> %a, float* %ptr_b, <8 x float> %passThru, i8 %mask) {
   1711 ; X86-LABEL: test_mask_xor_ps_rmbk_256:
   1712 ; X86:       # %bb.0:
   1713 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   1714 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
   1715 ; X86-NEXT:    vxorps (%eax){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x39,0x57,0x08]
   1716 ; X86-NEXT:    vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
   1717 ; X86-NEXT:    retl # encoding: [0xc3]
   1718 ;
   1719 ; X64-LABEL: test_mask_xor_ps_rmbk_256:
   1720 ; X64:       # %bb.0:
   1721 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   1722 ; X64-NEXT:    vxorps (%rdi){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x39,0x57,0x0f]
   1723 ; X64-NEXT:    vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
   1724 ; X64-NEXT:    retq # encoding: [0xc3]
   1725   %q = load float, float* %ptr_b
   1726   %vecinit.i = insertelement <8 x float> undef, float %q, i32 0
   1727   %b = shufflevector <8 x float> %vecinit.i, <8 x float> undef, <8 x i32> zeroinitializer
   1728   %res = call <8 x float> @llvm.x86.avx512.mask.xor.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask)
   1729   ret <8 x float> %res
   1730 }
   1731 
   1732 define <8 x float> @test_mask_xor_ps_rmbkz_256(<8 x float> %a, float* %ptr_b, i8 %mask) {
   1733 ; X86-LABEL: test_mask_xor_ps_rmbkz_256:
   1734 ; X86:       # %bb.0:
   1735 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   1736 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
   1737 ; X86-NEXT:    vxorps (%eax){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xb9,0x57,0x00]
   1738 ; X86-NEXT:    retl # encoding: [0xc3]
   1739 ;
   1740 ; X64-LABEL: test_mask_xor_ps_rmbkz_256:
   1741 ; X64:       # %bb.0:
   1742 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   1743 ; X64-NEXT:    vxorps (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xb9,0x57,0x07]
   1744 ; X64-NEXT:    retq # encoding: [0xc3]
   1745   %q = load float, float* %ptr_b
   1746   %vecinit.i = insertelement <8 x float> undef, float %q, i32 0
   1747   %b = shufflevector <8 x float> %vecinit.i, <8 x float> undef, <8 x i32> zeroinitializer
   1748   %res = call <8 x float> @llvm.x86.avx512.mask.xor.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 %mask)
   1749   ret <8 x float> %res
   1750 }
   1751 
   1752 declare <8 x float> @llvm.x86.avx512.mask.xor.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
   1753 
   1754 define <16 x float> @test_mask_xor_ps_rr_512(<16 x float> %a, <16 x float> %b) {
   1755 ; CHECK-LABEL: test_mask_xor_ps_rr_512:
   1756 ; CHECK:       # %bb.0:
   1757 ; CHECK-NEXT:    vxorps %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x57,0xc1]
   1758 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   1759   %res = call <16 x float> @llvm.x86.avx512.mask.xor.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 -1)
   1760   ret <16 x float> %res
   1761 }
   1762 
   1763 define <16 x float> @test_mask_xor_ps_rrk_512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask) {
   1764 ; X86-LABEL: test_mask_xor_ps_rrk_512:
   1765 ; X86:       # %bb.0:
   1766 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   1767 ; X86-NEXT:    vxorps %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x49,0x57,0xd1]
   1768 ; X86-NEXT:    vmovaps %zmm2, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
   1769 ; X86-NEXT:    retl # encoding: [0xc3]
   1770 ;
   1771 ; X64-LABEL: test_mask_xor_ps_rrk_512:
   1772 ; X64:       # %bb.0:
   1773 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   1774 ; X64-NEXT:    vxorps %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x49,0x57,0xd1]
   1775 ; X64-NEXT:    vmovaps %zmm2, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
   1776 ; X64-NEXT:    retq # encoding: [0xc3]
   1777   %res = call <16 x float> @llvm.x86.avx512.mask.xor.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask)
   1778   ret <16 x float> %res
   1779 }
   1780 
   1781 define <16 x float> @test_mask_xor_ps_rrkz_512(<16 x float> %a, <16 x float> %b, i16 %mask) {
   1782 ; X86-LABEL: test_mask_xor_ps_rrkz_512:
   1783 ; X86:       # %bb.0:
   1784 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   1785 ; X86-NEXT:    vxorps %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xc9,0x57,0xc1]
   1786 ; X86-NEXT:    retl # encoding: [0xc3]
   1787 ;
   1788 ; X64-LABEL: test_mask_xor_ps_rrkz_512:
   1789 ; X64:       # %bb.0:
   1790 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   1791 ; X64-NEXT:    vxorps %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xc9,0x57,0xc1]
   1792 ; X64-NEXT:    retq # encoding: [0xc3]
   1793   %res = call <16 x float> @llvm.x86.avx512.mask.xor.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 %mask)
   1794   ret <16 x float> %res
   1795 }
   1796 
   1797 define <16 x float> @test_mask_xor_ps_rm_512(<16 x float> %a, <16 x float>* %ptr_b) {
   1798 ; X86-LABEL: test_mask_xor_ps_rm_512:
   1799 ; X86:       # %bb.0:
   1800 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   1801 ; X86-NEXT:    vxorps (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x57,0x00]
   1802 ; X86-NEXT:    retl # encoding: [0xc3]
   1803 ;
   1804 ; X64-LABEL: test_mask_xor_ps_rm_512:
   1805 ; X64:       # %bb.0:
   1806 ; X64-NEXT:    vxorps (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x57,0x07]
   1807 ; X64-NEXT:    retq # encoding: [0xc3]
   1808   %b = load <16 x float>, <16 x float>* %ptr_b
   1809   %res = call <16 x float> @llvm.x86.avx512.mask.xor.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 -1)
   1810   ret <16 x float> %res
   1811 }
   1812 
   1813 define <16 x float> @test_mask_xor_ps_rmk_512(<16 x float> %a, <16 x float>* %ptr_b, <16 x float> %passThru, i16 %mask) {
   1814 ; X86-LABEL: test_mask_xor_ps_rmk_512:
   1815 ; X86:       # %bb.0:
   1816 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   1817 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
   1818 ; X86-NEXT:    vxorps (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x49,0x57,0x08]
   1819 ; X86-NEXT:    vmovaps %zmm1, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
   1820 ; X86-NEXT:    retl # encoding: [0xc3]
   1821 ;
   1822 ; X64-LABEL: test_mask_xor_ps_rmk_512:
   1823 ; X64:       # %bb.0:
   1824 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   1825 ; X64-NEXT:    vxorps (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x49,0x57,0x0f]
   1826 ; X64-NEXT:    vmovaps %zmm1, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
   1827 ; X64-NEXT:    retq # encoding: [0xc3]
   1828   %b = load <16 x float>, <16 x float>* %ptr_b
   1829   %res = call <16 x float> @llvm.x86.avx512.mask.xor.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask)
   1830   ret <16 x float> %res
   1831 }
   1832 
   1833 define <16 x float> @test_mask_xor_ps_rmkz_512(<16 x float> %a, <16 x float>* %ptr_b, i16 %mask) {
   1834 ; X86-LABEL: test_mask_xor_ps_rmkz_512:
   1835 ; X86:       # %bb.0:
   1836 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   1837 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
   1838 ; X86-NEXT:    vxorps (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xc9,0x57,0x00]
   1839 ; X86-NEXT:    retl # encoding: [0xc3]
   1840 ;
   1841 ; X64-LABEL: test_mask_xor_ps_rmkz_512:
   1842 ; X64:       # %bb.0:
   1843 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   1844 ; X64-NEXT:    vxorps (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xc9,0x57,0x07]
   1845 ; X64-NEXT:    retq # encoding: [0xc3]
   1846   %b = load <16 x float>, <16 x float>* %ptr_b
   1847   %res = call <16 x float> @llvm.x86.avx512.mask.xor.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 %mask)
   1848   ret <16 x float> %res
   1849 }
   1850 
   1851 define <16 x float> @test_mask_xor_ps_rmb_512(<16 x float> %a, float* %ptr_b) {
   1852 ; X86-LABEL: test_mask_xor_ps_rmb_512:
   1853 ; X86:       # %bb.0:
   1854 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   1855 ; X86-NEXT:    vxorps (%eax){1to16}, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7c,0x58,0x57,0x00]
   1856 ; X86-NEXT:    retl # encoding: [0xc3]
   1857 ;
   1858 ; X64-LABEL: test_mask_xor_ps_rmb_512:
   1859 ; X64:       # %bb.0:
   1860 ; X64-NEXT:    vxorps (%rdi){1to16}, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7c,0x58,0x57,0x07]
   1861 ; X64-NEXT:    retq # encoding: [0xc3]
   1862   %q = load float, float* %ptr_b
   1863   %vecinit.i = insertelement <16 x float> undef, float %q, i32 0
   1864   %b = shufflevector <16 x float> %vecinit.i, <16 x float> undef, <16 x i32> zeroinitializer
   1865   %res = call <16 x float> @llvm.x86.avx512.mask.xor.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 -1)
   1866   ret <16 x float> %res
   1867 }
   1868 
   1869 define <16 x float> @test_mask_xor_ps_rmbk_512(<16 x float> %a, float* %ptr_b, <16 x float> %passThru, i16 %mask) {
   1870 ; X86-LABEL: test_mask_xor_ps_rmbk_512:
   1871 ; X86:       # %bb.0:
   1872 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   1873 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
   1874 ; X86-NEXT:    vxorps (%eax){1to16}, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x59,0x57,0x08]
   1875 ; X86-NEXT:    vmovaps %zmm1, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
   1876 ; X86-NEXT:    retl # encoding: [0xc3]
   1877 ;
   1878 ; X64-LABEL: test_mask_xor_ps_rmbk_512:
   1879 ; X64:       # %bb.0:
   1880 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   1881 ; X64-NEXT:    vxorps (%rdi){1to16}, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x59,0x57,0x0f]
   1882 ; X64-NEXT:    vmovaps %zmm1, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
   1883 ; X64-NEXT:    retq # encoding: [0xc3]
   1884   %q = load float, float* %ptr_b
   1885   %vecinit.i = insertelement <16 x float> undef, float %q, i32 0
   1886   %b = shufflevector <16 x float> %vecinit.i, <16 x float> undef, <16 x i32> zeroinitializer
   1887   %res = call <16 x float> @llvm.x86.avx512.mask.xor.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask)
   1888   ret <16 x float> %res
   1889 }
   1890 
   1891 define <16 x float> @test_mask_xor_ps_rmbkz_512(<16 x float> %a, float* %ptr_b, i16 %mask) {
   1892 ; X86-LABEL: test_mask_xor_ps_rmbkz_512:
   1893 ; X86:       # %bb.0:
   1894 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   1895 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
   1896 ; X86-NEXT:    vxorps (%eax){1to16}, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xd9,0x57,0x00]
   1897 ; X86-NEXT:    retl # encoding: [0xc3]
   1898 ;
   1899 ; X64-LABEL: test_mask_xor_ps_rmbkz_512:
   1900 ; X64:       # %bb.0:
   1901 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   1902 ; X64-NEXT:    vxorps (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xd9,0x57,0x07]
   1903 ; X64-NEXT:    retq # encoding: [0xc3]
   1904   %q = load float, float* %ptr_b
   1905   %vecinit.i = insertelement <16 x float> undef, float %q, i32 0
   1906   %b = shufflevector <16 x float> %vecinit.i, <16 x float> undef, <16 x i32> zeroinitializer
   1907   %res = call <16 x float> @llvm.x86.avx512.mask.xor.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 %mask)
   1908   ret <16 x float> %res
   1909 }
   1910 
   1911 declare <16 x float> @llvm.x86.avx512.mask.xor.ps.512(<16 x float>, <16 x float>, <16 x float>, i16)
   1912 
   1913 define <8 x i64> @test_mask_mullo_epi64_rr_512(<8 x i64> %a, <8 x i64> %b) {
   1914 ; CHECK-LABEL: test_mask_mullo_epi64_rr_512:
   1915 ; CHECK:       # %bb.0:
   1916 ; CHECK-NEXT:    vpmullq %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf2,0xfd,0x48,0x40,0xc1]
   1917 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   1918   %res = call <8 x i64> @llvm.x86.avx512.mask.pmull.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
   1919   ret <8 x i64> %res
   1920 }
   1921 
   1922 define <8 x i64> @test_mask_mullo_epi64_rrk_512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) {
   1923 ; X86-LABEL: test_mask_mullo_epi64_rrk_512:
   1924 ; X86:       # %bb.0:
   1925 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
   1926 ; X86-NEXT:    vpmullq %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x40,0xd1]
   1927 ; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
   1928 ; X86-NEXT:    retl # encoding: [0xc3]
   1929 ;
   1930 ; X64-LABEL: test_mask_mullo_epi64_rrk_512:
   1931 ; X64:       # %bb.0:
   1932 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   1933 ; X64-NEXT:    vpmullq %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x40,0xd1]
   1934 ; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
   1935 ; X64-NEXT:    retq # encoding: [0xc3]
   1936   %res = call <8 x i64> @llvm.x86.avx512.mask.pmull.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
   1937   ret <8 x i64> %res
   1938 }
   1939 
   1940 define <8 x i64> @test_mask_mullo_epi64_rrkz_512(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
   1941 ; X86-LABEL: test_mask_mullo_epi64_rrkz_512:
   1942 ; X86:       # %bb.0:
   1943 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
   1944 ; X86-NEXT:    vpmullq %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x40,0xc1]
   1945 ; X86-NEXT:    retl # encoding: [0xc3]
   1946 ;
   1947 ; X64-LABEL: test_mask_mullo_epi64_rrkz_512:
   1948 ; X64:       # %bb.0:
   1949 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   1950 ; X64-NEXT:    vpmullq %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x40,0xc1]
   1951 ; X64-NEXT:    retq # encoding: [0xc3]
   1952   %res = call <8 x i64> @llvm.x86.avx512.mask.pmull.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
   1953   ret <8 x i64> %res
   1954 }
   1955 
   1956 define <8 x i64> @test_mask_mullo_epi64_rm_512(<8 x i64> %a, <8 x i64>* %ptr_b) {
   1957 ; X86-LABEL: test_mask_mullo_epi64_rm_512:
   1958 ; X86:       # %bb.0:
   1959 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   1960 ; X86-NEXT:    vpmullq (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf2,0xfd,0x48,0x40,0x00]
   1961 ; X86-NEXT:    retl # encoding: [0xc3]
   1962 ;
   1963 ; X64-LABEL: test_mask_mullo_epi64_rm_512:
   1964 ; X64:       # %bb.0:
   1965 ; X64-NEXT:    vpmullq (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf2,0xfd,0x48,0x40,0x07]
   1966 ; X64-NEXT:    retq # encoding: [0xc3]
   1967   %b = load <8 x i64>, <8 x i64>* %ptr_b
   1968   %res = call <8 x i64> @llvm.x86.avx512.mask.pmull.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
   1969   ret <8 x i64> %res
   1970 }
   1971 
   1972 define <8 x i64> @test_mask_mullo_epi64_rmk_512(<8 x i64> %a, <8 x i64>* %ptr_b, <8 x i64> %passThru, i8 %mask) {
   1973 ; X86-LABEL: test_mask_mullo_epi64_rmk_512:
   1974 ; X86:       # %bb.0:
   1975 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   1976 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
   1977 ; X86-NEXT:    vpmullq (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x40,0x08]
   1978 ; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
   1979 ; X86-NEXT:    retl # encoding: [0xc3]
   1980 ;
   1981 ; X64-LABEL: test_mask_mullo_epi64_rmk_512:
   1982 ; X64:       # %bb.0:
   1983 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   1984 ; X64-NEXT:    vpmullq (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x40,0x0f]
   1985 ; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
   1986 ; X64-NEXT:    retq # encoding: [0xc3]
   1987   %b = load <8 x i64>, <8 x i64>* %ptr_b
   1988   %res = call <8 x i64> @llvm.x86.avx512.mask.pmull.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
   1989   ret <8 x i64> %res
   1990 }
   1991 
   1992 define <8 x i64> @test_mask_mullo_epi64_rmkz_512(<8 x i64> %a, <8 x i64>* %ptr_b, i8 %mask) {
   1993 ; X86-LABEL: test_mask_mullo_epi64_rmkz_512:
   1994 ; X86:       # %bb.0:
   1995 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   1996 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
   1997 ; X86-NEXT:    vpmullq (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x40,0x00]
   1998 ; X86-NEXT:    retl # encoding: [0xc3]
   1999 ;
   2000 ; X64-LABEL: test_mask_mullo_epi64_rmkz_512:
   2001 ; X64:       # %bb.0:
   2002 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   2003 ; X64-NEXT:    vpmullq (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x40,0x07]
   2004 ; X64-NEXT:    retq # encoding: [0xc3]
   2005   %b = load <8 x i64>, <8 x i64>* %ptr_b
   2006   %res = call <8 x i64> @llvm.x86.avx512.mask.pmull.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
   2007   ret <8 x i64> %res
   2008 }
   2009 
   2010 define <8 x i64> @test_mask_mullo_epi64_rmb_512(<8 x i64> %a, i64* %ptr_b) {
   2011 ; X86-LABEL: test_mask_mullo_epi64_rmb_512:
   2012 ; X86:       # %bb.0:
   2013 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   2014 ; X86-NEXT:    vmovq (%eax), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x08]
   2015 ; X86-NEXT:    # xmm1 = mem[0],zero
   2016 ; X86-NEXT:    vpbroadcastq %xmm1, %zmm1 # encoding: [0x62,0xf2,0xfd,0x48,0x59,0xc9]
   2017 ; X86-NEXT:    vpmullq %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf2,0xfd,0x48,0x40,0xc1]
   2018 ; X86-NEXT:    retl # encoding: [0xc3]
   2019 ;
   2020 ; X64-LABEL: test_mask_mullo_epi64_rmb_512:
   2021 ; X64:       # %bb.0:
   2022 ; X64-NEXT:    vpmullq (%rdi){1to8}, %zmm0, %zmm0 # encoding: [0x62,0xf2,0xfd,0x58,0x40,0x07]
   2023 ; X64-NEXT:    retq # encoding: [0xc3]
   2024   %q = load i64, i64* %ptr_b
   2025   %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
   2026   %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
   2027   %res = call <8 x i64> @llvm.x86.avx512.mask.pmull.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
   2028   ret <8 x i64> %res
   2029 }
   2030 
   2031 define <8 x i64> @test_mask_mullo_epi64_rmbk_512(<8 x i64> %a, i64* %ptr_b, <8 x i64> %passThru, i8 %mask) {
   2032 ; X86-LABEL: test_mask_mullo_epi64_rmbk_512:
   2033 ; X86:       # %bb.0:
   2034 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   2035 ; X86-NEXT:    vmovq (%eax), %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x10]
   2036 ; X86-NEXT:    # xmm2 = mem[0],zero
   2037 ; X86-NEXT:    vpbroadcastq %xmm2, %zmm2 # encoding: [0x62,0xf2,0xfd,0x48,0x59,0xd2]
   2038 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
   2039 ; X86-NEXT:    vpmullq %zmm2, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x40,0xca]
   2040 ; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
   2041 ; X86-NEXT:    retl # encoding: [0xc3]
   2042 ;
   2043 ; X64-LABEL: test_mask_mullo_epi64_rmbk_512:
   2044 ; X64:       # %bb.0:
   2045 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   2046 ; X64-NEXT:    vpmullq (%rdi){1to8}, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x59,0x40,0x0f]
   2047 ; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
   2048 ; X64-NEXT:    retq # encoding: [0xc3]
   2049   %q = load i64, i64* %ptr_b
   2050   %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
   2051   %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
   2052   %res = call <8 x i64> @llvm.x86.avx512.mask.pmull.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
   2053   ret <8 x i64> %res
   2054 }
   2055 
   2056 define <8 x i64> @test_mask_mullo_epi64_rmbkz_512(<8 x i64> %a, i64* %ptr_b, i8 %mask) {
   2057 ; X86-LABEL: test_mask_mullo_epi64_rmbkz_512:
   2058 ; X86:       # %bb.0:
   2059 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   2060 ; X86-NEXT:    vmovq (%eax), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x08]
   2061 ; X86-NEXT:    # xmm1 = mem[0],zero
   2062 ; X86-NEXT:    vpbroadcastq %xmm1, %zmm1 # encoding: [0x62,0xf2,0xfd,0x48,0x59,0xc9]
   2063 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
   2064 ; X86-NEXT:    vpmullq %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x40,0xc1]
   2065 ; X86-NEXT:    retl # encoding: [0xc3]
   2066 ;
   2067 ; X64-LABEL: test_mask_mullo_epi64_rmbkz_512:
   2068 ; X64:       # %bb.0:
   2069 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   2070 ; X64-NEXT:    vpmullq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xd9,0x40,0x07]
   2071 ; X64-NEXT:    retq # encoding: [0xc3]
   2072   %q = load i64, i64* %ptr_b
   2073   %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
   2074   %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
   2075   %res = call <8 x i64> @llvm.x86.avx512.mask.pmull.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
   2076   ret <8 x i64> %res
   2077 }
   2078 declare <8 x i64> @llvm.x86.avx512.mask.pmull.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
   2079 
   2080 define <4 x i64> @test_mask_mullo_epi64_rr_256(<4 x i64> %a, <4 x i64> %b) {
   2081 ; CHECK-LABEL: test_mask_mullo_epi64_rr_256:
   2082 ; CHECK:       # %bb.0:
   2083 ; CHECK-NEXT:    vpmullq %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x28,0x40,0xc1]
   2084 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   2085   %res = call <4 x i64> @llvm.x86.avx512.mask.pmull.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 -1)
   2086   ret <4 x i64> %res
   2087 }
   2088 
   2089 define <4 x i64> @test_mask_mullo_epi64_rrk_256(<4 x i64> %a, <4 x i64> %b, <4 x i64> %passThru, i8 %mask) {
   2090 ; X86-LABEL: test_mask_mullo_epi64_rrk_256:
   2091 ; X86:       # %bb.0:
   2092 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
   2093 ; X86-NEXT:    vpmullq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x40,0xd1]
   2094 ; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
   2095 ; X86-NEXT:    retl # encoding: [0xc3]
   2096 ;
   2097 ; X64-LABEL: test_mask_mullo_epi64_rrk_256:
   2098 ; X64:       # %bb.0:
   2099 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   2100 ; X64-NEXT:    vpmullq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x40,0xd1]
   2101 ; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
   2102 ; X64-NEXT:    retq # encoding: [0xc3]
   2103   %res = call <4 x i64> @llvm.x86.avx512.mask.pmull.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> %passThru, i8 %mask)
   2104   ret <4 x i64> %res
   2105 }
   2106 
   2107 define <4 x i64> @test_mask_mullo_epi64_rrkz_256(<4 x i64> %a, <4 x i64> %b, i8 %mask) {
   2108 ; X86-LABEL: test_mask_mullo_epi64_rrkz_256:
   2109 ; X86:       # %bb.0:
   2110 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
   2111 ; X86-NEXT:    vpmullq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x40,0xc1]
   2112 ; X86-NEXT:    retl # encoding: [0xc3]
   2113 ;
   2114 ; X64-LABEL: test_mask_mullo_epi64_rrkz_256:
   2115 ; X64:       # %bb.0:
   2116 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   2117 ; X64-NEXT:    vpmullq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x40,0xc1]
   2118 ; X64-NEXT:    retq # encoding: [0xc3]
   2119   %res = call <4 x i64> @llvm.x86.avx512.mask.pmull.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 %mask)
   2120   ret <4 x i64> %res
   2121 }
   2122 
   2123 define <4 x i64> @test_mask_mullo_epi64_rm_256(<4 x i64> %a, <4 x i64>* %ptr_b) {
   2124 ; X86-LABEL: test_mask_mullo_epi64_rm_256:
   2125 ; X86:       # %bb.0:
   2126 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   2127 ; X86-NEXT:    vpmullq (%eax), %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x28,0x40,0x00]
   2128 ; X86-NEXT:    retl # encoding: [0xc3]
   2129 ;
   2130 ; X64-LABEL: test_mask_mullo_epi64_rm_256:
   2131 ; X64:       # %bb.0:
   2132 ; X64-NEXT:    vpmullq (%rdi), %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x28,0x40,0x07]
   2133 ; X64-NEXT:    retq # encoding: [0xc3]
   2134   %b = load <4 x i64>, <4 x i64>* %ptr_b
   2135   %res = call <4 x i64> @llvm.x86.avx512.mask.pmull.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 -1)
   2136   ret <4 x i64> %res
   2137 }
   2138 
   2139 define <4 x i64> @test_mask_mullo_epi64_rmk_256(<4 x i64> %a, <4 x i64>* %ptr_b, <4 x i64> %passThru, i8 %mask) {
   2140 ; X86-LABEL: test_mask_mullo_epi64_rmk_256:
   2141 ; X86:       # %bb.0:
   2142 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   2143 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
   2144 ; X86-NEXT:    vpmullq (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x40,0x08]
   2145 ; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
   2146 ; X86-NEXT:    retl # encoding: [0xc3]
   2147 ;
   2148 ; X64-LABEL: test_mask_mullo_epi64_rmk_256:
   2149 ; X64:       # %bb.0:
   2150 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   2151 ; X64-NEXT:    vpmullq (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x40,0x0f]
   2152 ; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
   2153 ; X64-NEXT:    retq # encoding: [0xc3]
   2154   %b = load <4 x i64>, <4 x i64>* %ptr_b
   2155   %res = call <4 x i64> @llvm.x86.avx512.mask.pmull.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> %passThru, i8 %mask)
   2156   ret <4 x i64> %res
   2157 }
   2158 
   2159 define <4 x i64> @test_mask_mullo_epi64_rmkz_256(<4 x i64> %a, <4 x i64>* %ptr_b, i8 %mask) {
   2160 ; X86-LABEL: test_mask_mullo_epi64_rmkz_256:
   2161 ; X86:       # %bb.0:
   2162 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   2163 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
   2164 ; X86-NEXT:    vpmullq (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x40,0x00]
   2165 ; X86-NEXT:    retl # encoding: [0xc3]
   2166 ;
   2167 ; X64-LABEL: test_mask_mullo_epi64_rmkz_256:
   2168 ; X64:       # %bb.0:
   2169 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   2170 ; X64-NEXT:    vpmullq (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x40,0x07]
   2171 ; X64-NEXT:    retq # encoding: [0xc3]
   2172   %b = load <4 x i64>, <4 x i64>* %ptr_b
   2173   %res = call <4 x i64> @llvm.x86.avx512.mask.pmull.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 %mask)
   2174   ret <4 x i64> %res
   2175 }
   2176 
   2177 define <4 x i64> @test_mask_mullo_epi64_rmb_256(<4 x i64> %a, i64* %ptr_b) {
   2178 ; X86-LABEL: test_mask_mullo_epi64_rmb_256:
   2179 ; X86:       # %bb.0:
   2180 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   2181 ; X86-NEXT:    vmovq (%eax), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x08]
   2182 ; X86-NEXT:    # xmm1 = mem[0],zero
   2183 ; X86-NEXT:    vpbroadcastq %xmm1, %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x59,0xc9]
   2184 ; X86-NEXT:    vpmullq %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x28,0x40,0xc1]
   2185 ; X86-NEXT:    retl # encoding: [0xc3]
   2186 ;
   2187 ; X64-LABEL: test_mask_mullo_epi64_rmb_256:
   2188 ; X64:       # %bb.0:
   2189 ; X64-NEXT:    vpmullq (%rdi){1to4}, %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x38,0x40,0x07]
   2190 ; X64-NEXT:    retq # encoding: [0xc3]
   2191   %q = load i64, i64* %ptr_b
   2192   %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0
   2193   %b = shufflevector <4 x i64> %vecinit.i, <4 x i64> undef, <4 x i32> zeroinitializer
   2194   %res = call <4 x i64> @llvm.x86.avx512.mask.pmull.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 -1)
   2195   ret <4 x i64> %res
   2196 }
   2197 
   2198 define <4 x i64> @test_mask_mullo_epi64_rmbk_256(<4 x i64> %a, i64* %ptr_b, <4 x i64> %passThru, i8 %mask) {
   2199 ; X86-LABEL: test_mask_mullo_epi64_rmbk_256:
   2200 ; X86:       # %bb.0:
   2201 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   2202 ; X86-NEXT:    vmovq (%eax), %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x10]
   2203 ; X86-NEXT:    # xmm2 = mem[0],zero
   2204 ; X86-NEXT:    vpbroadcastq %xmm2, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x59,0xd2]
   2205 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
   2206 ; X86-NEXT:    vpmullq %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x40,0xca]
   2207 ; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
   2208 ; X86-NEXT:    retl # encoding: [0xc3]
   2209 ;
   2210 ; X64-LABEL: test_mask_mullo_epi64_rmbk_256:
   2211 ; X64:       # %bb.0:
   2212 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   2213 ; X64-NEXT:    vpmullq (%rdi){1to4}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x39,0x40,0x0f]
   2214 ; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
   2215 ; X64-NEXT:    retq # encoding: [0xc3]
   2216   %q = load i64, i64* %ptr_b
   2217   %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0
   2218   %b = shufflevector <4 x i64> %vecinit.i, <4 x i64> undef, <4 x i32> zeroinitializer
   2219   %res = call <4 x i64> @llvm.x86.avx512.mask.pmull.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> %passThru, i8 %mask)
   2220   ret <4 x i64> %res
   2221 }
   2222 
   2223 define <4 x i64> @test_mask_mullo_epi64_rmbkz_256(<4 x i64> %a, i64* %ptr_b, i8 %mask) {
   2224 ; X86-LABEL: test_mask_mullo_epi64_rmbkz_256:
   2225 ; X86:       # %bb.0:
   2226 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   2227 ; X86-NEXT:    vmovq (%eax), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x08]
   2228 ; X86-NEXT:    # xmm1 = mem[0],zero
   2229 ; X86-NEXT:    vpbroadcastq %xmm1, %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x59,0xc9]
   2230 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
   2231 ; X86-NEXT:    vpmullq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x40,0xc1]
   2232 ; X86-NEXT:    retl # encoding: [0xc3]
   2233 ;
   2234 ; X64-LABEL: test_mask_mullo_epi64_rmbkz_256:
   2235 ; X64:       # %bb.0:
   2236 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   2237 ; X64-NEXT:    vpmullq (%rdi){1to4}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xb9,0x40,0x07]
   2238 ; X64-NEXT:    retq # encoding: [0xc3]
   2239   %q = load i64, i64* %ptr_b
   2240   %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0
   2241   %b = shufflevector <4 x i64> %vecinit.i, <4 x i64> undef, <4 x i32> zeroinitializer
   2242   %res = call <4 x i64> @llvm.x86.avx512.mask.pmull.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 %mask)
   2243   ret <4 x i64> %res
   2244 }
   2245 
   2246 declare <4 x i64> @llvm.x86.avx512.mask.pmull.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
   2247 
   2248 define <2 x i64> @test_mask_mullo_epi64_rr_128(<2 x i64> %a, <2 x i64> %b) {
   2249 ; CHECK-LABEL: test_mask_mullo_epi64_rr_128:
   2250 ; CHECK:       # %bb.0:
   2251 ; CHECK-NEXT:    vpmullq %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x40,0xc1]
   2252 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   2253   %res = call <2 x i64> @llvm.x86.avx512.mask.pmull.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 -1)
   2254   ret <2 x i64> %res
   2255 }
   2256 
   2257 define <2 x i64> @test_mask_mullo_epi64_rrk_128(<2 x i64> %a, <2 x i64> %b, <2 x i64> %passThru, i8 %mask) {
   2258 ; X86-LABEL: test_mask_mullo_epi64_rrk_128:
   2259 ; X86:       # %bb.0:
   2260 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
   2261 ; X86-NEXT:    vpmullq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x40,0xd1]
   2262 ; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
   2263 ; X86-NEXT:    retl # encoding: [0xc3]
   2264 ;
   2265 ; X64-LABEL: test_mask_mullo_epi64_rrk_128:
   2266 ; X64:       # %bb.0:
   2267 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   2268 ; X64-NEXT:    vpmullq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x40,0xd1]
   2269 ; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
   2270 ; X64-NEXT:    retq # encoding: [0xc3]
   2271   %res = call <2 x i64> @llvm.x86.avx512.mask.pmull.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> %passThru, i8 %mask)
   2272   ret <2 x i64> %res
   2273 }
   2274 
   2275 define <2 x i64> @test_mask_mullo_epi64_rrkz_128(<2 x i64> %a, <2 x i64> %b, i8 %mask) {
   2276 ; X86-LABEL: test_mask_mullo_epi64_rrkz_128:
   2277 ; X86:       # %bb.0:
   2278 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
   2279 ; X86-NEXT:    vpmullq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x40,0xc1]
   2280 ; X86-NEXT:    retl # encoding: [0xc3]
   2281 ;
   2282 ; X64-LABEL: test_mask_mullo_epi64_rrkz_128:
   2283 ; X64:       # %bb.0:
   2284 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   2285 ; X64-NEXT:    vpmullq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x40,0xc1]
   2286 ; X64-NEXT:    retq # encoding: [0xc3]
   2287   %res = call <2 x i64> @llvm.x86.avx512.mask.pmull.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 %mask)
   2288   ret <2 x i64> %res
   2289 }
   2290 
   2291 define <2 x i64> @test_mask_mullo_epi64_rm_128(<2 x i64> %a, <2 x i64>* %ptr_b) {
   2292 ; X86-LABEL: test_mask_mullo_epi64_rm_128:
   2293 ; X86:       # %bb.0:
   2294 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   2295 ; X86-NEXT:    vpmullq (%eax), %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x40,0x00]
   2296 ; X86-NEXT:    retl # encoding: [0xc3]
   2297 ;
   2298 ; X64-LABEL: test_mask_mullo_epi64_rm_128:
   2299 ; X64:       # %bb.0:
   2300 ; X64-NEXT:    vpmullq (%rdi), %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x40,0x07]
   2301 ; X64-NEXT:    retq # encoding: [0xc3]
   2302   %b = load <2 x i64>, <2 x i64>* %ptr_b
   2303   %res = call <2 x i64> @llvm.x86.avx512.mask.pmull.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 -1)
   2304   ret <2 x i64> %res
   2305 }
   2306 
   2307 define <2 x i64> @test_mask_mullo_epi64_rmk_128(<2 x i64> %a, <2 x i64>* %ptr_b, <2 x i64> %passThru, i8 %mask) {
   2308 ; X86-LABEL: test_mask_mullo_epi64_rmk_128:
   2309 ; X86:       # %bb.0:
   2310 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   2311 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
   2312 ; X86-NEXT:    vpmullq (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x40,0x08]
   2313 ; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
   2314 ; X86-NEXT:    retl # encoding: [0xc3]
   2315 ;
   2316 ; X64-LABEL: test_mask_mullo_epi64_rmk_128:
   2317 ; X64:       # %bb.0:
   2318 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   2319 ; X64-NEXT:    vpmullq (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x40,0x0f]
   2320 ; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
   2321 ; X64-NEXT:    retq # encoding: [0xc3]
   2322   %b = load <2 x i64>, <2 x i64>* %ptr_b
   2323   %res = call <2 x i64> @llvm.x86.avx512.mask.pmull.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> %passThru, i8 %mask)
   2324   ret <2 x i64> %res
   2325 }
   2326 
   2327 define <2 x i64> @test_mask_mullo_epi64_rmkz_128(<2 x i64> %a, <2 x i64>* %ptr_b, i8 %mask) {
   2328 ; X86-LABEL: test_mask_mullo_epi64_rmkz_128:
   2329 ; X86:       # %bb.0:
   2330 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   2331 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
   2332 ; X86-NEXT:    vpmullq (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x40,0x00]
   2333 ; X86-NEXT:    retl # encoding: [0xc3]
   2334 ;
   2335 ; X64-LABEL: test_mask_mullo_epi64_rmkz_128:
   2336 ; X64:       # %bb.0:
   2337 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   2338 ; X64-NEXT:    vpmullq (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x40,0x07]
   2339 ; X64-NEXT:    retq # encoding: [0xc3]
   2340   %b = load <2 x i64>, <2 x i64>* %ptr_b
   2341   %res = call <2 x i64> @llvm.x86.avx512.mask.pmull.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 %mask)
   2342   ret <2 x i64> %res
   2343 }
   2344 
   2345 define <2 x i64> @test_mask_mullo_epi64_rmb_128(<2 x i64> %a, i64* %ptr_b) {
   2346 ; X86-LABEL: test_mask_mullo_epi64_rmb_128:
   2347 ; X86:       # %bb.0:
   2348 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   2349 ; X86-NEXT:    vpbroadcastq (%eax), %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x59,0x08]
   2350 ; X86-NEXT:    vpmullq %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x40,0xc1]
   2351 ; X86-NEXT:    retl # encoding: [0xc3]
   2352 ;
   2353 ; X64-LABEL: test_mask_mullo_epi64_rmb_128:
   2354 ; X64:       # %bb.0:
   2355 ; X64-NEXT:    vpmullq (%rdi){1to2}, %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x18,0x40,0x07]
   2356 ; X64-NEXT:    retq # encoding: [0xc3]
   2357   %q = load i64, i64* %ptr_b
   2358   %vecinit.i = insertelement <2 x i64> undef, i64 %q, i32 0
   2359   %b = shufflevector <2 x i64> %vecinit.i, <2 x i64> undef, <2 x i32> zeroinitializer
   2360   %res = call <2 x i64> @llvm.x86.avx512.mask.pmull.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 -1)
   2361   ret <2 x i64> %res
   2362 }
   2363 
   2364 define <2 x i64> @test_mask_mullo_epi64_rmbk_128(<2 x i64> %a, i64* %ptr_b, <2 x i64> %passThru, i8 %mask) {
   2365 ; X86-LABEL: test_mask_mullo_epi64_rmbk_128:
   2366 ; X86:       # %bb.0:
   2367 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   2368 ; X86-NEXT:    vpbroadcastq (%eax), %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x59,0x10]
   2369 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
   2370 ; X86-NEXT:    vpmullq %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x40,0xca]
   2371 ; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
   2372 ; X86-NEXT:    retl # encoding: [0xc3]
   2373 ;
   2374 ; X64-LABEL: test_mask_mullo_epi64_rmbk_128:
   2375 ; X64:       # %bb.0:
   2376 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   2377 ; X64-NEXT:    vpmullq (%rdi){1to2}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x19,0x40,0x0f]
   2378 ; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
   2379 ; X64-NEXT:    retq # encoding: [0xc3]
   2380   %q = load i64, i64* %ptr_b
   2381   %vecinit.i = insertelement <2 x i64> undef, i64 %q, i32 0
   2382   %b = shufflevector <2 x i64> %vecinit.i, <2 x i64> undef, <2 x i32> zeroinitializer
   2383   %res = call <2 x i64> @llvm.x86.avx512.mask.pmull.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> %passThru, i8 %mask)
   2384   ret <2 x i64> %res
   2385 }
   2386 
   2387 define <2 x i64> @test_mask_mullo_epi64_rmbkz_128(<2 x i64> %a, i64* %ptr_b, i8 %mask) {
   2388 ; X86-LABEL: test_mask_mullo_epi64_rmbkz_128:
   2389 ; X86:       # %bb.0:
   2390 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   2391 ; X86-NEXT:    vpbroadcastq (%eax), %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x59,0x08]
   2392 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
   2393 ; X86-NEXT:    vpmullq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x40,0xc1]
   2394 ; X86-NEXT:    retl # encoding: [0xc3]
   2395 ;
   2396 ; X64-LABEL: test_mask_mullo_epi64_rmbkz_128:
   2397 ; X64:       # %bb.0:
   2398 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   2399 ; X64-NEXT:    vpmullq (%rdi){1to2}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x99,0x40,0x07]
   2400 ; X64-NEXT:    retq # encoding: [0xc3]
   2401   %q = load i64, i64* %ptr_b
   2402   %vecinit.i = insertelement <2 x i64> undef, i64 %q, i32 0
   2403   %b = shufflevector <2 x i64> %vecinit.i, <2 x i64> undef, <2 x i32> zeroinitializer
   2404   %res = call <2 x i64> @llvm.x86.avx512.mask.pmull.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 %mask)
   2405   ret <2 x i64> %res
   2406 }
   2407 
   2408 declare <2 x i64> @llvm.x86.avx512.mask.pmull.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
   2409 
   2410 declare <2 x double> @llvm.x86.avx512.mask.vextractf64x2.256(<4 x double>, i32, <2 x double>, i8)
   2411 
   2412 define <2 x double>@test_int_x86_avx512_mask_vextractf64x2_256(<4 x double> %x0, <2 x double> %x2, i8 %x3) {
   2413 ; X86-LABEL: test_int_x86_avx512_mask_vextractf64x2_256:
   2414 ; X86:       # %bb.0:
   2415 ; X86-NEXT:    vextractf128 $1, %ymm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x19,0xc2,0x01]
   2416 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
   2417 ; X86-NEXT:    vextractf64x2 $1, %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x19,0xc1,0x01]
   2418 ; X86-NEXT:    vaddpd %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xca]
   2419 ; X86-NEXT:    vextractf64x2 $1, %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0x19,0xc0,0x01]
   2420 ; X86-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc0]
   2421 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
   2422 ; X86-NEXT:    retl # encoding: [0xc3]
   2423 ;
   2424 ; X64-LABEL: test_int_x86_avx512_mask_vextractf64x2_256:
   2425 ; X64:       # %bb.0:
   2426 ; X64-NEXT:    vextractf128 $1, %ymm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x19,0xc2,0x01]
   2427 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   2428 ; X64-NEXT:    vextractf64x2 $1, %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x19,0xc1,0x01]
   2429 ; X64-NEXT:    vaddpd %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xca]
   2430 ; X64-NEXT:    vextractf64x2 $1, %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0x19,0xc0,0x01]
   2431 ; X64-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc0]
   2432 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
   2433 ; X64-NEXT:    retq # encoding: [0xc3]
   2434   %res = call <2 x double> @llvm.x86.avx512.mask.vextractf64x2.256(<4 x double> %x0,i32 1, <2 x double> %x2, i8 %x3)
   2435   %res2 = call <2 x double> @llvm.x86.avx512.mask.vextractf64x2.256(<4 x double> %x0,i32 1, <2 x double> zeroinitializer, i8 %x3)
   2436   %res1 = call <2 x double> @llvm.x86.avx512.mask.vextractf64x2.256(<4 x double> %x0,i32 1, <2 x double> zeroinitializer, i8 -1)
   2437   %res3 = fadd <2 x double> %res, %res1
   2438   %res4 = fadd <2 x double> %res3, %res2
   2439   ret <2 x double> %res4
   2440 }
   2441 
   2442 declare <4 x double> @llvm.x86.avx512.mask.insertf64x2.256(<4 x double>, <2 x double>, i32, <4 x double>, i8)
   2443 
   2444 define <4 x double>@test_int_x86_avx512_mask_insertf64x2_256(<4 x double> %x0, <2 x double> %x1, <4 x double> %x3, i8 %x4) {
   2445 ; X86-LABEL: test_int_x86_avx512_mask_insertf64x2_256:
   2446 ; X86:       # %bb.0:
   2447 ; X86-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x18,0xd9,0x01]
   2448 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
   2449 ; X86-NEXT:    vinsertf64x2 $1, %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x18,0xd1,0x01]
   2450 ; X86-NEXT:    vaddpd %ymm3, %ymm2, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x58,0xd3]
   2451 ; X86-NEXT:    vinsertf64x2 $1, %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0x18,0xc1,0x01]
   2452 ; X86-NEXT:    vaddpd %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x58,0xc2]
   2453 ; X86-NEXT:    retl # encoding: [0xc3]
   2454 ;
   2455 ; X64-LABEL: test_int_x86_avx512_mask_insertf64x2_256:
   2456 ; X64:       # %bb.0:
   2457 ; X64-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x18,0xd9,0x01]
   2458 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   2459 ; X64-NEXT:    vinsertf64x2 $1, %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x18,0xd1,0x01]
   2460 ; X64-NEXT:    vaddpd %ymm3, %ymm2, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x58,0xd3]
   2461 ; X64-NEXT:    vinsertf64x2 $1, %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0x18,0xc1,0x01]
   2462 ; X64-NEXT:    vaddpd %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x58,0xc2]
   2463 ; X64-NEXT:    retq # encoding: [0xc3]
   2464   %res = call <4 x double> @llvm.x86.avx512.mask.insertf64x2.256(<4 x double> %x0, <2 x double> %x1, i32 1, <4 x double> %x3, i8 %x4)
   2465   %res1 = call <4 x double> @llvm.x86.avx512.mask.insertf64x2.256(<4 x double> %x0, <2 x double> %x1, i32 1, <4 x double> %x3, i8 -1)
   2466   %res2 = call <4 x double> @llvm.x86.avx512.mask.insertf64x2.256(<4 x double> %x0, <2 x double> %x1, i32 1, <4 x double> zeroinitializer, i8 %x4)
   2467   %res3 = fadd <4 x double> %res, %res1
   2468   %res4 = fadd <4 x double> %res2, %res3
   2469   ret <4 x double> %res4
   2470 }
   2471 
   2472 declare <4 x i64> @llvm.x86.avx512.mask.inserti64x2.256(<4 x i64>, <2 x i64>, i32, <4 x i64>, i8)
   2473 
   2474 define <4 x i64>@test_int_x86_avx512_mask_inserti64x2_256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> %x3, i8 %x4) {
   2475 ; X86-LABEL: test_int_x86_avx512_mask_inserti64x2_256:
   2476 ; X86:       # %bb.0:
   2477 ; X86-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x38,0xd9,0x01]
   2478 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
   2479 ; X86-NEXT:    vinserti64x2 $1, %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x38,0xd1,0x01]
   2480 ; X86-NEXT:    vinserti64x2 $1, %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0x38,0xc1,0x01]
   2481 ; X86-NEXT:    vpaddq %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xd4,0xc0]
   2482 ; X86-NEXT:    vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
   2483 ; X86-NEXT:    retl # encoding: [0xc3]
   2484 ;
   2485 ; X64-LABEL: test_int_x86_avx512_mask_inserti64x2_256:
   2486 ; X64:       # %bb.0:
   2487 ; X64-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x38,0xd9,0x01]
   2488 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   2489 ; X64-NEXT:    vinserti64x2 $1, %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x38,0xd1,0x01]
   2490 ; X64-NEXT:    vinserti64x2 $1, %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0x38,0xc1,0x01]
   2491 ; X64-NEXT:    vpaddq %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xd4,0xc0]
   2492 ; X64-NEXT:    vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
   2493 ; X64-NEXT:    retq # encoding: [0xc3]
   2494   %res = call <4 x i64> @llvm.x86.avx512.mask.inserti64x2.256(<4 x i64> %x0, <2 x i64> %x1, i32 1, <4 x i64> %x3, i8 %x4)
   2495   %res1 = call <4 x i64> @llvm.x86.avx512.mask.inserti64x2.256(<4 x i64> %x0, <2 x i64> %x1, i32 1, <4 x i64> %x3, i8 -1)
   2496   %res2 = call <4 x i64> @llvm.x86.avx512.mask.inserti64x2.256(<4 x i64> %x0, <2 x i64> %x1, i32 1, <4 x i64> zeroinitializer, i8 %x4)
   2497   %res3 = add <4 x i64> %res, %res1
   2498   %res4 = add <4 x i64> %res3, %res2
   2499   ret <4 x i64> %res4
   2500 }
   2501 
   2502 declare <4 x i32> @llvm.x86.avx512.cvtmask2d.128(i8)
   2503 
   2504 define <4 x i32>@test_int_x86_avx512_cvtmask2d_128(i8 %x0) {
   2505 ; X86-LABEL: test_int_x86_avx512_cvtmask2d_128:
   2506 ; X86:       # %bb.0:
   2507 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k0 # encoding: [0xc5,0xf9,0x90,0x44,0x24,0x04]
   2508 ; X86-NEXT:    vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0]
   2509 ; X86-NEXT:    retl # encoding: [0xc3]
   2510 ;
   2511 ; X64-LABEL: test_int_x86_avx512_cvtmask2d_128:
   2512 ; X64:       # %bb.0:
   2513 ; X64-NEXT:    kmovw %edi, %k0 # encoding: [0xc5,0xf8,0x92,0xc7]
   2514 ; X64-NEXT:    vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0]
   2515 ; X64-NEXT:    retq # encoding: [0xc3]
   2516   %res = call <4 x i32> @llvm.x86.avx512.cvtmask2d.128(i8 %x0)
   2517   ret <4 x i32> %res
   2518 }
   2519 
   2520 declare <8 x i32> @llvm.x86.avx512.cvtmask2d.256(i8)
   2521 
   2522 define <8 x i32>@test_int_x86_avx512_cvtmask2d_256(i8 %x0) {
   2523 ; X86-LABEL: test_int_x86_avx512_cvtmask2d_256:
   2524 ; X86:       # %bb.0:
   2525 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k0 # encoding: [0xc5,0xf9,0x90,0x44,0x24,0x04]
   2526 ; X86-NEXT:    vpmovm2d %k0, %ymm0 # encoding: [0x62,0xf2,0x7e,0x28,0x38,0xc0]
   2527 ; X86-NEXT:    retl # encoding: [0xc3]
   2528 ;
   2529 ; X64-LABEL: test_int_x86_avx512_cvtmask2d_256:
   2530 ; X64:       # %bb.0:
   2531 ; X64-NEXT:    kmovw %edi, %k0 # encoding: [0xc5,0xf8,0x92,0xc7]
   2532 ; X64-NEXT:    vpmovm2d %k0, %ymm0 # encoding: [0x62,0xf2,0x7e,0x28,0x38,0xc0]
   2533 ; X64-NEXT:    retq # encoding: [0xc3]
   2534   %res = call <8 x i32> @llvm.x86.avx512.cvtmask2d.256(i8 %x0)
   2535   ret <8 x i32> %res
   2536 }
   2537 
   2538 declare <2 x i64> @llvm.x86.avx512.cvtmask2q.128(i8)
   2539 
   2540 define <2 x i64>@test_int_x86_avx512_cvtmask2q_128(i8 %x0) {
   2541 ; X86-LABEL: test_int_x86_avx512_cvtmask2q_128:
   2542 ; X86:       # %bb.0:
   2543 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k0 # encoding: [0xc5,0xf9,0x90,0x44,0x24,0x04]
   2544 ; X86-NEXT:    vpmovm2q %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0]
   2545 ; X86-NEXT:    retl # encoding: [0xc3]
   2546 ;
   2547 ; X64-LABEL: test_int_x86_avx512_cvtmask2q_128:
   2548 ; X64:       # %bb.0:
   2549 ; X64-NEXT:    kmovw %edi, %k0 # encoding: [0xc5,0xf8,0x92,0xc7]
   2550 ; X64-NEXT:    vpmovm2q %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0]
   2551 ; X64-NEXT:    retq # encoding: [0xc3]
   2552   %res = call <2 x i64> @llvm.x86.avx512.cvtmask2q.128(i8 %x0)
   2553   ret <2 x i64> %res
   2554 }
   2555 
   2556 declare <4 x i64> @llvm.x86.avx512.cvtmask2q.256(i8)
   2557 
   2558 define <4 x i64>@test_int_x86_avx512_cvtmask2q_256(i8 %x0) {
   2559 ; X86-LABEL: test_int_x86_avx512_cvtmask2q_256:
   2560 ; X86:       # %bb.0:
   2561 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k0 # encoding: [0xc5,0xf9,0x90,0x44,0x24,0x04]
   2562 ; X86-NEXT:    vpmovm2q %k0, %ymm0 # encoding: [0x62,0xf2,0xfe,0x28,0x38,0xc0]
   2563 ; X86-NEXT:    retl # encoding: [0xc3]
   2564 ;
   2565 ; X64-LABEL: test_int_x86_avx512_cvtmask2q_256:
   2566 ; X64:       # %bb.0:
   2567 ; X64-NEXT:    kmovw %edi, %k0 # encoding: [0xc5,0xf8,0x92,0xc7]
   2568 ; X64-NEXT:    vpmovm2q %k0, %ymm0 # encoding: [0x62,0xf2,0xfe,0x28,0x38,0xc0]
   2569 ; X64-NEXT:    retq # encoding: [0xc3]
   2570   %res = call <4 x i64> @llvm.x86.avx512.cvtmask2q.256(i8 %x0)
   2571   ret <4 x i64> %res
   2572 }
   2573 
   2574 declare <4 x double> @llvm.x86.avx512.mask.broadcastf64x2.256(<2 x double>, <4 x double>, i8)
   2575 
   2576 define <4 x double>@test_int_x86_avx512_mask_broadcastf64x2_256(<2 x double> %x0, <4 x double> %x2, i8 %mask) {
   2577 ; X86-LABEL: test_int_x86_avx512_mask_broadcastf64x2_256:
   2578 ; X86:       # %bb.0:
   2579 ; X86-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
   2580 ; X86-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x18,0xd0,0x01]
   2581 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
   2582 ; X86-NEXT:    vinsertf64x2 $1, %xmm0, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x18,0xc8,0x01]
   2583 ; X86-NEXT:    vaddpd %ymm1, %ymm2, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x58,0xc9]
   2584 ; X86-NEXT:    vinsertf64x2 $1, %xmm0, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0x18,0xc0,0x01]
   2585 ; X86-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x58,0xc1]
   2586 ; X86-NEXT:    retl # encoding: [0xc3]
   2587 ;
   2588 ; X64-LABEL: test_int_x86_avx512_mask_broadcastf64x2_256:
   2589 ; X64:       # %bb.0:
   2590 ; X64-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
   2591 ; X64-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x18,0xd0,0x01]
   2592 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   2593 ; X64-NEXT:    vinsertf64x2 $1, %xmm0, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x18,0xc8,0x01]
   2594 ; X64-NEXT:    vaddpd %ymm1, %ymm2, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x58,0xc9]
   2595 ; X64-NEXT:    vinsertf64x2 $1, %xmm0, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0x18,0xc0,0x01]
   2596 ; X64-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x58,0xc1]
   2597 ; X64-NEXT:    retq # encoding: [0xc3]
   2598 
   2599   %res1 = call <4 x double> @llvm.x86.avx512.mask.broadcastf64x2.256(<2 x double> %x0, <4 x double> %x2, i8 -1)
   2600   %res2 = call <4 x double> @llvm.x86.avx512.mask.broadcastf64x2.256(<2 x double> %x0, <4 x double> %x2, i8 %mask)
   2601   %res3 = call <4 x double> @llvm.x86.avx512.mask.broadcastf64x2.256(<2 x double> %x0, <4 x double> zeroinitializer, i8 %mask)
   2602   %res4 = fadd <4 x double> %res1, %res2
   2603   %res5 = fadd <4 x double> %res3, %res4
   2604   ret <4 x double> %res5
   2605 }
   2606 
   2607 define <4 x double>@test_int_x86_avx512_mask_broadcastf64x2_256_load(<2 x double>* %x0ptr, <4 x double> %x2, i8 %mask) {
   2608 ; X86-LABEL: test_int_x86_avx512_mask_broadcastf64x2_256_load:
   2609 ; X86:       # %bb.0:
   2610 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   2611 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
   2612 ; X86-NEXT:    vbroadcastf64x2 (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x1a,0x00]
   2613 ; X86-NEXT:    # ymm0 {%k1} = mem[0,1,0,1]
   2614 ; X86-NEXT:    retl # encoding: [0xc3]
   2615 ;
   2616 ; X64-LABEL: test_int_x86_avx512_mask_broadcastf64x2_256_load:
   2617 ; X64:       # %bb.0:
   2618 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   2619 ; X64-NEXT:    vbroadcastf64x2 (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x1a,0x07]
   2620 ; X64-NEXT:    # ymm0 {%k1} = mem[0,1,0,1]
   2621 ; X64-NEXT:    retq # encoding: [0xc3]
   2622 
   2623   %x0 = load <2 x double>, <2 x double>* %x0ptr
   2624   %res = call <4 x double> @llvm.x86.avx512.mask.broadcastf64x2.256(<2 x double> %x0, <4 x double> %x2, i8 %mask)
   2625   ret <4 x double> %res
   2626 }
   2627 
   2628 declare <4 x i64> @llvm.x86.avx512.mask.broadcasti64x2.256(<2 x i64>, <4 x i64>, i8)
   2629 
   2630 define <4 x i64>@test_int_x86_avx512_mask_broadcasti64x2_256(<2 x i64> %x0, <4 x i64> %x2, i8 %mask) {
   2631 ; X86-LABEL: test_int_x86_avx512_mask_broadcasti64x2_256:
   2632 ; X86:       # %bb.0:
   2633 ; X86-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
   2634 ; X86-NEXT:    vinserti128 $1, %xmm0, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x38,0xd0,0x01]
   2635 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
   2636 ; X86-NEXT:    vinserti64x2 $1, %xmm0, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x38,0xc8,0x01]
   2637 ; X86-NEXT:    vinserti64x2 $1, %xmm0, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0x38,0xc0,0x01]
   2638 ; X86-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
   2639 ; X86-NEXT:    vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
   2640 ; X86-NEXT:    retl # encoding: [0xc3]
   2641 ;
   2642 ; X64-LABEL: test_int_x86_avx512_mask_broadcasti64x2_256:
   2643 ; X64:       # %bb.0:
   2644 ; X64-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
   2645 ; X64-NEXT:    vinserti128 $1, %xmm0, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x38,0xd0,0x01]
   2646 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   2647 ; X64-NEXT:    vinserti64x2 $1, %xmm0, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x38,0xc8,0x01]
   2648 ; X64-NEXT:    vinserti64x2 $1, %xmm0, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0x38,0xc0,0x01]
   2649 ; X64-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
   2650 ; X64-NEXT:    vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
   2651 ; X64-NEXT:    retq # encoding: [0xc3]
   2652 
   2653   %res1 = call <4 x i64> @llvm.x86.avx512.mask.broadcasti64x2.256(<2 x i64> %x0, <4 x i64> %x2, i8 -1)
   2654   %res2 = call <4 x i64> @llvm.x86.avx512.mask.broadcasti64x2.256(<2 x i64> %x0, <4 x i64> %x2, i8 %mask)
   2655   %res3 = call <4 x i64> @llvm.x86.avx512.mask.broadcasti64x2.256(<2 x i64> %x0, <4 x i64> zeroinitializer, i8 %mask)
   2656   %res4 = add <4 x i64> %res1, %res2
   2657   %res5 = add <4 x i64> %res3, %res4
   2658   ret <4 x i64> %res5
   2659 }
   2660 
   2661 define <4 x i64>@test_int_x86_avx512_mask_broadcasti64x2_256_load(<2 x i64>* %x0ptr, <4 x i64> %x2, i8 %mask) {
   2662 ; X86-LABEL: test_int_x86_avx512_mask_broadcasti64x2_256_load:
   2663 ; X86:       # %bb.0:
   2664 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   2665 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
   2666 ; X86-NEXT:    vbroadcasti64x2 (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x5a,0x00]
   2667 ; X86-NEXT:    # ymm0 {%k1} = mem[0,1,0,1]
   2668 ; X86-NEXT:    retl # encoding: [0xc3]
   2669 ;
   2670 ; X64-LABEL: test_int_x86_avx512_mask_broadcasti64x2_256_load:
   2671 ; X64:       # %bb.0:
   2672 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   2673 ; X64-NEXT:    vbroadcasti64x2 (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x5a,0x07]
   2674 ; X64-NEXT:    # ymm0 {%k1} = mem[0,1,0,1]
   2675 ; X64-NEXT:    retq # encoding: [0xc3]
   2676 
   2677   %x0 = load <2 x i64>, <2 x i64>* %x0ptr
   2678   %res = call <4 x i64> @llvm.x86.avx512.mask.broadcasti64x2.256(<2 x i64> %x0, <4 x i64> %x2, i8 %mask)
   2679   ret <4 x i64> %res
   2680 }
   2681 
   2682 declare <8 x float> @llvm.x86.avx512.mask.broadcastf32x2.256(<4 x float>, <8 x float>, i8)
   2683 
   2684 define <8 x float>@test_int_x86_avx512_mask_broadcastf32x2_256(<4 x float> %x0, <8 x float> %x2, i8 %x3) {
   2685 ; X86-LABEL: test_int_x86_avx512_mask_broadcastf32x2_256:
   2686 ; X86:       # %bb.0:
   2687 ; X86-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
   2688 ; X86-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x18,0xd0,0x01]
   2689 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
   2690 ; X86-NEXT:    vinsertf32x4 $1, %xmm0, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x18,0xc8,0x01]
   2691 ; X86-NEXT:    vinsertf32x4 $1, %xmm0, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x18,0xc0,0x01]
   2692 ; X86-NEXT:    vaddps %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xc0]
   2693 ; X86-NEXT:    vaddps %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x58,0xc2]
   2694 ; X86-NEXT:    retl # encoding: [0xc3]
   2695 ;
   2696 ; X64-LABEL: test_int_x86_avx512_mask_broadcastf32x2_256:
   2697 ; X64:       # %bb.0:
   2698 ; X64-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
   2699 ; X64-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x18,0xd0,0x01]
   2700 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   2701 ; X64-NEXT:    vinsertf32x4 $1, %xmm0, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x18,0xc8,0x01]
   2702 ; X64-NEXT:    vinsertf32x4 $1, %xmm0, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x18,0xc0,0x01]
   2703 ; X64-NEXT:    vaddps %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xc0]
   2704 ; X64-NEXT:    vaddps %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x58,0xc2]
   2705 ; X64-NEXT:    retq # encoding: [0xc3]
   2706   %res = call <8 x float> @llvm.x86.avx512.mask.broadcastf32x2.256(<4 x float>  %x0, <8 x float> %x2, i8 %x3)
   2707   %res1 = call <8 x float> @llvm.x86.avx512.mask.broadcastf32x2.256(<4 x float> %x0, <8 x float> zeroinitializer, i8 %x3)
   2708   %res2 = call <8 x float> @llvm.x86.avx512.mask.broadcastf32x2.256(<4 x float> %x0, <8 x float> %x2, i8 -1)
   2709   %res3 = fadd <8 x float> %res, %res1
   2710   %res4 = fadd <8 x float> %res3, %res2
   2711   ret <8 x float> %res4
   2712 }
   2713 
   2714 declare <8 x i32> @llvm.x86.avx512.mask.broadcasti32x2.256(<4 x i32>, <8 x i32>, i8)
   2715 
   2716 define <8 x i32>@test_int_x86_avx512_mask_broadcasti32x2_256(<4 x i32> %x0, <8 x i32> %x2, i8 %x3, i64 * %y_ptr) {
   2717 ; X86-LABEL: test_int_x86_avx512_mask_broadcasti32x2_256:
   2718 ; X86:       # %bb.0:
   2719 ; X86-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
   2720 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
   2721 ; X86-NEXT:    vmovq (%eax), %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x10]
   2722 ; X86-NEXT:    # xmm2 = mem[0],zero
   2723 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
   2724 ; X86-NEXT:    vinserti32x4 $1, %xmm2, %ymm2, %ymm1 {%k1} # encoding: [0x62,0xf3,0x6d,0x29,0x38,0xca,0x01]
   2725 ; X86-NEXT:    vinserti128 $1, %xmm0, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x38,0xd0,0x01]
   2726 ; X86-NEXT:    vinserti32x4 $1, %xmm0, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x38,0xc0,0x01]
   2727 ; X86-NEXT:    vpaddd %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc2]
   2728 ; X86-NEXT:    vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0]
   2729 ; X86-NEXT:    retl # encoding: [0xc3]
   2730 ;
   2731 ; X64-LABEL: test_int_x86_avx512_mask_broadcasti32x2_256:
   2732 ; X64:       # %bb.0:
   2733 ; X64-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
   2734 ; X64-NEXT:    vmovq (%rsi), %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x16]
   2735 ; X64-NEXT:    # xmm2 = mem[0],zero
   2736 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   2737 ; X64-NEXT:    vinserti32x4 $1, %xmm2, %ymm2, %ymm1 {%k1} # encoding: [0x62,0xf3,0x6d,0x29,0x38,0xca,0x01]
   2738 ; X64-NEXT:    vinserti128 $1, %xmm0, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x38,0xd0,0x01]
   2739 ; X64-NEXT:    vinserti32x4 $1, %xmm0, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x38,0xc0,0x01]
   2740 ; X64-NEXT:    vpaddd %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc2]
   2741 ; X64-NEXT:    vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0]
   2742 ; X64-NEXT:    retq # encoding: [0xc3]
   2743   %y_64  = load i64, i64 * %y_ptr
   2744   %y_v2i64 = insertelement <2 x i64> undef, i64 %y_64, i32 0
   2745   %y = bitcast <2 x i64> %y_v2i64 to <4 x i32>
   2746   %res = call <8 x i32> @llvm.x86.avx512.mask.broadcasti32x2.256(<4 x i32>  %y, <8 x i32> %x2, i8 %x3)
   2747   %res1 = call <8 x i32> @llvm.x86.avx512.mask.broadcasti32x2.256(<4 x i32> %x0, <8 x i32> zeroinitializer, i8 %x3)
   2748   %res2 = call <8 x i32> @llvm.x86.avx512.mask.broadcasti32x2.256(<4 x i32> %x0, <8 x i32> %x2, i8 -1)
   2749   %res3 = add <8 x i32> %res, %res1
   2750   %res4 = add <8 x i32> %res3, %res2
   2751   ret <8 x i32> %res4
   2752 }
   2753 
   2754 declare <4 x i32> @llvm.x86.avx512.mask.broadcasti32x2.128(<4 x i32>, <4 x i32>, i8)
   2755 
   2756 define <4 x i32>@test_int_x86_avx512_mask_broadcasti32x2_128(<4 x i32> %x0, <4 x i32> %x2, i8 %x3) {
   2757 ; X86-LABEL: test_int_x86_avx512_mask_broadcasti32x2_128:
   2758 ; X86:       # %bb.0:
   2759 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
   2760 ; X86-NEXT:    vmovdqa32 %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x6f,0xc8]
   2761 ; X86-NEXT:    vmovdqa32 %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x6f,0xd0]
   2762 ; X86-NEXT:    vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
   2763 ; X86-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
   2764 ; X86-NEXT:    retl # encoding: [0xc3]
   2765 ;
   2766 ; X64-LABEL: test_int_x86_avx512_mask_broadcasti32x2_128:
   2767 ; X64:       # %bb.0:
   2768 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   2769 ; X64-NEXT:    vmovdqa32 %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x6f,0xc8]
   2770 ; X64-NEXT:    vmovdqa32 %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x6f,0xd0]
   2771 ; X64-NEXT:    vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
   2772 ; X64-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
   2773 ; X64-NEXT:    retq # encoding: [0xc3]
   2774   %res = call <4 x i32> @llvm.x86.avx512.mask.broadcasti32x2.128(<4 x i32>  %x0, <4 x i32> %x2, i8 %x3)
   2775   %res1 = call <4 x i32> @llvm.x86.avx512.mask.broadcasti32x2.128(<4 x i32> %x0, <4 x i32> zeroinitializer, i8 %x3)
   2776   %res2 = call <4 x i32> @llvm.x86.avx512.mask.broadcasti32x2.128(<4 x i32> %x0, <4 x i32> %x2, i8 -1)
   2777   %res3 = add <4 x i32> %res, %res1
   2778   %res4 = add <4 x i32> %res3, %res2
   2779   ret <4 x i32> %res4
   2780 }
   2781 
   2782 declare i8 @llvm.x86.avx512.cvtd2mask.128(<4 x i32>)
   2783 
   2784 define i8@test_int_x86_avx512_cvtd2mask_128(<4 x i32> %x0) {
   2785 ; CHECK-LABEL: test_int_x86_avx512_cvtd2mask_128:
   2786 ; CHECK:       # %bb.0:
   2787 ; CHECK-NEXT:    vpmovd2m %xmm0, %k0 # encoding: [0x62,0xf2,0x7e,0x08,0x39,0xc0]
   2788 ; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
   2789 ; CHECK-NEXT:    # kill: def $al killed $al killed $eax
   2790 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   2791     %res = call i8 @llvm.x86.avx512.cvtd2mask.128(<4 x i32> %x0)
   2792     ret i8 %res
   2793 }
   2794 
   2795 declare i8 @llvm.x86.avx512.cvtd2mask.256(<8 x i32>)
   2796 
   2797 define i8@test_int_x86_avx512_cvtd2mask_256(<8 x i32> %x0) {
   2798 ; CHECK-LABEL: test_int_x86_avx512_cvtd2mask_256:
   2799 ; CHECK:       # %bb.0:
   2800 ; CHECK-NEXT:    vpmovd2m %ymm0, %k0 # encoding: [0x62,0xf2,0x7e,0x28,0x39,0xc0]
   2801 ; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
   2802 ; CHECK-NEXT:    # kill: def $al killed $al killed $eax
   2803 ; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
   2804 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   2805     %res = call i8 @llvm.x86.avx512.cvtd2mask.256(<8 x i32> %x0)
   2806     ret i8 %res
   2807 }
   2808 
   2809 declare i8 @llvm.x86.avx512.cvtq2mask.128(<2 x i64>)
   2810 
   2811 define i8@test_int_x86_avx512_cvtq2mask_128(<2 x i64> %x0) {
   2812 ; CHECK-LABEL: test_int_x86_avx512_cvtq2mask_128:
   2813 ; CHECK:       # %bb.0:
   2814 ; CHECK-NEXT:    vpmovq2m %xmm0, %k0 # encoding: [0x62,0xf2,0xfe,0x08,0x39,0xc0]
   2815 ; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
   2816 ; CHECK-NEXT:    # kill: def $al killed $al killed $eax
   2817 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   2818     %res = call i8 @llvm.x86.avx512.cvtq2mask.128(<2 x i64> %x0)
   2819     ret i8 %res
   2820 }
   2821 
   2822 declare i8 @llvm.x86.avx512.cvtq2mask.256(<4 x i64>)
   2823 
   2824 define i8@test_int_x86_avx512_cvtq2mask_256(<4 x i64> %x0) {
   2825 ; CHECK-LABEL: test_int_x86_avx512_cvtq2mask_256:
   2826 ; CHECK:       # %bb.0:
   2827 ; CHECK-NEXT:    vpmovq2m %ymm0, %k0 # encoding: [0x62,0xf2,0xfe,0x28,0x39,0xc0]
   2828 ; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
   2829 ; CHECK-NEXT:    # kill: def $al killed $al killed $eax
   2830 ; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
   2831 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   2832     %res = call i8 @llvm.x86.avx512.cvtq2mask.256(<4 x i64> %x0)
   2833     ret i8 %res
   2834 }
   2835 
   2836 declare <2 x double> @llvm.x86.avx512.mask.cvtqq2pd.128(<2 x i64>, <2 x double>, i8)
   2837 
   2838 define <2 x double>@test_int_x86_avx512_mask_cvt_qq2pd_128(<2 x i64> %x0, <2 x double> %x1, i8 %x2) {
   2839 ; X86-LABEL: test_int_x86_avx512_mask_cvt_qq2pd_128:
   2840 ; X86:       # %bb.0:
   2841 ; X86-NEXT:    vcvtqq2pd %xmm0, %xmm2 # encoding: [0x62,0xf1,0xfe,0x08,0xe6,0xd0]
   2842 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
   2843 ; X86-NEXT:    vcvtqq2pd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfe,0x09,0xe6,0xc8]
   2844 ; X86-NEXT:    vaddpd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc2]
   2845 ; X86-NEXT:    retl # encoding: [0xc3]
   2846 ;
   2847 ; X64-LABEL: test_int_x86_avx512_mask_cvt_qq2pd_128:
   2848 ; X64:       # %bb.0:
   2849 ; X64-NEXT:    vcvtqq2pd %xmm0, %xmm2 # encoding: [0x62,0xf1,0xfe,0x08,0xe6,0xd0]
   2850 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   2851 ; X64-NEXT:    vcvtqq2pd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfe,0x09,0xe6,0xc8]
   2852 ; X64-NEXT:    vaddpd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc2]
   2853 ; X64-NEXT:    retq # encoding: [0xc3]
   2854   %res = call <2 x double> @llvm.x86.avx512.mask.cvtqq2pd.128(<2 x i64> %x0, <2 x double> %x1, i8 %x2)
   2855   %res1 = call <2 x double> @llvm.x86.avx512.mask.cvtqq2pd.128(<2 x i64> %x0, <2 x double> %x1, i8 -1)
   2856   %res2 = fadd <2 x double> %res, %res1
   2857   ret <2 x double> %res2
   2858 }
   2859 
   2860 declare <4 x double> @llvm.x86.avx512.mask.cvtqq2pd.256(<4 x i64>, <4 x double>, i8)
   2861 
   2862 define <4 x double>@test_int_x86_avx512_mask_cvt_qq2pd_256(<4 x i64> %x0, <4 x double> %x1, i8 %x2) {
   2863 ; X86-LABEL: test_int_x86_avx512_mask_cvt_qq2pd_256:
   2864 ; X86:       # %bb.0:
   2865 ; X86-NEXT:    vcvtqq2pd %ymm0, %ymm2 # encoding: [0x62,0xf1,0xfe,0x28,0xe6,0xd0]
   2866 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
   2867 ; X86-NEXT:    vcvtqq2pd %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfe,0x29,0xe6,0xc8]
   2868 ; X86-NEXT:    vaddpd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc2]
   2869 ; X86-NEXT:    retl # encoding: [0xc3]
   2870 ;
   2871 ; X64-LABEL: test_int_x86_avx512_mask_cvt_qq2pd_256:
   2872 ; X64:       # %bb.0:
   2873 ; X64-NEXT:    vcvtqq2pd %ymm0, %ymm2 # encoding: [0x62,0xf1,0xfe,0x28,0xe6,0xd0]
   2874 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   2875 ; X64-NEXT:    vcvtqq2pd %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfe,0x29,0xe6,0xc8]
   2876 ; X64-NEXT:    vaddpd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc2]
   2877 ; X64-NEXT:    retq # encoding: [0xc3]
   2878   %res = call <4 x double> @llvm.x86.avx512.mask.cvtqq2pd.256(<4 x i64> %x0, <4 x double> %x1, i8 %x2)
   2879   %res1 = call <4 x double> @llvm.x86.avx512.mask.cvtqq2pd.256(<4 x i64> %x0, <4 x double> %x1, i8 -1)
   2880   %res2 = fadd <4 x double> %res, %res1
   2881   ret <4 x double> %res2
   2882 }
   2883 
   2884 declare <2 x double> @llvm.x86.avx512.mask.cvtuqq2pd.128(<2 x i64>, <2 x double>, i8)
   2885 
   2886 define <2 x double>@test_int_x86_avx512_mask_cvt_uqq2pd_128(<2 x i64> %x0, <2 x double> %x1, i8 %x2) {
   2887 ; X86-LABEL: test_int_x86_avx512_mask_cvt_uqq2pd_128:
   2888 ; X86:       # %bb.0:
   2889 ; X86-NEXT:    vcvtuqq2pd %xmm0, %xmm2 # encoding: [0x62,0xf1,0xfe,0x08,0x7a,0xd0]
   2890 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
   2891 ; X86-NEXT:    vcvtuqq2pd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfe,0x09,0x7a,0xc8]
   2892 ; X86-NEXT:    vaddpd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc2]
   2893 ; X86-NEXT:    retl # encoding: [0xc3]
   2894 ;
   2895 ; X64-LABEL: test_int_x86_avx512_mask_cvt_uqq2pd_128:
   2896 ; X64:       # %bb.0:
   2897 ; X64-NEXT:    vcvtuqq2pd %xmm0, %xmm2 # encoding: [0x62,0xf1,0xfe,0x08,0x7a,0xd0]
   2898 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   2899 ; X64-NEXT:    vcvtuqq2pd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfe,0x09,0x7a,0xc8]
   2900 ; X64-NEXT:    vaddpd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc2]
   2901 ; X64-NEXT:    retq # encoding: [0xc3]
   2902   %res = call <2 x double> @llvm.x86.avx512.mask.cvtuqq2pd.128(<2 x i64> %x0, <2 x double> %x1, i8 %x2)
   2903   %res1 = call <2 x double> @llvm.x86.avx512.mask.cvtuqq2pd.128(<2 x i64> %x0, <2 x double> %x1, i8 -1)
   2904   %res2 = fadd <2 x double> %res, %res1
   2905   ret <2 x double> %res2
   2906 }
   2907 
   2908 declare <4 x double> @llvm.x86.avx512.mask.cvtuqq2pd.256(<4 x i64>, <4 x double>, i8)
   2909 
   2910 define <4 x double>@test_int_x86_avx512_mask_cvt_uqq2pd_256(<4 x i64> %x0, <4 x double> %x1, i8 %x2) {
   2911 ; X86-LABEL: test_int_x86_avx512_mask_cvt_uqq2pd_256:
   2912 ; X86:       # %bb.0:
   2913 ; X86-NEXT:    vcvtuqq2pd %ymm0, %ymm2 # encoding: [0x62,0xf1,0xfe,0x28,0x7a,0xd0]
   2914 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
   2915 ; X86-NEXT:    vcvtuqq2pd %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfe,0x29,0x7a,0xc8]
   2916 ; X86-NEXT:    vaddpd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc2]
   2917 ; X86-NEXT:    retl # encoding: [0xc3]
   2918 ;
   2919 ; X64-LABEL: test_int_x86_avx512_mask_cvt_uqq2pd_256:
   2920 ; X64:       # %bb.0:
   2921 ; X64-NEXT:    vcvtuqq2pd %ymm0, %ymm2 # encoding: [0x62,0xf1,0xfe,0x28,0x7a,0xd0]
   2922 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   2923 ; X64-NEXT:    vcvtuqq2pd %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfe,0x29,0x7a,0xc8]
   2924 ; X64-NEXT:    vaddpd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc2]
   2925 ; X64-NEXT:    retq # encoding: [0xc3]
   2926   %res = call <4 x double> @llvm.x86.avx512.mask.cvtuqq2pd.256(<4 x i64> %x0, <4 x double> %x1, i8 %x2)
   2927   %res1 = call <4 x double> @llvm.x86.avx512.mask.cvtuqq2pd.256(<4 x i64> %x0, <4 x double> %x1, i8 -1)
   2928   %res2 = fadd <4 x double> %res, %res1
   2929   ret <4 x double> %res2
   2930 }
   2931 
   2932 declare i8 @llvm.x86.avx512.mask.fpclass.ps.128(<4 x float>, i32, i8)
   2933 
   2934 define i8 @test_int_x86_avx512_mask_fpclass_ps_128(<4 x float> %x0) {
   2935 ; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_ps_128:
   2936 ; CHECK:       # %bb.0:
   2937 ; CHECK-NEXT:    vfpclassps $4, %xmm0, %k1 # encoding: [0x62,0xf3,0x7d,0x08,0x66,0xc8,0x04]
   2938 ; CHECK-NEXT:    vfpclassps $2, %xmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x66,0xc0,0x02]
   2939 ; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
   2940 ; CHECK-NEXT:    # kill: def $al killed $al killed $eax
   2941 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   2942   %res = call i8 @llvm.x86.avx512.mask.fpclass.ps.128(<4 x float> %x0, i32 2, i8 -1)
   2943   %res1 = call i8 @llvm.x86.avx512.mask.fpclass.ps.128(<4 x float> %x0, i32 4, i8 %res)
   2944   ret i8 %res1
   2945 }
   2946 
   2947 declare i8 @llvm.x86.avx512.mask.fpclass.ps.256(<8 x float>, i32, i8)
   2948 
   2949 define i8 @test_int_x86_avx512_mask_fpclass_ps_256(<8 x float> %x0) {
   2950 ; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_ps_256:
   2951 ; CHECK:       # %bb.0:
   2952 ; CHECK-NEXT:    vfpclassps $4, %ymm0, %k1 # encoding: [0x62,0xf3,0x7d,0x28,0x66,0xc8,0x04]
   2953 ; CHECK-NEXT:    vfpclassps $2, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x66,0xc0,0x02]
   2954 ; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
   2955 ; CHECK-NEXT:    # kill: def $al killed $al killed $eax
   2956 ; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
   2957 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   2958   %res = call i8 @llvm.x86.avx512.mask.fpclass.ps.256(<8 x float> %x0, i32 2, i8 -1)
   2959   %res1 = call i8 @llvm.x86.avx512.mask.fpclass.ps.256(<8 x float> %x0, i32 4, i8 %res)
   2960   ret i8 %res1
   2961 }
   2962 
   2963 declare i8 @llvm.x86.avx512.mask.fpclass.pd.128(<2 x double>, i32, i8)
   2964 
   2965 define i8 @test_int_x86_avx512_mask_fpclass_pd_128(<2 x double> %x0) {
   2966 ; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_pd_128:
   2967 ; CHECK:       # %bb.0:
   2968 ; CHECK-NEXT:    vfpclasspd $2, %xmm0, %k1 # encoding: [0x62,0xf3,0xfd,0x08,0x66,0xc8,0x02]
   2969 ; CHECK-NEXT:    vfpclasspd $4, %xmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x66,0xc0,0x04]
   2970 ; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
   2971 ; CHECK-NEXT:    # kill: def $al killed $al killed $eax
   2972 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   2973   %res =  call i8 @llvm.x86.avx512.mask.fpclass.pd.128(<2 x double> %x0, i32 4, i8 -1)
   2974   %res1 = call i8 @llvm.x86.avx512.mask.fpclass.pd.128(<2 x double> %x0, i32 2, i8 %res)
   2975   ret i8 %res1
   2976 }
   2977 
   2978 declare i8 @llvm.x86.avx512.mask.fpclass.pd.256(<4 x double>, i32, i8)
   2979 
   2980 define i8 @test_int_x86_avx512_mask_fpclass_pd_256(<4 x double> %x0) {
   2981 ; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_pd_256:
   2982 ; CHECK:       # %bb.0:
   2983 ; CHECK-NEXT:    vfpclasspd $4, %ymm0, %k1 # encoding: [0x62,0xf3,0xfd,0x28,0x66,0xc8,0x04]
   2984 ; CHECK-NEXT:    vfpclasspd $2, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x66,0xc0,0x02]
   2985 ; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
   2986 ; CHECK-NEXT:    # kill: def $al killed $al killed $eax
   2987 ; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
   2988 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   2989   %res = call i8 @llvm.x86.avx512.mask.fpclass.pd.256(<4 x double> %x0, i32 2, i8 -1)
   2990   %res1 = call i8 @llvm.x86.avx512.mask.fpclass.pd.256(<4 x double> %x0, i32 4, i8 %res)
   2991   ret i8 %res1
   2992 }
   2993