1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512dq,+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86 3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64 4 5 define <4 x float> @test_mask_andnot_ps_rr_128(<4 x float> %a, <4 x float> %b) { 6 ; CHECK-LABEL: test_mask_andnot_ps_rr_128: 7 ; CHECK: # %bb.0: 8 ; CHECK-NEXT: vandnps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x55,0xc1] 9 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 10 %res = call <4 x float> @llvm.x86.avx512.mask.andn.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 -1) 11 ret <4 x float> %res 12 } 13 14 define <4 x float> @test_mask_andnot_ps_rrk_128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask) { 15 ; X86-LABEL: test_mask_andnot_ps_rrk_128: 16 ; X86: # %bb.0: 17 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] 18 ; X86-NEXT: vandnps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x55,0xd1] 19 ; X86-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2] 20 ; X86-NEXT: retl # encoding: [0xc3] 21 ; 22 ; X64-LABEL: test_mask_andnot_ps_rrk_128: 23 ; X64: # %bb.0: 24 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 25 ; X64-NEXT: vandnps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x55,0xd1] 26 ; X64-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2] 27 ; X64-NEXT: retq # encoding: [0xc3] 28 %res = call <4 x float> @llvm.x86.avx512.mask.andn.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask) 29 ret <4 x float> %res 30 } 31 32 define <4 x float> @test_mask_andnot_ps_rrkz_128(<4 x float> %a, <4 x float> %b, i8 %mask) { 33 ; X86-LABEL: test_mask_andnot_ps_rrkz_128: 34 ; X86: # %bb.0: 35 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] 36 ; X86-NEXT: vandnps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x55,0xc1] 37 ; X86-NEXT: retl # encoding: [0xc3] 38 ; 39 ; X64-LABEL: test_mask_andnot_ps_rrkz_128: 40 ; X64: # %bb.0: 41 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 42 ; X64-NEXT: vandnps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x55,0xc1] 43 ; X64-NEXT: retq # encoding: [0xc3] 44 %res = call <4 x float> @llvm.x86.avx512.mask.andn.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 %mask) 45 ret <4 x float> %res 46 } 47 48 define <4 x float> @test_mask_andnot_ps_rm_128(<4 x float> %a, <4 x float>* %ptr_b) { 49 ; X86-LABEL: test_mask_andnot_ps_rm_128: 50 ; X86: # %bb.0: 51 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 52 ; X86-NEXT: vandnps (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x55,0x00] 53 ; X86-NEXT: retl # encoding: [0xc3] 54 ; 55 ; X64-LABEL: test_mask_andnot_ps_rm_128: 56 ; X64: # %bb.0: 57 ; X64-NEXT: vandnps (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x55,0x07] 58 ; X64-NEXT: retq # encoding: [0xc3] 59 %b = load <4 x float>, <4 x float>* %ptr_b 60 %res = call <4 x float> @llvm.x86.avx512.mask.andn.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 -1) 61 ret <4 x float> %res 62 } 63 64 define <4 x float> @test_mask_andnot_ps_rmk_128(<4 x float> %a, <4 x float>* %ptr_b, <4 x float> %passThru, i8 %mask) { 65 ; X86-LABEL: test_mask_andnot_ps_rmk_128: 66 ; X86: # %bb.0: 67 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 68 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] 69 ; X86-NEXT: vandnps (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x55,0x08] 70 ; X86-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1] 71 ; X86-NEXT: retl # encoding: [0xc3] 72 ; 73 ; X64-LABEL: test_mask_andnot_ps_rmk_128: 74 ; X64: # %bb.0: 75 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 76 ; X64-NEXT: vandnps (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x55,0x0f] 77 ; X64-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1] 78 ; X64-NEXT: retq # encoding: [0xc3] 79 %b = load <4 x float>, <4 x float>* %ptr_b 80 %res = call <4 x float> @llvm.x86.avx512.mask.andn.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask) 81 ret <4 x float> %res 82 } 83 84 define <4 x float> @test_mask_andnot_ps_rmkz_128(<4 x float> %a, <4 x float>* %ptr_b, i8 %mask) { 85 ; X86-LABEL: test_mask_andnot_ps_rmkz_128: 86 ; X86: # %bb.0: 87 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 88 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] 89 ; X86-NEXT: vandnps (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x55,0x00] 90 ; X86-NEXT: retl # encoding: [0xc3] 91 ; 92 ; X64-LABEL: test_mask_andnot_ps_rmkz_128: 93 ; X64: # %bb.0: 94 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 95 ; X64-NEXT: vandnps (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x55,0x07] 96 ; X64-NEXT: retq # encoding: [0xc3] 97 %b = load <4 x float>, <4 x float>* %ptr_b 98 %res = call <4 x float> @llvm.x86.avx512.mask.andn.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 %mask) 99 ret <4 x float> %res 100 } 101 102 define <4 x float> @test_mask_andnot_ps_rmb_128(<4 x float> %a, float* %ptr_b) { 103 ; X86-LABEL: test_mask_andnot_ps_rmb_128: 104 ; X86: # %bb.0: 105 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 106 ; X86-NEXT: vandnps (%eax){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7c,0x18,0x55,0x00] 107 ; X86-NEXT: retl # encoding: [0xc3] 108 ; 109 ; X64-LABEL: test_mask_andnot_ps_rmb_128: 110 ; X64: # %bb.0: 111 ; X64-NEXT: vandnps (%rdi){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7c,0x18,0x55,0x07] 112 ; X64-NEXT: retq # encoding: [0xc3] 113 %q = load float, float* %ptr_b 114 %vecinit.i = insertelement <4 x float> undef, float %q, i32 0 115 %b = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer 116 %res = call <4 x float> @llvm.x86.avx512.mask.andn.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 -1) 117 ret <4 x float> %res 118 } 119 120 define <4 x float> @test_mask_andnot_ps_rmbk_128(<4 x float> %a, float* %ptr_b, <4 x float> %passThru, i8 %mask) { 121 ; X86-LABEL: test_mask_andnot_ps_rmbk_128: 122 ; X86: # %bb.0: 123 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 124 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] 125 ; X86-NEXT: vandnps (%eax){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x19,0x55,0x08] 126 ; X86-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1] 127 ; X86-NEXT: retl # encoding: [0xc3] 128 ; 129 ; X64-LABEL: test_mask_andnot_ps_rmbk_128: 130 ; X64: # %bb.0: 131 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 132 ; X64-NEXT: vandnps (%rdi){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x19,0x55,0x0f] 133 ; X64-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1] 134 ; X64-NEXT: retq # encoding: [0xc3] 135 %q = load float, float* %ptr_b 136 %vecinit.i = insertelement <4 x float> undef, float %q, i32 0 137 %b = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer 138 %res = call <4 x float> @llvm.x86.avx512.mask.andn.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask) 139 ret <4 x float> %res 140 } 141 142 define <4 x float> @test_mask_andnot_ps_rmbkz_128(<4 x float> %a, float* %ptr_b, i8 %mask) { 143 ; X86-LABEL: test_mask_andnot_ps_rmbkz_128: 144 ; X86: # %bb.0: 145 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 146 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] 147 ; X86-NEXT: vandnps (%eax){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x99,0x55,0x00] 148 ; X86-NEXT: retl # encoding: [0xc3] 149 ; 150 ; X64-LABEL: test_mask_andnot_ps_rmbkz_128: 151 ; X64: # %bb.0: 152 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 153 ; X64-NEXT: vandnps (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x99,0x55,0x07] 154 ; X64-NEXT: retq # encoding: [0xc3] 155 %q = load float, float* %ptr_b 156 %vecinit.i = insertelement <4 x float> undef, float %q, i32 0 157 %b = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer 158 %res = call <4 x float> @llvm.x86.avx512.mask.andn.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 %mask) 159 ret <4 x float> %res 160 } 161 162 declare <4 x float> @llvm.x86.avx512.mask.andn.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) 163 164 define <8 x float> @test_mask_andnot_ps_rr_256(<8 x float> %a, <8 x float> %b) { 165 ; CHECK-LABEL: test_mask_andnot_ps_rr_256: 166 ; CHECK: # %bb.0: 167 ; CHECK-NEXT: vandnps %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x55,0xc1] 168 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 169 %res = call <8 x float> @llvm.x86.avx512.mask.andn.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 -1) 170 ret <8 x float> %res 171 } 172 173 define <8 x float> @test_mask_andnot_ps_rrk_256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask) { 174 ; X86-LABEL: test_mask_andnot_ps_rrk_256: 175 ; X86: # %bb.0: 176 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] 177 ; X86-NEXT: vandnps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x55,0xd1] 178 ; X86-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2] 179 ; X86-NEXT: retl # encoding: [0xc3] 180 ; 181 ; X64-LABEL: test_mask_andnot_ps_rrk_256: 182 ; X64: # %bb.0: 183 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 184 ; X64-NEXT: vandnps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x55,0xd1] 185 ; X64-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2] 186 ; X64-NEXT: retq # encoding: [0xc3] 187 %res = call <8 x float> @llvm.x86.avx512.mask.andn.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask) 188 ret <8 x float> %res 189 } 190 191 define <8 x float> @test_mask_andnot_ps_rrkz_256(<8 x float> %a, <8 x float> %b, i8 %mask) { 192 ; X86-LABEL: test_mask_andnot_ps_rrkz_256: 193 ; X86: # %bb.0: 194 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] 195 ; X86-NEXT: vandnps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x55,0xc1] 196 ; X86-NEXT: retl # encoding: [0xc3] 197 ; 198 ; X64-LABEL: test_mask_andnot_ps_rrkz_256: 199 ; X64: # %bb.0: 200 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 201 ; X64-NEXT: vandnps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x55,0xc1] 202 ; X64-NEXT: retq # encoding: [0xc3] 203 %res = call <8 x float> @llvm.x86.avx512.mask.andn.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 %mask) 204 ret <8 x float> %res 205 } 206 207 define <8 x float> @test_mask_andnot_ps_rm_256(<8 x float> %a, <8 x float>* %ptr_b) { 208 ; X86-LABEL: test_mask_andnot_ps_rm_256: 209 ; X86: # %bb.0: 210 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 211 ; X86-NEXT: vandnps (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x55,0x00] 212 ; X86-NEXT: retl # encoding: [0xc3] 213 ; 214 ; X64-LABEL: test_mask_andnot_ps_rm_256: 215 ; X64: # %bb.0: 216 ; X64-NEXT: vandnps (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x55,0x07] 217 ; X64-NEXT: retq # encoding: [0xc3] 218 %b = load <8 x float>, <8 x float>* %ptr_b 219 %res = call <8 x float> @llvm.x86.avx512.mask.andn.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 -1) 220 ret <8 x float> %res 221 } 222 223 define <8 x float> @test_mask_andnot_ps_rmk_256(<8 x float> %a, <8 x float>* %ptr_b, <8 x float> %passThru, i8 %mask) { 224 ; X86-LABEL: test_mask_andnot_ps_rmk_256: 225 ; X86: # %bb.0: 226 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 227 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] 228 ; X86-NEXT: vandnps (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x55,0x08] 229 ; X86-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1] 230 ; X86-NEXT: retl # encoding: [0xc3] 231 ; 232 ; X64-LABEL: test_mask_andnot_ps_rmk_256: 233 ; X64: # %bb.0: 234 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 235 ; X64-NEXT: vandnps (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x55,0x0f] 236 ; X64-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1] 237 ; X64-NEXT: retq # encoding: [0xc3] 238 %b = load <8 x float>, <8 x float>* %ptr_b 239 %res = call <8 x float> @llvm.x86.avx512.mask.andn.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask) 240 ret <8 x float> %res 241 } 242 243 define <8 x float> @test_mask_andnot_ps_rmkz_256(<8 x float> %a, <8 x float>* %ptr_b, i8 %mask) { 244 ; X86-LABEL: test_mask_andnot_ps_rmkz_256: 245 ; X86: # %bb.0: 246 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 247 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] 248 ; X86-NEXT: vandnps (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x55,0x00] 249 ; X86-NEXT: retl # encoding: [0xc3] 250 ; 251 ; X64-LABEL: test_mask_andnot_ps_rmkz_256: 252 ; X64: # %bb.0: 253 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 254 ; X64-NEXT: vandnps (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x55,0x07] 255 ; X64-NEXT: retq # encoding: [0xc3] 256 %b = load <8 x float>, <8 x float>* %ptr_b 257 %res = call <8 x float> @llvm.x86.avx512.mask.andn.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 %mask) 258 ret <8 x float> %res 259 } 260 261 define <8 x float> @test_mask_andnot_ps_rmb_256(<8 x float> %a, float* %ptr_b) { 262 ; X86-LABEL: test_mask_andnot_ps_rmb_256: 263 ; X86: # %bb.0: 264 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 265 ; X86-NEXT: vandnps (%eax){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7c,0x38,0x55,0x00] 266 ; X86-NEXT: retl # encoding: [0xc3] 267 ; 268 ; X64-LABEL: test_mask_andnot_ps_rmb_256: 269 ; X64: # %bb.0: 270 ; X64-NEXT: vandnps (%rdi){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7c,0x38,0x55,0x07] 271 ; X64-NEXT: retq # encoding: [0xc3] 272 %q = load float, float* %ptr_b 273 %vecinit.i = insertelement <8 x float> undef, float %q, i32 0 274 %b = shufflevector <8 x float> %vecinit.i, <8 x float> undef, <8 x i32> zeroinitializer 275 %res = call <8 x float> @llvm.x86.avx512.mask.andn.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 -1) 276 ret <8 x float> %res 277 } 278 279 define <8 x float> @test_mask_andnot_ps_rmbk_256(<8 x float> %a, float* %ptr_b, <8 x float> %passThru, i8 %mask) { 280 ; X86-LABEL: test_mask_andnot_ps_rmbk_256: 281 ; X86: # %bb.0: 282 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 283 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] 284 ; X86-NEXT: vandnps (%eax){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x39,0x55,0x08] 285 ; X86-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1] 286 ; X86-NEXT: retl # encoding: [0xc3] 287 ; 288 ; X64-LABEL: test_mask_andnot_ps_rmbk_256: 289 ; X64: # %bb.0: 290 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 291 ; X64-NEXT: vandnps (%rdi){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x39,0x55,0x0f] 292 ; X64-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1] 293 ; X64-NEXT: retq # encoding: [0xc3] 294 %q = load float, float* %ptr_b 295 %vecinit.i = insertelement <8 x float> undef, float %q, i32 0 296 %b = shufflevector <8 x float> %vecinit.i, <8 x float> undef, <8 x i32> zeroinitializer 297 %res = call <8 x float> @llvm.x86.avx512.mask.andn.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask) 298 ret <8 x float> %res 299 } 300 301 define <8 x float> @test_mask_andnot_ps_rmbkz_256(<8 x float> %a, float* %ptr_b, i8 %mask) { 302 ; X86-LABEL: test_mask_andnot_ps_rmbkz_256: 303 ; X86: # %bb.0: 304 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 305 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] 306 ; X86-NEXT: vandnps (%eax){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xb9,0x55,0x00] 307 ; X86-NEXT: retl # encoding: [0xc3] 308 ; 309 ; X64-LABEL: test_mask_andnot_ps_rmbkz_256: 310 ; X64: # %bb.0: 311 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 312 ; X64-NEXT: vandnps (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xb9,0x55,0x07] 313 ; X64-NEXT: retq # encoding: [0xc3] 314 %q = load float, float* %ptr_b 315 %vecinit.i = insertelement <8 x float> undef, float %q, i32 0 316 %b = shufflevector <8 x float> %vecinit.i, <8 x float> undef, <8 x i32> zeroinitializer 317 %res = call <8 x float> @llvm.x86.avx512.mask.andn.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 %mask) 318 ret <8 x float> %res 319 } 320 321 declare <8 x float> @llvm.x86.avx512.mask.andn.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) 322 323 define <16 x float> @test_mask_andnot_ps_rr_512(<16 x float> %a, <16 x float> %b) { 324 ; CHECK-LABEL: test_mask_andnot_ps_rr_512: 325 ; CHECK: # %bb.0: 326 ; CHECK-NEXT: vandnps %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x55,0xc1] 327 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 328 %res = call <16 x float> @llvm.x86.avx512.mask.andn.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 -1) 329 ret <16 x float> %res 330 } 331 332 define <16 x float> @test_mask_andnot_ps_rrk_512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask) { 333 ; X86-LABEL: test_mask_andnot_ps_rrk_512: 334 ; X86: # %bb.0: 335 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 336 ; X86-NEXT: vandnps %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x49,0x55,0xd1] 337 ; X86-NEXT: vmovaps %zmm2, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 338 ; X86-NEXT: retl # encoding: [0xc3] 339 ; 340 ; X64-LABEL: test_mask_andnot_ps_rrk_512: 341 ; X64: # %bb.0: 342 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 343 ; X64-NEXT: vandnps %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x49,0x55,0xd1] 344 ; X64-NEXT: vmovaps %zmm2, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 345 ; X64-NEXT: retq # encoding: [0xc3] 346 %res = call <16 x float> @llvm.x86.avx512.mask.andn.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask) 347 ret <16 x float> %res 348 } 349 350 define <16 x float> @test_mask_andnot_ps_rrkz_512(<16 x float> %a, <16 x float> %b, i16 %mask) { 351 ; X86-LABEL: test_mask_andnot_ps_rrkz_512: 352 ; X86: # %bb.0: 353 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 354 ; X86-NEXT: vandnps %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xc9,0x55,0xc1] 355 ; X86-NEXT: retl # encoding: [0xc3] 356 ; 357 ; X64-LABEL: test_mask_andnot_ps_rrkz_512: 358 ; X64: # %bb.0: 359 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 360 ; X64-NEXT: vandnps %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xc9,0x55,0xc1] 361 ; X64-NEXT: retq # encoding: [0xc3] 362 %res = call <16 x float> @llvm.x86.avx512.mask.andn.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 %mask) 363 ret <16 x float> %res 364 } 365 366 define <16 x float> @test_mask_andnot_ps_rm_512(<16 x float> %a, <16 x float>* %ptr_b) { 367 ; X86-LABEL: test_mask_andnot_ps_rm_512: 368 ; X86: # %bb.0: 369 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 370 ; X86-NEXT: vandnps (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x55,0x00] 371 ; X86-NEXT: retl # encoding: [0xc3] 372 ; 373 ; X64-LABEL: test_mask_andnot_ps_rm_512: 374 ; X64: # %bb.0: 375 ; X64-NEXT: vandnps (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x55,0x07] 376 ; X64-NEXT: retq # encoding: [0xc3] 377 %b = load <16 x float>, <16 x float>* %ptr_b 378 %res = call <16 x float> @llvm.x86.avx512.mask.andn.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 -1) 379 ret <16 x float> %res 380 } 381 382 define <16 x float> @test_mask_andnot_ps_rmk_512(<16 x float> %a, <16 x float>* %ptr_b, <16 x float> %passThru, i16 %mask) { 383 ; X86-LABEL: test_mask_andnot_ps_rmk_512: 384 ; X86: # %bb.0: 385 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 386 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 387 ; X86-NEXT: vandnps (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x49,0x55,0x08] 388 ; X86-NEXT: vmovaps %zmm1, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 389 ; X86-NEXT: retl # encoding: [0xc3] 390 ; 391 ; X64-LABEL: test_mask_andnot_ps_rmk_512: 392 ; X64: # %bb.0: 393 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 394 ; X64-NEXT: vandnps (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x49,0x55,0x0f] 395 ; X64-NEXT: vmovaps %zmm1, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 396 ; X64-NEXT: retq # encoding: [0xc3] 397 %b = load <16 x float>, <16 x float>* %ptr_b 398 %res = call <16 x float> @llvm.x86.avx512.mask.andn.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask) 399 ret <16 x float> %res 400 } 401 402 define <16 x float> @test_mask_andnot_ps_rmkz_512(<16 x float> %a, <16 x float>* %ptr_b, i16 %mask) { 403 ; X86-LABEL: test_mask_andnot_ps_rmkz_512: 404 ; X86: # %bb.0: 405 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 406 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 407 ; X86-NEXT: vandnps (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xc9,0x55,0x00] 408 ; X86-NEXT: retl # encoding: [0xc3] 409 ; 410 ; X64-LABEL: test_mask_andnot_ps_rmkz_512: 411 ; X64: # %bb.0: 412 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 413 ; X64-NEXT: vandnps (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xc9,0x55,0x07] 414 ; X64-NEXT: retq # encoding: [0xc3] 415 %b = load <16 x float>, <16 x float>* %ptr_b 416 %res = call <16 x float> @llvm.x86.avx512.mask.andn.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 %mask) 417 ret <16 x float> %res 418 } 419 420 define <16 x float> @test_mask_andnot_ps_rmb_512(<16 x float> %a, float* %ptr_b) { 421 ; X86-LABEL: test_mask_andnot_ps_rmb_512: 422 ; X86: # %bb.0: 423 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 424 ; X86-NEXT: vandnps (%eax){1to16}, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7c,0x58,0x55,0x00] 425 ; X86-NEXT: retl # encoding: [0xc3] 426 ; 427 ; X64-LABEL: test_mask_andnot_ps_rmb_512: 428 ; X64: # %bb.0: 429 ; X64-NEXT: vandnps (%rdi){1to16}, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7c,0x58,0x55,0x07] 430 ; X64-NEXT: retq # encoding: [0xc3] 431 %q = load float, float* %ptr_b 432 %vecinit.i = insertelement <16 x float> undef, float %q, i32 0 433 %b = shufflevector <16 x float> %vecinit.i, <16 x float> undef, <16 x i32> zeroinitializer 434 %res = call <16 x float> @llvm.x86.avx512.mask.andn.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 -1) 435 ret <16 x float> %res 436 } 437 438 define <16 x float> @test_mask_andnot_ps_rmbk_512(<16 x float> %a, float* %ptr_b, <16 x float> %passThru, i16 %mask) { 439 ; X86-LABEL: test_mask_andnot_ps_rmbk_512: 440 ; X86: # %bb.0: 441 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 442 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 443 ; X86-NEXT: vandnps (%eax){1to16}, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x59,0x55,0x08] 444 ; X86-NEXT: vmovaps %zmm1, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 445 ; X86-NEXT: retl # encoding: [0xc3] 446 ; 447 ; X64-LABEL: test_mask_andnot_ps_rmbk_512: 448 ; X64: # %bb.0: 449 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 450 ; X64-NEXT: vandnps (%rdi){1to16}, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x59,0x55,0x0f] 451 ; X64-NEXT: vmovaps %zmm1, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 452 ; X64-NEXT: retq # encoding: [0xc3] 453 %q = load float, float* %ptr_b 454 %vecinit.i = insertelement <16 x float> undef, float %q, i32 0 455 %b = shufflevector <16 x float> %vecinit.i, <16 x float> undef, <16 x i32> zeroinitializer 456 %res = call <16 x float> @llvm.x86.avx512.mask.andn.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask) 457 ret <16 x float> %res 458 } 459 460 define <16 x float> @test_mask_andnot_ps_rmbkz_512(<16 x float> %a, float* %ptr_b, i16 %mask) { 461 ; X86-LABEL: test_mask_andnot_ps_rmbkz_512: 462 ; X86: # %bb.0: 463 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 464 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 465 ; X86-NEXT: vandnps (%eax){1to16}, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xd9,0x55,0x00] 466 ; X86-NEXT: retl # encoding: [0xc3] 467 ; 468 ; X64-LABEL: test_mask_andnot_ps_rmbkz_512: 469 ; X64: # %bb.0: 470 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 471 ; X64-NEXT: vandnps (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xd9,0x55,0x07] 472 ; X64-NEXT: retq # encoding: [0xc3] 473 %q = load float, float* %ptr_b 474 %vecinit.i = insertelement <16 x float> undef, float %q, i32 0 475 %b = shufflevector <16 x float> %vecinit.i, <16 x float> undef, <16 x i32> zeroinitializer 476 %res = call <16 x float> @llvm.x86.avx512.mask.andn.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 %mask) 477 ret <16 x float> %res 478 } 479 480 declare <16 x float> @llvm.x86.avx512.mask.andn.ps.512(<16 x float>, <16 x float>, <16 x float>, i16) 481 482 define <4 x float> @test_mask_and_ps_rr_128(<4 x float> %a, <4 x float> %b) { 483 ; CHECK-LABEL: test_mask_and_ps_rr_128: 484 ; CHECK: # %bb.0: 485 ; CHECK-NEXT: vandps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x54,0xc1] 486 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 487 %res = call <4 x float> @llvm.x86.avx512.mask.and.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 -1) 488 ret <4 x float> %res 489 } 490 491 define <4 x float> @test_mask_and_ps_rrk_128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask) { 492 ; X86-LABEL: test_mask_and_ps_rrk_128: 493 ; X86: # %bb.0: 494 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] 495 ; X86-NEXT: vandps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x54,0xd1] 496 ; X86-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2] 497 ; X86-NEXT: retl # encoding: [0xc3] 498 ; 499 ; X64-LABEL: test_mask_and_ps_rrk_128: 500 ; X64: # %bb.0: 501 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 502 ; X64-NEXT: vandps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x54,0xd1] 503 ; X64-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2] 504 ; X64-NEXT: retq # encoding: [0xc3] 505 %res = call <4 x float> @llvm.x86.avx512.mask.and.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask) 506 ret <4 x float> %res 507 } 508 509 define <4 x float> @test_mask_and_ps_rrkz_128(<4 x float> %a, <4 x float> %b, i8 %mask) { 510 ; X86-LABEL: test_mask_and_ps_rrkz_128: 511 ; X86: # %bb.0: 512 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] 513 ; X86-NEXT: vandps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x54,0xc1] 514 ; X86-NEXT: retl # encoding: [0xc3] 515 ; 516 ; X64-LABEL: test_mask_and_ps_rrkz_128: 517 ; X64: # %bb.0: 518 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 519 ; X64-NEXT: vandps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x54,0xc1] 520 ; X64-NEXT: retq # encoding: [0xc3] 521 %res = call <4 x float> @llvm.x86.avx512.mask.and.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 %mask) 522 ret <4 x float> %res 523 } 524 525 define <4 x float> @test_mask_and_ps_rm_128(<4 x float> %a, <4 x float>* %ptr_b) { 526 ; X86-LABEL: test_mask_and_ps_rm_128: 527 ; X86: # %bb.0: 528 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 529 ; X86-NEXT: vandps (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x54,0x00] 530 ; X86-NEXT: retl # encoding: [0xc3] 531 ; 532 ; X64-LABEL: test_mask_and_ps_rm_128: 533 ; X64: # %bb.0: 534 ; X64-NEXT: vandps (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x54,0x07] 535 ; X64-NEXT: retq # encoding: [0xc3] 536 %b = load <4 x float>, <4 x float>* %ptr_b 537 %res = call <4 x float> @llvm.x86.avx512.mask.and.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 -1) 538 ret <4 x float> %res 539 } 540 541 define <4 x float> @test_mask_and_ps_rmk_128(<4 x float> %a, <4 x float>* %ptr_b, <4 x float> %passThru, i8 %mask) { 542 ; X86-LABEL: test_mask_and_ps_rmk_128: 543 ; X86: # %bb.0: 544 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 545 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] 546 ; X86-NEXT: vandps (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x54,0x08] 547 ; X86-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1] 548 ; X86-NEXT: retl # encoding: [0xc3] 549 ; 550 ; X64-LABEL: test_mask_and_ps_rmk_128: 551 ; X64: # %bb.0: 552 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 553 ; X64-NEXT: vandps (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x54,0x0f] 554 ; X64-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1] 555 ; X64-NEXT: retq # encoding: [0xc3] 556 %b = load <4 x float>, <4 x float>* %ptr_b 557 %res = call <4 x float> @llvm.x86.avx512.mask.and.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask) 558 ret <4 x float> %res 559 } 560 561 define <4 x float> @test_mask_and_ps_rmkz_128(<4 x float> %a, <4 x float>* %ptr_b, i8 %mask) { 562 ; X86-LABEL: test_mask_and_ps_rmkz_128: 563 ; X86: # %bb.0: 564 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 565 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] 566 ; X86-NEXT: vandps (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x54,0x00] 567 ; X86-NEXT: retl # encoding: [0xc3] 568 ; 569 ; X64-LABEL: test_mask_and_ps_rmkz_128: 570 ; X64: # %bb.0: 571 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 572 ; X64-NEXT: vandps (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x54,0x07] 573 ; X64-NEXT: retq # encoding: [0xc3] 574 %b = load <4 x float>, <4 x float>* %ptr_b 575 %res = call <4 x float> @llvm.x86.avx512.mask.and.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 %mask) 576 ret <4 x float> %res 577 } 578 579 define <4 x float> @test_mask_and_ps_rmb_128(<4 x float> %a, float* %ptr_b) { 580 ; X86-LABEL: test_mask_and_ps_rmb_128: 581 ; X86: # %bb.0: 582 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 583 ; X86-NEXT: vandps (%eax){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7c,0x18,0x54,0x00] 584 ; X86-NEXT: retl # encoding: [0xc3] 585 ; 586 ; X64-LABEL: test_mask_and_ps_rmb_128: 587 ; X64: # %bb.0: 588 ; X64-NEXT: vandps (%rdi){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7c,0x18,0x54,0x07] 589 ; X64-NEXT: retq # encoding: [0xc3] 590 %q = load float, float* %ptr_b 591 %vecinit.i = insertelement <4 x float> undef, float %q, i32 0 592 %b = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer 593 %res = call <4 x float> @llvm.x86.avx512.mask.and.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 -1) 594 ret <4 x float> %res 595 } 596 597 define <4 x float> @test_mask_and_ps_rmbk_128(<4 x float> %a, float* %ptr_b, <4 x float> %passThru, i8 %mask) { 598 ; X86-LABEL: test_mask_and_ps_rmbk_128: 599 ; X86: # %bb.0: 600 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 601 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] 602 ; X86-NEXT: vandps (%eax){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x19,0x54,0x08] 603 ; X86-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1] 604 ; X86-NEXT: retl # encoding: [0xc3] 605 ; 606 ; X64-LABEL: test_mask_and_ps_rmbk_128: 607 ; X64: # %bb.0: 608 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 609 ; X64-NEXT: vandps (%rdi){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x19,0x54,0x0f] 610 ; X64-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1] 611 ; X64-NEXT: retq # encoding: [0xc3] 612 %q = load float, float* %ptr_b 613 %vecinit.i = insertelement <4 x float> undef, float %q, i32 0 614 %b = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer 615 %res = call <4 x float> @llvm.x86.avx512.mask.and.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask) 616 ret <4 x float> %res 617 } 618 619 define <4 x float> @test_mask_and_ps_rmbkz_128(<4 x float> %a, float* %ptr_b, i8 %mask) { 620 ; X86-LABEL: test_mask_and_ps_rmbkz_128: 621 ; X86: # %bb.0: 622 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 623 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] 624 ; X86-NEXT: vandps (%eax){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x99,0x54,0x00] 625 ; X86-NEXT: retl # encoding: [0xc3] 626 ; 627 ; X64-LABEL: test_mask_and_ps_rmbkz_128: 628 ; X64: # %bb.0: 629 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 630 ; X64-NEXT: vandps (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x99,0x54,0x07] 631 ; X64-NEXT: retq # encoding: [0xc3] 632 %q = load float, float* %ptr_b 633 %vecinit.i = insertelement <4 x float> undef, float %q, i32 0 634 %b = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer 635 %res = call <4 x float> @llvm.x86.avx512.mask.and.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 %mask) 636 ret <4 x float> %res 637 } 638 639 declare <4 x float> @llvm.x86.avx512.mask.and.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) 640 641 define <8 x float> @test_mask_and_ps_rr_256(<8 x float> %a, <8 x float> %b) { 642 ; CHECK-LABEL: test_mask_and_ps_rr_256: 643 ; CHECK: # %bb.0: 644 ; CHECK-NEXT: vandps %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x54,0xc1] 645 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 646 %res = call <8 x float> @llvm.x86.avx512.mask.and.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 -1) 647 ret <8 x float> %res 648 } 649 650 define <8 x float> @test_mask_and_ps_rrk_256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask) { 651 ; X86-LABEL: test_mask_and_ps_rrk_256: 652 ; X86: # %bb.0: 653 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] 654 ; X86-NEXT: vandps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x54,0xd1] 655 ; X86-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2] 656 ; X86-NEXT: retl # encoding: [0xc3] 657 ; 658 ; X64-LABEL: test_mask_and_ps_rrk_256: 659 ; X64: # %bb.0: 660 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 661 ; X64-NEXT: vandps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x54,0xd1] 662 ; X64-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2] 663 ; X64-NEXT: retq # encoding: [0xc3] 664 %res = call <8 x float> @llvm.x86.avx512.mask.and.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask) 665 ret <8 x float> %res 666 } 667 668 define <8 x float> @test_mask_and_ps_rrkz_256(<8 x float> %a, <8 x float> %b, i8 %mask) { 669 ; X86-LABEL: test_mask_and_ps_rrkz_256: 670 ; X86: # %bb.0: 671 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] 672 ; X86-NEXT: vandps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x54,0xc1] 673 ; X86-NEXT: retl # encoding: [0xc3] 674 ; 675 ; X64-LABEL: test_mask_and_ps_rrkz_256: 676 ; X64: # %bb.0: 677 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 678 ; X64-NEXT: vandps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x54,0xc1] 679 ; X64-NEXT: retq # encoding: [0xc3] 680 %res = call <8 x float> @llvm.x86.avx512.mask.and.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 %mask) 681 ret <8 x float> %res 682 } 683 684 define <8 x float> @test_mask_and_ps_rm_256(<8 x float> %a, <8 x float>* %ptr_b) { 685 ; X86-LABEL: test_mask_and_ps_rm_256: 686 ; X86: # %bb.0: 687 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 688 ; X86-NEXT: vandps (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x54,0x00] 689 ; X86-NEXT: retl # encoding: [0xc3] 690 ; 691 ; X64-LABEL: test_mask_and_ps_rm_256: 692 ; X64: # %bb.0: 693 ; X64-NEXT: vandps (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x54,0x07] 694 ; X64-NEXT: retq # encoding: [0xc3] 695 %b = load <8 x float>, <8 x float>* %ptr_b 696 %res = call <8 x float> @llvm.x86.avx512.mask.and.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 -1) 697 ret <8 x float> %res 698 } 699 700 define <8 x float> @test_mask_and_ps_rmk_256(<8 x float> %a, <8 x float>* %ptr_b, <8 x float> %passThru, i8 %mask) { 701 ; X86-LABEL: test_mask_and_ps_rmk_256: 702 ; X86: # %bb.0: 703 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 704 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] 705 ; X86-NEXT: vandps (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x54,0x08] 706 ; X86-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1] 707 ; X86-NEXT: retl # encoding: [0xc3] 708 ; 709 ; X64-LABEL: test_mask_and_ps_rmk_256: 710 ; X64: # %bb.0: 711 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 712 ; X64-NEXT: vandps (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x54,0x0f] 713 ; X64-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1] 714 ; X64-NEXT: retq # encoding: [0xc3] 715 %b = load <8 x float>, <8 x float>* %ptr_b 716 %res = call <8 x float> @llvm.x86.avx512.mask.and.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask) 717 ret <8 x float> %res 718 } 719 720 define <8 x float> @test_mask_and_ps_rmkz_256(<8 x float> %a, <8 x float>* %ptr_b, i8 %mask) { 721 ; X86-LABEL: test_mask_and_ps_rmkz_256: 722 ; X86: # %bb.0: 723 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 724 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] 725 ; X86-NEXT: vandps (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x54,0x00] 726 ; X86-NEXT: retl # encoding: [0xc3] 727 ; 728 ; X64-LABEL: test_mask_and_ps_rmkz_256: 729 ; X64: # %bb.0: 730 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 731 ; X64-NEXT: vandps (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x54,0x07] 732 ; X64-NEXT: retq # encoding: [0xc3] 733 %b = load <8 x float>, <8 x float>* %ptr_b 734 %res = call <8 x float> @llvm.x86.avx512.mask.and.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 %mask) 735 ret <8 x float> %res 736 } 737 738 define <8 x float> @test_mask_and_ps_rmb_256(<8 x float> %a, float* %ptr_b) { 739 ; X86-LABEL: test_mask_and_ps_rmb_256: 740 ; X86: # %bb.0: 741 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 742 ; X86-NEXT: vandps (%eax){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7c,0x38,0x54,0x00] 743 ; X86-NEXT: retl # encoding: [0xc3] 744 ; 745 ; X64-LABEL: test_mask_and_ps_rmb_256: 746 ; X64: # %bb.0: 747 ; X64-NEXT: vandps (%rdi){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7c,0x38,0x54,0x07] 748 ; X64-NEXT: retq # encoding: [0xc3] 749 %q = load float, float* %ptr_b 750 %vecinit.i = insertelement <8 x float> undef, float %q, i32 0 751 %b = shufflevector <8 x float> %vecinit.i, <8 x float> undef, <8 x i32> zeroinitializer 752 %res = call <8 x float> @llvm.x86.avx512.mask.and.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 -1) 753 ret <8 x float> %res 754 } 755 756 define <8 x float> @test_mask_and_ps_rmbk_256(<8 x float> %a, float* %ptr_b, <8 x float> %passThru, i8 %mask) { 757 ; X86-LABEL: test_mask_and_ps_rmbk_256: 758 ; X86: # %bb.0: 759 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 760 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] 761 ; X86-NEXT: vandps (%eax){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x39,0x54,0x08] 762 ; X86-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1] 763 ; X86-NEXT: retl # encoding: [0xc3] 764 ; 765 ; X64-LABEL: test_mask_and_ps_rmbk_256: 766 ; X64: # %bb.0: 767 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 768 ; X64-NEXT: vandps (%rdi){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x39,0x54,0x0f] 769 ; X64-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1] 770 ; X64-NEXT: retq # encoding: [0xc3] 771 %q = load float, float* %ptr_b 772 %vecinit.i = insertelement <8 x float> undef, float %q, i32 0 773 %b = shufflevector <8 x float> %vecinit.i, <8 x float> undef, <8 x i32> zeroinitializer 774 %res = call <8 x float> @llvm.x86.avx512.mask.and.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask) 775 ret <8 x float> %res 776 } 777 778 define <8 x float> @test_mask_and_ps_rmbkz_256(<8 x float> %a, float* %ptr_b, i8 %mask) { 779 ; X86-LABEL: test_mask_and_ps_rmbkz_256: 780 ; X86: # %bb.0: 781 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 782 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] 783 ; X86-NEXT: vandps (%eax){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xb9,0x54,0x00] 784 ; X86-NEXT: retl # encoding: [0xc3] 785 ; 786 ; X64-LABEL: test_mask_and_ps_rmbkz_256: 787 ; X64: # %bb.0: 788 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 789 ; X64-NEXT: vandps (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xb9,0x54,0x07] 790 ; X64-NEXT: retq # encoding: [0xc3] 791 %q = load float, float* %ptr_b 792 %vecinit.i = insertelement <8 x float> undef, float %q, i32 0 793 %b = shufflevector <8 x float> %vecinit.i, <8 x float> undef, <8 x i32> zeroinitializer 794 %res = call <8 x float> @llvm.x86.avx512.mask.and.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 %mask) 795 ret <8 x float> %res 796 } 797 798 declare <8 x float> @llvm.x86.avx512.mask.and.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) 799 800 define <16 x float> @test_mask_and_ps_rr_512(<16 x float> %a, <16 x float> %b) { 801 ; CHECK-LABEL: test_mask_and_ps_rr_512: 802 ; CHECK: # %bb.0: 803 ; CHECK-NEXT: vandps %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x54,0xc1] 804 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 805 %res = call <16 x float> @llvm.x86.avx512.mask.and.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 -1) 806 ret <16 x float> %res 807 } 808 809 define <16 x float> @test_mask_and_ps_rrk_512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask) { 810 ; X86-LABEL: test_mask_and_ps_rrk_512: 811 ; X86: # %bb.0: 812 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 813 ; X86-NEXT: vandps %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x49,0x54,0xd1] 814 ; X86-NEXT: vmovaps %zmm2, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 815 ; X86-NEXT: retl # encoding: [0xc3] 816 ; 817 ; X64-LABEL: test_mask_and_ps_rrk_512: 818 ; X64: # %bb.0: 819 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 820 ; X64-NEXT: vandps %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x49,0x54,0xd1] 821 ; X64-NEXT: vmovaps %zmm2, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 822 ; X64-NEXT: retq # encoding: [0xc3] 823 %res = call <16 x float> @llvm.x86.avx512.mask.and.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask) 824 ret <16 x float> %res 825 } 826 827 define <16 x float> @test_mask_and_ps_rrkz_512(<16 x float> %a, <16 x float> %b, i16 %mask) { 828 ; X86-LABEL: test_mask_and_ps_rrkz_512: 829 ; X86: # %bb.0: 830 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 831 ; X86-NEXT: vandps %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xc9,0x54,0xc1] 832 ; X86-NEXT: retl # encoding: [0xc3] 833 ; 834 ; X64-LABEL: test_mask_and_ps_rrkz_512: 835 ; X64: # %bb.0: 836 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 837 ; X64-NEXT: vandps %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xc9,0x54,0xc1] 838 ; X64-NEXT: retq # encoding: [0xc3] 839 %res = call <16 x float> @llvm.x86.avx512.mask.and.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 %mask) 840 ret <16 x float> %res 841 } 842 843 define <16 x float> @test_mask_and_ps_rm_512(<16 x float> %a, <16 x float>* %ptr_b) { 844 ; X86-LABEL: test_mask_and_ps_rm_512: 845 ; X86: # %bb.0: 846 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 847 ; X86-NEXT: vandps (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x54,0x00] 848 ; X86-NEXT: retl # encoding: [0xc3] 849 ; 850 ; X64-LABEL: test_mask_and_ps_rm_512: 851 ; X64: # %bb.0: 852 ; X64-NEXT: vandps (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x54,0x07] 853 ; X64-NEXT: retq # encoding: [0xc3] 854 %b = load <16 x float>, <16 x float>* %ptr_b 855 %res = call <16 x float> @llvm.x86.avx512.mask.and.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 -1) 856 ret <16 x float> %res 857 } 858 859 define <16 x float> @test_mask_and_ps_rmk_512(<16 x float> %a, <16 x float>* %ptr_b, <16 x float> %passThru, i16 %mask) { 860 ; X86-LABEL: test_mask_and_ps_rmk_512: 861 ; X86: # %bb.0: 862 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 863 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 864 ; X86-NEXT: vandps (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x49,0x54,0x08] 865 ; X86-NEXT: vmovaps %zmm1, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 866 ; X86-NEXT: retl # encoding: [0xc3] 867 ; 868 ; X64-LABEL: test_mask_and_ps_rmk_512: 869 ; X64: # %bb.0: 870 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 871 ; X64-NEXT: vandps (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x49,0x54,0x0f] 872 ; X64-NEXT: vmovaps %zmm1, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 873 ; X64-NEXT: retq # encoding: [0xc3] 874 %b = load <16 x float>, <16 x float>* %ptr_b 875 %res = call <16 x float> @llvm.x86.avx512.mask.and.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask) 876 ret <16 x float> %res 877 } 878 879 define <16 x float> @test_mask_and_ps_rmkz_512(<16 x float> %a, <16 x float>* %ptr_b, i16 %mask) { 880 ; X86-LABEL: test_mask_and_ps_rmkz_512: 881 ; X86: # %bb.0: 882 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 883 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 884 ; X86-NEXT: vandps (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xc9,0x54,0x00] 885 ; X86-NEXT: retl # encoding: [0xc3] 886 ; 887 ; X64-LABEL: test_mask_and_ps_rmkz_512: 888 ; X64: # %bb.0: 889 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 890 ; X64-NEXT: vandps (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xc9,0x54,0x07] 891 ; X64-NEXT: retq # encoding: [0xc3] 892 %b = load <16 x float>, <16 x float>* %ptr_b 893 %res = call <16 x float> @llvm.x86.avx512.mask.and.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 %mask) 894 ret <16 x float> %res 895 } 896 897 define <16 x float> @test_mask_and_ps_rmb_512(<16 x float> %a, float* %ptr_b) { 898 ; X86-LABEL: test_mask_and_ps_rmb_512: 899 ; X86: # %bb.0: 900 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 901 ; X86-NEXT: vandps (%eax){1to16}, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7c,0x58,0x54,0x00] 902 ; X86-NEXT: retl # encoding: [0xc3] 903 ; 904 ; X64-LABEL: test_mask_and_ps_rmb_512: 905 ; X64: # %bb.0: 906 ; X64-NEXT: vandps (%rdi){1to16}, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7c,0x58,0x54,0x07] 907 ; X64-NEXT: retq # encoding: [0xc3] 908 %q = load float, float* %ptr_b 909 %vecinit.i = insertelement <16 x float> undef, float %q, i32 0 910 %b = shufflevector <16 x float> %vecinit.i, <16 x float> undef, <16 x i32> zeroinitializer 911 %res = call <16 x float> @llvm.x86.avx512.mask.and.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 -1) 912 ret <16 x float> %res 913 } 914 915 define <16 x float> @test_mask_and_ps_rmbk_512(<16 x float> %a, float* %ptr_b, <16 x float> %passThru, i16 %mask) { 916 ; X86-LABEL: test_mask_and_ps_rmbk_512: 917 ; X86: # %bb.0: 918 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 919 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 920 ; X86-NEXT: vandps (%eax){1to16}, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x59,0x54,0x08] 921 ; X86-NEXT: vmovaps %zmm1, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 922 ; X86-NEXT: retl # encoding: [0xc3] 923 ; 924 ; X64-LABEL: test_mask_and_ps_rmbk_512: 925 ; X64: # %bb.0: 926 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 927 ; X64-NEXT: vandps (%rdi){1to16}, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x59,0x54,0x0f] 928 ; X64-NEXT: vmovaps %zmm1, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 929 ; X64-NEXT: retq # encoding: [0xc3] 930 %q = load float, float* %ptr_b 931 %vecinit.i = insertelement <16 x float> undef, float %q, i32 0 932 %b = shufflevector <16 x float> %vecinit.i, <16 x float> undef, <16 x i32> zeroinitializer 933 %res = call <16 x float> @llvm.x86.avx512.mask.and.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask) 934 ret <16 x float> %res 935 } 936 937 define <16 x float> @test_mask_and_ps_rmbkz_512(<16 x float> %a, float* %ptr_b, i16 %mask) { 938 ; X86-LABEL: test_mask_and_ps_rmbkz_512: 939 ; X86: # %bb.0: 940 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 941 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 942 ; X86-NEXT: vandps (%eax){1to16}, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xd9,0x54,0x00] 943 ; X86-NEXT: retl # encoding: [0xc3] 944 ; 945 ; X64-LABEL: test_mask_and_ps_rmbkz_512: 946 ; X64: # %bb.0: 947 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 948 ; X64-NEXT: vandps (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xd9,0x54,0x07] 949 ; X64-NEXT: retq # encoding: [0xc3] 950 %q = load float, float* %ptr_b 951 %vecinit.i = insertelement <16 x float> undef, float %q, i32 0 952 %b = shufflevector <16 x float> %vecinit.i, <16 x float> undef, <16 x i32> zeroinitializer 953 %res = call <16 x float> @llvm.x86.avx512.mask.and.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 %mask) 954 ret <16 x float> %res 955 } 956 957 declare <16 x float> @llvm.x86.avx512.mask.and.ps.512(<16 x float>, <16 x float>, <16 x float>, i16) 958 959 define <4 x float> @test_mask_or_ps_rr_128(<4 x float> %a, <4 x float> %b) { 960 ; CHECK-LABEL: test_mask_or_ps_rr_128: 961 ; CHECK: # %bb.0: 962 ; CHECK-NEXT: vorps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x56,0xc1] 963 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 964 %res = call <4 x float> @llvm.x86.avx512.mask.or.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 -1) 965 ret <4 x float> %res 966 } 967 968 define <4 x float> @test_mask_or_ps_rrk_128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask) { 969 ; X86-LABEL: test_mask_or_ps_rrk_128: 970 ; X86: # %bb.0: 971 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] 972 ; X86-NEXT: vorps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x56,0xd1] 973 ; X86-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2] 974 ; X86-NEXT: retl # encoding: [0xc3] 975 ; 976 ; X64-LABEL: test_mask_or_ps_rrk_128: 977 ; X64: # %bb.0: 978 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 979 ; X64-NEXT: vorps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x56,0xd1] 980 ; X64-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2] 981 ; X64-NEXT: retq # encoding: [0xc3] 982 %res = call <4 x float> @llvm.x86.avx512.mask.or.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask) 983 ret <4 x float> %res 984 } 985 986 define <4 x float> @test_mask_or_ps_rrkz_128(<4 x float> %a, <4 x float> %b, i8 %mask) { 987 ; X86-LABEL: test_mask_or_ps_rrkz_128: 988 ; X86: # %bb.0: 989 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] 990 ; X86-NEXT: vorps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x56,0xc1] 991 ; X86-NEXT: retl # encoding: [0xc3] 992 ; 993 ; X64-LABEL: test_mask_or_ps_rrkz_128: 994 ; X64: # %bb.0: 995 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 996 ; X64-NEXT: vorps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x56,0xc1] 997 ; X64-NEXT: retq # encoding: [0xc3] 998 %res = call <4 x float> @llvm.x86.avx512.mask.or.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 %mask) 999 ret <4 x float> %res 1000 } 1001 1002 define <4 x float> @test_mask_or_ps_rm_128(<4 x float> %a, <4 x float>* %ptr_b) { 1003 ; X86-LABEL: test_mask_or_ps_rm_128: 1004 ; X86: # %bb.0: 1005 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1006 ; X86-NEXT: vorps (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x56,0x00] 1007 ; X86-NEXT: retl # encoding: [0xc3] 1008 ; 1009 ; X64-LABEL: test_mask_or_ps_rm_128: 1010 ; X64: # %bb.0: 1011 ; X64-NEXT: vorps (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x56,0x07] 1012 ; X64-NEXT: retq # encoding: [0xc3] 1013 %b = load <4 x float>, <4 x float>* %ptr_b 1014 %res = call <4 x float> @llvm.x86.avx512.mask.or.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 -1) 1015 ret <4 x float> %res 1016 } 1017 1018 define <4 x float> @test_mask_or_ps_rmk_128(<4 x float> %a, <4 x float>* %ptr_b, <4 x float> %passThru, i8 %mask) { 1019 ; X86-LABEL: test_mask_or_ps_rmk_128: 1020 ; X86: # %bb.0: 1021 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1022 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] 1023 ; X86-NEXT: vorps (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x56,0x08] 1024 ; X86-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1] 1025 ; X86-NEXT: retl # encoding: [0xc3] 1026 ; 1027 ; X64-LABEL: test_mask_or_ps_rmk_128: 1028 ; X64: # %bb.0: 1029 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 1030 ; X64-NEXT: vorps (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x56,0x0f] 1031 ; X64-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1] 1032 ; X64-NEXT: retq # encoding: [0xc3] 1033 %b = load <4 x float>, <4 x float>* %ptr_b 1034 %res = call <4 x float> @llvm.x86.avx512.mask.or.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask) 1035 ret <4 x float> %res 1036 } 1037 1038 define <4 x float> @test_mask_or_ps_rmkz_128(<4 x float> %a, <4 x float>* %ptr_b, i8 %mask) { 1039 ; X86-LABEL: test_mask_or_ps_rmkz_128: 1040 ; X86: # %bb.0: 1041 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1042 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] 1043 ; X86-NEXT: vorps (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x56,0x00] 1044 ; X86-NEXT: retl # encoding: [0xc3] 1045 ; 1046 ; X64-LABEL: test_mask_or_ps_rmkz_128: 1047 ; X64: # %bb.0: 1048 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 1049 ; X64-NEXT: vorps (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x56,0x07] 1050 ; X64-NEXT: retq # encoding: [0xc3] 1051 %b = load <4 x float>, <4 x float>* %ptr_b 1052 %res = call <4 x float> @llvm.x86.avx512.mask.or.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 %mask) 1053 ret <4 x float> %res 1054 } 1055 1056 define <4 x float> @test_mask_or_ps_rmb_128(<4 x float> %a, float* %ptr_b) { 1057 ; X86-LABEL: test_mask_or_ps_rmb_128: 1058 ; X86: # %bb.0: 1059 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1060 ; X86-NEXT: vorps (%eax){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7c,0x18,0x56,0x00] 1061 ; X86-NEXT: retl # encoding: [0xc3] 1062 ; 1063 ; X64-LABEL: test_mask_or_ps_rmb_128: 1064 ; X64: # %bb.0: 1065 ; X64-NEXT: vorps (%rdi){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7c,0x18,0x56,0x07] 1066 ; X64-NEXT: retq # encoding: [0xc3] 1067 %q = load float, float* %ptr_b 1068 %vecinit.i = insertelement <4 x float> undef, float %q, i32 0 1069 %b = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer 1070 %res = call <4 x float> @llvm.x86.avx512.mask.or.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 -1) 1071 ret <4 x float> %res 1072 } 1073 1074 define <4 x float> @test_mask_or_ps_rmbk_128(<4 x float> %a, float* %ptr_b, <4 x float> %passThru, i8 %mask) { 1075 ; X86-LABEL: test_mask_or_ps_rmbk_128: 1076 ; X86: # %bb.0: 1077 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1078 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] 1079 ; X86-NEXT: vorps (%eax){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x19,0x56,0x08] 1080 ; X86-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1] 1081 ; X86-NEXT: retl # encoding: [0xc3] 1082 ; 1083 ; X64-LABEL: test_mask_or_ps_rmbk_128: 1084 ; X64: # %bb.0: 1085 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 1086 ; X64-NEXT: vorps (%rdi){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x19,0x56,0x0f] 1087 ; X64-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1] 1088 ; X64-NEXT: retq # encoding: [0xc3] 1089 %q = load float, float* %ptr_b 1090 %vecinit.i = insertelement <4 x float> undef, float %q, i32 0 1091 %b = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer 1092 %res = call <4 x float> @llvm.x86.avx512.mask.or.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask) 1093 ret <4 x float> %res 1094 } 1095 1096 define <4 x float> @test_mask_or_ps_rmbkz_128(<4 x float> %a, float* %ptr_b, i8 %mask) { 1097 ; X86-LABEL: test_mask_or_ps_rmbkz_128: 1098 ; X86: # %bb.0: 1099 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1100 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] 1101 ; X86-NEXT: vorps (%eax){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x99,0x56,0x00] 1102 ; X86-NEXT: retl # encoding: [0xc3] 1103 ; 1104 ; X64-LABEL: test_mask_or_ps_rmbkz_128: 1105 ; X64: # %bb.0: 1106 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 1107 ; X64-NEXT: vorps (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x99,0x56,0x07] 1108 ; X64-NEXT: retq # encoding: [0xc3] 1109 %q = load float, float* %ptr_b 1110 %vecinit.i = insertelement <4 x float> undef, float %q, i32 0 1111 %b = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer 1112 %res = call <4 x float> @llvm.x86.avx512.mask.or.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 %mask) 1113 ret <4 x float> %res 1114 } 1115 1116 declare <4 x float> @llvm.x86.avx512.mask.or.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) 1117 1118 define <8 x float> @test_mask_or_ps_rr_256(<8 x float> %a, <8 x float> %b) { 1119 ; CHECK-LABEL: test_mask_or_ps_rr_256: 1120 ; CHECK: # %bb.0: 1121 ; CHECK-NEXT: vorps %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x56,0xc1] 1122 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1123 %res = call <8 x float> @llvm.x86.avx512.mask.or.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 -1) 1124 ret <8 x float> %res 1125 } 1126 1127 define <8 x float> @test_mask_or_ps_rrk_256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask) { 1128 ; X86-LABEL: test_mask_or_ps_rrk_256: 1129 ; X86: # %bb.0: 1130 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] 1131 ; X86-NEXT: vorps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x56,0xd1] 1132 ; X86-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2] 1133 ; X86-NEXT: retl # encoding: [0xc3] 1134 ; 1135 ; X64-LABEL: test_mask_or_ps_rrk_256: 1136 ; X64: # %bb.0: 1137 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 1138 ; X64-NEXT: vorps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x56,0xd1] 1139 ; X64-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2] 1140 ; X64-NEXT: retq # encoding: [0xc3] 1141 %res = call <8 x float> @llvm.x86.avx512.mask.or.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask) 1142 ret <8 x float> %res 1143 } 1144 1145 define <8 x float> @test_mask_or_ps_rrkz_256(<8 x float> %a, <8 x float> %b, i8 %mask) { 1146 ; X86-LABEL: test_mask_or_ps_rrkz_256: 1147 ; X86: # %bb.0: 1148 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] 1149 ; X86-NEXT: vorps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x56,0xc1] 1150 ; X86-NEXT: retl # encoding: [0xc3] 1151 ; 1152 ; X64-LABEL: test_mask_or_ps_rrkz_256: 1153 ; X64: # %bb.0: 1154 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 1155 ; X64-NEXT: vorps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x56,0xc1] 1156 ; X64-NEXT: retq # encoding: [0xc3] 1157 %res = call <8 x float> @llvm.x86.avx512.mask.or.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 %mask) 1158 ret <8 x float> %res 1159 } 1160 1161 define <8 x float> @test_mask_or_ps_rm_256(<8 x float> %a, <8 x float>* %ptr_b) { 1162 ; X86-LABEL: test_mask_or_ps_rm_256: 1163 ; X86: # %bb.0: 1164 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1165 ; X86-NEXT: vorps (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x56,0x00] 1166 ; X86-NEXT: retl # encoding: [0xc3] 1167 ; 1168 ; X64-LABEL: test_mask_or_ps_rm_256: 1169 ; X64: # %bb.0: 1170 ; X64-NEXT: vorps (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x56,0x07] 1171 ; X64-NEXT: retq # encoding: [0xc3] 1172 %b = load <8 x float>, <8 x float>* %ptr_b 1173 %res = call <8 x float> @llvm.x86.avx512.mask.or.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 -1) 1174 ret <8 x float> %res 1175 } 1176 1177 define <8 x float> @test_mask_or_ps_rmk_256(<8 x float> %a, <8 x float>* %ptr_b, <8 x float> %passThru, i8 %mask) { 1178 ; X86-LABEL: test_mask_or_ps_rmk_256: 1179 ; X86: # %bb.0: 1180 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1181 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] 1182 ; X86-NEXT: vorps (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x56,0x08] 1183 ; X86-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1] 1184 ; X86-NEXT: retl # encoding: [0xc3] 1185 ; 1186 ; X64-LABEL: test_mask_or_ps_rmk_256: 1187 ; X64: # %bb.0: 1188 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 1189 ; X64-NEXT: vorps (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x56,0x0f] 1190 ; X64-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1] 1191 ; X64-NEXT: retq # encoding: [0xc3] 1192 %b = load <8 x float>, <8 x float>* %ptr_b 1193 %res = call <8 x float> @llvm.x86.avx512.mask.or.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask) 1194 ret <8 x float> %res 1195 } 1196 1197 define <8 x float> @test_mask_or_ps_rmkz_256(<8 x float> %a, <8 x float>* %ptr_b, i8 %mask) { 1198 ; X86-LABEL: test_mask_or_ps_rmkz_256: 1199 ; X86: # %bb.0: 1200 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1201 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] 1202 ; X86-NEXT: vorps (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x56,0x00] 1203 ; X86-NEXT: retl # encoding: [0xc3] 1204 ; 1205 ; X64-LABEL: test_mask_or_ps_rmkz_256: 1206 ; X64: # %bb.0: 1207 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 1208 ; X64-NEXT: vorps (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x56,0x07] 1209 ; X64-NEXT: retq # encoding: [0xc3] 1210 %b = load <8 x float>, <8 x float>* %ptr_b 1211 %res = call <8 x float> @llvm.x86.avx512.mask.or.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 %mask) 1212 ret <8 x float> %res 1213 } 1214 1215 define <8 x float> @test_mask_or_ps_rmb_256(<8 x float> %a, float* %ptr_b) { 1216 ; X86-LABEL: test_mask_or_ps_rmb_256: 1217 ; X86: # %bb.0: 1218 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1219 ; X86-NEXT: vorps (%eax){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7c,0x38,0x56,0x00] 1220 ; X86-NEXT: retl # encoding: [0xc3] 1221 ; 1222 ; X64-LABEL: test_mask_or_ps_rmb_256: 1223 ; X64: # %bb.0: 1224 ; X64-NEXT: vorps (%rdi){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7c,0x38,0x56,0x07] 1225 ; X64-NEXT: retq # encoding: [0xc3] 1226 %q = load float, float* %ptr_b 1227 %vecinit.i = insertelement <8 x float> undef, float %q, i32 0 1228 %b = shufflevector <8 x float> %vecinit.i, <8 x float> undef, <8 x i32> zeroinitializer 1229 %res = call <8 x float> @llvm.x86.avx512.mask.or.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 -1) 1230 ret <8 x float> %res 1231 } 1232 1233 define <8 x float> @test_mask_or_ps_rmbk_256(<8 x float> %a, float* %ptr_b, <8 x float> %passThru, i8 %mask) { 1234 ; X86-LABEL: test_mask_or_ps_rmbk_256: 1235 ; X86: # %bb.0: 1236 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1237 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] 1238 ; X86-NEXT: vorps (%eax){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x39,0x56,0x08] 1239 ; X86-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1] 1240 ; X86-NEXT: retl # encoding: [0xc3] 1241 ; 1242 ; X64-LABEL: test_mask_or_ps_rmbk_256: 1243 ; X64: # %bb.0: 1244 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 1245 ; X64-NEXT: vorps (%rdi){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x39,0x56,0x0f] 1246 ; X64-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1] 1247 ; X64-NEXT: retq # encoding: [0xc3] 1248 %q = load float, float* %ptr_b 1249 %vecinit.i = insertelement <8 x float> undef, float %q, i32 0 1250 %b = shufflevector <8 x float> %vecinit.i, <8 x float> undef, <8 x i32> zeroinitializer 1251 %res = call <8 x float> @llvm.x86.avx512.mask.or.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask) 1252 ret <8 x float> %res 1253 } 1254 1255 define <8 x float> @test_mask_or_ps_rmbkz_256(<8 x float> %a, float* %ptr_b, i8 %mask) { 1256 ; X86-LABEL: test_mask_or_ps_rmbkz_256: 1257 ; X86: # %bb.0: 1258 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1259 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] 1260 ; X86-NEXT: vorps (%eax){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xb9,0x56,0x00] 1261 ; X86-NEXT: retl # encoding: [0xc3] 1262 ; 1263 ; X64-LABEL: test_mask_or_ps_rmbkz_256: 1264 ; X64: # %bb.0: 1265 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 1266 ; X64-NEXT: vorps (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xb9,0x56,0x07] 1267 ; X64-NEXT: retq # encoding: [0xc3] 1268 %q = load float, float* %ptr_b 1269 %vecinit.i = insertelement <8 x float> undef, float %q, i32 0 1270 %b = shufflevector <8 x float> %vecinit.i, <8 x float> undef, <8 x i32> zeroinitializer 1271 %res = call <8 x float> @llvm.x86.avx512.mask.or.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 %mask) 1272 ret <8 x float> %res 1273 } 1274 1275 declare <8 x float> @llvm.x86.avx512.mask.or.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) 1276 1277 define <16 x float> @test_mask_or_ps_rr_512(<16 x float> %a, <16 x float> %b) { 1278 ; CHECK-LABEL: test_mask_or_ps_rr_512: 1279 ; CHECK: # %bb.0: 1280 ; CHECK-NEXT: vorps %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x56,0xc1] 1281 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1282 %res = call <16 x float> @llvm.x86.avx512.mask.or.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 -1) 1283 ret <16 x float> %res 1284 } 1285 1286 define <16 x float> @test_mask_or_ps_rrk_512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask) { 1287 ; X86-LABEL: test_mask_or_ps_rrk_512: 1288 ; X86: # %bb.0: 1289 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 1290 ; X86-NEXT: vorps %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x49,0x56,0xd1] 1291 ; X86-NEXT: vmovaps %zmm2, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 1292 ; X86-NEXT: retl # encoding: [0xc3] 1293 ; 1294 ; X64-LABEL: test_mask_or_ps_rrk_512: 1295 ; X64: # %bb.0: 1296 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 1297 ; X64-NEXT: vorps %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x49,0x56,0xd1] 1298 ; X64-NEXT: vmovaps %zmm2, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 1299 ; X64-NEXT: retq # encoding: [0xc3] 1300 %res = call <16 x float> @llvm.x86.avx512.mask.or.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask) 1301 ret <16 x float> %res 1302 } 1303 1304 define <16 x float> @test_mask_or_ps_rrkz_512(<16 x float> %a, <16 x float> %b, i16 %mask) { 1305 ; X86-LABEL: test_mask_or_ps_rrkz_512: 1306 ; X86: # %bb.0: 1307 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 1308 ; X86-NEXT: vorps %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xc9,0x56,0xc1] 1309 ; X86-NEXT: retl # encoding: [0xc3] 1310 ; 1311 ; X64-LABEL: test_mask_or_ps_rrkz_512: 1312 ; X64: # %bb.0: 1313 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 1314 ; X64-NEXT: vorps %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xc9,0x56,0xc1] 1315 ; X64-NEXT: retq # encoding: [0xc3] 1316 %res = call <16 x float> @llvm.x86.avx512.mask.or.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 %mask) 1317 ret <16 x float> %res 1318 } 1319 1320 define <16 x float> @test_mask_or_ps_rm_512(<16 x float> %a, <16 x float>* %ptr_b) { 1321 ; X86-LABEL: test_mask_or_ps_rm_512: 1322 ; X86: # %bb.0: 1323 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1324 ; X86-NEXT: vorps (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x56,0x00] 1325 ; X86-NEXT: retl # encoding: [0xc3] 1326 ; 1327 ; X64-LABEL: test_mask_or_ps_rm_512: 1328 ; X64: # %bb.0: 1329 ; X64-NEXT: vorps (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x56,0x07] 1330 ; X64-NEXT: retq # encoding: [0xc3] 1331 %b = load <16 x float>, <16 x float>* %ptr_b 1332 %res = call <16 x float> @llvm.x86.avx512.mask.or.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 -1) 1333 ret <16 x float> %res 1334 } 1335 1336 define <16 x float> @test_mask_or_ps_rmk_512(<16 x float> %a, <16 x float>* %ptr_b, <16 x float> %passThru, i16 %mask) { 1337 ; X86-LABEL: test_mask_or_ps_rmk_512: 1338 ; X86: # %bb.0: 1339 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1340 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 1341 ; X86-NEXT: vorps (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x49,0x56,0x08] 1342 ; X86-NEXT: vmovaps %zmm1, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 1343 ; X86-NEXT: retl # encoding: [0xc3] 1344 ; 1345 ; X64-LABEL: test_mask_or_ps_rmk_512: 1346 ; X64: # %bb.0: 1347 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 1348 ; X64-NEXT: vorps (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x49,0x56,0x0f] 1349 ; X64-NEXT: vmovaps %zmm1, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 1350 ; X64-NEXT: retq # encoding: [0xc3] 1351 %b = load <16 x float>, <16 x float>* %ptr_b 1352 %res = call <16 x float> @llvm.x86.avx512.mask.or.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask) 1353 ret <16 x float> %res 1354 } 1355 1356 define <16 x float> @test_mask_or_ps_rmkz_512(<16 x float> %a, <16 x float>* %ptr_b, i16 %mask) { 1357 ; X86-LABEL: test_mask_or_ps_rmkz_512: 1358 ; X86: # %bb.0: 1359 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1360 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 1361 ; X86-NEXT: vorps (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xc9,0x56,0x00] 1362 ; X86-NEXT: retl # encoding: [0xc3] 1363 ; 1364 ; X64-LABEL: test_mask_or_ps_rmkz_512: 1365 ; X64: # %bb.0: 1366 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 1367 ; X64-NEXT: vorps (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xc9,0x56,0x07] 1368 ; X64-NEXT: retq # encoding: [0xc3] 1369 %b = load <16 x float>, <16 x float>* %ptr_b 1370 %res = call <16 x float> @llvm.x86.avx512.mask.or.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 %mask) 1371 ret <16 x float> %res 1372 } 1373 1374 define <16 x float> @test_mask_or_ps_rmb_512(<16 x float> %a, float* %ptr_b) { 1375 ; X86-LABEL: test_mask_or_ps_rmb_512: 1376 ; X86: # %bb.0: 1377 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1378 ; X86-NEXT: vorps (%eax){1to16}, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7c,0x58,0x56,0x00] 1379 ; X86-NEXT: retl # encoding: [0xc3] 1380 ; 1381 ; X64-LABEL: test_mask_or_ps_rmb_512: 1382 ; X64: # %bb.0: 1383 ; X64-NEXT: vorps (%rdi){1to16}, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7c,0x58,0x56,0x07] 1384 ; X64-NEXT: retq # encoding: [0xc3] 1385 %q = load float, float* %ptr_b 1386 %vecinit.i = insertelement <16 x float> undef, float %q, i32 0 1387 %b = shufflevector <16 x float> %vecinit.i, <16 x float> undef, <16 x i32> zeroinitializer 1388 %res = call <16 x float> @llvm.x86.avx512.mask.or.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 -1) 1389 ret <16 x float> %res 1390 } 1391 1392 define <16 x float> @test_mask_or_ps_rmbk_512(<16 x float> %a, float* %ptr_b, <16 x float> %passThru, i16 %mask) { 1393 ; X86-LABEL: test_mask_or_ps_rmbk_512: 1394 ; X86: # %bb.0: 1395 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1396 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 1397 ; X86-NEXT: vorps (%eax){1to16}, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x59,0x56,0x08] 1398 ; X86-NEXT: vmovaps %zmm1, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 1399 ; X86-NEXT: retl # encoding: [0xc3] 1400 ; 1401 ; X64-LABEL: test_mask_or_ps_rmbk_512: 1402 ; X64: # %bb.0: 1403 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 1404 ; X64-NEXT: vorps (%rdi){1to16}, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x59,0x56,0x0f] 1405 ; X64-NEXT: vmovaps %zmm1, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 1406 ; X64-NEXT: retq # encoding: [0xc3] 1407 %q = load float, float* %ptr_b 1408 %vecinit.i = insertelement <16 x float> undef, float %q, i32 0 1409 %b = shufflevector <16 x float> %vecinit.i, <16 x float> undef, <16 x i32> zeroinitializer 1410 %res = call <16 x float> @llvm.x86.avx512.mask.or.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask) 1411 ret <16 x float> %res 1412 } 1413 1414 define <16 x float> @test_mask_or_ps_rmbkz_512(<16 x float> %a, float* %ptr_b, i16 %mask) { 1415 ; X86-LABEL: test_mask_or_ps_rmbkz_512: 1416 ; X86: # %bb.0: 1417 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1418 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 1419 ; X86-NEXT: vorps (%eax){1to16}, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xd9,0x56,0x00] 1420 ; X86-NEXT: retl # encoding: [0xc3] 1421 ; 1422 ; X64-LABEL: test_mask_or_ps_rmbkz_512: 1423 ; X64: # %bb.0: 1424 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 1425 ; X64-NEXT: vorps (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xd9,0x56,0x07] 1426 ; X64-NEXT: retq # encoding: [0xc3] 1427 %q = load float, float* %ptr_b 1428 %vecinit.i = insertelement <16 x float> undef, float %q, i32 0 1429 %b = shufflevector <16 x float> %vecinit.i, <16 x float> undef, <16 x i32> zeroinitializer 1430 %res = call <16 x float> @llvm.x86.avx512.mask.or.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 %mask) 1431 ret <16 x float> %res 1432 } 1433 1434 declare <16 x float> @llvm.x86.avx512.mask.or.ps.512(<16 x float>, <16 x float>, <16 x float>, i16) 1435 1436 define <4 x float> @test_mask_xor_ps_rr_128(<4 x float> %a, <4 x float> %b) { 1437 ; CHECK-LABEL: test_mask_xor_ps_rr_128: 1438 ; CHECK: # %bb.0: 1439 ; CHECK-NEXT: vxorps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x57,0xc1] 1440 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1441 %res = call <4 x float> @llvm.x86.avx512.mask.xor.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 -1) 1442 ret <4 x float> %res 1443 } 1444 1445 define <4 x float> @test_mask_xor_ps_rrk_128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask) { 1446 ; X86-LABEL: test_mask_xor_ps_rrk_128: 1447 ; X86: # %bb.0: 1448 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] 1449 ; X86-NEXT: vxorps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x57,0xd1] 1450 ; X86-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2] 1451 ; X86-NEXT: retl # encoding: [0xc3] 1452 ; 1453 ; X64-LABEL: test_mask_xor_ps_rrk_128: 1454 ; X64: # %bb.0: 1455 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 1456 ; X64-NEXT: vxorps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x57,0xd1] 1457 ; X64-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2] 1458 ; X64-NEXT: retq # encoding: [0xc3] 1459 %res = call <4 x float> @llvm.x86.avx512.mask.xor.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask) 1460 ret <4 x float> %res 1461 } 1462 1463 define <4 x float> @test_mask_xor_ps_rrkz_128(<4 x float> %a, <4 x float> %b, i8 %mask) { 1464 ; X86-LABEL: test_mask_xor_ps_rrkz_128: 1465 ; X86: # %bb.0: 1466 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] 1467 ; X86-NEXT: vxorps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x57,0xc1] 1468 ; X86-NEXT: retl # encoding: [0xc3] 1469 ; 1470 ; X64-LABEL: test_mask_xor_ps_rrkz_128: 1471 ; X64: # %bb.0: 1472 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 1473 ; X64-NEXT: vxorps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x57,0xc1] 1474 ; X64-NEXT: retq # encoding: [0xc3] 1475 %res = call <4 x float> @llvm.x86.avx512.mask.xor.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 %mask) 1476 ret <4 x float> %res 1477 } 1478 1479 define <4 x float> @test_mask_xor_ps_rm_128(<4 x float> %a, <4 x float>* %ptr_b) { 1480 ; X86-LABEL: test_mask_xor_ps_rm_128: 1481 ; X86: # %bb.0: 1482 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1483 ; X86-NEXT: vxorps (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x57,0x00] 1484 ; X86-NEXT: retl # encoding: [0xc3] 1485 ; 1486 ; X64-LABEL: test_mask_xor_ps_rm_128: 1487 ; X64: # %bb.0: 1488 ; X64-NEXT: vxorps (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x57,0x07] 1489 ; X64-NEXT: retq # encoding: [0xc3] 1490 %b = load <4 x float>, <4 x float>* %ptr_b 1491 %res = call <4 x float> @llvm.x86.avx512.mask.xor.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 -1) 1492 ret <4 x float> %res 1493 } 1494 1495 define <4 x float> @test_mask_xor_ps_rmk_128(<4 x float> %a, <4 x float>* %ptr_b, <4 x float> %passThru, i8 %mask) { 1496 ; X86-LABEL: test_mask_xor_ps_rmk_128: 1497 ; X86: # %bb.0: 1498 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1499 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] 1500 ; X86-NEXT: vxorps (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x57,0x08] 1501 ; X86-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1] 1502 ; X86-NEXT: retl # encoding: [0xc3] 1503 ; 1504 ; X64-LABEL: test_mask_xor_ps_rmk_128: 1505 ; X64: # %bb.0: 1506 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 1507 ; X64-NEXT: vxorps (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x57,0x0f] 1508 ; X64-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1] 1509 ; X64-NEXT: retq # encoding: [0xc3] 1510 %b = load <4 x float>, <4 x float>* %ptr_b 1511 %res = call <4 x float> @llvm.x86.avx512.mask.xor.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask) 1512 ret <4 x float> %res 1513 } 1514 1515 define <4 x float> @test_mask_xor_ps_rmkz_128(<4 x float> %a, <4 x float>* %ptr_b, i8 %mask) { 1516 ; X86-LABEL: test_mask_xor_ps_rmkz_128: 1517 ; X86: # %bb.0: 1518 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1519 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] 1520 ; X86-NEXT: vxorps (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x57,0x00] 1521 ; X86-NEXT: retl # encoding: [0xc3] 1522 ; 1523 ; X64-LABEL: test_mask_xor_ps_rmkz_128: 1524 ; X64: # %bb.0: 1525 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 1526 ; X64-NEXT: vxorps (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x57,0x07] 1527 ; X64-NEXT: retq # encoding: [0xc3] 1528 %b = load <4 x float>, <4 x float>* %ptr_b 1529 %res = call <4 x float> @llvm.x86.avx512.mask.xor.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 %mask) 1530 ret <4 x float> %res 1531 } 1532 1533 define <4 x float> @test_mask_xor_ps_rmb_128(<4 x float> %a, float* %ptr_b) { 1534 ; X86-LABEL: test_mask_xor_ps_rmb_128: 1535 ; X86: # %bb.0: 1536 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1537 ; X86-NEXT: vxorps (%eax){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7c,0x18,0x57,0x00] 1538 ; X86-NEXT: retl # encoding: [0xc3] 1539 ; 1540 ; X64-LABEL: test_mask_xor_ps_rmb_128: 1541 ; X64: # %bb.0: 1542 ; X64-NEXT: vxorps (%rdi){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7c,0x18,0x57,0x07] 1543 ; X64-NEXT: retq # encoding: [0xc3] 1544 %q = load float, float* %ptr_b 1545 %vecinit.i = insertelement <4 x float> undef, float %q, i32 0 1546 %b = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer 1547 %res = call <4 x float> @llvm.x86.avx512.mask.xor.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 -1) 1548 ret <4 x float> %res 1549 } 1550 1551 define <4 x float> @test_mask_xor_ps_rmbk_128(<4 x float> %a, float* %ptr_b, <4 x float> %passThru, i8 %mask) { 1552 ; X86-LABEL: test_mask_xor_ps_rmbk_128: 1553 ; X86: # %bb.0: 1554 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1555 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] 1556 ; X86-NEXT: vxorps (%eax){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x19,0x57,0x08] 1557 ; X86-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1] 1558 ; X86-NEXT: retl # encoding: [0xc3] 1559 ; 1560 ; X64-LABEL: test_mask_xor_ps_rmbk_128: 1561 ; X64: # %bb.0: 1562 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 1563 ; X64-NEXT: vxorps (%rdi){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x19,0x57,0x0f] 1564 ; X64-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1] 1565 ; X64-NEXT: retq # encoding: [0xc3] 1566 %q = load float, float* %ptr_b 1567 %vecinit.i = insertelement <4 x float> undef, float %q, i32 0 1568 %b = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer 1569 %res = call <4 x float> @llvm.x86.avx512.mask.xor.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask) 1570 ret <4 x float> %res 1571 } 1572 1573 define <4 x float> @test_mask_xor_ps_rmbkz_128(<4 x float> %a, float* %ptr_b, i8 %mask) { 1574 ; X86-LABEL: test_mask_xor_ps_rmbkz_128: 1575 ; X86: # %bb.0: 1576 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1577 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] 1578 ; X86-NEXT: vxorps (%eax){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x99,0x57,0x00] 1579 ; X86-NEXT: retl # encoding: [0xc3] 1580 ; 1581 ; X64-LABEL: test_mask_xor_ps_rmbkz_128: 1582 ; X64: # %bb.0: 1583 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 1584 ; X64-NEXT: vxorps (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x99,0x57,0x07] 1585 ; X64-NEXT: retq # encoding: [0xc3] 1586 %q = load float, float* %ptr_b 1587 %vecinit.i = insertelement <4 x float> undef, float %q, i32 0 1588 %b = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer 1589 %res = call <4 x float> @llvm.x86.avx512.mask.xor.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 %mask) 1590 ret <4 x float> %res 1591 } 1592 1593 declare <4 x float> @llvm.x86.avx512.mask.xor.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) 1594 1595 define <8 x float> @test_mask_xor_ps_rr_256(<8 x float> %a, <8 x float> %b) { 1596 ; CHECK-LABEL: test_mask_xor_ps_rr_256: 1597 ; CHECK: # %bb.0: 1598 ; CHECK-NEXT: vxorps %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x57,0xc1] 1599 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1600 %res = call <8 x float> @llvm.x86.avx512.mask.xor.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 -1) 1601 ret <8 x float> %res 1602 } 1603 1604 define <8 x float> @test_mask_xor_ps_rrk_256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask) { 1605 ; X86-LABEL: test_mask_xor_ps_rrk_256: 1606 ; X86: # %bb.0: 1607 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] 1608 ; X86-NEXT: vxorps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x57,0xd1] 1609 ; X86-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2] 1610 ; X86-NEXT: retl # encoding: [0xc3] 1611 ; 1612 ; X64-LABEL: test_mask_xor_ps_rrk_256: 1613 ; X64: # %bb.0: 1614 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 1615 ; X64-NEXT: vxorps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x57,0xd1] 1616 ; X64-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2] 1617 ; X64-NEXT: retq # encoding: [0xc3] 1618 %res = call <8 x float> @llvm.x86.avx512.mask.xor.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask) 1619 ret <8 x float> %res 1620 } 1621 1622 define <8 x float> @test_mask_xor_ps_rrkz_256(<8 x float> %a, <8 x float> %b, i8 %mask) { 1623 ; X86-LABEL: test_mask_xor_ps_rrkz_256: 1624 ; X86: # %bb.0: 1625 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] 1626 ; X86-NEXT: vxorps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x57,0xc1] 1627 ; X86-NEXT: retl # encoding: [0xc3] 1628 ; 1629 ; X64-LABEL: test_mask_xor_ps_rrkz_256: 1630 ; X64: # %bb.0: 1631 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 1632 ; X64-NEXT: vxorps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x57,0xc1] 1633 ; X64-NEXT: retq # encoding: [0xc3] 1634 %res = call <8 x float> @llvm.x86.avx512.mask.xor.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 %mask) 1635 ret <8 x float> %res 1636 } 1637 1638 define <8 x float> @test_mask_xor_ps_rm_256(<8 x float> %a, <8 x float>* %ptr_b) { 1639 ; X86-LABEL: test_mask_xor_ps_rm_256: 1640 ; X86: # %bb.0: 1641 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1642 ; X86-NEXT: vxorps (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x57,0x00] 1643 ; X86-NEXT: retl # encoding: [0xc3] 1644 ; 1645 ; X64-LABEL: test_mask_xor_ps_rm_256: 1646 ; X64: # %bb.0: 1647 ; X64-NEXT: vxorps (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x57,0x07] 1648 ; X64-NEXT: retq # encoding: [0xc3] 1649 %b = load <8 x float>, <8 x float>* %ptr_b 1650 %res = call <8 x float> @llvm.x86.avx512.mask.xor.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 -1) 1651 ret <8 x float> %res 1652 } 1653 1654 define <8 x float> @test_mask_xor_ps_rmk_256(<8 x float> %a, <8 x float>* %ptr_b, <8 x float> %passThru, i8 %mask) { 1655 ; X86-LABEL: test_mask_xor_ps_rmk_256: 1656 ; X86: # %bb.0: 1657 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1658 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] 1659 ; X86-NEXT: vxorps (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x57,0x08] 1660 ; X86-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1] 1661 ; X86-NEXT: retl # encoding: [0xc3] 1662 ; 1663 ; X64-LABEL: test_mask_xor_ps_rmk_256: 1664 ; X64: # %bb.0: 1665 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 1666 ; X64-NEXT: vxorps (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x57,0x0f] 1667 ; X64-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1] 1668 ; X64-NEXT: retq # encoding: [0xc3] 1669 %b = load <8 x float>, <8 x float>* %ptr_b 1670 %res = call <8 x float> @llvm.x86.avx512.mask.xor.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask) 1671 ret <8 x float> %res 1672 } 1673 1674 define <8 x float> @test_mask_xor_ps_rmkz_256(<8 x float> %a, <8 x float>* %ptr_b, i8 %mask) { 1675 ; X86-LABEL: test_mask_xor_ps_rmkz_256: 1676 ; X86: # %bb.0: 1677 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1678 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] 1679 ; X86-NEXT: vxorps (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x57,0x00] 1680 ; X86-NEXT: retl # encoding: [0xc3] 1681 ; 1682 ; X64-LABEL: test_mask_xor_ps_rmkz_256: 1683 ; X64: # %bb.0: 1684 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 1685 ; X64-NEXT: vxorps (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x57,0x07] 1686 ; X64-NEXT: retq # encoding: [0xc3] 1687 %b = load <8 x float>, <8 x float>* %ptr_b 1688 %res = call <8 x float> @llvm.x86.avx512.mask.xor.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 %mask) 1689 ret <8 x float> %res 1690 } 1691 1692 define <8 x float> @test_mask_xor_ps_rmb_256(<8 x float> %a, float* %ptr_b) { 1693 ; X86-LABEL: test_mask_xor_ps_rmb_256: 1694 ; X86: # %bb.0: 1695 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1696 ; X86-NEXT: vxorps (%eax){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7c,0x38,0x57,0x00] 1697 ; X86-NEXT: retl # encoding: [0xc3] 1698 ; 1699 ; X64-LABEL: test_mask_xor_ps_rmb_256: 1700 ; X64: # %bb.0: 1701 ; X64-NEXT: vxorps (%rdi){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7c,0x38,0x57,0x07] 1702 ; X64-NEXT: retq # encoding: [0xc3] 1703 %q = load float, float* %ptr_b 1704 %vecinit.i = insertelement <8 x float> undef, float %q, i32 0 1705 %b = shufflevector <8 x float> %vecinit.i, <8 x float> undef, <8 x i32> zeroinitializer 1706 %res = call <8 x float> @llvm.x86.avx512.mask.xor.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 -1) 1707 ret <8 x float> %res 1708 } 1709 1710 define <8 x float> @test_mask_xor_ps_rmbk_256(<8 x float> %a, float* %ptr_b, <8 x float> %passThru, i8 %mask) { 1711 ; X86-LABEL: test_mask_xor_ps_rmbk_256: 1712 ; X86: # %bb.0: 1713 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1714 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] 1715 ; X86-NEXT: vxorps (%eax){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x39,0x57,0x08] 1716 ; X86-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1] 1717 ; X86-NEXT: retl # encoding: [0xc3] 1718 ; 1719 ; X64-LABEL: test_mask_xor_ps_rmbk_256: 1720 ; X64: # %bb.0: 1721 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 1722 ; X64-NEXT: vxorps (%rdi){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x39,0x57,0x0f] 1723 ; X64-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1] 1724 ; X64-NEXT: retq # encoding: [0xc3] 1725 %q = load float, float* %ptr_b 1726 %vecinit.i = insertelement <8 x float> undef, float %q, i32 0 1727 %b = shufflevector <8 x float> %vecinit.i, <8 x float> undef, <8 x i32> zeroinitializer 1728 %res = call <8 x float> @llvm.x86.avx512.mask.xor.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask) 1729 ret <8 x float> %res 1730 } 1731 1732 define <8 x float> @test_mask_xor_ps_rmbkz_256(<8 x float> %a, float* %ptr_b, i8 %mask) { 1733 ; X86-LABEL: test_mask_xor_ps_rmbkz_256: 1734 ; X86: # %bb.0: 1735 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1736 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] 1737 ; X86-NEXT: vxorps (%eax){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xb9,0x57,0x00] 1738 ; X86-NEXT: retl # encoding: [0xc3] 1739 ; 1740 ; X64-LABEL: test_mask_xor_ps_rmbkz_256: 1741 ; X64: # %bb.0: 1742 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 1743 ; X64-NEXT: vxorps (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xb9,0x57,0x07] 1744 ; X64-NEXT: retq # encoding: [0xc3] 1745 %q = load float, float* %ptr_b 1746 %vecinit.i = insertelement <8 x float> undef, float %q, i32 0 1747 %b = shufflevector <8 x float> %vecinit.i, <8 x float> undef, <8 x i32> zeroinitializer 1748 %res = call <8 x float> @llvm.x86.avx512.mask.xor.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 %mask) 1749 ret <8 x float> %res 1750 } 1751 1752 declare <8 x float> @llvm.x86.avx512.mask.xor.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) 1753 1754 define <16 x float> @test_mask_xor_ps_rr_512(<16 x float> %a, <16 x float> %b) { 1755 ; CHECK-LABEL: test_mask_xor_ps_rr_512: 1756 ; CHECK: # %bb.0: 1757 ; CHECK-NEXT: vxorps %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x57,0xc1] 1758 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1759 %res = call <16 x float> @llvm.x86.avx512.mask.xor.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 -1) 1760 ret <16 x float> %res 1761 } 1762 1763 define <16 x float> @test_mask_xor_ps_rrk_512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask) { 1764 ; X86-LABEL: test_mask_xor_ps_rrk_512: 1765 ; X86: # %bb.0: 1766 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 1767 ; X86-NEXT: vxorps %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x49,0x57,0xd1] 1768 ; X86-NEXT: vmovaps %zmm2, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 1769 ; X86-NEXT: retl # encoding: [0xc3] 1770 ; 1771 ; X64-LABEL: test_mask_xor_ps_rrk_512: 1772 ; X64: # %bb.0: 1773 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 1774 ; X64-NEXT: vxorps %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x49,0x57,0xd1] 1775 ; X64-NEXT: vmovaps %zmm2, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 1776 ; X64-NEXT: retq # encoding: [0xc3] 1777 %res = call <16 x float> @llvm.x86.avx512.mask.xor.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask) 1778 ret <16 x float> %res 1779 } 1780 1781 define <16 x float> @test_mask_xor_ps_rrkz_512(<16 x float> %a, <16 x float> %b, i16 %mask) { 1782 ; X86-LABEL: test_mask_xor_ps_rrkz_512: 1783 ; X86: # %bb.0: 1784 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 1785 ; X86-NEXT: vxorps %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xc9,0x57,0xc1] 1786 ; X86-NEXT: retl # encoding: [0xc3] 1787 ; 1788 ; X64-LABEL: test_mask_xor_ps_rrkz_512: 1789 ; X64: # %bb.0: 1790 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 1791 ; X64-NEXT: vxorps %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xc9,0x57,0xc1] 1792 ; X64-NEXT: retq # encoding: [0xc3] 1793 %res = call <16 x float> @llvm.x86.avx512.mask.xor.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 %mask) 1794 ret <16 x float> %res 1795 } 1796 1797 define <16 x float> @test_mask_xor_ps_rm_512(<16 x float> %a, <16 x float>* %ptr_b) { 1798 ; X86-LABEL: test_mask_xor_ps_rm_512: 1799 ; X86: # %bb.0: 1800 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1801 ; X86-NEXT: vxorps (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x57,0x00] 1802 ; X86-NEXT: retl # encoding: [0xc3] 1803 ; 1804 ; X64-LABEL: test_mask_xor_ps_rm_512: 1805 ; X64: # %bb.0: 1806 ; X64-NEXT: vxorps (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x57,0x07] 1807 ; X64-NEXT: retq # encoding: [0xc3] 1808 %b = load <16 x float>, <16 x float>* %ptr_b 1809 %res = call <16 x float> @llvm.x86.avx512.mask.xor.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 -1) 1810 ret <16 x float> %res 1811 } 1812 1813 define <16 x float> @test_mask_xor_ps_rmk_512(<16 x float> %a, <16 x float>* %ptr_b, <16 x float> %passThru, i16 %mask) { 1814 ; X86-LABEL: test_mask_xor_ps_rmk_512: 1815 ; X86: # %bb.0: 1816 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1817 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 1818 ; X86-NEXT: vxorps (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x49,0x57,0x08] 1819 ; X86-NEXT: vmovaps %zmm1, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 1820 ; X86-NEXT: retl # encoding: [0xc3] 1821 ; 1822 ; X64-LABEL: test_mask_xor_ps_rmk_512: 1823 ; X64: # %bb.0: 1824 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 1825 ; X64-NEXT: vxorps (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x49,0x57,0x0f] 1826 ; X64-NEXT: vmovaps %zmm1, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 1827 ; X64-NEXT: retq # encoding: [0xc3] 1828 %b = load <16 x float>, <16 x float>* %ptr_b 1829 %res = call <16 x float> @llvm.x86.avx512.mask.xor.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask) 1830 ret <16 x float> %res 1831 } 1832 1833 define <16 x float> @test_mask_xor_ps_rmkz_512(<16 x float> %a, <16 x float>* %ptr_b, i16 %mask) { 1834 ; X86-LABEL: test_mask_xor_ps_rmkz_512: 1835 ; X86: # %bb.0: 1836 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1837 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 1838 ; X86-NEXT: vxorps (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xc9,0x57,0x00] 1839 ; X86-NEXT: retl # encoding: [0xc3] 1840 ; 1841 ; X64-LABEL: test_mask_xor_ps_rmkz_512: 1842 ; X64: # %bb.0: 1843 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 1844 ; X64-NEXT: vxorps (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xc9,0x57,0x07] 1845 ; X64-NEXT: retq # encoding: [0xc3] 1846 %b = load <16 x float>, <16 x float>* %ptr_b 1847 %res = call <16 x float> @llvm.x86.avx512.mask.xor.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 %mask) 1848 ret <16 x float> %res 1849 } 1850 1851 define <16 x float> @test_mask_xor_ps_rmb_512(<16 x float> %a, float* %ptr_b) { 1852 ; X86-LABEL: test_mask_xor_ps_rmb_512: 1853 ; X86: # %bb.0: 1854 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1855 ; X86-NEXT: vxorps (%eax){1to16}, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7c,0x58,0x57,0x00] 1856 ; X86-NEXT: retl # encoding: [0xc3] 1857 ; 1858 ; X64-LABEL: test_mask_xor_ps_rmb_512: 1859 ; X64: # %bb.0: 1860 ; X64-NEXT: vxorps (%rdi){1to16}, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7c,0x58,0x57,0x07] 1861 ; X64-NEXT: retq # encoding: [0xc3] 1862 %q = load float, float* %ptr_b 1863 %vecinit.i = insertelement <16 x float> undef, float %q, i32 0 1864 %b = shufflevector <16 x float> %vecinit.i, <16 x float> undef, <16 x i32> zeroinitializer 1865 %res = call <16 x float> @llvm.x86.avx512.mask.xor.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 -1) 1866 ret <16 x float> %res 1867 } 1868 1869 define <16 x float> @test_mask_xor_ps_rmbk_512(<16 x float> %a, float* %ptr_b, <16 x float> %passThru, i16 %mask) { 1870 ; X86-LABEL: test_mask_xor_ps_rmbk_512: 1871 ; X86: # %bb.0: 1872 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1873 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 1874 ; X86-NEXT: vxorps (%eax){1to16}, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x59,0x57,0x08] 1875 ; X86-NEXT: vmovaps %zmm1, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 1876 ; X86-NEXT: retl # encoding: [0xc3] 1877 ; 1878 ; X64-LABEL: test_mask_xor_ps_rmbk_512: 1879 ; X64: # %bb.0: 1880 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 1881 ; X64-NEXT: vxorps (%rdi){1to16}, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x59,0x57,0x0f] 1882 ; X64-NEXT: vmovaps %zmm1, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 1883 ; X64-NEXT: retq # encoding: [0xc3] 1884 %q = load float, float* %ptr_b 1885 %vecinit.i = insertelement <16 x float> undef, float %q, i32 0 1886 %b = shufflevector <16 x float> %vecinit.i, <16 x float> undef, <16 x i32> zeroinitializer 1887 %res = call <16 x float> @llvm.x86.avx512.mask.xor.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask) 1888 ret <16 x float> %res 1889 } 1890 1891 define <16 x float> @test_mask_xor_ps_rmbkz_512(<16 x float> %a, float* %ptr_b, i16 %mask) { 1892 ; X86-LABEL: test_mask_xor_ps_rmbkz_512: 1893 ; X86: # %bb.0: 1894 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1895 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 1896 ; X86-NEXT: vxorps (%eax){1to16}, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xd9,0x57,0x00] 1897 ; X86-NEXT: retl # encoding: [0xc3] 1898 ; 1899 ; X64-LABEL: test_mask_xor_ps_rmbkz_512: 1900 ; X64: # %bb.0: 1901 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 1902 ; X64-NEXT: vxorps (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xd9,0x57,0x07] 1903 ; X64-NEXT: retq # encoding: [0xc3] 1904 %q = load float, float* %ptr_b 1905 %vecinit.i = insertelement <16 x float> undef, float %q, i32 0 1906 %b = shufflevector <16 x float> %vecinit.i, <16 x float> undef, <16 x i32> zeroinitializer 1907 %res = call <16 x float> @llvm.x86.avx512.mask.xor.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 %mask) 1908 ret <16 x float> %res 1909 } 1910 1911 declare <16 x float> @llvm.x86.avx512.mask.xor.ps.512(<16 x float>, <16 x float>, <16 x float>, i16) 1912 1913 define <8 x i64> @test_mask_mullo_epi64_rr_512(<8 x i64> %a, <8 x i64> %b) { 1914 ; CHECK-LABEL: test_mask_mullo_epi64_rr_512: 1915 ; CHECK: # %bb.0: 1916 ; CHECK-NEXT: vpmullq %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf2,0xfd,0x48,0x40,0xc1] 1917 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1918 %res = call <8 x i64> @llvm.x86.avx512.mask.pmull.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1) 1919 ret <8 x i64> %res 1920 } 1921 1922 define <8 x i64> @test_mask_mullo_epi64_rrk_512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) { 1923 ; X86-LABEL: test_mask_mullo_epi64_rrk_512: 1924 ; X86: # %bb.0: 1925 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] 1926 ; X86-NEXT: vpmullq %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x40,0xd1] 1927 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 1928 ; X86-NEXT: retl # encoding: [0xc3] 1929 ; 1930 ; X64-LABEL: test_mask_mullo_epi64_rrk_512: 1931 ; X64: # %bb.0: 1932 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 1933 ; X64-NEXT: vpmullq %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x40,0xd1] 1934 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 1935 ; X64-NEXT: retq # encoding: [0xc3] 1936 %res = call <8 x i64> @llvm.x86.avx512.mask.pmull.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) 1937 ret <8 x i64> %res 1938 } 1939 1940 define <8 x i64> @test_mask_mullo_epi64_rrkz_512(<8 x i64> %a, <8 x i64> %b, i8 %mask) { 1941 ; X86-LABEL: test_mask_mullo_epi64_rrkz_512: 1942 ; X86: # %bb.0: 1943 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] 1944 ; X86-NEXT: vpmullq %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x40,0xc1] 1945 ; X86-NEXT: retl # encoding: [0xc3] 1946 ; 1947 ; X64-LABEL: test_mask_mullo_epi64_rrkz_512: 1948 ; X64: # %bb.0: 1949 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 1950 ; X64-NEXT: vpmullq %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x40,0xc1] 1951 ; X64-NEXT: retq # encoding: [0xc3] 1952 %res = call <8 x i64> @llvm.x86.avx512.mask.pmull.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask) 1953 ret <8 x i64> %res 1954 } 1955 1956 define <8 x i64> @test_mask_mullo_epi64_rm_512(<8 x i64> %a, <8 x i64>* %ptr_b) { 1957 ; X86-LABEL: test_mask_mullo_epi64_rm_512: 1958 ; X86: # %bb.0: 1959 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1960 ; X86-NEXT: vpmullq (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf2,0xfd,0x48,0x40,0x00] 1961 ; X86-NEXT: retl # encoding: [0xc3] 1962 ; 1963 ; X64-LABEL: test_mask_mullo_epi64_rm_512: 1964 ; X64: # %bb.0: 1965 ; X64-NEXT: vpmullq (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf2,0xfd,0x48,0x40,0x07] 1966 ; X64-NEXT: retq # encoding: [0xc3] 1967 %b = load <8 x i64>, <8 x i64>* %ptr_b 1968 %res = call <8 x i64> @llvm.x86.avx512.mask.pmull.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1) 1969 ret <8 x i64> %res 1970 } 1971 1972 define <8 x i64> @test_mask_mullo_epi64_rmk_512(<8 x i64> %a, <8 x i64>* %ptr_b, <8 x i64> %passThru, i8 %mask) { 1973 ; X86-LABEL: test_mask_mullo_epi64_rmk_512: 1974 ; X86: # %bb.0: 1975 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1976 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] 1977 ; X86-NEXT: vpmullq (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x40,0x08] 1978 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 1979 ; X86-NEXT: retl # encoding: [0xc3] 1980 ; 1981 ; X64-LABEL: test_mask_mullo_epi64_rmk_512: 1982 ; X64: # %bb.0: 1983 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 1984 ; X64-NEXT: vpmullq (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x40,0x0f] 1985 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 1986 ; X64-NEXT: retq # encoding: [0xc3] 1987 %b = load <8 x i64>, <8 x i64>* %ptr_b 1988 %res = call <8 x i64> @llvm.x86.avx512.mask.pmull.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) 1989 ret <8 x i64> %res 1990 } 1991 1992 define <8 x i64> @test_mask_mullo_epi64_rmkz_512(<8 x i64> %a, <8 x i64>* %ptr_b, i8 %mask) { 1993 ; X86-LABEL: test_mask_mullo_epi64_rmkz_512: 1994 ; X86: # %bb.0: 1995 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1996 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] 1997 ; X86-NEXT: vpmullq (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x40,0x00] 1998 ; X86-NEXT: retl # encoding: [0xc3] 1999 ; 2000 ; X64-LABEL: test_mask_mullo_epi64_rmkz_512: 2001 ; X64: # %bb.0: 2002 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 2003 ; X64-NEXT: vpmullq (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x40,0x07] 2004 ; X64-NEXT: retq # encoding: [0xc3] 2005 %b = load <8 x i64>, <8 x i64>* %ptr_b 2006 %res = call <8 x i64> @llvm.x86.avx512.mask.pmull.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask) 2007 ret <8 x i64> %res 2008 } 2009 2010 define <8 x i64> @test_mask_mullo_epi64_rmb_512(<8 x i64> %a, i64* %ptr_b) { 2011 ; X86-LABEL: test_mask_mullo_epi64_rmb_512: 2012 ; X86: # %bb.0: 2013 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2014 ; X86-NEXT: vmovq (%eax), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x08] 2015 ; X86-NEXT: # xmm1 = mem[0],zero 2016 ; X86-NEXT: vpbroadcastq %xmm1, %zmm1 # encoding: [0x62,0xf2,0xfd,0x48,0x59,0xc9] 2017 ; X86-NEXT: vpmullq %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf2,0xfd,0x48,0x40,0xc1] 2018 ; X86-NEXT: retl # encoding: [0xc3] 2019 ; 2020 ; X64-LABEL: test_mask_mullo_epi64_rmb_512: 2021 ; X64: # %bb.0: 2022 ; X64-NEXT: vpmullq (%rdi){1to8}, %zmm0, %zmm0 # encoding: [0x62,0xf2,0xfd,0x58,0x40,0x07] 2023 ; X64-NEXT: retq # encoding: [0xc3] 2024 %q = load i64, i64* %ptr_b 2025 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0 2026 %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer 2027 %res = call <8 x i64> @llvm.x86.avx512.mask.pmull.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1) 2028 ret <8 x i64> %res 2029 } 2030 2031 define <8 x i64> @test_mask_mullo_epi64_rmbk_512(<8 x i64> %a, i64* %ptr_b, <8 x i64> %passThru, i8 %mask) { 2032 ; X86-LABEL: test_mask_mullo_epi64_rmbk_512: 2033 ; X86: # %bb.0: 2034 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2035 ; X86-NEXT: vmovq (%eax), %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x10] 2036 ; X86-NEXT: # xmm2 = mem[0],zero 2037 ; X86-NEXT: vpbroadcastq %xmm2, %zmm2 # encoding: [0x62,0xf2,0xfd,0x48,0x59,0xd2] 2038 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] 2039 ; X86-NEXT: vpmullq %zmm2, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x40,0xca] 2040 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 2041 ; X86-NEXT: retl # encoding: [0xc3] 2042 ; 2043 ; X64-LABEL: test_mask_mullo_epi64_rmbk_512: 2044 ; X64: # %bb.0: 2045 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 2046 ; X64-NEXT: vpmullq (%rdi){1to8}, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x59,0x40,0x0f] 2047 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 2048 ; X64-NEXT: retq # encoding: [0xc3] 2049 %q = load i64, i64* %ptr_b 2050 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0 2051 %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer 2052 %res = call <8 x i64> @llvm.x86.avx512.mask.pmull.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) 2053 ret <8 x i64> %res 2054 } 2055 2056 define <8 x i64> @test_mask_mullo_epi64_rmbkz_512(<8 x i64> %a, i64* %ptr_b, i8 %mask) { 2057 ; X86-LABEL: test_mask_mullo_epi64_rmbkz_512: 2058 ; X86: # %bb.0: 2059 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2060 ; X86-NEXT: vmovq (%eax), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x08] 2061 ; X86-NEXT: # xmm1 = mem[0],zero 2062 ; X86-NEXT: vpbroadcastq %xmm1, %zmm1 # encoding: [0x62,0xf2,0xfd,0x48,0x59,0xc9] 2063 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] 2064 ; X86-NEXT: vpmullq %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x40,0xc1] 2065 ; X86-NEXT: retl # encoding: [0xc3] 2066 ; 2067 ; X64-LABEL: test_mask_mullo_epi64_rmbkz_512: 2068 ; X64: # %bb.0: 2069 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 2070 ; X64-NEXT: vpmullq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xd9,0x40,0x07] 2071 ; X64-NEXT: retq # encoding: [0xc3] 2072 %q = load i64, i64* %ptr_b 2073 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0 2074 %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer 2075 %res = call <8 x i64> @llvm.x86.avx512.mask.pmull.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask) 2076 ret <8 x i64> %res 2077 } 2078 declare <8 x i64> @llvm.x86.avx512.mask.pmull.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) 2079 2080 define <4 x i64> @test_mask_mullo_epi64_rr_256(<4 x i64> %a, <4 x i64> %b) { 2081 ; CHECK-LABEL: test_mask_mullo_epi64_rr_256: 2082 ; CHECK: # %bb.0: 2083 ; CHECK-NEXT: vpmullq %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x28,0x40,0xc1] 2084 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2085 %res = call <4 x i64> @llvm.x86.avx512.mask.pmull.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 -1) 2086 ret <4 x i64> %res 2087 } 2088 2089 define <4 x i64> @test_mask_mullo_epi64_rrk_256(<4 x i64> %a, <4 x i64> %b, <4 x i64> %passThru, i8 %mask) { 2090 ; X86-LABEL: test_mask_mullo_epi64_rrk_256: 2091 ; X86: # %bb.0: 2092 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] 2093 ; X86-NEXT: vpmullq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x40,0xd1] 2094 ; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 2095 ; X86-NEXT: retl # encoding: [0xc3] 2096 ; 2097 ; X64-LABEL: test_mask_mullo_epi64_rrk_256: 2098 ; X64: # %bb.0: 2099 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 2100 ; X64-NEXT: vpmullq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x40,0xd1] 2101 ; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 2102 ; X64-NEXT: retq # encoding: [0xc3] 2103 %res = call <4 x i64> @llvm.x86.avx512.mask.pmull.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> %passThru, i8 %mask) 2104 ret <4 x i64> %res 2105 } 2106 2107 define <4 x i64> @test_mask_mullo_epi64_rrkz_256(<4 x i64> %a, <4 x i64> %b, i8 %mask) { 2108 ; X86-LABEL: test_mask_mullo_epi64_rrkz_256: 2109 ; X86: # %bb.0: 2110 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] 2111 ; X86-NEXT: vpmullq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x40,0xc1] 2112 ; X86-NEXT: retl # encoding: [0xc3] 2113 ; 2114 ; X64-LABEL: test_mask_mullo_epi64_rrkz_256: 2115 ; X64: # %bb.0: 2116 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 2117 ; X64-NEXT: vpmullq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x40,0xc1] 2118 ; X64-NEXT: retq # encoding: [0xc3] 2119 %res = call <4 x i64> @llvm.x86.avx512.mask.pmull.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 %mask) 2120 ret <4 x i64> %res 2121 } 2122 2123 define <4 x i64> @test_mask_mullo_epi64_rm_256(<4 x i64> %a, <4 x i64>* %ptr_b) { 2124 ; X86-LABEL: test_mask_mullo_epi64_rm_256: 2125 ; X86: # %bb.0: 2126 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2127 ; X86-NEXT: vpmullq (%eax), %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x28,0x40,0x00] 2128 ; X86-NEXT: retl # encoding: [0xc3] 2129 ; 2130 ; X64-LABEL: test_mask_mullo_epi64_rm_256: 2131 ; X64: # %bb.0: 2132 ; X64-NEXT: vpmullq (%rdi), %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x28,0x40,0x07] 2133 ; X64-NEXT: retq # encoding: [0xc3] 2134 %b = load <4 x i64>, <4 x i64>* %ptr_b 2135 %res = call <4 x i64> @llvm.x86.avx512.mask.pmull.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 -1) 2136 ret <4 x i64> %res 2137 } 2138 2139 define <4 x i64> @test_mask_mullo_epi64_rmk_256(<4 x i64> %a, <4 x i64>* %ptr_b, <4 x i64> %passThru, i8 %mask) { 2140 ; X86-LABEL: test_mask_mullo_epi64_rmk_256: 2141 ; X86: # %bb.0: 2142 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2143 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] 2144 ; X86-NEXT: vpmullq (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x40,0x08] 2145 ; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 2146 ; X86-NEXT: retl # encoding: [0xc3] 2147 ; 2148 ; X64-LABEL: test_mask_mullo_epi64_rmk_256: 2149 ; X64: # %bb.0: 2150 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 2151 ; X64-NEXT: vpmullq (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x40,0x0f] 2152 ; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 2153 ; X64-NEXT: retq # encoding: [0xc3] 2154 %b = load <4 x i64>, <4 x i64>* %ptr_b 2155 %res = call <4 x i64> @llvm.x86.avx512.mask.pmull.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> %passThru, i8 %mask) 2156 ret <4 x i64> %res 2157 } 2158 2159 define <4 x i64> @test_mask_mullo_epi64_rmkz_256(<4 x i64> %a, <4 x i64>* %ptr_b, i8 %mask) { 2160 ; X86-LABEL: test_mask_mullo_epi64_rmkz_256: 2161 ; X86: # %bb.0: 2162 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2163 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] 2164 ; X86-NEXT: vpmullq (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x40,0x00] 2165 ; X86-NEXT: retl # encoding: [0xc3] 2166 ; 2167 ; X64-LABEL: test_mask_mullo_epi64_rmkz_256: 2168 ; X64: # %bb.0: 2169 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 2170 ; X64-NEXT: vpmullq (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x40,0x07] 2171 ; X64-NEXT: retq # encoding: [0xc3] 2172 %b = load <4 x i64>, <4 x i64>* %ptr_b 2173 %res = call <4 x i64> @llvm.x86.avx512.mask.pmull.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 %mask) 2174 ret <4 x i64> %res 2175 } 2176 2177 define <4 x i64> @test_mask_mullo_epi64_rmb_256(<4 x i64> %a, i64* %ptr_b) { 2178 ; X86-LABEL: test_mask_mullo_epi64_rmb_256: 2179 ; X86: # %bb.0: 2180 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2181 ; X86-NEXT: vmovq (%eax), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x08] 2182 ; X86-NEXT: # xmm1 = mem[0],zero 2183 ; X86-NEXT: vpbroadcastq %xmm1, %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x59,0xc9] 2184 ; X86-NEXT: vpmullq %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x28,0x40,0xc1] 2185 ; X86-NEXT: retl # encoding: [0xc3] 2186 ; 2187 ; X64-LABEL: test_mask_mullo_epi64_rmb_256: 2188 ; X64: # %bb.0: 2189 ; X64-NEXT: vpmullq (%rdi){1to4}, %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x38,0x40,0x07] 2190 ; X64-NEXT: retq # encoding: [0xc3] 2191 %q = load i64, i64* %ptr_b 2192 %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0 2193 %b = shufflevector <4 x i64> %vecinit.i, <4 x i64> undef, <4 x i32> zeroinitializer 2194 %res = call <4 x i64> @llvm.x86.avx512.mask.pmull.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 -1) 2195 ret <4 x i64> %res 2196 } 2197 2198 define <4 x i64> @test_mask_mullo_epi64_rmbk_256(<4 x i64> %a, i64* %ptr_b, <4 x i64> %passThru, i8 %mask) { 2199 ; X86-LABEL: test_mask_mullo_epi64_rmbk_256: 2200 ; X86: # %bb.0: 2201 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2202 ; X86-NEXT: vmovq (%eax), %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x10] 2203 ; X86-NEXT: # xmm2 = mem[0],zero 2204 ; X86-NEXT: vpbroadcastq %xmm2, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x59,0xd2] 2205 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] 2206 ; X86-NEXT: vpmullq %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x40,0xca] 2207 ; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 2208 ; X86-NEXT: retl # encoding: [0xc3] 2209 ; 2210 ; X64-LABEL: test_mask_mullo_epi64_rmbk_256: 2211 ; X64: # %bb.0: 2212 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 2213 ; X64-NEXT: vpmullq (%rdi){1to4}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x39,0x40,0x0f] 2214 ; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 2215 ; X64-NEXT: retq # encoding: [0xc3] 2216 %q = load i64, i64* %ptr_b 2217 %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0 2218 %b = shufflevector <4 x i64> %vecinit.i, <4 x i64> undef, <4 x i32> zeroinitializer 2219 %res = call <4 x i64> @llvm.x86.avx512.mask.pmull.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> %passThru, i8 %mask) 2220 ret <4 x i64> %res 2221 } 2222 2223 define <4 x i64> @test_mask_mullo_epi64_rmbkz_256(<4 x i64> %a, i64* %ptr_b, i8 %mask) { 2224 ; X86-LABEL: test_mask_mullo_epi64_rmbkz_256: 2225 ; X86: # %bb.0: 2226 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2227 ; X86-NEXT: vmovq (%eax), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x08] 2228 ; X86-NEXT: # xmm1 = mem[0],zero 2229 ; X86-NEXT: vpbroadcastq %xmm1, %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x59,0xc9] 2230 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] 2231 ; X86-NEXT: vpmullq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x40,0xc1] 2232 ; X86-NEXT: retl # encoding: [0xc3] 2233 ; 2234 ; X64-LABEL: test_mask_mullo_epi64_rmbkz_256: 2235 ; X64: # %bb.0: 2236 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 2237 ; X64-NEXT: vpmullq (%rdi){1to4}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xb9,0x40,0x07] 2238 ; X64-NEXT: retq # encoding: [0xc3] 2239 %q = load i64, i64* %ptr_b 2240 %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0 2241 %b = shufflevector <4 x i64> %vecinit.i, <4 x i64> undef, <4 x i32> zeroinitializer 2242 %res = call <4 x i64> @llvm.x86.avx512.mask.pmull.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 %mask) 2243 ret <4 x i64> %res 2244 } 2245 2246 declare <4 x i64> @llvm.x86.avx512.mask.pmull.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8) 2247 2248 define <2 x i64> @test_mask_mullo_epi64_rr_128(<2 x i64> %a, <2 x i64> %b) { 2249 ; CHECK-LABEL: test_mask_mullo_epi64_rr_128: 2250 ; CHECK: # %bb.0: 2251 ; CHECK-NEXT: vpmullq %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x40,0xc1] 2252 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2253 %res = call <2 x i64> @llvm.x86.avx512.mask.pmull.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 -1) 2254 ret <2 x i64> %res 2255 } 2256 2257 define <2 x i64> @test_mask_mullo_epi64_rrk_128(<2 x i64> %a, <2 x i64> %b, <2 x i64> %passThru, i8 %mask) { 2258 ; X86-LABEL: test_mask_mullo_epi64_rrk_128: 2259 ; X86: # %bb.0: 2260 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] 2261 ; X86-NEXT: vpmullq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x40,0xd1] 2262 ; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 2263 ; X86-NEXT: retl # encoding: [0xc3] 2264 ; 2265 ; X64-LABEL: test_mask_mullo_epi64_rrk_128: 2266 ; X64: # %bb.0: 2267 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 2268 ; X64-NEXT: vpmullq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x40,0xd1] 2269 ; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 2270 ; X64-NEXT: retq # encoding: [0xc3] 2271 %res = call <2 x i64> @llvm.x86.avx512.mask.pmull.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> %passThru, i8 %mask) 2272 ret <2 x i64> %res 2273 } 2274 2275 define <2 x i64> @test_mask_mullo_epi64_rrkz_128(<2 x i64> %a, <2 x i64> %b, i8 %mask) { 2276 ; X86-LABEL: test_mask_mullo_epi64_rrkz_128: 2277 ; X86: # %bb.0: 2278 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] 2279 ; X86-NEXT: vpmullq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x40,0xc1] 2280 ; X86-NEXT: retl # encoding: [0xc3] 2281 ; 2282 ; X64-LABEL: test_mask_mullo_epi64_rrkz_128: 2283 ; X64: # %bb.0: 2284 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 2285 ; X64-NEXT: vpmullq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x40,0xc1] 2286 ; X64-NEXT: retq # encoding: [0xc3] 2287 %res = call <2 x i64> @llvm.x86.avx512.mask.pmull.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 %mask) 2288 ret <2 x i64> %res 2289 } 2290 2291 define <2 x i64> @test_mask_mullo_epi64_rm_128(<2 x i64> %a, <2 x i64>* %ptr_b) { 2292 ; X86-LABEL: test_mask_mullo_epi64_rm_128: 2293 ; X86: # %bb.0: 2294 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2295 ; X86-NEXT: vpmullq (%eax), %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x40,0x00] 2296 ; X86-NEXT: retl # encoding: [0xc3] 2297 ; 2298 ; X64-LABEL: test_mask_mullo_epi64_rm_128: 2299 ; X64: # %bb.0: 2300 ; X64-NEXT: vpmullq (%rdi), %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x40,0x07] 2301 ; X64-NEXT: retq # encoding: [0xc3] 2302 %b = load <2 x i64>, <2 x i64>* %ptr_b 2303 %res = call <2 x i64> @llvm.x86.avx512.mask.pmull.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 -1) 2304 ret <2 x i64> %res 2305 } 2306 2307 define <2 x i64> @test_mask_mullo_epi64_rmk_128(<2 x i64> %a, <2 x i64>* %ptr_b, <2 x i64> %passThru, i8 %mask) { 2308 ; X86-LABEL: test_mask_mullo_epi64_rmk_128: 2309 ; X86: # %bb.0: 2310 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2311 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] 2312 ; X86-NEXT: vpmullq (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x40,0x08] 2313 ; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 2314 ; X86-NEXT: retl # encoding: [0xc3] 2315 ; 2316 ; X64-LABEL: test_mask_mullo_epi64_rmk_128: 2317 ; X64: # %bb.0: 2318 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 2319 ; X64-NEXT: vpmullq (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x40,0x0f] 2320 ; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 2321 ; X64-NEXT: retq # encoding: [0xc3] 2322 %b = load <2 x i64>, <2 x i64>* %ptr_b 2323 %res = call <2 x i64> @llvm.x86.avx512.mask.pmull.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> %passThru, i8 %mask) 2324 ret <2 x i64> %res 2325 } 2326 2327 define <2 x i64> @test_mask_mullo_epi64_rmkz_128(<2 x i64> %a, <2 x i64>* %ptr_b, i8 %mask) { 2328 ; X86-LABEL: test_mask_mullo_epi64_rmkz_128: 2329 ; X86: # %bb.0: 2330 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2331 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] 2332 ; X86-NEXT: vpmullq (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x40,0x00] 2333 ; X86-NEXT: retl # encoding: [0xc3] 2334 ; 2335 ; X64-LABEL: test_mask_mullo_epi64_rmkz_128: 2336 ; X64: # %bb.0: 2337 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 2338 ; X64-NEXT: vpmullq (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x40,0x07] 2339 ; X64-NEXT: retq # encoding: [0xc3] 2340 %b = load <2 x i64>, <2 x i64>* %ptr_b 2341 %res = call <2 x i64> @llvm.x86.avx512.mask.pmull.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 %mask) 2342 ret <2 x i64> %res 2343 } 2344 2345 define <2 x i64> @test_mask_mullo_epi64_rmb_128(<2 x i64> %a, i64* %ptr_b) { 2346 ; X86-LABEL: test_mask_mullo_epi64_rmb_128: 2347 ; X86: # %bb.0: 2348 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2349 ; X86-NEXT: vpbroadcastq (%eax), %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x59,0x08] 2350 ; X86-NEXT: vpmullq %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x40,0xc1] 2351 ; X86-NEXT: retl # encoding: [0xc3] 2352 ; 2353 ; X64-LABEL: test_mask_mullo_epi64_rmb_128: 2354 ; X64: # %bb.0: 2355 ; X64-NEXT: vpmullq (%rdi){1to2}, %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x18,0x40,0x07] 2356 ; X64-NEXT: retq # encoding: [0xc3] 2357 %q = load i64, i64* %ptr_b 2358 %vecinit.i = insertelement <2 x i64> undef, i64 %q, i32 0 2359 %b = shufflevector <2 x i64> %vecinit.i, <2 x i64> undef, <2 x i32> zeroinitializer 2360 %res = call <2 x i64> @llvm.x86.avx512.mask.pmull.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 -1) 2361 ret <2 x i64> %res 2362 } 2363 2364 define <2 x i64> @test_mask_mullo_epi64_rmbk_128(<2 x i64> %a, i64* %ptr_b, <2 x i64> %passThru, i8 %mask) { 2365 ; X86-LABEL: test_mask_mullo_epi64_rmbk_128: 2366 ; X86: # %bb.0: 2367 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2368 ; X86-NEXT: vpbroadcastq (%eax), %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x59,0x10] 2369 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] 2370 ; X86-NEXT: vpmullq %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x40,0xca] 2371 ; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 2372 ; X86-NEXT: retl # encoding: [0xc3] 2373 ; 2374 ; X64-LABEL: test_mask_mullo_epi64_rmbk_128: 2375 ; X64: # %bb.0: 2376 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 2377 ; X64-NEXT: vpmullq (%rdi){1to2}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x19,0x40,0x0f] 2378 ; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 2379 ; X64-NEXT: retq # encoding: [0xc3] 2380 %q = load i64, i64* %ptr_b 2381 %vecinit.i = insertelement <2 x i64> undef, i64 %q, i32 0 2382 %b = shufflevector <2 x i64> %vecinit.i, <2 x i64> undef, <2 x i32> zeroinitializer 2383 %res = call <2 x i64> @llvm.x86.avx512.mask.pmull.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> %passThru, i8 %mask) 2384 ret <2 x i64> %res 2385 } 2386 2387 define <2 x i64> @test_mask_mullo_epi64_rmbkz_128(<2 x i64> %a, i64* %ptr_b, i8 %mask) { 2388 ; X86-LABEL: test_mask_mullo_epi64_rmbkz_128: 2389 ; X86: # %bb.0: 2390 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2391 ; X86-NEXT: vpbroadcastq (%eax), %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x59,0x08] 2392 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] 2393 ; X86-NEXT: vpmullq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x40,0xc1] 2394 ; X86-NEXT: retl # encoding: [0xc3] 2395 ; 2396 ; X64-LABEL: test_mask_mullo_epi64_rmbkz_128: 2397 ; X64: # %bb.0: 2398 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 2399 ; X64-NEXT: vpmullq (%rdi){1to2}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x99,0x40,0x07] 2400 ; X64-NEXT: retq # encoding: [0xc3] 2401 %q = load i64, i64* %ptr_b 2402 %vecinit.i = insertelement <2 x i64> undef, i64 %q, i32 0 2403 %b = shufflevector <2 x i64> %vecinit.i, <2 x i64> undef, <2 x i32> zeroinitializer 2404 %res = call <2 x i64> @llvm.x86.avx512.mask.pmull.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 %mask) 2405 ret <2 x i64> %res 2406 } 2407 2408 declare <2 x i64> @llvm.x86.avx512.mask.pmull.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8) 2409 2410 declare <2 x double> @llvm.x86.avx512.mask.vextractf64x2.256(<4 x double>, i32, <2 x double>, i8) 2411 2412 define <2 x double>@test_int_x86_avx512_mask_vextractf64x2_256(<4 x double> %x0, <2 x double> %x2, i8 %x3) { 2413 ; X86-LABEL: test_int_x86_avx512_mask_vextractf64x2_256: 2414 ; X86: # %bb.0: 2415 ; X86-NEXT: vextractf128 $1, %ymm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x19,0xc2,0x01] 2416 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] 2417 ; X86-NEXT: vextractf64x2 $1, %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x19,0xc1,0x01] 2418 ; X86-NEXT: vaddpd %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xca] 2419 ; X86-NEXT: vextractf64x2 $1, %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0x19,0xc0,0x01] 2420 ; X86-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc0] 2421 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 2422 ; X86-NEXT: retl # encoding: [0xc3] 2423 ; 2424 ; X64-LABEL: test_int_x86_avx512_mask_vextractf64x2_256: 2425 ; X64: # %bb.0: 2426 ; X64-NEXT: vextractf128 $1, %ymm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x19,0xc2,0x01] 2427 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 2428 ; X64-NEXT: vextractf64x2 $1, %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x19,0xc1,0x01] 2429 ; X64-NEXT: vaddpd %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xca] 2430 ; X64-NEXT: vextractf64x2 $1, %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0x19,0xc0,0x01] 2431 ; X64-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc0] 2432 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 2433 ; X64-NEXT: retq # encoding: [0xc3] 2434 %res = call <2 x double> @llvm.x86.avx512.mask.vextractf64x2.256(<4 x double> %x0,i32 1, <2 x double> %x2, i8 %x3) 2435 %res2 = call <2 x double> @llvm.x86.avx512.mask.vextractf64x2.256(<4 x double> %x0,i32 1, <2 x double> zeroinitializer, i8 %x3) 2436 %res1 = call <2 x double> @llvm.x86.avx512.mask.vextractf64x2.256(<4 x double> %x0,i32 1, <2 x double> zeroinitializer, i8 -1) 2437 %res3 = fadd <2 x double> %res, %res1 2438 %res4 = fadd <2 x double> %res3, %res2 2439 ret <2 x double> %res4 2440 } 2441 2442 declare <4 x double> @llvm.x86.avx512.mask.insertf64x2.256(<4 x double>, <2 x double>, i32, <4 x double>, i8) 2443 2444 define <4 x double>@test_int_x86_avx512_mask_insertf64x2_256(<4 x double> %x0, <2 x double> %x1, <4 x double> %x3, i8 %x4) { 2445 ; X86-LABEL: test_int_x86_avx512_mask_insertf64x2_256: 2446 ; X86: # %bb.0: 2447 ; X86-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x18,0xd9,0x01] 2448 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] 2449 ; X86-NEXT: vinsertf64x2 $1, %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x18,0xd1,0x01] 2450 ; X86-NEXT: vaddpd %ymm3, %ymm2, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x58,0xd3] 2451 ; X86-NEXT: vinsertf64x2 $1, %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0x18,0xc1,0x01] 2452 ; X86-NEXT: vaddpd %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x58,0xc2] 2453 ; X86-NEXT: retl # encoding: [0xc3] 2454 ; 2455 ; X64-LABEL: test_int_x86_avx512_mask_insertf64x2_256: 2456 ; X64: # %bb.0: 2457 ; X64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x18,0xd9,0x01] 2458 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 2459 ; X64-NEXT: vinsertf64x2 $1, %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x18,0xd1,0x01] 2460 ; X64-NEXT: vaddpd %ymm3, %ymm2, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x58,0xd3] 2461 ; X64-NEXT: vinsertf64x2 $1, %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0x18,0xc1,0x01] 2462 ; X64-NEXT: vaddpd %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x58,0xc2] 2463 ; X64-NEXT: retq # encoding: [0xc3] 2464 %res = call <4 x double> @llvm.x86.avx512.mask.insertf64x2.256(<4 x double> %x0, <2 x double> %x1, i32 1, <4 x double> %x3, i8 %x4) 2465 %res1 = call <4 x double> @llvm.x86.avx512.mask.insertf64x2.256(<4 x double> %x0, <2 x double> %x1, i32 1, <4 x double> %x3, i8 -1) 2466 %res2 = call <4 x double> @llvm.x86.avx512.mask.insertf64x2.256(<4 x double> %x0, <2 x double> %x1, i32 1, <4 x double> zeroinitializer, i8 %x4) 2467 %res3 = fadd <4 x double> %res, %res1 2468 %res4 = fadd <4 x double> %res2, %res3 2469 ret <4 x double> %res4 2470 } 2471 2472 declare <4 x i64> @llvm.x86.avx512.mask.inserti64x2.256(<4 x i64>, <2 x i64>, i32, <4 x i64>, i8) 2473 2474 define <4 x i64>@test_int_x86_avx512_mask_inserti64x2_256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> %x3, i8 %x4) { 2475 ; X86-LABEL: test_int_x86_avx512_mask_inserti64x2_256: 2476 ; X86: # %bb.0: 2477 ; X86-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x38,0xd9,0x01] 2478 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] 2479 ; X86-NEXT: vinserti64x2 $1, %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x38,0xd1,0x01] 2480 ; X86-NEXT: vinserti64x2 $1, %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0x38,0xc1,0x01] 2481 ; X86-NEXT: vpaddq %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xd4,0xc0] 2482 ; X86-NEXT: vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0] 2483 ; X86-NEXT: retl # encoding: [0xc3] 2484 ; 2485 ; X64-LABEL: test_int_x86_avx512_mask_inserti64x2_256: 2486 ; X64: # %bb.0: 2487 ; X64-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x38,0xd9,0x01] 2488 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 2489 ; X64-NEXT: vinserti64x2 $1, %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x38,0xd1,0x01] 2490 ; X64-NEXT: vinserti64x2 $1, %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0x38,0xc1,0x01] 2491 ; X64-NEXT: vpaddq %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xd4,0xc0] 2492 ; X64-NEXT: vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0] 2493 ; X64-NEXT: retq # encoding: [0xc3] 2494 %res = call <4 x i64> @llvm.x86.avx512.mask.inserti64x2.256(<4 x i64> %x0, <2 x i64> %x1, i32 1, <4 x i64> %x3, i8 %x4) 2495 %res1 = call <4 x i64> @llvm.x86.avx512.mask.inserti64x2.256(<4 x i64> %x0, <2 x i64> %x1, i32 1, <4 x i64> %x3, i8 -1) 2496 %res2 = call <4 x i64> @llvm.x86.avx512.mask.inserti64x2.256(<4 x i64> %x0, <2 x i64> %x1, i32 1, <4 x i64> zeroinitializer, i8 %x4) 2497 %res3 = add <4 x i64> %res, %res1 2498 %res4 = add <4 x i64> %res3, %res2 2499 ret <4 x i64> %res4 2500 } 2501 2502 declare <4 x i32> @llvm.x86.avx512.cvtmask2d.128(i8) 2503 2504 define <4 x i32>@test_int_x86_avx512_cvtmask2d_128(i8 %x0) { 2505 ; X86-LABEL: test_int_x86_avx512_cvtmask2d_128: 2506 ; X86: # %bb.0: 2507 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k0 # encoding: [0xc5,0xf9,0x90,0x44,0x24,0x04] 2508 ; X86-NEXT: vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0] 2509 ; X86-NEXT: retl # encoding: [0xc3] 2510 ; 2511 ; X64-LABEL: test_int_x86_avx512_cvtmask2d_128: 2512 ; X64: # %bb.0: 2513 ; X64-NEXT: kmovw %edi, %k0 # encoding: [0xc5,0xf8,0x92,0xc7] 2514 ; X64-NEXT: vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0] 2515 ; X64-NEXT: retq # encoding: [0xc3] 2516 %res = call <4 x i32> @llvm.x86.avx512.cvtmask2d.128(i8 %x0) 2517 ret <4 x i32> %res 2518 } 2519 2520 declare <8 x i32> @llvm.x86.avx512.cvtmask2d.256(i8) 2521 2522 define <8 x i32>@test_int_x86_avx512_cvtmask2d_256(i8 %x0) { 2523 ; X86-LABEL: test_int_x86_avx512_cvtmask2d_256: 2524 ; X86: # %bb.0: 2525 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k0 # encoding: [0xc5,0xf9,0x90,0x44,0x24,0x04] 2526 ; X86-NEXT: vpmovm2d %k0, %ymm0 # encoding: [0x62,0xf2,0x7e,0x28,0x38,0xc0] 2527 ; X86-NEXT: retl # encoding: [0xc3] 2528 ; 2529 ; X64-LABEL: test_int_x86_avx512_cvtmask2d_256: 2530 ; X64: # %bb.0: 2531 ; X64-NEXT: kmovw %edi, %k0 # encoding: [0xc5,0xf8,0x92,0xc7] 2532 ; X64-NEXT: vpmovm2d %k0, %ymm0 # encoding: [0x62,0xf2,0x7e,0x28,0x38,0xc0] 2533 ; X64-NEXT: retq # encoding: [0xc3] 2534 %res = call <8 x i32> @llvm.x86.avx512.cvtmask2d.256(i8 %x0) 2535 ret <8 x i32> %res 2536 } 2537 2538 declare <2 x i64> @llvm.x86.avx512.cvtmask2q.128(i8) 2539 2540 define <2 x i64>@test_int_x86_avx512_cvtmask2q_128(i8 %x0) { 2541 ; X86-LABEL: test_int_x86_avx512_cvtmask2q_128: 2542 ; X86: # %bb.0: 2543 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k0 # encoding: [0xc5,0xf9,0x90,0x44,0x24,0x04] 2544 ; X86-NEXT: vpmovm2q %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0] 2545 ; X86-NEXT: retl # encoding: [0xc3] 2546 ; 2547 ; X64-LABEL: test_int_x86_avx512_cvtmask2q_128: 2548 ; X64: # %bb.0: 2549 ; X64-NEXT: kmovw %edi, %k0 # encoding: [0xc5,0xf8,0x92,0xc7] 2550 ; X64-NEXT: vpmovm2q %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0] 2551 ; X64-NEXT: retq # encoding: [0xc3] 2552 %res = call <2 x i64> @llvm.x86.avx512.cvtmask2q.128(i8 %x0) 2553 ret <2 x i64> %res 2554 } 2555 2556 declare <4 x i64> @llvm.x86.avx512.cvtmask2q.256(i8) 2557 2558 define <4 x i64>@test_int_x86_avx512_cvtmask2q_256(i8 %x0) { 2559 ; X86-LABEL: test_int_x86_avx512_cvtmask2q_256: 2560 ; X86: # %bb.0: 2561 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k0 # encoding: [0xc5,0xf9,0x90,0x44,0x24,0x04] 2562 ; X86-NEXT: vpmovm2q %k0, %ymm0 # encoding: [0x62,0xf2,0xfe,0x28,0x38,0xc0] 2563 ; X86-NEXT: retl # encoding: [0xc3] 2564 ; 2565 ; X64-LABEL: test_int_x86_avx512_cvtmask2q_256: 2566 ; X64: # %bb.0: 2567 ; X64-NEXT: kmovw %edi, %k0 # encoding: [0xc5,0xf8,0x92,0xc7] 2568 ; X64-NEXT: vpmovm2q %k0, %ymm0 # encoding: [0x62,0xf2,0xfe,0x28,0x38,0xc0] 2569 ; X64-NEXT: retq # encoding: [0xc3] 2570 %res = call <4 x i64> @llvm.x86.avx512.cvtmask2q.256(i8 %x0) 2571 ret <4 x i64> %res 2572 } 2573 2574 declare <4 x double> @llvm.x86.avx512.mask.broadcastf64x2.256(<2 x double>, <4 x double>, i8) 2575 2576 define <4 x double>@test_int_x86_avx512_mask_broadcastf64x2_256(<2 x double> %x0, <4 x double> %x2, i8 %mask) { 2577 ; X86-LABEL: test_int_x86_avx512_mask_broadcastf64x2_256: 2578 ; X86: # %bb.0: 2579 ; X86-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 2580 ; X86-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x18,0xd0,0x01] 2581 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] 2582 ; X86-NEXT: vinsertf64x2 $1, %xmm0, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x18,0xc8,0x01] 2583 ; X86-NEXT: vaddpd %ymm1, %ymm2, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x58,0xc9] 2584 ; X86-NEXT: vinsertf64x2 $1, %xmm0, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0x18,0xc0,0x01] 2585 ; X86-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x58,0xc1] 2586 ; X86-NEXT: retl # encoding: [0xc3] 2587 ; 2588 ; X64-LABEL: test_int_x86_avx512_mask_broadcastf64x2_256: 2589 ; X64: # %bb.0: 2590 ; X64-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 2591 ; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x18,0xd0,0x01] 2592 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 2593 ; X64-NEXT: vinsertf64x2 $1, %xmm0, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x18,0xc8,0x01] 2594 ; X64-NEXT: vaddpd %ymm1, %ymm2, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x58,0xc9] 2595 ; X64-NEXT: vinsertf64x2 $1, %xmm0, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0x18,0xc0,0x01] 2596 ; X64-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x58,0xc1] 2597 ; X64-NEXT: retq # encoding: [0xc3] 2598 2599 %res1 = call <4 x double> @llvm.x86.avx512.mask.broadcastf64x2.256(<2 x double> %x0, <4 x double> %x2, i8 -1) 2600 %res2 = call <4 x double> @llvm.x86.avx512.mask.broadcastf64x2.256(<2 x double> %x0, <4 x double> %x2, i8 %mask) 2601 %res3 = call <4 x double> @llvm.x86.avx512.mask.broadcastf64x2.256(<2 x double> %x0, <4 x double> zeroinitializer, i8 %mask) 2602 %res4 = fadd <4 x double> %res1, %res2 2603 %res5 = fadd <4 x double> %res3, %res4 2604 ret <4 x double> %res5 2605 } 2606 2607 define <4 x double>@test_int_x86_avx512_mask_broadcastf64x2_256_load(<2 x double>* %x0ptr, <4 x double> %x2, i8 %mask) { 2608 ; X86-LABEL: test_int_x86_avx512_mask_broadcastf64x2_256_load: 2609 ; X86: # %bb.0: 2610 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2611 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] 2612 ; X86-NEXT: vbroadcastf64x2 (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x1a,0x00] 2613 ; X86-NEXT: # ymm0 {%k1} = mem[0,1,0,1] 2614 ; X86-NEXT: retl # encoding: [0xc3] 2615 ; 2616 ; X64-LABEL: test_int_x86_avx512_mask_broadcastf64x2_256_load: 2617 ; X64: # %bb.0: 2618 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 2619 ; X64-NEXT: vbroadcastf64x2 (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x1a,0x07] 2620 ; X64-NEXT: # ymm0 {%k1} = mem[0,1,0,1] 2621 ; X64-NEXT: retq # encoding: [0xc3] 2622 2623 %x0 = load <2 x double>, <2 x double>* %x0ptr 2624 %res = call <4 x double> @llvm.x86.avx512.mask.broadcastf64x2.256(<2 x double> %x0, <4 x double> %x2, i8 %mask) 2625 ret <4 x double> %res 2626 } 2627 2628 declare <4 x i64> @llvm.x86.avx512.mask.broadcasti64x2.256(<2 x i64>, <4 x i64>, i8) 2629 2630 define <4 x i64>@test_int_x86_avx512_mask_broadcasti64x2_256(<2 x i64> %x0, <4 x i64> %x2, i8 %mask) { 2631 ; X86-LABEL: test_int_x86_avx512_mask_broadcasti64x2_256: 2632 ; X86: # %bb.0: 2633 ; X86-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 2634 ; X86-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x38,0xd0,0x01] 2635 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] 2636 ; X86-NEXT: vinserti64x2 $1, %xmm0, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x38,0xc8,0x01] 2637 ; X86-NEXT: vinserti64x2 $1, %xmm0, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0x38,0xc0,0x01] 2638 ; X86-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0] 2639 ; X86-NEXT: vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0] 2640 ; X86-NEXT: retl # encoding: [0xc3] 2641 ; 2642 ; X64-LABEL: test_int_x86_avx512_mask_broadcasti64x2_256: 2643 ; X64: # %bb.0: 2644 ; X64-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 2645 ; X64-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x38,0xd0,0x01] 2646 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 2647 ; X64-NEXT: vinserti64x2 $1, %xmm0, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x38,0xc8,0x01] 2648 ; X64-NEXT: vinserti64x2 $1, %xmm0, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0x38,0xc0,0x01] 2649 ; X64-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0] 2650 ; X64-NEXT: vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0] 2651 ; X64-NEXT: retq # encoding: [0xc3] 2652 2653 %res1 = call <4 x i64> @llvm.x86.avx512.mask.broadcasti64x2.256(<2 x i64> %x0, <4 x i64> %x2, i8 -1) 2654 %res2 = call <4 x i64> @llvm.x86.avx512.mask.broadcasti64x2.256(<2 x i64> %x0, <4 x i64> %x2, i8 %mask) 2655 %res3 = call <4 x i64> @llvm.x86.avx512.mask.broadcasti64x2.256(<2 x i64> %x0, <4 x i64> zeroinitializer, i8 %mask) 2656 %res4 = add <4 x i64> %res1, %res2 2657 %res5 = add <4 x i64> %res3, %res4 2658 ret <4 x i64> %res5 2659 } 2660 2661 define <4 x i64>@test_int_x86_avx512_mask_broadcasti64x2_256_load(<2 x i64>* %x0ptr, <4 x i64> %x2, i8 %mask) { 2662 ; X86-LABEL: test_int_x86_avx512_mask_broadcasti64x2_256_load: 2663 ; X86: # %bb.0: 2664 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2665 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] 2666 ; X86-NEXT: vbroadcasti64x2 (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x5a,0x00] 2667 ; X86-NEXT: # ymm0 {%k1} = mem[0,1,0,1] 2668 ; X86-NEXT: retl # encoding: [0xc3] 2669 ; 2670 ; X64-LABEL: test_int_x86_avx512_mask_broadcasti64x2_256_load: 2671 ; X64: # %bb.0: 2672 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 2673 ; X64-NEXT: vbroadcasti64x2 (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x5a,0x07] 2674 ; X64-NEXT: # ymm0 {%k1} = mem[0,1,0,1] 2675 ; X64-NEXT: retq # encoding: [0xc3] 2676 2677 %x0 = load <2 x i64>, <2 x i64>* %x0ptr 2678 %res = call <4 x i64> @llvm.x86.avx512.mask.broadcasti64x2.256(<2 x i64> %x0, <4 x i64> %x2, i8 %mask) 2679 ret <4 x i64> %res 2680 } 2681 2682 declare <8 x float> @llvm.x86.avx512.mask.broadcastf32x2.256(<4 x float>, <8 x float>, i8) 2683 2684 define <8 x float>@test_int_x86_avx512_mask_broadcastf32x2_256(<4 x float> %x0, <8 x float> %x2, i8 %x3) { 2685 ; X86-LABEL: test_int_x86_avx512_mask_broadcastf32x2_256: 2686 ; X86: # %bb.0: 2687 ; X86-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 2688 ; X86-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x18,0xd0,0x01] 2689 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] 2690 ; X86-NEXT: vinsertf32x4 $1, %xmm0, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x18,0xc8,0x01] 2691 ; X86-NEXT: vinsertf32x4 $1, %xmm0, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x18,0xc0,0x01] 2692 ; X86-NEXT: vaddps %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xc0] 2693 ; X86-NEXT: vaddps %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x58,0xc2] 2694 ; X86-NEXT: retl # encoding: [0xc3] 2695 ; 2696 ; X64-LABEL: test_int_x86_avx512_mask_broadcastf32x2_256: 2697 ; X64: # %bb.0: 2698 ; X64-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 2699 ; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x18,0xd0,0x01] 2700 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 2701 ; X64-NEXT: vinsertf32x4 $1, %xmm0, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x18,0xc8,0x01] 2702 ; X64-NEXT: vinsertf32x4 $1, %xmm0, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x18,0xc0,0x01] 2703 ; X64-NEXT: vaddps %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xc0] 2704 ; X64-NEXT: vaddps %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x58,0xc2] 2705 ; X64-NEXT: retq # encoding: [0xc3] 2706 %res = call <8 x float> @llvm.x86.avx512.mask.broadcastf32x2.256(<4 x float> %x0, <8 x float> %x2, i8 %x3) 2707 %res1 = call <8 x float> @llvm.x86.avx512.mask.broadcastf32x2.256(<4 x float> %x0, <8 x float> zeroinitializer, i8 %x3) 2708 %res2 = call <8 x float> @llvm.x86.avx512.mask.broadcastf32x2.256(<4 x float> %x0, <8 x float> %x2, i8 -1) 2709 %res3 = fadd <8 x float> %res, %res1 2710 %res4 = fadd <8 x float> %res3, %res2 2711 ret <8 x float> %res4 2712 } 2713 2714 declare <8 x i32> @llvm.x86.avx512.mask.broadcasti32x2.256(<4 x i32>, <8 x i32>, i8) 2715 2716 define <8 x i32>@test_int_x86_avx512_mask_broadcasti32x2_256(<4 x i32> %x0, <8 x i32> %x2, i8 %x3, i64 * %y_ptr) { 2717 ; X86-LABEL: test_int_x86_avx512_mask_broadcasti32x2_256: 2718 ; X86: # %bb.0: 2719 ; X86-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 2720 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] 2721 ; X86-NEXT: vmovq (%eax), %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x10] 2722 ; X86-NEXT: # xmm2 = mem[0],zero 2723 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] 2724 ; X86-NEXT: vinserti32x4 $1, %xmm2, %ymm2, %ymm1 {%k1} # encoding: [0x62,0xf3,0x6d,0x29,0x38,0xca,0x01] 2725 ; X86-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x38,0xd0,0x01] 2726 ; X86-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x38,0xc0,0x01] 2727 ; X86-NEXT: vpaddd %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc2] 2728 ; X86-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0] 2729 ; X86-NEXT: retl # encoding: [0xc3] 2730 ; 2731 ; X64-LABEL: test_int_x86_avx512_mask_broadcasti32x2_256: 2732 ; X64: # %bb.0: 2733 ; X64-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 2734 ; X64-NEXT: vmovq (%rsi), %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x16] 2735 ; X64-NEXT: # xmm2 = mem[0],zero 2736 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 2737 ; X64-NEXT: vinserti32x4 $1, %xmm2, %ymm2, %ymm1 {%k1} # encoding: [0x62,0xf3,0x6d,0x29,0x38,0xca,0x01] 2738 ; X64-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x38,0xd0,0x01] 2739 ; X64-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x38,0xc0,0x01] 2740 ; X64-NEXT: vpaddd %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc2] 2741 ; X64-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0] 2742 ; X64-NEXT: retq # encoding: [0xc3] 2743 %y_64 = load i64, i64 * %y_ptr 2744 %y_v2i64 = insertelement <2 x i64> undef, i64 %y_64, i32 0 2745 %y = bitcast <2 x i64> %y_v2i64 to <4 x i32> 2746 %res = call <8 x i32> @llvm.x86.avx512.mask.broadcasti32x2.256(<4 x i32> %y, <8 x i32> %x2, i8 %x3) 2747 %res1 = call <8 x i32> @llvm.x86.avx512.mask.broadcasti32x2.256(<4 x i32> %x0, <8 x i32> zeroinitializer, i8 %x3) 2748 %res2 = call <8 x i32> @llvm.x86.avx512.mask.broadcasti32x2.256(<4 x i32> %x0, <8 x i32> %x2, i8 -1) 2749 %res3 = add <8 x i32> %res, %res1 2750 %res4 = add <8 x i32> %res3, %res2 2751 ret <8 x i32> %res4 2752 } 2753 2754 declare <4 x i32> @llvm.x86.avx512.mask.broadcasti32x2.128(<4 x i32>, <4 x i32>, i8) 2755 2756 define <4 x i32>@test_int_x86_avx512_mask_broadcasti32x2_128(<4 x i32> %x0, <4 x i32> %x2, i8 %x3) { 2757 ; X86-LABEL: test_int_x86_avx512_mask_broadcasti32x2_128: 2758 ; X86: # %bb.0: 2759 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] 2760 ; X86-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x6f,0xc8] 2761 ; X86-NEXT: vmovdqa32 %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x6f,0xd0] 2762 ; X86-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0] 2763 ; X86-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] 2764 ; X86-NEXT: retl # encoding: [0xc3] 2765 ; 2766 ; X64-LABEL: test_int_x86_avx512_mask_broadcasti32x2_128: 2767 ; X64: # %bb.0: 2768 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 2769 ; X64-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x6f,0xc8] 2770 ; X64-NEXT: vmovdqa32 %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x6f,0xd0] 2771 ; X64-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0] 2772 ; X64-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] 2773 ; X64-NEXT: retq # encoding: [0xc3] 2774 %res = call <4 x i32> @llvm.x86.avx512.mask.broadcasti32x2.128(<4 x i32> %x0, <4 x i32> %x2, i8 %x3) 2775 %res1 = call <4 x i32> @llvm.x86.avx512.mask.broadcasti32x2.128(<4 x i32> %x0, <4 x i32> zeroinitializer, i8 %x3) 2776 %res2 = call <4 x i32> @llvm.x86.avx512.mask.broadcasti32x2.128(<4 x i32> %x0, <4 x i32> %x2, i8 -1) 2777 %res3 = add <4 x i32> %res, %res1 2778 %res4 = add <4 x i32> %res3, %res2 2779 ret <4 x i32> %res4 2780 } 2781 2782 declare i8 @llvm.x86.avx512.cvtd2mask.128(<4 x i32>) 2783 2784 define i8@test_int_x86_avx512_cvtd2mask_128(<4 x i32> %x0) { 2785 ; CHECK-LABEL: test_int_x86_avx512_cvtd2mask_128: 2786 ; CHECK: # %bb.0: 2787 ; CHECK-NEXT: vpmovd2m %xmm0, %k0 # encoding: [0x62,0xf2,0x7e,0x08,0x39,0xc0] 2788 ; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] 2789 ; CHECK-NEXT: # kill: def $al killed $al killed $eax 2790 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2791 %res = call i8 @llvm.x86.avx512.cvtd2mask.128(<4 x i32> %x0) 2792 ret i8 %res 2793 } 2794 2795 declare i8 @llvm.x86.avx512.cvtd2mask.256(<8 x i32>) 2796 2797 define i8@test_int_x86_avx512_cvtd2mask_256(<8 x i32> %x0) { 2798 ; CHECK-LABEL: test_int_x86_avx512_cvtd2mask_256: 2799 ; CHECK: # %bb.0: 2800 ; CHECK-NEXT: vpmovd2m %ymm0, %k0 # encoding: [0x62,0xf2,0x7e,0x28,0x39,0xc0] 2801 ; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] 2802 ; CHECK-NEXT: # kill: def $al killed $al killed $eax 2803 ; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 2804 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2805 %res = call i8 @llvm.x86.avx512.cvtd2mask.256(<8 x i32> %x0) 2806 ret i8 %res 2807 } 2808 2809 declare i8 @llvm.x86.avx512.cvtq2mask.128(<2 x i64>) 2810 2811 define i8@test_int_x86_avx512_cvtq2mask_128(<2 x i64> %x0) { 2812 ; CHECK-LABEL: test_int_x86_avx512_cvtq2mask_128: 2813 ; CHECK: # %bb.0: 2814 ; CHECK-NEXT: vpmovq2m %xmm0, %k0 # encoding: [0x62,0xf2,0xfe,0x08,0x39,0xc0] 2815 ; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] 2816 ; CHECK-NEXT: # kill: def $al killed $al killed $eax 2817 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2818 %res = call i8 @llvm.x86.avx512.cvtq2mask.128(<2 x i64> %x0) 2819 ret i8 %res 2820 } 2821 2822 declare i8 @llvm.x86.avx512.cvtq2mask.256(<4 x i64>) 2823 2824 define i8@test_int_x86_avx512_cvtq2mask_256(<4 x i64> %x0) { 2825 ; CHECK-LABEL: test_int_x86_avx512_cvtq2mask_256: 2826 ; CHECK: # %bb.0: 2827 ; CHECK-NEXT: vpmovq2m %ymm0, %k0 # encoding: [0x62,0xf2,0xfe,0x28,0x39,0xc0] 2828 ; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] 2829 ; CHECK-NEXT: # kill: def $al killed $al killed $eax 2830 ; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 2831 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2832 %res = call i8 @llvm.x86.avx512.cvtq2mask.256(<4 x i64> %x0) 2833 ret i8 %res 2834 } 2835 2836 declare <2 x double> @llvm.x86.avx512.mask.cvtqq2pd.128(<2 x i64>, <2 x double>, i8) 2837 2838 define <2 x double>@test_int_x86_avx512_mask_cvt_qq2pd_128(<2 x i64> %x0, <2 x double> %x1, i8 %x2) { 2839 ; X86-LABEL: test_int_x86_avx512_mask_cvt_qq2pd_128: 2840 ; X86: # %bb.0: 2841 ; X86-NEXT: vcvtqq2pd %xmm0, %xmm2 # encoding: [0x62,0xf1,0xfe,0x08,0xe6,0xd0] 2842 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] 2843 ; X86-NEXT: vcvtqq2pd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfe,0x09,0xe6,0xc8] 2844 ; X86-NEXT: vaddpd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc2] 2845 ; X86-NEXT: retl # encoding: [0xc3] 2846 ; 2847 ; X64-LABEL: test_int_x86_avx512_mask_cvt_qq2pd_128: 2848 ; X64: # %bb.0: 2849 ; X64-NEXT: vcvtqq2pd %xmm0, %xmm2 # encoding: [0x62,0xf1,0xfe,0x08,0xe6,0xd0] 2850 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 2851 ; X64-NEXT: vcvtqq2pd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfe,0x09,0xe6,0xc8] 2852 ; X64-NEXT: vaddpd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc2] 2853 ; X64-NEXT: retq # encoding: [0xc3] 2854 %res = call <2 x double> @llvm.x86.avx512.mask.cvtqq2pd.128(<2 x i64> %x0, <2 x double> %x1, i8 %x2) 2855 %res1 = call <2 x double> @llvm.x86.avx512.mask.cvtqq2pd.128(<2 x i64> %x0, <2 x double> %x1, i8 -1) 2856 %res2 = fadd <2 x double> %res, %res1 2857 ret <2 x double> %res2 2858 } 2859 2860 declare <4 x double> @llvm.x86.avx512.mask.cvtqq2pd.256(<4 x i64>, <4 x double>, i8) 2861 2862 define <4 x double>@test_int_x86_avx512_mask_cvt_qq2pd_256(<4 x i64> %x0, <4 x double> %x1, i8 %x2) { 2863 ; X86-LABEL: test_int_x86_avx512_mask_cvt_qq2pd_256: 2864 ; X86: # %bb.0: 2865 ; X86-NEXT: vcvtqq2pd %ymm0, %ymm2 # encoding: [0x62,0xf1,0xfe,0x28,0xe6,0xd0] 2866 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] 2867 ; X86-NEXT: vcvtqq2pd %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfe,0x29,0xe6,0xc8] 2868 ; X86-NEXT: vaddpd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc2] 2869 ; X86-NEXT: retl # encoding: [0xc3] 2870 ; 2871 ; X64-LABEL: test_int_x86_avx512_mask_cvt_qq2pd_256: 2872 ; X64: # %bb.0: 2873 ; X64-NEXT: vcvtqq2pd %ymm0, %ymm2 # encoding: [0x62,0xf1,0xfe,0x28,0xe6,0xd0] 2874 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 2875 ; X64-NEXT: vcvtqq2pd %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfe,0x29,0xe6,0xc8] 2876 ; X64-NEXT: vaddpd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc2] 2877 ; X64-NEXT: retq # encoding: [0xc3] 2878 %res = call <4 x double> @llvm.x86.avx512.mask.cvtqq2pd.256(<4 x i64> %x0, <4 x double> %x1, i8 %x2) 2879 %res1 = call <4 x double> @llvm.x86.avx512.mask.cvtqq2pd.256(<4 x i64> %x0, <4 x double> %x1, i8 -1) 2880 %res2 = fadd <4 x double> %res, %res1 2881 ret <4 x double> %res2 2882 } 2883 2884 declare <2 x double> @llvm.x86.avx512.mask.cvtuqq2pd.128(<2 x i64>, <2 x double>, i8) 2885 2886 define <2 x double>@test_int_x86_avx512_mask_cvt_uqq2pd_128(<2 x i64> %x0, <2 x double> %x1, i8 %x2) { 2887 ; X86-LABEL: test_int_x86_avx512_mask_cvt_uqq2pd_128: 2888 ; X86: # %bb.0: 2889 ; X86-NEXT: vcvtuqq2pd %xmm0, %xmm2 # encoding: [0x62,0xf1,0xfe,0x08,0x7a,0xd0] 2890 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] 2891 ; X86-NEXT: vcvtuqq2pd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfe,0x09,0x7a,0xc8] 2892 ; X86-NEXT: vaddpd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc2] 2893 ; X86-NEXT: retl # encoding: [0xc3] 2894 ; 2895 ; X64-LABEL: test_int_x86_avx512_mask_cvt_uqq2pd_128: 2896 ; X64: # %bb.0: 2897 ; X64-NEXT: vcvtuqq2pd %xmm0, %xmm2 # encoding: [0x62,0xf1,0xfe,0x08,0x7a,0xd0] 2898 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 2899 ; X64-NEXT: vcvtuqq2pd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfe,0x09,0x7a,0xc8] 2900 ; X64-NEXT: vaddpd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc2] 2901 ; X64-NEXT: retq # encoding: [0xc3] 2902 %res = call <2 x double> @llvm.x86.avx512.mask.cvtuqq2pd.128(<2 x i64> %x0, <2 x double> %x1, i8 %x2) 2903 %res1 = call <2 x double> @llvm.x86.avx512.mask.cvtuqq2pd.128(<2 x i64> %x0, <2 x double> %x1, i8 -1) 2904 %res2 = fadd <2 x double> %res, %res1 2905 ret <2 x double> %res2 2906 } 2907 2908 declare <4 x double> @llvm.x86.avx512.mask.cvtuqq2pd.256(<4 x i64>, <4 x double>, i8) 2909 2910 define <4 x double>@test_int_x86_avx512_mask_cvt_uqq2pd_256(<4 x i64> %x0, <4 x double> %x1, i8 %x2) { 2911 ; X86-LABEL: test_int_x86_avx512_mask_cvt_uqq2pd_256: 2912 ; X86: # %bb.0: 2913 ; X86-NEXT: vcvtuqq2pd %ymm0, %ymm2 # encoding: [0x62,0xf1,0xfe,0x28,0x7a,0xd0] 2914 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] 2915 ; X86-NEXT: vcvtuqq2pd %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfe,0x29,0x7a,0xc8] 2916 ; X86-NEXT: vaddpd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc2] 2917 ; X86-NEXT: retl # encoding: [0xc3] 2918 ; 2919 ; X64-LABEL: test_int_x86_avx512_mask_cvt_uqq2pd_256: 2920 ; X64: # %bb.0: 2921 ; X64-NEXT: vcvtuqq2pd %ymm0, %ymm2 # encoding: [0x62,0xf1,0xfe,0x28,0x7a,0xd0] 2922 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 2923 ; X64-NEXT: vcvtuqq2pd %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfe,0x29,0x7a,0xc8] 2924 ; X64-NEXT: vaddpd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc2] 2925 ; X64-NEXT: retq # encoding: [0xc3] 2926 %res = call <4 x double> @llvm.x86.avx512.mask.cvtuqq2pd.256(<4 x i64> %x0, <4 x double> %x1, i8 %x2) 2927 %res1 = call <4 x double> @llvm.x86.avx512.mask.cvtuqq2pd.256(<4 x i64> %x0, <4 x double> %x1, i8 -1) 2928 %res2 = fadd <4 x double> %res, %res1 2929 ret <4 x double> %res2 2930 } 2931 2932 declare i8 @llvm.x86.avx512.mask.fpclass.ps.128(<4 x float>, i32, i8) 2933 2934 define i8 @test_int_x86_avx512_mask_fpclass_ps_128(<4 x float> %x0) { 2935 ; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_ps_128: 2936 ; CHECK: # %bb.0: 2937 ; CHECK-NEXT: vfpclassps $4, %xmm0, %k1 # encoding: [0x62,0xf3,0x7d,0x08,0x66,0xc8,0x04] 2938 ; CHECK-NEXT: vfpclassps $2, %xmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x66,0xc0,0x02] 2939 ; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] 2940 ; CHECK-NEXT: # kill: def $al killed $al killed $eax 2941 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2942 %res = call i8 @llvm.x86.avx512.mask.fpclass.ps.128(<4 x float> %x0, i32 2, i8 -1) 2943 %res1 = call i8 @llvm.x86.avx512.mask.fpclass.ps.128(<4 x float> %x0, i32 4, i8 %res) 2944 ret i8 %res1 2945 } 2946 2947 declare i8 @llvm.x86.avx512.mask.fpclass.ps.256(<8 x float>, i32, i8) 2948 2949 define i8 @test_int_x86_avx512_mask_fpclass_ps_256(<8 x float> %x0) { 2950 ; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_ps_256: 2951 ; CHECK: # %bb.0: 2952 ; CHECK-NEXT: vfpclassps $4, %ymm0, %k1 # encoding: [0x62,0xf3,0x7d,0x28,0x66,0xc8,0x04] 2953 ; CHECK-NEXT: vfpclassps $2, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x66,0xc0,0x02] 2954 ; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] 2955 ; CHECK-NEXT: # kill: def $al killed $al killed $eax 2956 ; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 2957 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2958 %res = call i8 @llvm.x86.avx512.mask.fpclass.ps.256(<8 x float> %x0, i32 2, i8 -1) 2959 %res1 = call i8 @llvm.x86.avx512.mask.fpclass.ps.256(<8 x float> %x0, i32 4, i8 %res) 2960 ret i8 %res1 2961 } 2962 2963 declare i8 @llvm.x86.avx512.mask.fpclass.pd.128(<2 x double>, i32, i8) 2964 2965 define i8 @test_int_x86_avx512_mask_fpclass_pd_128(<2 x double> %x0) { 2966 ; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_pd_128: 2967 ; CHECK: # %bb.0: 2968 ; CHECK-NEXT: vfpclasspd $2, %xmm0, %k1 # encoding: [0x62,0xf3,0xfd,0x08,0x66,0xc8,0x02] 2969 ; CHECK-NEXT: vfpclasspd $4, %xmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x66,0xc0,0x04] 2970 ; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] 2971 ; CHECK-NEXT: # kill: def $al killed $al killed $eax 2972 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2973 %res = call i8 @llvm.x86.avx512.mask.fpclass.pd.128(<2 x double> %x0, i32 4, i8 -1) 2974 %res1 = call i8 @llvm.x86.avx512.mask.fpclass.pd.128(<2 x double> %x0, i32 2, i8 %res) 2975 ret i8 %res1 2976 } 2977 2978 declare i8 @llvm.x86.avx512.mask.fpclass.pd.256(<4 x double>, i32, i8) 2979 2980 define i8 @test_int_x86_avx512_mask_fpclass_pd_256(<4 x double> %x0) { 2981 ; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_pd_256: 2982 ; CHECK: # %bb.0: 2983 ; CHECK-NEXT: vfpclasspd $4, %ymm0, %k1 # encoding: [0x62,0xf3,0xfd,0x28,0x66,0xc8,0x04] 2984 ; CHECK-NEXT: vfpclasspd $2, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x66,0xc0,0x02] 2985 ; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] 2986 ; CHECK-NEXT: # kill: def $al killed $al killed $eax 2987 ; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 2988 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2989 %res = call i8 @llvm.x86.avx512.mask.fpclass.pd.256(<4 x double> %x0, i32 2, i8 -1) 2990 %res1 = call i8 @llvm.x86.avx512.mask.fpclass.pd.256(<4 x double> %x0, i32 4, i8 %res) 2991 ret i8 %res1 2992 } 2993