1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s 3 4 declare i32 @llvm.x86.avx512.kortestz.w(i16, i16) nounwind readnone 5 define i32 @test_kortestz(i16 %a0, i16 %a1) { 6 ; CHECK-LABEL: test_kortestz: 7 ; CHECK: ## BB#0: 8 ; CHECK-NEXT: kmovw %esi, %k0 9 ; CHECK-NEXT: kmovw %edi, %k1 10 ; CHECK-NEXT: kortestw %k0, %k1 11 ; CHECK-NEXT: sete %al 12 ; CHECK-NEXT: kmovw %eax, %k0 13 ; CHECK-NEXT: kmovw %k0, %eax 14 ; CHECK-NEXT: andl $1, %eax 15 ; CHECK-NEXT: retq 16 %res = call i32 @llvm.x86.avx512.kortestz.w(i16 %a0, i16 %a1) 17 ret i32 %res 18 } 19 20 declare i32 @llvm.x86.avx512.kortestc.w(i16, i16) nounwind readnone 21 define i32 @test_kortestc(i16 %a0, i16 %a1) { 22 ; CHECK-LABEL: test_kortestc: 23 ; CHECK: ## BB#0: 24 ; CHECK-NEXT: kmovw %esi, %k0 25 ; CHECK-NEXT: kmovw %edi, %k1 26 ; CHECK-NEXT: kortestw %k0, %k1 27 ; CHECK-NEXT: sbbl %eax, %eax 28 ; CHECK-NEXT: andl $1, %eax 29 ; CHECK-NEXT: retq 30 %res = call i32 @llvm.x86.avx512.kortestc.w(i16 %a0, i16 %a1) 31 ret i32 %res 32 } 33 34 declare i16 @llvm.x86.avx512.kand.w(i16, i16) nounwind readnone 35 define i16 @test_kand(i16 %a0, i16 %a1) { 36 ; CHECK-LABEL: test_kand: 37 ; CHECK: ## BB#0: 38 ; CHECK-NEXT: movw $8, %ax 39 ; CHECK-NEXT: kmovw %eax, %k0 40 ; CHECK-NEXT: kmovw %edi, %k1 41 ; CHECK-NEXT: kandw %k0, %k1, %k0 42 ; CHECK-NEXT: kmovw %esi, %k1 43 ; CHECK-NEXT: kandw %k1, %k0, %k0 44 ; CHECK-NEXT: kmovw %k0, %eax 45 ; CHECK-NEXT: retq 46 %t1 = call i16 @llvm.x86.avx512.kand.w(i16 %a0, i16 8) 47 %t2 = call i16 @llvm.x86.avx512.kand.w(i16 %t1, i16 %a1) 48 ret i16 %t2 49 } 50 51 declare i16 @llvm.x86.avx512.knot.w(i16) nounwind readnone 52 define i16 @test_knot(i16 %a0) { 53 ; CHECK-LABEL: test_knot: 54 ; CHECK: ## BB#0: 55 ; CHECK-NEXT: kmovw %edi, %k0 56 ; CHECK-NEXT: knotw %k0, %k0 57 ; CHECK-NEXT: kmovw %k0, %eax 58 ; CHECK-NEXT: retq 59 %res = call i16 @llvm.x86.avx512.knot.w(i16 %a0) 60 ret i16 %res 61 } 62 63 declare i16 @llvm.x86.avx512.kunpck.bw(i16, i16) nounwind readnone 64 65 define i16 @unpckbw_test(i16 %a0, i16 %a1) { 66 ; CHECK-LABEL: unpckbw_test: 67 ; CHECK: ## BB#0: 68 ; CHECK-NEXT: kmovw %edi, %k0 69 ; CHECK-NEXT: kmovw %esi, %k1 70 ; CHECK-NEXT: kunpckbw %k1, %k0, %k0 71 ; CHECK-NEXT: kmovw %k0, %eax 72 ; CHECK-NEXT: retq 73 %res = call i16 @llvm.x86.avx512.kunpck.bw(i16 %a0, i16 %a1) 74 ret i16 %res 75 } 76 77 define <16 x float> @test_rcp_ps_512(<16 x float> %a0) { 78 ; CHECK-LABEL: test_rcp_ps_512: 79 ; CHECK: ## BB#0: 80 ; CHECK-NEXT: vrcp14ps %zmm0, %zmm0 81 ; CHECK-NEXT: retq 82 %res = call <16 x float> @llvm.x86.avx512.rcp14.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1) ; <<16 x float>> [#uses=1] 83 ret <16 x float> %res 84 } 85 declare <16 x float> @llvm.x86.avx512.rcp14.ps.512(<16 x float>, <16 x float>, i16) nounwind readnone 86 87 define <8 x double> @test_rcp_pd_512(<8 x double> %a0) { 88 ; CHECK-LABEL: test_rcp_pd_512: 89 ; CHECK: ## BB#0: 90 ; CHECK-NEXT: vrcp14pd %zmm0, %zmm0 91 ; CHECK-NEXT: retq 92 %res = call <8 x double> @llvm.x86.avx512.rcp14.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1) ; <<8 x double>> [#uses=1] 93 ret <8 x double> %res 94 } 95 declare <8 x double> @llvm.x86.avx512.rcp14.pd.512(<8 x double>, <8 x double>, i8) nounwind readnone 96 97 declare <8 x double> @llvm.x86.avx512.mask.rndscale.pd.512(<8 x double>, i32, <8 x double>, i8, i32) 98 99 define <8 x double> @test7(<8 x double> %a) { 100 ; CHECK-LABEL: test7: 101 ; CHECK: ## BB#0: 102 ; CHECK-NEXT: vrndscalepd $11, %zmm0, %zmm0 103 ; CHECK-NEXT: retq 104 %res = call <8 x double> @llvm.x86.avx512.mask.rndscale.pd.512(<8 x double> %a, i32 11, <8 x double> %a, i8 -1, i32 4) 105 ret <8 x double>%res 106 } 107 108 declare <16 x float> @llvm.x86.avx512.mask.rndscale.ps.512(<16 x float>, i32, <16 x float>, i16, i32) 109 110 define <16 x float> @test8(<16 x float> %a) { 111 ; CHECK-LABEL: test8: 112 ; CHECK: ## BB#0: 113 ; CHECK-NEXT: vrndscaleps $11, %zmm0, %zmm0 114 ; CHECK-NEXT: retq 115 %res = call <16 x float> @llvm.x86.avx512.mask.rndscale.ps.512(<16 x float> %a, i32 11, <16 x float> %a, i16 -1, i32 4) 116 ret <16 x float>%res 117 } 118 119 define <16 x float> @test_rsqrt_ps_512(<16 x float> %a0) { 120 ; CHECK-LABEL: test_rsqrt_ps_512: 121 ; CHECK: ## BB#0: 122 ; CHECK-NEXT: vrsqrt14ps %zmm0, %zmm0 123 ; CHECK-NEXT: retq 124 %res = call <16 x float> @llvm.x86.avx512.rsqrt14.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1) ; <<16 x float>> [#uses=1] 125 ret <16 x float> %res 126 } 127 declare <16 x float> @llvm.x86.avx512.rsqrt14.ps.512(<16 x float>, <16 x float>, i16) nounwind readnone 128 129 define <4 x float> @test_rsqrt14_ss(<4 x float> %a0) { 130 ; CHECK-LABEL: test_rsqrt14_ss: 131 ; CHECK: ## BB#0: 132 ; CHECK-NEXT: vrsqrt14ss %xmm0, %xmm0, %xmm0 133 ; CHECK-NEXT: retq 134 %res = call <4 x float> @llvm.x86.avx512.rsqrt14.ss(<4 x float> %a0, <4 x float> %a0, <4 x float> zeroinitializer, i8 -1) ; <<4 x float>> [#uses=1] 135 ret <4 x float> %res 136 } 137 declare <4 x float> @llvm.x86.avx512.rsqrt14.ss(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone 138 139 define <4 x float> @test_rcp14_ss(<4 x float> %a0) { 140 ; CHECK-LABEL: test_rcp14_ss: 141 ; CHECK: ## BB#0: 142 ; CHECK-NEXT: vrcp14ss %xmm0, %xmm0, %xmm0 143 ; CHECK-NEXT: retq 144 %res = call <4 x float> @llvm.x86.avx512.rcp14.ss(<4 x float> %a0, <4 x float> %a0, <4 x float> zeroinitializer, i8 -1) ; <<4 x float>> [#uses=1] 145 ret <4 x float> %res 146 } 147 declare <4 x float> @llvm.x86.avx512.rcp14.ss(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone 148 149 define <8 x double> @test_sqrt_pd_512(<8 x double> %a0) { 150 ; CHECK-LABEL: test_sqrt_pd_512: 151 ; CHECK: ## BB#0: 152 ; CHECK-NEXT: vsqrtpd %zmm0, %zmm0 153 ; CHECK-NEXT: retq 154 %res = call <8 x double> @llvm.x86.avx512.mask.sqrt.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1, i32 4) 155 ret <8 x double> %res 156 } 157 declare <8 x double> @llvm.x86.avx512.mask.sqrt.pd.512(<8 x double>, <8 x double>, i8, i32) nounwind readnone 158 159 define <16 x float> @test_sqrt_ps_512(<16 x float> %a0) { 160 ; CHECK-LABEL: test_sqrt_ps_512: 161 ; CHECK: ## BB#0: 162 ; CHECK-NEXT: vsqrtps %zmm0, %zmm0 163 ; CHECK-NEXT: retq 164 %res = call <16 x float> @llvm.x86.avx512.mask.sqrt.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 4) 165 ret <16 x float> %res 166 } 167 define <16 x float> @test_sqrt_round_ps_512(<16 x float> %a0) { 168 ; CHECK-LABEL: test_sqrt_round_ps_512: 169 ; CHECK: ## BB#0: 170 ; CHECK-NEXT: vsqrtps {rz-sae}, %zmm0, %zmm0 171 ; CHECK-NEXT: retq 172 %res = call <16 x float> @llvm.x86.avx512.mask.sqrt.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 3) 173 ret <16 x float> %res 174 } 175 declare <16 x float> @llvm.x86.avx512.mask.sqrt.ps.512(<16 x float>, <16 x float>, i16, i32) nounwind readnone 176 177 define <8 x double> @test_getexp_pd_512(<8 x double> %a0) { 178 ; CHECK-LABEL: test_getexp_pd_512: 179 ; CHECK: ## BB#0: 180 ; CHECK-NEXT: vgetexppd %zmm0, %zmm0 181 ; CHECK-NEXT: retq 182 %res = call <8 x double> @llvm.x86.avx512.mask.getexp.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1, i32 4) 183 ret <8 x double> %res 184 } 185 define <8 x double> @test_getexp_round_pd_512(<8 x double> %a0) { 186 ; CHECK-LABEL: test_getexp_round_pd_512: 187 ; CHECK: ## BB#0: 188 ; CHECK-NEXT: vgetexppd {sae}, %zmm0, %zmm0 189 ; CHECK-NEXT: retq 190 %res = call <8 x double> @llvm.x86.avx512.mask.getexp.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1, i32 8) 191 ret <8 x double> %res 192 } 193 declare <8 x double> @llvm.x86.avx512.mask.getexp.pd.512(<8 x double>, <8 x double>, i8, i32) nounwind readnone 194 195 define <16 x float> @test_getexp_ps_512(<16 x float> %a0) { 196 ; CHECK-LABEL: test_getexp_ps_512: 197 ; CHECK: ## BB#0: 198 ; CHECK-NEXT: vgetexpps %zmm0, %zmm0 199 ; CHECK-NEXT: retq 200 %res = call <16 x float> @llvm.x86.avx512.mask.getexp.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 4) 201 ret <16 x float> %res 202 } 203 204 define <16 x float> @test_getexp_round_ps_512(<16 x float> %a0) { 205 ; CHECK-LABEL: test_getexp_round_ps_512: 206 ; CHECK: ## BB#0: 207 ; CHECK-NEXT: vgetexpps {sae}, %zmm0, %zmm0 208 ; CHECK-NEXT: retq 209 %res = call <16 x float> @llvm.x86.avx512.mask.getexp.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 8) 210 ret <16 x float> %res 211 } 212 declare <16 x float> @llvm.x86.avx512.mask.getexp.ps.512(<16 x float>, <16 x float>, i16, i32) nounwind readnone 213 214 declare <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32) nounwind readnone 215 216 define <4 x float> @test_sqrt_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) { 217 ; CHECK-LABEL: test_sqrt_ss: 218 ; CHECK: ## BB#0: 219 ; CHECK-NEXT: andl $1, %edi 220 ; CHECK-NEXT: kmovw %edi, %k1 221 ; CHECK-NEXT: vmovaps %zmm2, %zmm3 222 ; CHECK-NEXT: vsqrtss %xmm1, %xmm0, %xmm3 {%k1} 223 ; CHECK-NEXT: vsqrtss {rd-sae}, %xmm1, %xmm0, %xmm2 {%k1} 224 ; CHECK-NEXT: vsqrtss {ru-sae}, %xmm1, %xmm0, %xmm4 {%k1} {z} 225 ; CHECK-NEXT: vsqrtss {rz-sae}, %xmm1, %xmm0, %xmm0 226 ; CHECK-NEXT: vaddps %xmm2, %xmm3, %xmm1 227 ; CHECK-NEXT: vaddps %xmm0, %xmm4, %xmm0 228 ; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0 229 ; CHECK-NEXT: retq 230 %res0 = call <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 4) 231 %res1 = call <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 1) 232 %res2 = call <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 %mask, i32 2) 233 %res3 = call <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1, i32 3) 234 235 %res.1 = fadd <4 x float> %res0, %res1 236 %res.2 = fadd <4 x float> %res2, %res3 237 %res = fadd <4 x float> %res.1, %res.2 238 ret <4 x float> %res 239 } 240 241 declare <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32) nounwind readnone 242 243 define <2 x double> @test_sqrt_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) { 244 ; CHECK-LABEL: test_sqrt_sd: 245 ; CHECK: ## BB#0: 246 ; CHECK-NEXT: andl $1, %edi 247 ; CHECK-NEXT: kmovw %edi, %k1 248 ; CHECK-NEXT: vmovaps %zmm2, %zmm3 249 ; CHECK-NEXT: vsqrtsd %xmm1, %xmm0, %xmm3 {%k1} 250 ; CHECK-NEXT: vsqrtsd {rd-sae}, %xmm1, %xmm0, %xmm2 {%k1} 251 ; CHECK-NEXT: vsqrtsd {ru-sae}, %xmm1, %xmm0, %xmm4 {%k1} {z} 252 ; CHECK-NEXT: vsqrtsd {rz-sae}, %xmm1, %xmm0, %xmm0 253 ; CHECK-NEXT: vaddpd %xmm2, %xmm3, %xmm1 254 ; CHECK-NEXT: vaddpd %xmm0, %xmm4, %xmm0 255 ; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0 256 ; CHECK-NEXT: retq 257 %res0 = call <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 4) 258 %res1 = call <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 1) 259 %res2 = call <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 %mask, i32 2) 260 %res3 = call <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1, i32 3) 261 262 %res.1 = fadd <2 x double> %res0, %res1 263 %res.2 = fadd <2 x double> %res2, %res3 264 %res = fadd <2 x double> %res.1, %res.2 265 ret <2 x double> %res 266 } 267 268 define i64 @test_x86_sse2_cvtsd2si64(<2 x double> %a0) { 269 ; CHECK-LABEL: test_x86_sse2_cvtsd2si64: 270 ; CHECK: ## BB#0: 271 ; CHECK-NEXT: vcvtsd2si %xmm0, %rax 272 ; CHECK-NEXT: retq 273 %res = call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> %a0) ; <i64> [#uses=1] 274 ret i64 %res 275 } 276 declare i64 @llvm.x86.sse2.cvtsd2si64(<2 x double>) nounwind readnone 277 278 define <2 x double> @test_x86_sse2_cvtsi642sd(<2 x double> %a0, i64 %a1) { 279 ; CHECK-LABEL: test_x86_sse2_cvtsi642sd: 280 ; CHECK: ## BB#0: 281 ; CHECK-NEXT: vcvtsi2sdq %rdi, %xmm0, %xmm0 282 ; CHECK-NEXT: retq 283 %res = call <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double> %a0, i64 %a1) ; <<2 x double>> [#uses=1] 284 ret <2 x double> %res 285 } 286 declare <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double>, i64) nounwind readnone 287 288 define i64 @test_x86_avx512_cvttsd2si64(<2 x double> %a0) { 289 ; CHECK-LABEL: test_x86_avx512_cvttsd2si64: 290 ; CHECK: ## BB#0: 291 ; CHECK-NEXT: vcvttsd2si %xmm0, %rcx 292 ; CHECK-NEXT: vcvttsd2si {sae}, %xmm0, %rax 293 ; CHECK-NEXT: addq %rcx, %rax 294 ; CHECK-NEXT: retq 295 %res0 = call i64 @llvm.x86.avx512.cvttsd2si64(<2 x double> %a0, i32 4) ; 296 %res1 = call i64 @llvm.x86.avx512.cvttsd2si64(<2 x double> %a0, i32 8) ; 297 %res2 = add i64 %res0, %res1 298 ret i64 %res2 299 } 300 declare i64 @llvm.x86.avx512.cvttsd2si64(<2 x double>, i32) nounwind readnone 301 302 define i32 @test_x86_avx512_cvttsd2usi(<2 x double> %a0) { 303 ; CHECK-LABEL: test_x86_avx512_cvttsd2usi: 304 ; CHECK: ## BB#0: 305 ; CHECK-NEXT: vcvttsd2usi %xmm0, %ecx 306 ; CHECK-NEXT: vcvttsd2usi {sae}, %xmm0, %eax 307 ; CHECK-NEXT: addl %ecx, %eax 308 ; CHECK-NEXT: retq 309 %res0 = call i32 @llvm.x86.avx512.cvttsd2usi(<2 x double> %a0, i32 4) ; 310 %res1 = call i32 @llvm.x86.avx512.cvttsd2usi(<2 x double> %a0, i32 8) ; 311 %res2 = add i32 %res0, %res1 312 ret i32 %res2 313 } 314 declare i32 @llvm.x86.avx512.cvttsd2usi(<2 x double>, i32) nounwind readnone 315 316 define i32 @test_x86_avx512_cvttsd2si(<2 x double> %a0) { 317 ; CHECK-LABEL: test_x86_avx512_cvttsd2si: 318 ; CHECK: ## BB#0: 319 ; CHECK-NEXT: vcvttsd2si %xmm0, %ecx 320 ; CHECK-NEXT: vcvttsd2si {sae}, %xmm0, %eax 321 ; CHECK-NEXT: addl %ecx, %eax 322 ; CHECK-NEXT: retq 323 %res0 = call i32 @llvm.x86.avx512.cvttsd2si(<2 x double> %a0, i32 4) ; 324 %res1 = call i32 @llvm.x86.avx512.cvttsd2si(<2 x double> %a0, i32 8) ; 325 %res2 = add i32 %res0, %res1 326 ret i32 %res2 327 } 328 declare i32 @llvm.x86.avx512.cvttsd2si(<2 x double>, i32) nounwind readnone 329 330 331 332 define i64 @test_x86_avx512_cvttsd2usi64(<2 x double> %a0) { 333 ; CHECK-LABEL: test_x86_avx512_cvttsd2usi64: 334 ; CHECK: ## BB#0: 335 ; CHECK-NEXT: vcvttsd2usi %xmm0, %rcx 336 ; CHECK-NEXT: vcvttsd2usi {sae}, %xmm0, %rax 337 ; CHECK-NEXT: addq %rcx, %rax 338 ; CHECK-NEXT: retq 339 %res0 = call i64 @llvm.x86.avx512.cvttsd2usi64(<2 x double> %a0, i32 4) ; 340 %res1 = call i64 @llvm.x86.avx512.cvttsd2usi64(<2 x double> %a0, i32 8) ; 341 %res2 = add i64 %res0, %res1 342 ret i64 %res2 343 } 344 declare i64 @llvm.x86.avx512.cvttsd2usi64(<2 x double>, i32) nounwind readnone 345 346 define i64 @test_x86_sse_cvtss2si64(<4 x float> %a0) { 347 ; CHECK-LABEL: test_x86_sse_cvtss2si64: 348 ; CHECK: ## BB#0: 349 ; CHECK-NEXT: vcvtss2si %xmm0, %rax 350 ; CHECK-NEXT: retq 351 %res = call i64 @llvm.x86.sse.cvtss2si64(<4 x float> %a0) ; <i64> [#uses=1] 352 ret i64 %res 353 } 354 declare i64 @llvm.x86.sse.cvtss2si64(<4 x float>) nounwind readnone 355 356 357 define <4 x float> @test_x86_sse_cvtsi642ss(<4 x float> %a0, i64 %a1) { 358 ; CHECK-LABEL: test_x86_sse_cvtsi642ss: 359 ; CHECK: ## BB#0: 360 ; CHECK-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 361 ; CHECK-NEXT: retq 362 %res = call <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float> %a0, i64 %a1) ; <<4 x float>> [#uses=1] 363 ret <4 x float> %res 364 } 365 declare <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float>, i64) nounwind readnone 366 367 368 define i32 @test_x86_avx512_cvttss2si(<4 x float> %a0) { 369 ; CHECK-LABEL: test_x86_avx512_cvttss2si: 370 ; CHECK: ## BB#0: 371 ; CHECK-NEXT: vcvttss2si {sae}, %xmm0, %ecx 372 ; CHECK-NEXT: vcvttss2si %xmm0, %eax 373 ; CHECK-NEXT: addl %ecx, %eax 374 ; CHECK-NEXT: retq 375 %res0 = call i32 @llvm.x86.avx512.cvttss2si(<4 x float> %a0, i32 8) ; 376 %res1 = call i32 @llvm.x86.avx512.cvttss2si(<4 x float> %a0, i32 4) ; 377 %res2 = add i32 %res0, %res1 378 ret i32 %res2 379 } 380 declare i32 @llvm.x86.avx512.cvttss2si(<4 x float>, i32) nounwind readnone 381 382 define i64 @test_x86_avx512_cvttss2si64(<4 x float> %a0) { 383 ; CHECK-LABEL: test_x86_avx512_cvttss2si64: 384 ; CHECK: ## BB#0: 385 ; CHECK-NEXT: vcvttss2si %xmm0, %rcx 386 ; CHECK-NEXT: vcvttss2si {sae}, %xmm0, %rax 387 ; CHECK-NEXT: addq %rcx, %rax 388 ; CHECK-NEXT: retq 389 %res0 = call i64 @llvm.x86.avx512.cvttss2si64(<4 x float> %a0, i32 4) ; 390 %res1 = call i64 @llvm.x86.avx512.cvttss2si64(<4 x float> %a0, i32 8) ; 391 %res2 = add i64 %res0, %res1 392 ret i64 %res2 393 } 394 declare i64 @llvm.x86.avx512.cvttss2si64(<4 x float>, i32) nounwind readnone 395 396 define i32 @test_x86_avx512_cvttss2usi(<4 x float> %a0) { 397 ; CHECK-LABEL: test_x86_avx512_cvttss2usi: 398 ; CHECK: ## BB#0: 399 ; CHECK-NEXT: vcvttss2usi {sae}, %xmm0, %ecx 400 ; CHECK-NEXT: vcvttss2usi %xmm0, %eax 401 ; CHECK-NEXT: addl %ecx, %eax 402 ; CHECK-NEXT: retq 403 %res0 = call i32 @llvm.x86.avx512.cvttss2usi(<4 x float> %a0, i32 8) ; 404 %res1 = call i32 @llvm.x86.avx512.cvttss2usi(<4 x float> %a0, i32 4) ; 405 %res2 = add i32 %res0, %res1 406 ret i32 %res2 407 } 408 declare i32 @llvm.x86.avx512.cvttss2usi(<4 x float>, i32) nounwind readnone 409 410 define i64 @test_x86_avx512_cvttss2usi64(<4 x float> %a0) { 411 ; CHECK-LABEL: test_x86_avx512_cvttss2usi64: 412 ; CHECK: ## BB#0: 413 ; CHECK-NEXT: vcvttss2usi %xmm0, %rcx 414 ; CHECK-NEXT: vcvttss2usi {sae}, %xmm0, %rax 415 ; CHECK-NEXT: addq %rcx, %rax 416 ; CHECK-NEXT: retq 417 %res0 = call i64 @llvm.x86.avx512.cvttss2usi64(<4 x float> %a0, i32 4) ; 418 %res1 = call i64 @llvm.x86.avx512.cvttss2usi64(<4 x float> %a0, i32 8) ; 419 %res2 = add i64 %res0, %res1 420 ret i64 %res2 421 } 422 declare i64 @llvm.x86.avx512.cvttss2usi64(<4 x float>, i32) nounwind readnone 423 424 define i64 @test_x86_avx512_cvtsd2usi64(<2 x double> %a0) { 425 ; CHECK-LABEL: test_x86_avx512_cvtsd2usi64: 426 ; CHECK: ## BB#0: 427 ; CHECK-NEXT: vcvtsd2usi %xmm0, %rax 428 ; CHECK-NEXT: retq 429 %res = call i64 @llvm.x86.avx512.cvtsd2usi64(<2 x double> %a0) ; <i64> [#uses=1] 430 ret i64 %res 431 } 432 declare i64 @llvm.x86.avx512.cvtsd2usi64(<2 x double>) nounwind readnone 433 434 define <16 x float> @test_x86_vcvtph2ps_512(<16 x i16> %a0) { 435 ; CHECK-LABEL: test_x86_vcvtph2ps_512: 436 ; CHECK: ## BB#0: 437 ; CHECK-NEXT: vcvtph2ps %ymm0, %zmm0 438 ; CHECK-NEXT: retq 439 %res = call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %a0, <16 x float> zeroinitializer, i16 -1, i32 4) 440 ret <16 x float> %res 441 } 442 443 define <16 x float> @test_x86_vcvtph2ps_512_sae(<16 x i16> %a0) { 444 ; CHECK-LABEL: test_x86_vcvtph2ps_512_sae: 445 ; CHECK: ## BB#0: 446 ; CHECK-NEXT: vcvtph2ps {sae}, %ymm0, %zmm0 447 ; CHECK-NEXT: retq 448 %res = call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %a0, <16 x float> zeroinitializer, i16 -1, i32 8) 449 ret <16 x float> %res 450 } 451 452 define <16 x float> @test_x86_vcvtph2ps_512_rrk(<16 x i16> %a0,<16 x float> %a1, i16 %mask) { 453 ; CHECK-LABEL: test_x86_vcvtph2ps_512_rrk: 454 ; CHECK: ## BB#0: 455 ; CHECK-NEXT: kmovw %edi, %k1 456 ; CHECK-NEXT: vcvtph2ps %ymm0, %zmm1 {%k1} 457 ; CHECK-NEXT: vmovaps %zmm1, %zmm0 458 ; CHECK-NEXT: retq 459 %res = call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %a0, <16 x float> %a1, i16 %mask, i32 4) 460 ret <16 x float> %res 461 } 462 463 define <16 x float> @test_x86_vcvtph2ps_512_sae_rrkz(<16 x i16> %a0, i16 %mask) { 464 ; CHECK-LABEL: test_x86_vcvtph2ps_512_sae_rrkz: 465 ; CHECK: ## BB#0: 466 ; CHECK-NEXT: kmovw %edi, %k1 467 ; CHECK-NEXT: vcvtph2ps {sae}, %ymm0, %zmm0 {%k1} {z} 468 ; CHECK-NEXT: retq 469 %res = call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %a0, <16 x float> zeroinitializer, i16 %mask, i32 8) 470 ret <16 x float> %res 471 } 472 473 define <16 x float> @test_x86_vcvtph2ps_512_rrkz(<16 x i16> %a0, i16 %mask) { 474 ; CHECK-LABEL: test_x86_vcvtph2ps_512_rrkz: 475 ; CHECK: ## BB#0: 476 ; CHECK-NEXT: kmovw %edi, %k1 477 ; CHECK-NEXT: vcvtph2ps %ymm0, %zmm0 {%k1} {z} 478 ; CHECK-NEXT: retq 479 %res = call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %a0, <16 x float> zeroinitializer, i16 %mask, i32 4) 480 ret <16 x float> %res 481 } 482 483 declare <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16>, <16 x float>, i16, i32) nounwind readonly 484 485 486 define <16 x i16> @test_x86_vcvtps2ph_256(<16 x float> %a0) { 487 ; CHECK-LABEL: test_x86_vcvtps2ph_256: 488 ; CHECK: ## BB#0: 489 ; CHECK-NEXT: vcvtps2ph $2, %zmm0, %ymm0 490 ; CHECK-NEXT: retq 491 %res = call <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float> %a0, i32 2, <16 x i16> zeroinitializer, i16 -1) 492 ret <16 x i16> %res 493 } 494 495 declare <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float>, i32, <16 x i16>, i16) nounwind readonly 496 497 define <16 x float> @test_x86_vbroadcast_ss_512(i8* %a0) { 498 ; CHECK-LABEL: test_x86_vbroadcast_ss_512: 499 ; CHECK: ## BB#0: 500 ; CHECK-NEXT: vbroadcastss (%rdi), %zmm0 501 ; CHECK-NEXT: retq 502 %res = call <16 x float> @llvm.x86.avx512.vbroadcast.ss.512(i8* %a0) ; <<16 x float>> [#uses=1] 503 ret <16 x float> %res 504 } 505 declare <16 x float> @llvm.x86.avx512.vbroadcast.ss.512(i8*) nounwind readonly 506 507 define <8 x double> @test_x86_vbroadcast_sd_512(i8* %a0) { 508 ; CHECK-LABEL: test_x86_vbroadcast_sd_512: 509 ; CHECK: ## BB#0: 510 ; CHECK-NEXT: vbroadcastsd (%rdi), %zmm0 511 ; CHECK-NEXT: retq 512 %res = call <8 x double> @llvm.x86.avx512.vbroadcast.sd.512(i8* %a0) ; <<8 x double>> [#uses=1] 513 ret <8 x double> %res 514 } 515 declare <8 x double> @llvm.x86.avx512.vbroadcast.sd.512(i8*) nounwind readonly 516 517 define <16 x float> @test_x86_vbroadcast_ss_ps_512(<4 x float> %a0) { 518 ; CHECK-LABEL: test_x86_vbroadcast_ss_ps_512: 519 ; CHECK: ## BB#0: 520 ; CHECK-NEXT: vbroadcastss %xmm0, %zmm0 521 ; CHECK-NEXT: retq 522 %res = call <16 x float> @llvm.x86.avx512.vbroadcast.ss.ps.512(<4 x float> %a0) ; <<16 x float>> [#uses=1] 523 ret <16 x float> %res 524 } 525 declare <16 x float> @llvm.x86.avx512.vbroadcast.ss.ps.512(<4 x float>) nounwind readonly 526 527 define <8 x double> @test_x86_vbroadcast_sd_pd_512(<2 x double> %a0) { 528 ; CHECK-LABEL: test_x86_vbroadcast_sd_pd_512: 529 ; CHECK: ## BB#0: 530 ; CHECK-NEXT: vbroadcastsd %xmm0, %zmm0 531 ; CHECK-NEXT: retq 532 %res = call <8 x double> @llvm.x86.avx512.vbroadcast.sd.pd.512(<2 x double> %a0) ; <<8 x double>> [#uses=1] 533 ret <8 x double> %res 534 } 535 declare <8 x double> @llvm.x86.avx512.vbroadcast.sd.pd.512(<2 x double>) nounwind readonly 536 537 define <16 x i32>@test_int_x86_avx512_pbroadcastd_512(<4 x i32> %x0, <16 x i32> %x1, i16 %mask) { 538 ; CHECK-LABEL: test_int_x86_avx512_pbroadcastd_512: 539 ; CHECK: ## BB#0: 540 ; CHECK-NEXT: kmovw %edi, %k1 541 ; CHECK-NEXT: vpbroadcastd %xmm0, %zmm1 {%k1} 542 ; CHECK-NEXT: vpbroadcastd %xmm0, %zmm2 {%k1} {z} 543 ; CHECK-NEXT: vpbroadcastd %xmm0, %zmm0 544 ; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0 545 ; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0 546 ; CHECK-NEXT: retq 547 %res = call <16 x i32> @llvm.x86.avx512.pbroadcastd.512(<4 x i32> %x0, <16 x i32> %x1, i16 -1) 548 %res1 = call <16 x i32> @llvm.x86.avx512.pbroadcastd.512(<4 x i32> %x0, <16 x i32> %x1, i16 %mask) 549 %res2 = call <16 x i32> @llvm.x86.avx512.pbroadcastd.512(<4 x i32> %x0, <16 x i32> zeroinitializer, i16 %mask) 550 %res3 = add <16 x i32> %res, %res1 551 %res4 = add <16 x i32> %res2, %res3 552 ret <16 x i32> %res4 553 } 554 declare <16 x i32> @llvm.x86.avx512.pbroadcastd.512(<4 x i32>, <16 x i32>, i16) 555 556 define <16 x i32> @test_x86_pbroadcastd_i32_512(i32 %a0) { 557 ; CHECK-LABEL: test_x86_pbroadcastd_i32_512: 558 ; CHECK: ## BB#0: 559 ; CHECK-NEXT: vpbroadcastd %edi, %zmm0 560 ; CHECK-NEXT: retq 561 %res = call <16 x i32> @llvm.x86.avx512.pbroadcastd.i32.512(i32 %a0) ; <<16 x i32>> [#uses=1] 562 ret <16 x i32> %res 563 } 564 declare <16 x i32> @llvm.x86.avx512.pbroadcastd.i32.512(i32) nounwind readonly 565 566 define <8 x i64>@test_int_x86_avx512_pbroadcastq_512(<2 x i64> %x0, <8 x i64> %x1, i8 %mask) { 567 ; CHECK-LABEL: test_int_x86_avx512_pbroadcastq_512: 568 ; CHECK: ## BB#0: 569 ; CHECK-NEXT: movzbl %dil, %eax 570 ; CHECK-NEXT: kmovw %eax, %k1 571 ; CHECK-NEXT: vpbroadcastq %xmm0, %zmm1 {%k1} 572 ; CHECK-NEXT: vpbroadcastq %xmm0, %zmm2 {%k1} {z} 573 ; CHECK-NEXT: vpbroadcastq %xmm0, %zmm0 574 ; CHECK-NEXT: vpaddq %zmm1, %zmm0, %zmm0 575 ; CHECK-NEXT: vpaddq %zmm0, %zmm2, %zmm0 576 ; CHECK-NEXT: retq 577 %res = call <8 x i64> @llvm.x86.avx512.pbroadcastq.512(<2 x i64> %x0, <8 x i64> %x1,i8 -1) 578 %res1 = call <8 x i64> @llvm.x86.avx512.pbroadcastq.512(<2 x i64> %x0, <8 x i64> %x1,i8 %mask) 579 %res2 = call <8 x i64> @llvm.x86.avx512.pbroadcastq.512(<2 x i64> %x0, <8 x i64> zeroinitializer,i8 %mask) 580 %res3 = add <8 x i64> %res, %res1 581 %res4 = add <8 x i64> %res2, %res3 582 ret <8 x i64> %res4 583 } 584 declare <8 x i64> @llvm.x86.avx512.pbroadcastq.512(<2 x i64>, <8 x i64>, i8) 585 586 define <8 x i64> @test_x86_pbroadcastq_i64_512(i64 %a0) { 587 ; CHECK-LABEL: test_x86_pbroadcastq_i64_512: 588 ; CHECK: ## BB#0: 589 ; CHECK-NEXT: vpbroadcastq %rdi, %zmm0 590 ; CHECK-NEXT: retq 591 %res = call <8 x i64> @llvm.x86.avx512.pbroadcastq.i64.512(i64 %a0) ; <<8 x i64>> [#uses=1] 592 ret <8 x i64> %res 593 } 594 declare <8 x i64> @llvm.x86.avx512.pbroadcastq.i64.512(i64) nounwind readonly 595 596 define <16 x i32> @test_conflict_d(<16 x i32> %a) { 597 ; CHECK-LABEL: test_conflict_d: 598 ; CHECK: ## BB#0: 599 ; CHECK-NEXT: vpconflictd %zmm0, %zmm0 600 ; CHECK-NEXT: retq 601 %res = call <16 x i32> @llvm.x86.avx512.mask.conflict.d.512(<16 x i32> %a, <16 x i32> zeroinitializer, i16 -1) 602 ret <16 x i32> %res 603 } 604 605 declare <16 x i32> @llvm.x86.avx512.mask.conflict.d.512(<16 x i32>, <16 x i32>, i16) nounwind readonly 606 607 define <8 x i64> @test_conflict_q(<8 x i64> %a) { 608 ; CHECK-LABEL: test_conflict_q: 609 ; CHECK: ## BB#0: 610 ; CHECK-NEXT: vpconflictq %zmm0, %zmm0 611 ; CHECK-NEXT: retq 612 %res = call <8 x i64> @llvm.x86.avx512.mask.conflict.q.512(<8 x i64> %a, <8 x i64> zeroinitializer, i8 -1) 613 ret <8 x i64> %res 614 } 615 616 declare <8 x i64> @llvm.x86.avx512.mask.conflict.q.512(<8 x i64>, <8 x i64>, i8) nounwind readonly 617 618 define <16 x i32> @test_maskz_conflict_d(<16 x i32> %a, i16 %mask) { 619 ; CHECK-LABEL: test_maskz_conflict_d: 620 ; CHECK: ## BB#0: 621 ; CHECK-NEXT: kmovw %edi, %k1 622 ; CHECK-NEXT: vpconflictd %zmm0, %zmm0 {%k1} {z} 623 ; CHECK-NEXT: retq 624 %res = call <16 x i32> @llvm.x86.avx512.mask.conflict.d.512(<16 x i32> %a, <16 x i32> zeroinitializer, i16 %mask) 625 ret <16 x i32> %res 626 } 627 628 define <8 x i64> @test_mask_conflict_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) { 629 ; CHECK-LABEL: test_mask_conflict_q: 630 ; CHECK: ## BB#0: 631 ; CHECK-NEXT: movzbl %dil, %eax 632 ; CHECK-NEXT: kmovw %eax, %k1 633 ; CHECK-NEXT: vpconflictq %zmm0, %zmm1 {%k1} 634 ; CHECK-NEXT: vmovaps %zmm1, %zmm0 635 ; CHECK-NEXT: retq 636 %res = call <8 x i64> @llvm.x86.avx512.mask.conflict.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask) 637 ret <8 x i64> %res 638 } 639 640 define <16 x i32> @test_lzcnt_d(<16 x i32> %a) { 641 ; CHECK-LABEL: test_lzcnt_d: 642 ; CHECK: ## BB#0: 643 ; CHECK-NEXT: vplzcntd %zmm0, %zmm0 644 ; CHECK-NEXT: retq 645 %res = call <16 x i32> @llvm.x86.avx512.mask.lzcnt.d.512(<16 x i32> %a, <16 x i32> zeroinitializer, i16 -1) 646 ret <16 x i32> %res 647 } 648 649 declare <16 x i32> @llvm.x86.avx512.mask.lzcnt.d.512(<16 x i32>, <16 x i32>, i16) nounwind readonly 650 651 define <8 x i64> @test_lzcnt_q(<8 x i64> %a) { 652 ; CHECK-LABEL: test_lzcnt_q: 653 ; CHECK: ## BB#0: 654 ; CHECK-NEXT: vplzcntq %zmm0, %zmm0 655 ; CHECK-NEXT: retq 656 %res = call <8 x i64> @llvm.x86.avx512.mask.lzcnt.q.512(<8 x i64> %a, <8 x i64> zeroinitializer, i8 -1) 657 ret <8 x i64> %res 658 } 659 660 declare <8 x i64> @llvm.x86.avx512.mask.lzcnt.q.512(<8 x i64>, <8 x i64>, i8) nounwind readonly 661 662 663 define <16 x i32> @test_mask_lzcnt_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) { 664 ; CHECK-LABEL: test_mask_lzcnt_d: 665 ; CHECK: ## BB#0: 666 ; CHECK-NEXT: kmovw %edi, %k1 667 ; CHECK-NEXT: vplzcntd %zmm0, %zmm1 {%k1} 668 ; CHECK-NEXT: vmovaps %zmm1, %zmm0 669 ; CHECK-NEXT: retq 670 %res = call <16 x i32> @llvm.x86.avx512.mask.lzcnt.d.512(<16 x i32> %a, <16 x i32> %b, i16 %mask) 671 ret <16 x i32> %res 672 } 673 674 define <8 x i64> @test_mask_lzcnt_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) { 675 ; CHECK-LABEL: test_mask_lzcnt_q: 676 ; CHECK: ## BB#0: 677 ; CHECK-NEXT: movzbl %dil, %eax 678 ; CHECK-NEXT: kmovw %eax, %k1 679 ; CHECK-NEXT: vplzcntq %zmm0, %zmm1 {%k1} 680 ; CHECK-NEXT: vmovaps %zmm1, %zmm0 681 ; CHECK-NEXT: retq 682 %res = call <8 x i64> @llvm.x86.avx512.mask.lzcnt.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask) 683 ret <8 x i64> %res 684 } 685 686 define <16 x float> @test_x86_mask_blend_ps_512(i16 %a0, <16 x float> %a1, <16 x float> %a2) { 687 ; CHECK-LABEL: test_x86_mask_blend_ps_512: 688 ; CHECK: ## BB#0: 689 ; CHECK-NEXT: kmovw %edi, %k1 690 ; CHECK-NEXT: vblendmps %zmm1, %zmm0, %zmm0 {%k1} 691 ; CHECK-NEXT: retq 692 %res = call <16 x float> @llvm.x86.avx512.mask.blend.ps.512(<16 x float> %a1, <16 x float> %a2, i16 %a0) ; <<16 x float>> [#uses=1] 693 ret <16 x float> %res 694 } 695 696 declare <16 x float> @llvm.x86.avx512.mask.blend.ps.512(<16 x float>, <16 x float>, i16) nounwind readonly 697 698 define <8 x double> @test_x86_mask_blend_pd_512(i8 %a0, <8 x double> %a1, <8 x double> %a2) { 699 ; CHECK-LABEL: test_x86_mask_blend_pd_512: 700 ; CHECK: ## BB#0: 701 ; CHECK-NEXT: movzbl %dil, %eax 702 ; CHECK-NEXT: kmovw %eax, %k1 703 ; CHECK-NEXT: vblendmpd %zmm1, %zmm0, %zmm0 {%k1} 704 ; CHECK-NEXT: retq 705 %res = call <8 x double> @llvm.x86.avx512.mask.blend.pd.512(<8 x double> %a1, <8 x double> %a2, i8 %a0) ; <<8 x double>> [#uses=1] 706 ret <8 x double> %res 707 } 708 709 define <8 x double> @test_x86_mask_blend_pd_512_memop(<8 x double> %a, <8 x double>* %ptr, i8 %mask) { 710 ; CHECK-LABEL: test_x86_mask_blend_pd_512_memop: 711 ; CHECK: ## BB#0: 712 ; CHECK-NEXT: movzbl %sil, %eax 713 ; CHECK-NEXT: kmovw %eax, %k1 714 ; CHECK-NEXT: vblendmpd (%rdi), %zmm0, %zmm0 {%k1} 715 ; CHECK-NEXT: retq 716 %b = load <8 x double>, <8 x double>* %ptr 717 %res = call <8 x double> @llvm.x86.avx512.mask.blend.pd.512(<8 x double> %a, <8 x double> %b, i8 %mask) ; <<8 x double>> [#uses=1] 718 ret <8 x double> %res 719 } 720 declare <8 x double> @llvm.x86.avx512.mask.blend.pd.512(<8 x double>, <8 x double>, i8) nounwind readonly 721 722 define <16 x i32> @test_x86_mask_blend_d_512(i16 %a0, <16 x i32> %a1, <16 x i32> %a2) { 723 ; CHECK-LABEL: test_x86_mask_blend_d_512: 724 ; CHECK: ## BB#0: 725 ; CHECK-NEXT: kmovw %edi, %k1 726 ; CHECK-NEXT: vpblendmd %zmm1, %zmm0, %zmm0 {%k1} 727 ; CHECK-NEXT: retq 728 %res = call <16 x i32> @llvm.x86.avx512.mask.blend.d.512(<16 x i32> %a1, <16 x i32> %a2, i16 %a0) ; <<16 x i32>> [#uses=1] 729 ret <16 x i32> %res 730 } 731 declare <16 x i32> @llvm.x86.avx512.mask.blend.d.512(<16 x i32>, <16 x i32>, i16) nounwind readonly 732 733 define <8 x i64> @test_x86_mask_blend_q_512(i8 %a0, <8 x i64> %a1, <8 x i64> %a2) { 734 ; CHECK-LABEL: test_x86_mask_blend_q_512: 735 ; CHECK: ## BB#0: 736 ; CHECK-NEXT: movzbl %dil, %eax 737 ; CHECK-NEXT: kmovw %eax, %k1 738 ; CHECK-NEXT: vpblendmq %zmm1, %zmm0, %zmm0 {%k1} 739 ; CHECK-NEXT: retq 740 %res = call <8 x i64> @llvm.x86.avx512.mask.blend.q.512(<8 x i64> %a1, <8 x i64> %a2, i8 %a0) ; <<8 x i64>> [#uses=1] 741 ret <8 x i64> %res 742 } 743 declare <8 x i64> @llvm.x86.avx512.mask.blend.q.512(<8 x i64>, <8 x i64>, i8) nounwind readonly 744 745 define i16 @test_cmpps(<16 x float> %a, <16 x float> %b) { 746 ; CHECK-LABEL: test_cmpps: 747 ; CHECK: ## BB#0: 748 ; CHECK-NEXT: vcmpleps {sae}, %zmm1, %zmm0, %k0 749 ; CHECK-NEXT: kmovw %k0, %eax 750 ; CHECK-NEXT: retq 751 %res = call i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %a, <16 x float> %b, i32 2, i16 -1, i32 8) 752 ret i16 %res 753 } 754 declare i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> , <16 x float> , i32, i16, i32) 755 756 define i8 @test_cmppd(<8 x double> %a, <8 x double> %b) { 757 ; CHECK-LABEL: test_cmppd: 758 ; CHECK: ## BB#0: 759 ; CHECK-NEXT: vcmpneqpd %zmm1, %zmm0, %k0 760 ; CHECK-NEXT: kmovw %k0, %eax 761 ; CHECK-NEXT: retq 762 %res = call i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %a, <8 x double> %b, i32 4, i8 -1, i32 4) 763 ret i8 %res 764 } 765 declare i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> , <8 x double> , i32, i8, i32) 766 767 ; fp min - max 768 define <8 x double> @test_vmaxpd(<8 x double> %a0, <8 x double> %a1) { 769 ; CHECK-LABEL: test_vmaxpd: 770 ; CHECK: ## BB#0: 771 ; CHECK-NEXT: vmaxpd %zmm1, %zmm0, %zmm0 772 ; CHECK-NEXT: retq 773 %res = call <8 x double> @llvm.x86.avx512.mask.max.pd.512(<8 x double> %a0, <8 x double> %a1, 774 <8 x double>zeroinitializer, i8 -1, i32 4) 775 ret <8 x double> %res 776 } 777 declare <8 x double> @llvm.x86.avx512.mask.max.pd.512(<8 x double>, <8 x double>, 778 <8 x double>, i8, i32) 779 780 define <8 x double> @test_vminpd(<8 x double> %a0, <8 x double> %a1) { 781 ; CHECK-LABEL: test_vminpd: 782 ; CHECK: ## BB#0: 783 ; CHECK-NEXT: vminpd %zmm1, %zmm0, %zmm0 784 ; CHECK-NEXT: retq 785 %res = call <8 x double> @llvm.x86.avx512.mask.min.pd.512(<8 x double> %a0, <8 x double> %a1, 786 <8 x double>zeroinitializer, i8 -1, i32 4) 787 ret <8 x double> %res 788 } 789 declare <8 x double> @llvm.x86.avx512.mask.min.pd.512(<8 x double>, <8 x double>, 790 <8 x double>, i8, i32) 791 792 declare <16 x i32> @llvm.x86.avx512.mask.pabs.d.512(<16 x i32>, <16 x i32>, i16) 793 794 define <16 x i32>@test_int_x86_avx512_mask_pabs_d_512(<16 x i32> %x0, <16 x i32> %x1, i16 %x2) { 795 ; CHECK-LABEL: test_int_x86_avx512_mask_pabs_d_512: 796 ; CHECK: ## BB#0: 797 ; CHECK-NEXT: kmovw %edi, %k1 798 ; CHECK-NEXT: vpabsd %zmm0, %zmm1 {%k1} 799 ; CHECK-NEXT: vpabsd %zmm0, %zmm0 800 ; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0 801 ; CHECK-NEXT: retq 802 %res = call <16 x i32> @llvm.x86.avx512.mask.pabs.d.512(<16 x i32> %x0, <16 x i32> %x1, i16 %x2) 803 %res1 = call <16 x i32> @llvm.x86.avx512.mask.pabs.d.512(<16 x i32> %x0, <16 x i32> %x1, i16 -1) 804 %res2 = add <16 x i32> %res, %res1 805 ret <16 x i32> %res2 806 } 807 808 declare <8 x i64> @llvm.x86.avx512.mask.pabs.q.512(<8 x i64>, <8 x i64>, i8) 809 810 define <8 x i64>@test_int_x86_avx512_mask_pabs_q_512(<8 x i64> %x0, <8 x i64> %x1, i8 %x2) { 811 ; CHECK-LABEL: test_int_x86_avx512_mask_pabs_q_512: 812 ; CHECK: ## BB#0: 813 ; CHECK-NEXT: movzbl %dil, %eax 814 ; CHECK-NEXT: kmovw %eax, %k1 815 ; CHECK-NEXT: vpabsq %zmm0, %zmm1 {%k1} 816 ; CHECK-NEXT: vpabsq %zmm0, %zmm0 817 ; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0 818 ; CHECK-NEXT: retq 819 %res = call <8 x i64> @llvm.x86.avx512.mask.pabs.q.512(<8 x i64> %x0, <8 x i64> %x1, i8 %x2) 820 %res1 = call <8 x i64> @llvm.x86.avx512.mask.pabs.q.512(<8 x i64> %x0, <8 x i64> %x1, i8 -1) 821 %res2 = add <8 x i64> %res, %res1 822 ret <8 x i64> %res2 823 } 824 825 define i8 @test_vptestmq(<8 x i64> %a0, <8 x i64> %a1) { 826 ; CHECK-LABEL: test_vptestmq: 827 ; CHECK: ## BB#0: 828 ; CHECK-NEXT: vptestmq %zmm1, %zmm0, %k0 829 ; CHECK-NEXT: kmovw %k0, %eax 830 ; CHECK-NEXT: retq 831 %res = call i8 @llvm.x86.avx512.mask.ptestm.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 -1) 832 ret i8 %res 833 } 834 declare i8 @llvm.x86.avx512.mask.ptestm.q.512(<8 x i64>, <8 x i64>, i8) 835 836 define i16 @test_vptestmd(<16 x i32> %a0, <16 x i32> %a1) { 837 ; CHECK-LABEL: test_vptestmd: 838 ; CHECK: ## BB#0: 839 ; CHECK-NEXT: vptestmd %zmm1, %zmm0, %k0 840 ; CHECK-NEXT: kmovw %k0, %eax 841 ; CHECK-NEXT: retq 842 %res = call i16 @llvm.x86.avx512.mask.ptestm.d.512(<16 x i32> %a0, <16 x i32> %a1, i16 -1) 843 ret i16 %res 844 } 845 declare i16 @llvm.x86.avx512.mask.ptestm.d.512(<16 x i32>, <16 x i32>, i16) 846 847 define void @test_store1(<16 x float> %data, i8* %ptr, i16 %mask) { 848 ; CHECK-LABEL: test_store1: 849 ; CHECK: ## BB#0: 850 ; CHECK-NEXT: kmovw %esi, %k1 851 ; CHECK-NEXT: vmovups %zmm0, (%rdi) {%k1} 852 ; CHECK-NEXT: retq 853 call void @llvm.x86.avx512.mask.storeu.ps.512(i8* %ptr, <16 x float> %data, i16 %mask) 854 ret void 855 } 856 857 declare void @llvm.x86.avx512.mask.storeu.ps.512(i8*, <16 x float>, i16 ) 858 859 define void @test_store2(<8 x double> %data, i8* %ptr, i8 %mask) { 860 ; CHECK-LABEL: test_store2: 861 ; CHECK: ## BB#0: 862 ; CHECK-NEXT: kmovw %esi, %k1 863 ; CHECK-NEXT: vmovupd %zmm0, (%rdi) {%k1} 864 ; CHECK-NEXT: retq 865 call void @llvm.x86.avx512.mask.storeu.pd.512(i8* %ptr, <8 x double> %data, i8 %mask) 866 ret void 867 } 868 869 declare void @llvm.x86.avx512.mask.storeu.pd.512(i8*, <8 x double>, i8) 870 871 define void @test_mask_store_aligned_ps(<16 x float> %data, i8* %ptr, i16 %mask) { 872 ; CHECK-LABEL: test_mask_store_aligned_ps: 873 ; CHECK: ## BB#0: 874 ; CHECK-NEXT: kmovw %esi, %k1 875 ; CHECK-NEXT: vmovaps %zmm0, (%rdi) {%k1} 876 ; CHECK-NEXT: retq 877 call void @llvm.x86.avx512.mask.store.ps.512(i8* %ptr, <16 x float> %data, i16 %mask) 878 ret void 879 } 880 881 declare void @llvm.x86.avx512.mask.store.ps.512(i8*, <16 x float>, i16 ) 882 883 define void @test_mask_store_aligned_pd(<8 x double> %data, i8* %ptr, i8 %mask) { 884 ; CHECK-LABEL: test_mask_store_aligned_pd: 885 ; CHECK: ## BB#0: 886 ; CHECK-NEXT: kmovw %esi, %k1 887 ; CHECK-NEXT: vmovapd %zmm0, (%rdi) {%k1} 888 ; CHECK-NEXT: retq 889 call void @llvm.x86.avx512.mask.store.pd.512(i8* %ptr, <8 x double> %data, i8 %mask) 890 ret void 891 } 892 893 declare void @llvm.x86.avx512.mask.store.pd.512(i8*, <8 x double>, i8) 894 895 define <16 x float> @test_maskz_load_aligned_ps(<16 x float> %data, i8* %ptr, i16 %mask) { 896 ; CHECK-LABEL: test_maskz_load_aligned_ps: 897 ; CHECK: ## BB#0: 898 ; CHECK-NEXT: kmovw %esi, %k1 899 ; CHECK-NEXT: vmovaps (%rdi), %zmm0 {%k1} {z} 900 ; CHECK-NEXT: retq 901 %res = call <16 x float> @llvm.x86.avx512.mask.load.ps.512(i8* %ptr, <16 x float> zeroinitializer, i16 %mask) 902 ret <16 x float> %res 903 } 904 905 declare <16 x float> @llvm.x86.avx512.mask.load.ps.512(i8*, <16 x float>, i16) 906 907 define <8 x double> @test_maskz_load_aligned_pd(<8 x double> %data, i8* %ptr, i8 %mask) { 908 ; CHECK-LABEL: test_maskz_load_aligned_pd: 909 ; CHECK: ## BB#0: 910 ; CHECK-NEXT: kmovw %esi, %k1 911 ; CHECK-NEXT: vmovapd (%rdi), %zmm0 {%k1} {z} 912 ; CHECK-NEXT: retq 913 %res = call <8 x double> @llvm.x86.avx512.mask.load.pd.512(i8* %ptr, <8 x double> zeroinitializer, i8 %mask) 914 ret <8 x double> %res 915 } 916 917 declare <8 x double> @llvm.x86.avx512.mask.load.pd.512(i8*, <8 x double>, i8) 918 919 define <16 x float> @test_load_aligned_ps(<16 x float> %data, i8* %ptr, i16 %mask) { 920 ; CHECK-LABEL: test_load_aligned_ps: 921 ; CHECK: ## BB#0: 922 ; CHECK-NEXT: vmovaps (%rdi), %zmm0 923 ; CHECK-NEXT: retq 924 %res = call <16 x float> @llvm.x86.avx512.mask.load.ps.512(i8* %ptr, <16 x float> zeroinitializer, i16 -1) 925 ret <16 x float> %res 926 } 927 928 define <8 x double> @test_load_aligned_pd(<8 x double> %data, i8* %ptr, i8 %mask) { 929 ; CHECK-LABEL: test_load_aligned_pd: 930 ; CHECK: ## BB#0: 931 ; CHECK-NEXT: vmovapd (%rdi), %zmm0 932 ; CHECK-NEXT: retq 933 %res = call <8 x double> @llvm.x86.avx512.mask.load.pd.512(i8* %ptr, <8 x double> zeroinitializer, i8 -1) 934 ret <8 x double> %res 935 } 936 937 declare <8 x i64> @llvm.x86.avx512.movntdqa(i8*) 938 939 define <8 x i64> @test_valign_q(<8 x i64> %a, <8 x i64> %b) { 940 ; CHECK-LABEL: test_valign_q: 941 ; CHECK: ## BB#0: 942 ; CHECK-NEXT: valignq $2, %zmm1, %zmm0, %zmm0 943 ; CHECK-NEXT: retq 944 %res = call <8 x i64> @llvm.x86.avx512.mask.valign.q.512(<8 x i64> %a, <8 x i64> %b, i32 2, <8 x i64> zeroinitializer, i8 -1) 945 ret <8 x i64> %res 946 } 947 948 define <8 x i64> @test_mask_valign_q(<8 x i64> %a, <8 x i64> %b, <8 x i64> %src, i8 %mask) { 949 ; CHECK-LABEL: test_mask_valign_q: 950 ; CHECK: ## BB#0: 951 ; CHECK-NEXT: movzbl %dil, %eax 952 ; CHECK-NEXT: kmovw %eax, %k1 953 ; CHECK-NEXT: valignq $2, %zmm1, %zmm0, %zmm2 {%k1} 954 ; CHECK-NEXT: vmovaps %zmm2, %zmm0 955 ; CHECK-NEXT: retq 956 %res = call <8 x i64> @llvm.x86.avx512.mask.valign.q.512(<8 x i64> %a, <8 x i64> %b, i32 2, <8 x i64> %src, i8 %mask) 957 ret <8 x i64> %res 958 } 959 960 declare <8 x i64> @llvm.x86.avx512.mask.valign.q.512(<8 x i64>, <8 x i64>, i32, <8 x i64>, i8) 961 962 define <16 x i32> @test_maskz_valign_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) { 963 ; CHECK-LABEL: test_maskz_valign_d: 964 ; CHECK: ## BB#0: 965 ; CHECK-NEXT: kmovw %edi, %k1 966 ; CHECK-NEXT: valignd $5, %zmm1, %zmm0, %zmm0 {%k1} {z} 967 ; CHECK-NEXT: retq 968 %res = call <16 x i32> @llvm.x86.avx512.mask.valign.d.512(<16 x i32> %a, <16 x i32> %b, i32 5, <16 x i32> zeroinitializer, i16 %mask) 969 ret <16 x i32> %res 970 } 971 972 declare <16 x i32> @llvm.x86.avx512.mask.valign.d.512(<16 x i32>, <16 x i32>, i32, <16 x i32>, i16) 973 974 define void @test_mask_store_ss(i8* %ptr, <4 x float> %data, i8 %mask) { 975 ; CHECK-LABEL: test_mask_store_ss: 976 ; CHECK: ## BB#0: 977 ; CHECK-NEXT: kmovw %esi, %k1 978 ; CHECK-NEXT: vmovss %xmm0, (%rdi) {%k1} 979 ; CHECK-NEXT: retq 980 call void @llvm.x86.avx512.mask.store.ss(i8* %ptr, <4 x float> %data, i8 %mask) 981 ret void 982 } 983 984 declare void @llvm.x86.avx512.mask.store.ss(i8*, <4 x float>, i8 ) 985 986 define i16 @test_pcmpeq_d(<16 x i32> %a, <16 x i32> %b) { 987 ; CHECK-LABEL: test_pcmpeq_d: 988 ; CHECK: ## BB#0: 989 ; CHECK-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 990 ; CHECK-NEXT: kmovw %k0, %eax 991 ; CHECK-NEXT: retq 992 %res = call i16 @llvm.x86.avx512.mask.pcmpeq.d.512(<16 x i32> %a, <16 x i32> %b, i16 -1) 993 ret i16 %res 994 } 995 996 define i16 @test_mask_pcmpeq_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) { 997 ; CHECK-LABEL: test_mask_pcmpeq_d: 998 ; CHECK: ## BB#0: 999 ; CHECK-NEXT: kmovw %edi, %k1 1000 ; CHECK-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} 1001 ; CHECK-NEXT: kmovw %k0, %eax 1002 ; CHECK-NEXT: retq 1003 %res = call i16 @llvm.x86.avx512.mask.pcmpeq.d.512(<16 x i32> %a, <16 x i32> %b, i16 %mask) 1004 ret i16 %res 1005 } 1006 1007 declare i16 @llvm.x86.avx512.mask.pcmpeq.d.512(<16 x i32>, <16 x i32>, i16) 1008 1009 define i8 @test_pcmpeq_q(<8 x i64> %a, <8 x i64> %b) { 1010 ; CHECK-LABEL: test_pcmpeq_q: 1011 ; CHECK: ## BB#0: 1012 ; CHECK-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 1013 ; CHECK-NEXT: kmovw %k0, %eax 1014 ; CHECK-NEXT: retq 1015 %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.512(<8 x i64> %a, <8 x i64> %b, i8 -1) 1016 ret i8 %res 1017 } 1018 1019 define i8 @test_mask_pcmpeq_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) { 1020 ; CHECK-LABEL: test_mask_pcmpeq_q: 1021 ; CHECK: ## BB#0: 1022 ; CHECK-NEXT: movzbl %dil, %eax 1023 ; CHECK-NEXT: kmovw %eax, %k1 1024 ; CHECK-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} 1025 ; CHECK-NEXT: kmovw %k0, %eax 1026 ; CHECK-NEXT: retq 1027 %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask) 1028 ret i8 %res 1029 } 1030 1031 declare i8 @llvm.x86.avx512.mask.pcmpeq.q.512(<8 x i64>, <8 x i64>, i8) 1032 1033 define i16 @test_pcmpgt_d(<16 x i32> %a, <16 x i32> %b) { 1034 ; CHECK-LABEL: test_pcmpgt_d: 1035 ; CHECK: ## BB#0: 1036 ; CHECK-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 1037 ; CHECK-NEXT: kmovw %k0, %eax 1038 ; CHECK-NEXT: retq 1039 %res = call i16 @llvm.x86.avx512.mask.pcmpgt.d.512(<16 x i32> %a, <16 x i32> %b, i16 -1) 1040 ret i16 %res 1041 } 1042 1043 define i16 @test_mask_pcmpgt_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) { 1044 ; CHECK-LABEL: test_mask_pcmpgt_d: 1045 ; CHECK: ## BB#0: 1046 ; CHECK-NEXT: kmovw %edi, %k1 1047 ; CHECK-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} 1048 ; CHECK-NEXT: kmovw %k0, %eax 1049 ; CHECK-NEXT: retq 1050 %res = call i16 @llvm.x86.avx512.mask.pcmpgt.d.512(<16 x i32> %a, <16 x i32> %b, i16 %mask) 1051 ret i16 %res 1052 } 1053 1054 declare i16 @llvm.x86.avx512.mask.pcmpgt.d.512(<16 x i32>, <16 x i32>, i16) 1055 1056 define i8 @test_pcmpgt_q(<8 x i64> %a, <8 x i64> %b) { 1057 ; CHECK-LABEL: test_pcmpgt_q: 1058 ; CHECK: ## BB#0: 1059 ; CHECK-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 1060 ; CHECK-NEXT: kmovw %k0, %eax 1061 ; CHECK-NEXT: retq 1062 %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.512(<8 x i64> %a, <8 x i64> %b, i8 -1) 1063 ret i8 %res 1064 } 1065 1066 define i8 @test_mask_pcmpgt_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) { 1067 ; CHECK-LABEL: test_mask_pcmpgt_q: 1068 ; CHECK: ## BB#0: 1069 ; CHECK-NEXT: movzbl %dil, %eax 1070 ; CHECK-NEXT: kmovw %eax, %k1 1071 ; CHECK-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} 1072 ; CHECK-NEXT: kmovw %k0, %eax 1073 ; CHECK-NEXT: retq 1074 %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask) 1075 ret i8 %res 1076 } 1077 1078 declare i8 @llvm.x86.avx512.mask.pcmpgt.q.512(<8 x i64>, <8 x i64>, i8) 1079 1080 define <8 x i16> @test_cmp_d_512(<16 x i32> %a0, <16 x i32> %a1) { 1081 ; CHECK-LABEL: test_cmp_d_512: 1082 ; CHECK: ## BB#0: 1083 ; CHECK-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 1084 ; CHECK-NEXT: kmovw %k0, %r8d 1085 ; CHECK-NEXT: vpcmpltd %zmm1, %zmm0, %k0 1086 ; CHECK-NEXT: kmovw %k0, %r9d 1087 ; CHECK-NEXT: vpcmpled %zmm1, %zmm0, %k0 1088 ; CHECK-NEXT: kmovw %k0, %r10d 1089 ; CHECK-NEXT: vpcmpunordd %zmm1, %zmm0, %k0 1090 ; CHECK-NEXT: kmovw %k0, %esi 1091 ; CHECK-NEXT: vpcmpneqd %zmm1, %zmm0, %k0 1092 ; CHECK-NEXT: kmovw %k0, %edi 1093 ; CHECK-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 1094 ; CHECK-NEXT: kmovw %k0, %eax 1095 ; CHECK-NEXT: vpcmpnled %zmm1, %zmm0, %k0 1096 ; CHECK-NEXT: kmovw %k0, %ecx 1097 ; CHECK-NEXT: vpcmpordd %zmm1, %zmm0, %k0 1098 ; CHECK-NEXT: kmovw %k0, %edx 1099 ; CHECK-NEXT: vmovd %r8d, %xmm0 1100 ; CHECK-NEXT: vpinsrw $1, %r9d, %xmm0, %xmm0 1101 ; CHECK-NEXT: vpinsrw $2, %r10d, %xmm0, %xmm0 1102 ; CHECK-NEXT: vpinsrw $3, %esi, %xmm0, %xmm0 1103 ; CHECK-NEXT: vpinsrw $4, %edi, %xmm0, %xmm0 1104 ; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 1105 ; CHECK-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 1106 ; CHECK-NEXT: vpinsrw $7, %edx, %xmm0, %xmm0 1107 ; CHECK-NEXT: retq 1108 %res0 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 -1) 1109 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0 1110 %res1 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 1, i16 -1) 1111 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1 1112 %res2 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 2, i16 -1) 1113 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2 1114 %res3 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 3, i16 -1) 1115 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3 1116 %res4 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 4, i16 -1) 1117 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4 1118 %res5 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 5, i16 -1) 1119 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5 1120 %res6 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 6, i16 -1) 1121 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6 1122 %res7 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 7, i16 -1) 1123 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7 1124 ret <8 x i16> %vec7 1125 } 1126 1127 define <8 x i16> @test_mask_cmp_d_512(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) { 1128 ; CHECK-LABEL: test_mask_cmp_d_512: 1129 ; CHECK: ## BB#0: 1130 ; CHECK-NEXT: kmovw %edi, %k1 1131 ; CHECK-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} 1132 ; CHECK-NEXT: kmovw %k0, %r8d 1133 ; CHECK-NEXT: vpcmpltd %zmm1, %zmm0, %k0 {%k1} 1134 ; CHECK-NEXT: kmovw %k0, %r9d 1135 ; CHECK-NEXT: vpcmpled %zmm1, %zmm0, %k0 {%k1} 1136 ; CHECK-NEXT: kmovw %k0, %r10d 1137 ; CHECK-NEXT: vpcmpunordd %zmm1, %zmm0, %k0 {%k1} 1138 ; CHECK-NEXT: kmovw %k0, %esi 1139 ; CHECK-NEXT: vpcmpneqd %zmm1, %zmm0, %k0 {%k1} 1140 ; CHECK-NEXT: kmovw %k0, %edi 1141 ; CHECK-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1} 1142 ; CHECK-NEXT: kmovw %k0, %eax 1143 ; CHECK-NEXT: vpcmpnled %zmm1, %zmm0, %k0 {%k1} 1144 ; CHECK-NEXT: kmovw %k0, %ecx 1145 ; CHECK-NEXT: vpcmpordd %zmm1, %zmm0, %k0 {%k1} 1146 ; CHECK-NEXT: kmovw %k0, %edx 1147 ; CHECK-NEXT: vmovd %r8d, %xmm0 1148 ; CHECK-NEXT: vpinsrw $1, %r9d, %xmm0, %xmm0 1149 ; CHECK-NEXT: vpinsrw $2, %r10d, %xmm0, %xmm0 1150 ; CHECK-NEXT: vpinsrw $3, %esi, %xmm0, %xmm0 1151 ; CHECK-NEXT: vpinsrw $4, %edi, %xmm0, %xmm0 1152 ; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 1153 ; CHECK-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 1154 ; CHECK-NEXT: vpinsrw $7, %edx, %xmm0, %xmm0 1155 ; CHECK-NEXT: retq 1156 %res0 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 %mask) 1157 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0 1158 %res1 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 1, i16 %mask) 1159 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1 1160 %res2 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 2, i16 %mask) 1161 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2 1162 %res3 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 3, i16 %mask) 1163 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3 1164 %res4 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 4, i16 %mask) 1165 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4 1166 %res5 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 5, i16 %mask) 1167 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5 1168 %res6 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 6, i16 %mask) 1169 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6 1170 %res7 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 7, i16 %mask) 1171 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7 1172 ret <8 x i16> %vec7 1173 } 1174 1175 declare i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32>, <16 x i32>, i32, i16) nounwind readnone 1176 1177 define <8 x i16> @test_ucmp_d_512(<16 x i32> %a0, <16 x i32> %a1) { 1178 ; CHECK-LABEL: test_ucmp_d_512: 1179 ; CHECK: ## BB#0: 1180 ; CHECK-NEXT: vpcmpequd %zmm1, %zmm0, %k0 1181 ; CHECK-NEXT: kmovw %k0, %r8d 1182 ; CHECK-NEXT: vpcmpltud %zmm1, %zmm0, %k0 1183 ; CHECK-NEXT: kmovw %k0, %r9d 1184 ; CHECK-NEXT: vpcmpleud %zmm1, %zmm0, %k0 1185 ; CHECK-NEXT: kmovw %k0, %r10d 1186 ; CHECK-NEXT: vpcmpunordud %zmm1, %zmm0, %k0 1187 ; CHECK-NEXT: kmovw %k0, %esi 1188 ; CHECK-NEXT: vpcmpnequd %zmm1, %zmm0, %k0 1189 ; CHECK-NEXT: kmovw %k0, %edi 1190 ; CHECK-NEXT: vpcmpnltud %zmm1, %zmm0, %k0 1191 ; CHECK-NEXT: kmovw %k0, %eax 1192 ; CHECK-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 1193 ; CHECK-NEXT: kmovw %k0, %ecx 1194 ; CHECK-NEXT: vpcmpordud %zmm1, %zmm0, %k0 1195 ; CHECK-NEXT: kmovw %k0, %edx 1196 ; CHECK-NEXT: vmovd %r8d, %xmm0 1197 ; CHECK-NEXT: vpinsrw $1, %r9d, %xmm0, %xmm0 1198 ; CHECK-NEXT: vpinsrw $2, %r10d, %xmm0, %xmm0 1199 ; CHECK-NEXT: vpinsrw $3, %esi, %xmm0, %xmm0 1200 ; CHECK-NEXT: vpinsrw $4, %edi, %xmm0, %xmm0 1201 ; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 1202 ; CHECK-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 1203 ; CHECK-NEXT: vpinsrw $7, %edx, %xmm0, %xmm0 1204 ; CHECK-NEXT: retq 1205 %res0 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 -1) 1206 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0 1207 %res1 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 1, i16 -1) 1208 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1 1209 %res2 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 2, i16 -1) 1210 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2 1211 %res3 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 3, i16 -1) 1212 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3 1213 %res4 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 4, i16 -1) 1214 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4 1215 %res5 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 5, i16 -1) 1216 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5 1217 %res6 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 6, i16 -1) 1218 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6 1219 %res7 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 7, i16 -1) 1220 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7 1221 ret <8 x i16> %vec7 1222 } 1223 1224 define <8 x i16> @test_mask_ucmp_d_512(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) { 1225 ; CHECK-LABEL: test_mask_ucmp_d_512: 1226 ; CHECK: ## BB#0: 1227 ; CHECK-NEXT: kmovw %edi, %k1 1228 ; CHECK-NEXT: vpcmpequd %zmm1, %zmm0, %k0 {%k1} 1229 ; CHECK-NEXT: kmovw %k0, %r8d 1230 ; CHECK-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} 1231 ; CHECK-NEXT: kmovw %k0, %r9d 1232 ; CHECK-NEXT: vpcmpleud %zmm1, %zmm0, %k0 {%k1} 1233 ; CHECK-NEXT: kmovw %k0, %r10d 1234 ; CHECK-NEXT: vpcmpunordud %zmm1, %zmm0, %k0 {%k1} 1235 ; CHECK-NEXT: kmovw %k0, %esi 1236 ; CHECK-NEXT: vpcmpnequd %zmm1, %zmm0, %k0 {%k1} 1237 ; CHECK-NEXT: kmovw %k0, %edi 1238 ; CHECK-NEXT: vpcmpnltud %zmm1, %zmm0, %k0 {%k1} 1239 ; CHECK-NEXT: kmovw %k0, %eax 1240 ; CHECK-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 {%k1} 1241 ; CHECK-NEXT: kmovw %k0, %ecx 1242 ; CHECK-NEXT: vpcmpordud %zmm1, %zmm0, %k0 {%k1} 1243 ; CHECK-NEXT: kmovw %k0, %edx 1244 ; CHECK-NEXT: vmovd %r8d, %xmm0 1245 ; CHECK-NEXT: vpinsrw $1, %r9d, %xmm0, %xmm0 1246 ; CHECK-NEXT: vpinsrw $2, %r10d, %xmm0, %xmm0 1247 ; CHECK-NEXT: vpinsrw $3, %esi, %xmm0, %xmm0 1248 ; CHECK-NEXT: vpinsrw $4, %edi, %xmm0, %xmm0 1249 ; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 1250 ; CHECK-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 1251 ; CHECK-NEXT: vpinsrw $7, %edx, %xmm0, %xmm0 1252 ; CHECK-NEXT: retq 1253 %res0 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 %mask) 1254 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0 1255 %res1 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 1, i16 %mask) 1256 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1 1257 %res2 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 2, i16 %mask) 1258 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2 1259 %res3 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 3, i16 %mask) 1260 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3 1261 %res4 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 4, i16 %mask) 1262 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4 1263 %res5 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 5, i16 %mask) 1264 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5 1265 %res6 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 6, i16 %mask) 1266 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6 1267 %res7 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 7, i16 %mask) 1268 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7 1269 ret <8 x i16> %vec7 1270 } 1271 1272 declare i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32>, <16 x i32>, i32, i16) nounwind readnone 1273 1274 define <8 x i8> @test_cmp_q_512(<8 x i64> %a0, <8 x i64> %a1) { 1275 ; CHECK-LABEL: test_cmp_q_512: 1276 ; CHECK: ## BB#0: 1277 ; CHECK-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 1278 ; CHECK-NEXT: kmovw %k0, %r8d 1279 ; CHECK-NEXT: vpcmpltq %zmm1, %zmm0, %k0 1280 ; CHECK-NEXT: kmovw %k0, %r9d 1281 ; CHECK-NEXT: vpcmpleq %zmm1, %zmm0, %k0 1282 ; CHECK-NEXT: kmovw %k0, %r10d 1283 ; CHECK-NEXT: vpcmpunordq %zmm1, %zmm0, %k0 1284 ; CHECK-NEXT: kmovw %k0, %r11d 1285 ; CHECK-NEXT: vpcmpneqq %zmm1, %zmm0, %k0 1286 ; CHECK-NEXT: kmovw %k0, %edi 1287 ; CHECK-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 1288 ; CHECK-NEXT: kmovw %k0, %eax 1289 ; CHECK-NEXT: vpcmpnleq %zmm1, %zmm0, %k0 1290 ; CHECK-NEXT: kmovw %k0, %ecx 1291 ; CHECK-NEXT: vpcmpordq %zmm1, %zmm0, %k0 1292 ; CHECK-NEXT: kmovw %k0, %edx 1293 ; CHECK-NEXT: movzbl %r8b, %esi 1294 ; CHECK-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 1295 ; CHECK-NEXT: movzbl %r9b, %esi 1296 ; CHECK-NEXT: vpinsrb $2, %esi, %xmm0, %xmm0 1297 ; CHECK-NEXT: movzbl %r10b, %esi 1298 ; CHECK-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 1299 ; CHECK-NEXT: movzbl %r11b, %esi 1300 ; CHECK-NEXT: vpinsrb $6, %esi, %xmm0, %xmm0 1301 ; CHECK-NEXT: movzbl %dil, %esi 1302 ; CHECK-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 1303 ; CHECK-NEXT: movzbl %al, %eax 1304 ; CHECK-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 1305 ; CHECK-NEXT: movzbl %cl, %eax 1306 ; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 1307 ; CHECK-NEXT: movzbl %dl, %eax 1308 ; CHECK-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 1309 ; CHECK-NEXT: retq 1310 %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 -1) 1311 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0 1312 %res1 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 1, i8 -1) 1313 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1 1314 %res2 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 2, i8 -1) 1315 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2 1316 %res3 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 3, i8 -1) 1317 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3 1318 %res4 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 4, i8 -1) 1319 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4 1320 %res5 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 5, i8 -1) 1321 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5 1322 %res6 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 6, i8 -1) 1323 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6 1324 %res7 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 7, i8 -1) 1325 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7 1326 ret <8 x i8> %vec7 1327 } 1328 1329 define <8 x i8> @test_mask_cmp_q_512(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) { 1330 ; CHECK-LABEL: test_mask_cmp_q_512: 1331 ; CHECK: ## BB#0: 1332 ; CHECK-NEXT: movzbl %dil, %eax 1333 ; CHECK-NEXT: kmovw %eax, %k1 1334 ; CHECK-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} 1335 ; CHECK-NEXT: kmovw %k0, %r8d 1336 ; CHECK-NEXT: vpcmpltq %zmm1, %zmm0, %k0 {%k1} 1337 ; CHECK-NEXT: kmovw %k0, %r9d 1338 ; CHECK-NEXT: vpcmpleq %zmm1, %zmm0, %k0 {%k1} 1339 ; CHECK-NEXT: kmovw %k0, %r10d 1340 ; CHECK-NEXT: vpcmpunordq %zmm1, %zmm0, %k0 {%k1} 1341 ; CHECK-NEXT: kmovw %k0, %r11d 1342 ; CHECK-NEXT: vpcmpneqq %zmm1, %zmm0, %k0 {%k1} 1343 ; CHECK-NEXT: kmovw %k0, %edi 1344 ; CHECK-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1} 1345 ; CHECK-NEXT: kmovw %k0, %eax 1346 ; CHECK-NEXT: vpcmpnleq %zmm1, %zmm0, %k0 {%k1} 1347 ; CHECK-NEXT: kmovw %k0, %ecx 1348 ; CHECK-NEXT: vpcmpordq %zmm1, %zmm0, %k0 {%k1} 1349 ; CHECK-NEXT: kmovw %k0, %edx 1350 ; CHECK-NEXT: movzbl %r8b, %esi 1351 ; CHECK-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 1352 ; CHECK-NEXT: movzbl %r9b, %esi 1353 ; CHECK-NEXT: vpinsrb $2, %esi, %xmm0, %xmm0 1354 ; CHECK-NEXT: movzbl %r10b, %esi 1355 ; CHECK-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 1356 ; CHECK-NEXT: movzbl %r11b, %esi 1357 ; CHECK-NEXT: vpinsrb $6, %esi, %xmm0, %xmm0 1358 ; CHECK-NEXT: movzbl %dil, %esi 1359 ; CHECK-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 1360 ; CHECK-NEXT: movzbl %al, %eax 1361 ; CHECK-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 1362 ; CHECK-NEXT: movzbl %cl, %eax 1363 ; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 1364 ; CHECK-NEXT: movzbl %dl, %eax 1365 ; CHECK-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 1366 ; CHECK-NEXT: retq 1367 %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 %mask) 1368 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0 1369 %res1 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 1, i8 %mask) 1370 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1 1371 %res2 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 2, i8 %mask) 1372 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2 1373 %res3 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 3, i8 %mask) 1374 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3 1375 %res4 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 4, i8 %mask) 1376 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4 1377 %res5 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 5, i8 %mask) 1378 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5 1379 %res6 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 6, i8 %mask) 1380 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6 1381 %res7 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 7, i8 %mask) 1382 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7 1383 ret <8 x i8> %vec7 1384 } 1385 1386 declare i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64>, <8 x i64>, i32, i8) nounwind readnone 1387 1388 define <8 x i8> @test_ucmp_q_512(<8 x i64> %a0, <8 x i64> %a1) { 1389 ; CHECK-LABEL: test_ucmp_q_512: 1390 ; CHECK: ## BB#0: 1391 ; CHECK-NEXT: vpcmpequq %zmm1, %zmm0, %k0 1392 ; CHECK-NEXT: kmovw %k0, %r8d 1393 ; CHECK-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 1394 ; CHECK-NEXT: kmovw %k0, %r9d 1395 ; CHECK-NEXT: vpcmpleuq %zmm1, %zmm0, %k0 1396 ; CHECK-NEXT: kmovw %k0, %r10d 1397 ; CHECK-NEXT: vpcmpunorduq %zmm1, %zmm0, %k0 1398 ; CHECK-NEXT: kmovw %k0, %r11d 1399 ; CHECK-NEXT: vpcmpnequq %zmm1, %zmm0, %k0 1400 ; CHECK-NEXT: kmovw %k0, %edi 1401 ; CHECK-NEXT: vpcmpnltuq %zmm1, %zmm0, %k0 1402 ; CHECK-NEXT: kmovw %k0, %eax 1403 ; CHECK-NEXT: vpcmpnleuq %zmm1, %zmm0, %k0 1404 ; CHECK-NEXT: kmovw %k0, %ecx 1405 ; CHECK-NEXT: vpcmporduq %zmm1, %zmm0, %k0 1406 ; CHECK-NEXT: kmovw %k0, %edx 1407 ; CHECK-NEXT: movzbl %r8b, %esi 1408 ; CHECK-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 1409 ; CHECK-NEXT: movzbl %r9b, %esi 1410 ; CHECK-NEXT: vpinsrb $2, %esi, %xmm0, %xmm0 1411 ; CHECK-NEXT: movzbl %r10b, %esi 1412 ; CHECK-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 1413 ; CHECK-NEXT: movzbl %r11b, %esi 1414 ; CHECK-NEXT: vpinsrb $6, %esi, %xmm0, %xmm0 1415 ; CHECK-NEXT: movzbl %dil, %esi 1416 ; CHECK-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 1417 ; CHECK-NEXT: movzbl %al, %eax 1418 ; CHECK-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 1419 ; CHECK-NEXT: movzbl %cl, %eax 1420 ; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 1421 ; CHECK-NEXT: movzbl %dl, %eax 1422 ; CHECK-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 1423 ; CHECK-NEXT: retq 1424 %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 -1) 1425 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0 1426 %res1 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 1, i8 -1) 1427 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1 1428 %res2 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 2, i8 -1) 1429 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2 1430 %res3 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 3, i8 -1) 1431 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3 1432 %res4 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 4, i8 -1) 1433 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4 1434 %res5 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 5, i8 -1) 1435 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5 1436 %res6 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 6, i8 -1) 1437 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6 1438 %res7 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 7, i8 -1) 1439 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7 1440 ret <8 x i8> %vec7 1441 } 1442 1443 define <8 x i8> @test_mask_ucmp_q_512(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) { 1444 ; CHECK-LABEL: test_mask_ucmp_q_512: 1445 ; CHECK: ## BB#0: 1446 ; CHECK-NEXT: movzbl %dil, %eax 1447 ; CHECK-NEXT: kmovw %eax, %k1 1448 ; CHECK-NEXT: vpcmpequq %zmm1, %zmm0, %k0 {%k1} 1449 ; CHECK-NEXT: kmovw %k0, %r8d 1450 ; CHECK-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} 1451 ; CHECK-NEXT: kmovw %k0, %r9d 1452 ; CHECK-NEXT: vpcmpleuq %zmm1, %zmm0, %k0 {%k1} 1453 ; CHECK-NEXT: kmovw %k0, %r10d 1454 ; CHECK-NEXT: vpcmpunorduq %zmm1, %zmm0, %k0 {%k1} 1455 ; CHECK-NEXT: kmovw %k0, %r11d 1456 ; CHECK-NEXT: vpcmpnequq %zmm1, %zmm0, %k0 {%k1} 1457 ; CHECK-NEXT: kmovw %k0, %edi 1458 ; CHECK-NEXT: vpcmpnltuq %zmm1, %zmm0, %k0 {%k1} 1459 ; CHECK-NEXT: kmovw %k0, %eax 1460 ; CHECK-NEXT: vpcmpnleuq %zmm1, %zmm0, %k0 {%k1} 1461 ; CHECK-NEXT: kmovw %k0, %ecx 1462 ; CHECK-NEXT: vpcmporduq %zmm1, %zmm0, %k0 {%k1} 1463 ; CHECK-NEXT: kmovw %k0, %edx 1464 ; CHECK-NEXT: movzbl %r8b, %esi 1465 ; CHECK-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 1466 ; CHECK-NEXT: movzbl %r9b, %esi 1467 ; CHECK-NEXT: vpinsrb $2, %esi, %xmm0, %xmm0 1468 ; CHECK-NEXT: movzbl %r10b, %esi 1469 ; CHECK-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 1470 ; CHECK-NEXT: movzbl %r11b, %esi 1471 ; CHECK-NEXT: vpinsrb $6, %esi, %xmm0, %xmm0 1472 ; CHECK-NEXT: movzbl %dil, %esi 1473 ; CHECK-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 1474 ; CHECK-NEXT: movzbl %al, %eax 1475 ; CHECK-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 1476 ; CHECK-NEXT: movzbl %cl, %eax 1477 ; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 1478 ; CHECK-NEXT: movzbl %dl, %eax 1479 ; CHECK-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 1480 ; CHECK-NEXT: retq 1481 %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 %mask) 1482 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0 1483 %res1 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 1, i8 %mask) 1484 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1 1485 %res2 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 2, i8 %mask) 1486 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2 1487 %res3 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 3, i8 %mask) 1488 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3 1489 %res4 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 4, i8 %mask) 1490 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4 1491 %res5 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 5, i8 %mask) 1492 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5 1493 %res6 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 6, i8 %mask) 1494 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6 1495 %res7 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 7, i8 %mask) 1496 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7 1497 ret <8 x i8> %vec7 1498 } 1499 1500 declare i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64>, <8 x i64>, i32, i8) nounwind readnone 1501 1502 define <4 x float> @test_mask_vextractf32x4(<4 x float> %b, <16 x float> %a, i8 %mask) { 1503 ; CHECK-LABEL: test_mask_vextractf32x4: 1504 ; CHECK: ## BB#0: 1505 ; CHECK-NEXT: kmovw %edi, %k1 1506 ; CHECK-NEXT: vextractf32x4 $2, %zmm1, %xmm0 {%k1} 1507 ; CHECK-NEXT: retq 1508 %res = call <4 x float> @llvm.x86.avx512.mask.vextractf32x4.512(<16 x float> %a, i32 2, <4 x float> %b, i8 %mask) 1509 ret <4 x float> %res 1510 } 1511 1512 declare <4 x float> @llvm.x86.avx512.mask.vextractf32x4.512(<16 x float>, i32, <4 x float>, i8) 1513 1514 define <4 x i64> @test_mask_vextracti64x4(<4 x i64> %b, <8 x i64> %a, i8 %mask) { 1515 ; CHECK-LABEL: test_mask_vextracti64x4: 1516 ; CHECK: ## BB#0: 1517 ; CHECK-NEXT: kmovw %edi, %k1 1518 ; CHECK-NEXT: vextracti64x4 $2, %zmm1, %ymm0 {%k1} 1519 ; CHECK-NEXT: retq 1520 %res = call <4 x i64> @llvm.x86.avx512.mask.vextracti64x4.512(<8 x i64> %a, i32 2, <4 x i64> %b, i8 %mask) 1521 ret <4 x i64> %res 1522 } 1523 1524 declare <4 x i64> @llvm.x86.avx512.mask.vextracti64x4.512(<8 x i64>, i32, <4 x i64>, i8) 1525 1526 define <4 x i32> @test_maskz_vextracti32x4(<16 x i32> %a, i8 %mask) { 1527 ; CHECK-LABEL: test_maskz_vextracti32x4: 1528 ; CHECK: ## BB#0: 1529 ; CHECK-NEXT: kmovw %edi, %k1 1530 ; CHECK-NEXT: vextracti32x4 $2, %zmm0, %xmm0 {%k1} {z} 1531 ; CHECK-NEXT: retq 1532 %res = call <4 x i32> @llvm.x86.avx512.mask.vextracti32x4.512(<16 x i32> %a, i32 2, <4 x i32> zeroinitializer, i8 %mask) 1533 ret <4 x i32> %res 1534 } 1535 1536 declare <4 x i32> @llvm.x86.avx512.mask.vextracti32x4.512(<16 x i32>, i32, <4 x i32>, i8) 1537 1538 define <4 x double> @test_vextractf64x4(<8 x double> %a) { 1539 ; CHECK-LABEL: test_vextractf64x4: 1540 ; CHECK: ## BB#0: 1541 ; CHECK-NEXT: vextractf64x4 $2, %zmm0, %ymm0 1542 ; CHECK-NEXT: retq 1543 %res = call <4 x double> @llvm.x86.avx512.mask.vextractf64x4.512(<8 x double> %a, i32 2, <4 x double> zeroinitializer, i8 -1) 1544 ret <4 x double> %res 1545 } 1546 1547 declare <4 x double> @llvm.x86.avx512.mask.vextractf64x4.512(<8 x double>, i32, <4 x double>, i8) 1548 1549 define <16 x i32> @test_x86_avx512_pslli_d(<16 x i32> %a0) { 1550 ; CHECK-LABEL: test_x86_avx512_pslli_d: 1551 ; CHECK: ## BB#0: 1552 ; CHECK-NEXT: vpslld $7, %zmm0, %zmm0 1553 ; CHECK-NEXT: retq 1554 %res = call <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 -1) 1555 ret <16 x i32> %res 1556 } 1557 1558 define <16 x i32> @test_x86_avx512_mask_pslli_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) { 1559 ; CHECK-LABEL: test_x86_avx512_mask_pslli_d: 1560 ; CHECK: ## BB#0: 1561 ; CHECK-NEXT: kmovw %edi, %k1 1562 ; CHECK-NEXT: vpslld $7, %zmm0, %zmm1 {%k1} 1563 ; CHECK-NEXT: vmovaps %zmm1, %zmm0 1564 ; CHECK-NEXT: retq 1565 %res = call <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32> %a0, i32 7, <16 x i32> %a1, i16 %mask) 1566 ret <16 x i32> %res 1567 } 1568 1569 define <16 x i32> @test_x86_avx512_maskz_pslli_d(<16 x i32> %a0, i16 %mask) { 1570 ; CHECK-LABEL: test_x86_avx512_maskz_pslli_d: 1571 ; CHECK: ## BB#0: 1572 ; CHECK-NEXT: kmovw %edi, %k1 1573 ; CHECK-NEXT: vpslld $7, %zmm0, %zmm0 {%k1} {z} 1574 ; CHECK-NEXT: retq 1575 %res = call <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 %mask) 1576 ret <16 x i32> %res 1577 } 1578 1579 declare <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32>, i32, <16 x i32>, i16) nounwind readnone 1580 1581 define <8 x i64> @test_x86_avx512_pslli_q(<8 x i64> %a0) { 1582 ; CHECK-LABEL: test_x86_avx512_pslli_q: 1583 ; CHECK: ## BB#0: 1584 ; CHECK-NEXT: vpsllq $7, %zmm0, %zmm0 1585 ; CHECK-NEXT: retq 1586 %res = call <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 -1) 1587 ret <8 x i64> %res 1588 } 1589 1590 define <8 x i64> @test_x86_avx512_mask_pslli_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) { 1591 ; CHECK-LABEL: test_x86_avx512_mask_pslli_q: 1592 ; CHECK: ## BB#0: 1593 ; CHECK-NEXT: movzbl %dil, %eax 1594 ; CHECK-NEXT: kmovw %eax, %k1 1595 ; CHECK-NEXT: vpsllq $7, %zmm0, %zmm1 {%k1} 1596 ; CHECK-NEXT: vmovaps %zmm1, %zmm0 1597 ; CHECK-NEXT: retq 1598 %res = call <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64> %a0, i32 7, <8 x i64> %a1, i8 %mask) 1599 ret <8 x i64> %res 1600 } 1601 1602 define <8 x i64> @test_x86_avx512_maskz_pslli_q(<8 x i64> %a0, i8 %mask) { 1603 ; CHECK-LABEL: test_x86_avx512_maskz_pslli_q: 1604 ; CHECK: ## BB#0: 1605 ; CHECK-NEXT: movzbl %dil, %eax 1606 ; CHECK-NEXT: kmovw %eax, %k1 1607 ; CHECK-NEXT: vpsllq $7, %zmm0, %zmm0 {%k1} {z} 1608 ; CHECK-NEXT: retq 1609 %res = call <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 %mask) 1610 ret <8 x i64> %res 1611 } 1612 1613 declare <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64>, i32, <8 x i64>, i8) nounwind readnone 1614 1615 define <16 x i32> @test_x86_avx512_psrli_d(<16 x i32> %a0) { 1616 ; CHECK-LABEL: test_x86_avx512_psrli_d: 1617 ; CHECK: ## BB#0: 1618 ; CHECK-NEXT: vpsrld $7, %zmm0, %zmm0 1619 ; CHECK-NEXT: retq 1620 %res = call <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 -1) 1621 ret <16 x i32> %res 1622 } 1623 1624 define <16 x i32> @test_x86_avx512_mask_psrli_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) { 1625 ; CHECK-LABEL: test_x86_avx512_mask_psrli_d: 1626 ; CHECK: ## BB#0: 1627 ; CHECK-NEXT: kmovw %edi, %k1 1628 ; CHECK-NEXT: vpsrld $7, %zmm0, %zmm1 {%k1} 1629 ; CHECK-NEXT: vmovaps %zmm1, %zmm0 1630 ; CHECK-NEXT: retq 1631 %res = call <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32> %a0, i32 7, <16 x i32> %a1, i16 %mask) 1632 ret <16 x i32> %res 1633 } 1634 1635 define <16 x i32> @test_x86_avx512_maskz_psrli_d(<16 x i32> %a0, i16 %mask) { 1636 ; CHECK-LABEL: test_x86_avx512_maskz_psrli_d: 1637 ; CHECK: ## BB#0: 1638 ; CHECK-NEXT: kmovw %edi, %k1 1639 ; CHECK-NEXT: vpsrld $7, %zmm0, %zmm0 {%k1} {z} 1640 ; CHECK-NEXT: retq 1641 %res = call <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 %mask) 1642 ret <16 x i32> %res 1643 } 1644 1645 declare <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32>, i32, <16 x i32>, i16) nounwind readnone 1646 1647 define <8 x i64> @test_x86_avx512_psrli_q(<8 x i64> %a0) { 1648 ; CHECK-LABEL: test_x86_avx512_psrli_q: 1649 ; CHECK: ## BB#0: 1650 ; CHECK-NEXT: vpsrlq $7, %zmm0, %zmm0 1651 ; CHECK-NEXT: retq 1652 %res = call <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 -1) 1653 ret <8 x i64> %res 1654 } 1655 1656 define <8 x i64> @test_x86_avx512_mask_psrli_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) { 1657 ; CHECK-LABEL: test_x86_avx512_mask_psrli_q: 1658 ; CHECK: ## BB#0: 1659 ; CHECK-NEXT: movzbl %dil, %eax 1660 ; CHECK-NEXT: kmovw %eax, %k1 1661 ; CHECK-NEXT: vpsrlq $7, %zmm0, %zmm1 {%k1} 1662 ; CHECK-NEXT: vmovaps %zmm1, %zmm0 1663 ; CHECK-NEXT: retq 1664 %res = call <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64> %a0, i32 7, <8 x i64> %a1, i8 %mask) 1665 ret <8 x i64> %res 1666 } 1667 1668 define <8 x i64> @test_x86_avx512_maskz_psrli_q(<8 x i64> %a0, i8 %mask) { 1669 ; CHECK-LABEL: test_x86_avx512_maskz_psrli_q: 1670 ; CHECK: ## BB#0: 1671 ; CHECK-NEXT: movzbl %dil, %eax 1672 ; CHECK-NEXT: kmovw %eax, %k1 1673 ; CHECK-NEXT: vpsrlq $7, %zmm0, %zmm0 {%k1} {z} 1674 ; CHECK-NEXT: retq 1675 %res = call <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 %mask) 1676 ret <8 x i64> %res 1677 } 1678 1679 declare <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64>, i32, <8 x i64>, i8) nounwind readnone 1680 1681 define <16 x i32> @test_x86_avx512_psrai_d(<16 x i32> %a0) { 1682 ; CHECK-LABEL: test_x86_avx512_psrai_d: 1683 ; CHECK: ## BB#0: 1684 ; CHECK-NEXT: vpsrad $7, %zmm0, %zmm0 1685 ; CHECK-NEXT: retq 1686 %res = call <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 -1) 1687 ret <16 x i32> %res 1688 } 1689 1690 define <16 x i32> @test_x86_avx512_mask_psrai_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) { 1691 ; CHECK-LABEL: test_x86_avx512_mask_psrai_d: 1692 ; CHECK: ## BB#0: 1693 ; CHECK-NEXT: kmovw %edi, %k1 1694 ; CHECK-NEXT: vpsrad $7, %zmm0, %zmm1 {%k1} 1695 ; CHECK-NEXT: vmovaps %zmm1, %zmm0 1696 ; CHECK-NEXT: retq 1697 %res = call <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32> %a0, i32 7, <16 x i32> %a1, i16 %mask) 1698 ret <16 x i32> %res 1699 } 1700 1701 define <16 x i32> @test_x86_avx512_maskz_psrai_d(<16 x i32> %a0, i16 %mask) { 1702 ; CHECK-LABEL: test_x86_avx512_maskz_psrai_d: 1703 ; CHECK: ## BB#0: 1704 ; CHECK-NEXT: kmovw %edi, %k1 1705 ; CHECK-NEXT: vpsrad $7, %zmm0, %zmm0 {%k1} {z} 1706 ; CHECK-NEXT: retq 1707 %res = call <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 %mask) 1708 ret <16 x i32> %res 1709 } 1710 1711 declare <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32>, i32, <16 x i32>, i16) nounwind readnone 1712 1713 define <8 x i64> @test_x86_avx512_psrai_q(<8 x i64> %a0) { 1714 ; CHECK-LABEL: test_x86_avx512_psrai_q: 1715 ; CHECK: ## BB#0: 1716 ; CHECK-NEXT: vpsraq $7, %zmm0, %zmm0 1717 ; CHECK-NEXT: retq 1718 %res = call <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 -1) 1719 ret <8 x i64> %res 1720 } 1721 1722 define <8 x i64> @test_x86_avx512_mask_psrai_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) { 1723 ; CHECK-LABEL: test_x86_avx512_mask_psrai_q: 1724 ; CHECK: ## BB#0: 1725 ; CHECK-NEXT: movzbl %dil, %eax 1726 ; CHECK-NEXT: kmovw %eax, %k1 1727 ; CHECK-NEXT: vpsraq $7, %zmm0, %zmm1 {%k1} 1728 ; CHECK-NEXT: vmovaps %zmm1, %zmm0 1729 ; CHECK-NEXT: retq 1730 %res = call <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64> %a0, i32 7, <8 x i64> %a1, i8 %mask) 1731 ret <8 x i64> %res 1732 } 1733 1734 define <8 x i64> @test_x86_avx512_maskz_psrai_q(<8 x i64> %a0, i8 %mask) { 1735 ; CHECK-LABEL: test_x86_avx512_maskz_psrai_q: 1736 ; CHECK: ## BB#0: 1737 ; CHECK-NEXT: movzbl %dil, %eax 1738 ; CHECK-NEXT: kmovw %eax, %k1 1739 ; CHECK-NEXT: vpsraq $7, %zmm0, %zmm0 {%k1} {z} 1740 ; CHECK-NEXT: retq 1741 %res = call <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 %mask) 1742 ret <8 x i64> %res 1743 } 1744 1745 declare <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64>, i32, <8 x i64>, i8) nounwind readnone 1746 1747 define <16 x i32> @test_x86_avx512_psll_d(<16 x i32> %a0, <4 x i32> %a1) { 1748 ; CHECK-LABEL: test_x86_avx512_psll_d: 1749 ; CHECK: ## BB#0: 1750 ; CHECK-NEXT: vpslld %xmm1, %zmm0, %zmm0 1751 ; CHECK-NEXT: retq 1752 %res = call <16 x i32> @llvm.x86.avx512.mask.psll.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 -1) 1753 ret <16 x i32> %res 1754 } 1755 1756 define <16 x i32> @test_x86_avx512_mask_psll_d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask) { 1757 ; CHECK-LABEL: test_x86_avx512_mask_psll_d: 1758 ; CHECK: ## BB#0: 1759 ; CHECK-NEXT: kmovw %edi, %k1 1760 ; CHECK-NEXT: vpslld %xmm1, %zmm0, %zmm2 {%k1} 1761 ; CHECK-NEXT: vmovaps %zmm2, %zmm0 1762 ; CHECK-NEXT: retq 1763 %res = call <16 x i32> @llvm.x86.avx512.mask.psll.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask) 1764 ret <16 x i32> %res 1765 } 1766 1767 define <16 x i32> @test_x86_avx512_maskz_psll_d(<16 x i32> %a0, <4 x i32> %a1, i16 %mask) { 1768 ; CHECK-LABEL: test_x86_avx512_maskz_psll_d: 1769 ; CHECK: ## BB#0: 1770 ; CHECK-NEXT: kmovw %edi, %k1 1771 ; CHECK-NEXT: vpslld %xmm1, %zmm0, %zmm0 {%k1} {z} 1772 ; CHECK-NEXT: retq 1773 %res = call <16 x i32> @llvm.x86.avx512.mask.psll.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask) 1774 ret <16 x i32> %res 1775 } 1776 1777 declare <16 x i32> @llvm.x86.avx512.mask.psll.d(<16 x i32>, <4 x i32>, <16 x i32>, i16) nounwind readnone 1778 1779 define <8 x i64> @test_x86_avx512_psll_q(<8 x i64> %a0, <2 x i64> %a1) { 1780 ; CHECK-LABEL: test_x86_avx512_psll_q: 1781 ; CHECK: ## BB#0: 1782 ; CHECK-NEXT: vpsllq %xmm1, %zmm0, %zmm0 1783 ; CHECK-NEXT: retq 1784 %res = call <8 x i64> @llvm.x86.avx512.mask.psll.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 -1) 1785 ret <8 x i64> %res 1786 } 1787 1788 define <8 x i64> @test_x86_avx512_mask_psll_q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask) { 1789 ; CHECK-LABEL: test_x86_avx512_mask_psll_q: 1790 ; CHECK: ## BB#0: 1791 ; CHECK-NEXT: movzbl %dil, %eax 1792 ; CHECK-NEXT: kmovw %eax, %k1 1793 ; CHECK-NEXT: vpsllq %xmm1, %zmm0, %zmm2 {%k1} 1794 ; CHECK-NEXT: vmovaps %zmm2, %zmm0 1795 ; CHECK-NEXT: retq 1796 %res = call <8 x i64> @llvm.x86.avx512.mask.psll.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask) 1797 ret <8 x i64> %res 1798 } 1799 1800 define <8 x i64> @test_x86_avx512_maskz_psll_q(<8 x i64> %a0, <2 x i64> %a1, i8 %mask) { 1801 ; CHECK-LABEL: test_x86_avx512_maskz_psll_q: 1802 ; CHECK: ## BB#0: 1803 ; CHECK-NEXT: movzbl %dil, %eax 1804 ; CHECK-NEXT: kmovw %eax, %k1 1805 ; CHECK-NEXT: vpsllq %xmm1, %zmm0, %zmm0 {%k1} {z} 1806 ; CHECK-NEXT: retq 1807 %res = call <8 x i64> @llvm.x86.avx512.mask.psll.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask) 1808 ret <8 x i64> %res 1809 } 1810 1811 declare <8 x i64> @llvm.x86.avx512.mask.psll.q(<8 x i64>, <2 x i64>, <8 x i64>, i8) nounwind readnone 1812 1813 define <16 x i32> @test_x86_avx512_psrl_d(<16 x i32> %a0, <4 x i32> %a1) { 1814 ; CHECK-LABEL: test_x86_avx512_psrl_d: 1815 ; CHECK: ## BB#0: 1816 ; CHECK-NEXT: vpsrld %xmm1, %zmm0, %zmm0 1817 ; CHECK-NEXT: retq 1818 %res = call <16 x i32> @llvm.x86.avx512.mask.psrl.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 -1) 1819 ret <16 x i32> %res 1820 } 1821 1822 define <16 x i32> @test_x86_avx512_mask_psrl_d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask) { 1823 ; CHECK-LABEL: test_x86_avx512_mask_psrl_d: 1824 ; CHECK: ## BB#0: 1825 ; CHECK-NEXT: kmovw %edi, %k1 1826 ; CHECK-NEXT: vpsrld %xmm1, %zmm0, %zmm2 {%k1} 1827 ; CHECK-NEXT: vmovaps %zmm2, %zmm0 1828 ; CHECK-NEXT: retq 1829 %res = call <16 x i32> @llvm.x86.avx512.mask.psrl.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask) 1830 ret <16 x i32> %res 1831 } 1832 1833 define <16 x i32> @test_x86_avx512_maskz_psrl_d(<16 x i32> %a0, <4 x i32> %a1, i16 %mask) { 1834 ; CHECK-LABEL: test_x86_avx512_maskz_psrl_d: 1835 ; CHECK: ## BB#0: 1836 ; CHECK-NEXT: kmovw %edi, %k1 1837 ; CHECK-NEXT: vpsrld %xmm1, %zmm0, %zmm0 {%k1} {z} 1838 ; CHECK-NEXT: retq 1839 %res = call <16 x i32> @llvm.x86.avx512.mask.psrl.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask) 1840 ret <16 x i32> %res 1841 } 1842 1843 declare <16 x i32> @llvm.x86.avx512.mask.psrl.d(<16 x i32>, <4 x i32>, <16 x i32>, i16) nounwind readnone 1844 1845 define <8 x i64> @test_x86_avx512_psrl_q(<8 x i64> %a0, <2 x i64> %a1) { 1846 ; CHECK-LABEL: test_x86_avx512_psrl_q: 1847 ; CHECK: ## BB#0: 1848 ; CHECK-NEXT: vpsrlq %xmm1, %zmm0, %zmm0 1849 ; CHECK-NEXT: retq 1850 %res = call <8 x i64> @llvm.x86.avx512.mask.psrl.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 -1) 1851 ret <8 x i64> %res 1852 } 1853 1854 define <8 x i64> @test_x86_avx512_mask_psrl_q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask) { 1855 ; CHECK-LABEL: test_x86_avx512_mask_psrl_q: 1856 ; CHECK: ## BB#0: 1857 ; CHECK-NEXT: movzbl %dil, %eax 1858 ; CHECK-NEXT: kmovw %eax, %k1 1859 ; CHECK-NEXT: vpsrlq %xmm1, %zmm0, %zmm2 {%k1} 1860 ; CHECK-NEXT: vmovaps %zmm2, %zmm0 1861 ; CHECK-NEXT: retq 1862 %res = call <8 x i64> @llvm.x86.avx512.mask.psrl.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask) 1863 ret <8 x i64> %res 1864 } 1865 1866 define <8 x i64> @test_x86_avx512_maskz_psrl_q(<8 x i64> %a0, <2 x i64> %a1, i8 %mask) { 1867 ; CHECK-LABEL: test_x86_avx512_maskz_psrl_q: 1868 ; CHECK: ## BB#0: 1869 ; CHECK-NEXT: movzbl %dil, %eax 1870 ; CHECK-NEXT: kmovw %eax, %k1 1871 ; CHECK-NEXT: vpsrlq %xmm1, %zmm0, %zmm0 {%k1} {z} 1872 ; CHECK-NEXT: retq 1873 %res = call <8 x i64> @llvm.x86.avx512.mask.psrl.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask) 1874 ret <8 x i64> %res 1875 } 1876 1877 declare <8 x i64> @llvm.x86.avx512.mask.psrl.q(<8 x i64>, <2 x i64>, <8 x i64>, i8) nounwind readnone 1878 1879 define <16 x i32> @test_x86_avx512_psra_d(<16 x i32> %a0, <4 x i32> %a1) { 1880 ; CHECK-LABEL: test_x86_avx512_psra_d: 1881 ; CHECK: ## BB#0: 1882 ; CHECK-NEXT: vpsrad %xmm1, %zmm0, %zmm0 1883 ; CHECK-NEXT: retq 1884 %res = call <16 x i32> @llvm.x86.avx512.mask.psra.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 -1) 1885 ret <16 x i32> %res 1886 } 1887 1888 define <16 x i32> @test_x86_avx512_mask_psra_d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask) { 1889 ; CHECK-LABEL: test_x86_avx512_mask_psra_d: 1890 ; CHECK: ## BB#0: 1891 ; CHECK-NEXT: kmovw %edi, %k1 1892 ; CHECK-NEXT: vpsrad %xmm1, %zmm0, %zmm2 {%k1} 1893 ; CHECK-NEXT: vmovaps %zmm2, %zmm0 1894 ; CHECK-NEXT: retq 1895 %res = call <16 x i32> @llvm.x86.avx512.mask.psra.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask) 1896 ret <16 x i32> %res 1897 } 1898 1899 define <16 x i32> @test_x86_avx512_maskz_psra_d(<16 x i32> %a0, <4 x i32> %a1, i16 %mask) { 1900 ; CHECK-LABEL: test_x86_avx512_maskz_psra_d: 1901 ; CHECK: ## BB#0: 1902 ; CHECK-NEXT: kmovw %edi, %k1 1903 ; CHECK-NEXT: vpsrad %xmm1, %zmm0, %zmm0 {%k1} {z} 1904 ; CHECK-NEXT: retq 1905 %res = call <16 x i32> @llvm.x86.avx512.mask.psra.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask) 1906 ret <16 x i32> %res 1907 } 1908 1909 declare <16 x i32> @llvm.x86.avx512.mask.psra.d(<16 x i32>, <4 x i32>, <16 x i32>, i16) nounwind readnone 1910 1911 define <8 x i64> @test_x86_avx512_psra_q(<8 x i64> %a0, <2 x i64> %a1) { 1912 ; CHECK-LABEL: test_x86_avx512_psra_q: 1913 ; CHECK: ## BB#0: 1914 ; CHECK-NEXT: vpsraq %xmm1, %zmm0, %zmm0 1915 ; CHECK-NEXT: retq 1916 %res = call <8 x i64> @llvm.x86.avx512.mask.psra.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 -1) 1917 ret <8 x i64> %res 1918 } 1919 1920 define <8 x i64> @test_x86_avx512_mask_psra_q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask) { 1921 ; CHECK-LABEL: test_x86_avx512_mask_psra_q: 1922 ; CHECK: ## BB#0: 1923 ; CHECK-NEXT: movzbl %dil, %eax 1924 ; CHECK-NEXT: kmovw %eax, %k1 1925 ; CHECK-NEXT: vpsraq %xmm1, %zmm0, %zmm2 {%k1} 1926 ; CHECK-NEXT: vmovaps %zmm2, %zmm0 1927 ; CHECK-NEXT: retq 1928 %res = call <8 x i64> @llvm.x86.avx512.mask.psra.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask) 1929 ret <8 x i64> %res 1930 } 1931 1932 define <8 x i64> @test_x86_avx512_maskz_psra_q(<8 x i64> %a0, <2 x i64> %a1, i8 %mask) { 1933 ; CHECK-LABEL: test_x86_avx512_maskz_psra_q: 1934 ; CHECK: ## BB#0: 1935 ; CHECK-NEXT: movzbl %dil, %eax 1936 ; CHECK-NEXT: kmovw %eax, %k1 1937 ; CHECK-NEXT: vpsraq %xmm1, %zmm0, %zmm0 {%k1} {z} 1938 ; CHECK-NEXT: retq 1939 %res = call <8 x i64> @llvm.x86.avx512.mask.psra.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask) 1940 ret <8 x i64> %res 1941 } 1942 1943 declare <8 x i64> @llvm.x86.avx512.mask.psra.q(<8 x i64>, <2 x i64>, <8 x i64>, i8) nounwind readnone 1944 1945 define <16 x i32> @test_x86_avx512_psllv_d(<16 x i32> %a0, <16 x i32> %a1) { 1946 ; CHECK-LABEL: test_x86_avx512_psllv_d: 1947 ; CHECK: ## BB#0: 1948 ; CHECK-NEXT: vpsllvd %zmm1, %zmm0, %zmm0 1949 ; CHECK-NEXT: retq 1950 %res = call <16 x i32> @llvm.x86.avx512.mask.psllv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 -1) 1951 ret <16 x i32> %res 1952 } 1953 1954 define <16 x i32> @test_x86_avx512_mask_psllv_d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask) { 1955 ; CHECK-LABEL: test_x86_avx512_mask_psllv_d: 1956 ; CHECK: ## BB#0: 1957 ; CHECK-NEXT: kmovw %edi, %k1 1958 ; CHECK-NEXT: vpsllvd %zmm1, %zmm0, %zmm2 {%k1} 1959 ; CHECK-NEXT: vmovaps %zmm2, %zmm0 1960 ; CHECK-NEXT: retq 1961 %res = call <16 x i32> @llvm.x86.avx512.mask.psllv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask) 1962 ret <16 x i32> %res 1963 } 1964 1965 define <16 x i32> @test_x86_avx512_maskz_psllv_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) { 1966 ; CHECK-LABEL: test_x86_avx512_maskz_psllv_d: 1967 ; CHECK: ## BB#0: 1968 ; CHECK-NEXT: kmovw %edi, %k1 1969 ; CHECK-NEXT: vpsllvd %zmm1, %zmm0, %zmm0 {%k1} {z} 1970 ; CHECK-NEXT: retq 1971 %res = call <16 x i32> @llvm.x86.avx512.mask.psllv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask) 1972 ret <16 x i32> %res 1973 } 1974 1975 declare <16 x i32> @llvm.x86.avx512.mask.psllv.d(<16 x i32>, <16 x i32>, <16 x i32>, i16) nounwind readnone 1976 1977 define <8 x i64> @test_x86_avx512_psllv_q(<8 x i64> %a0, <8 x i64> %a1) { 1978 ; CHECK-LABEL: test_x86_avx512_psllv_q: 1979 ; CHECK: ## BB#0: 1980 ; CHECK-NEXT: vpsllvq %zmm1, %zmm0, %zmm0 1981 ; CHECK-NEXT: retq 1982 %res = call <8 x i64> @llvm.x86.avx512.mask.psllv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 -1) 1983 ret <8 x i64> %res 1984 } 1985 1986 define <8 x i64> @test_x86_avx512_mask_psllv_q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) { 1987 ; CHECK-LABEL: test_x86_avx512_mask_psllv_q: 1988 ; CHECK: ## BB#0: 1989 ; CHECK-NEXT: movzbl %dil, %eax 1990 ; CHECK-NEXT: kmovw %eax, %k1 1991 ; CHECK-NEXT: vpsllvq %zmm1, %zmm0, %zmm2 {%k1} 1992 ; CHECK-NEXT: vmovaps %zmm2, %zmm0 1993 ; CHECK-NEXT: retq 1994 %res = call <8 x i64> @llvm.x86.avx512.mask.psllv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) 1995 ret <8 x i64> %res 1996 } 1997 1998 define <8 x i64> @test_x86_avx512_maskz_psllv_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) { 1999 ; CHECK-LABEL: test_x86_avx512_maskz_psllv_q: 2000 ; CHECK: ## BB#0: 2001 ; CHECK-NEXT: movzbl %dil, %eax 2002 ; CHECK-NEXT: kmovw %eax, %k1 2003 ; CHECK-NEXT: vpsllvq %zmm1, %zmm0, %zmm0 {%k1} {z} 2004 ; CHECK-NEXT: retq 2005 %res = call <8 x i64> @llvm.x86.avx512.mask.psllv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask) 2006 ret <8 x i64> %res 2007 } 2008 2009 declare <8 x i64> @llvm.x86.avx512.mask.psllv.q(<8 x i64>, <8 x i64>, <8 x i64>, i8) nounwind readnone 2010 2011 2012 define <16 x i32> @test_x86_avx512_psrav_d(<16 x i32> %a0, <16 x i32> %a1) { 2013 ; CHECK-LABEL: test_x86_avx512_psrav_d: 2014 ; CHECK: ## BB#0: 2015 ; CHECK-NEXT: vpsravd %zmm1, %zmm0, %zmm0 2016 ; CHECK-NEXT: retq 2017 %res = call <16 x i32> @llvm.x86.avx512.mask.psrav.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 -1) 2018 ret <16 x i32> %res 2019 } 2020 2021 define <16 x i32> @test_x86_avx512_mask_psrav_d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask) { 2022 ; CHECK-LABEL: test_x86_avx512_mask_psrav_d: 2023 ; CHECK: ## BB#0: 2024 ; CHECK-NEXT: kmovw %edi, %k1 2025 ; CHECK-NEXT: vpsravd %zmm1, %zmm0, %zmm2 {%k1} 2026 ; CHECK-NEXT: vmovaps %zmm2, %zmm0 2027 ; CHECK-NEXT: retq 2028 %res = call <16 x i32> @llvm.x86.avx512.mask.psrav.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask) 2029 ret <16 x i32> %res 2030 } 2031 2032 define <16 x i32> @test_x86_avx512_maskz_psrav_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) { 2033 ; CHECK-LABEL: test_x86_avx512_maskz_psrav_d: 2034 ; CHECK: ## BB#0: 2035 ; CHECK-NEXT: kmovw %edi, %k1 2036 ; CHECK-NEXT: vpsravd %zmm1, %zmm0, %zmm0 {%k1} {z} 2037 ; CHECK-NEXT: retq 2038 %res = call <16 x i32> @llvm.x86.avx512.mask.psrav.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask) 2039 ret <16 x i32> %res 2040 } 2041 2042 declare <16 x i32> @llvm.x86.avx512.mask.psrav.d(<16 x i32>, <16 x i32>, <16 x i32>, i16) nounwind readnone 2043 2044 define <8 x i64> @test_x86_avx512_psrav_q(<8 x i64> %a0, <8 x i64> %a1) { 2045 ; CHECK-LABEL: test_x86_avx512_psrav_q: 2046 ; CHECK: ## BB#0: 2047 ; CHECK-NEXT: vpsravq %zmm1, %zmm0, %zmm0 2048 ; CHECK-NEXT: retq 2049 %res = call <8 x i64> @llvm.x86.avx512.mask.psrav.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 -1) 2050 ret <8 x i64> %res 2051 } 2052 2053 define <8 x i64> @test_x86_avx512_mask_psrav_q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) { 2054 ; CHECK-LABEL: test_x86_avx512_mask_psrav_q: 2055 ; CHECK: ## BB#0: 2056 ; CHECK-NEXT: movzbl %dil, %eax 2057 ; CHECK-NEXT: kmovw %eax, %k1 2058 ; CHECK-NEXT: vpsravq %zmm1, %zmm0, %zmm2 {%k1} 2059 ; CHECK-NEXT: vmovaps %zmm2, %zmm0 2060 ; CHECK-NEXT: retq 2061 %res = call <8 x i64> @llvm.x86.avx512.mask.psrav.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) 2062 ret <8 x i64> %res 2063 } 2064 2065 define <8 x i64> @test_x86_avx512_maskz_psrav_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) { 2066 ; CHECK-LABEL: test_x86_avx512_maskz_psrav_q: 2067 ; CHECK: ## BB#0: 2068 ; CHECK-NEXT: movzbl %dil, %eax 2069 ; CHECK-NEXT: kmovw %eax, %k1 2070 ; CHECK-NEXT: vpsravq %zmm1, %zmm0, %zmm0 {%k1} {z} 2071 ; CHECK-NEXT: retq 2072 %res = call <8 x i64> @llvm.x86.avx512.mask.psrav.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask) 2073 ret <8 x i64> %res 2074 } 2075 2076 declare <8 x i64> @llvm.x86.avx512.mask.psrav.q(<8 x i64>, <8 x i64>, <8 x i64>, i8) nounwind readnone 2077 2078 define <16 x i32> @test_x86_avx512_psrlv_d(<16 x i32> %a0, <16 x i32> %a1) { 2079 ; CHECK-LABEL: test_x86_avx512_psrlv_d: 2080 ; CHECK: ## BB#0: 2081 ; CHECK-NEXT: vpsrlvd %zmm1, %zmm0, %zmm0 2082 ; CHECK-NEXT: retq 2083 %res = call <16 x i32> @llvm.x86.avx512.mask.psrlv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 -1) 2084 ret <16 x i32> %res 2085 } 2086 2087 define <16 x i32> @test_x86_avx512_mask_psrlv_d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask) { 2088 ; CHECK-LABEL: test_x86_avx512_mask_psrlv_d: 2089 ; CHECK: ## BB#0: 2090 ; CHECK-NEXT: kmovw %edi, %k1 2091 ; CHECK-NEXT: vpsrlvd %zmm1, %zmm0, %zmm2 {%k1} 2092 ; CHECK-NEXT: vmovaps %zmm2, %zmm0 2093 ; CHECK-NEXT: retq 2094 %res = call <16 x i32> @llvm.x86.avx512.mask.psrlv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask) 2095 ret <16 x i32> %res 2096 } 2097 2098 define <16 x i32> @test_x86_avx512_maskz_psrlv_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) { 2099 ; CHECK-LABEL: test_x86_avx512_maskz_psrlv_d: 2100 ; CHECK: ## BB#0: 2101 ; CHECK-NEXT: kmovw %edi, %k1 2102 ; CHECK-NEXT: vpsrlvd %zmm1, %zmm0, %zmm0 {%k1} {z} 2103 ; CHECK-NEXT: retq 2104 %res = call <16 x i32> @llvm.x86.avx512.mask.psrlv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask) 2105 ret <16 x i32> %res 2106 } 2107 2108 declare <16 x i32> @llvm.x86.avx512.mask.psrlv.d(<16 x i32>, <16 x i32>, <16 x i32>, i16) nounwind readnone 2109 2110 define <8 x i64> @test_x86_avx512_psrlv_q(<8 x i64> %a0, <8 x i64> %a1) { 2111 ; CHECK-LABEL: test_x86_avx512_psrlv_q: 2112 ; CHECK: ## BB#0: 2113 ; CHECK-NEXT: vpsrlvq %zmm1, %zmm0, %zmm0 2114 ; CHECK-NEXT: retq 2115 %res = call <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 -1) 2116 ret <8 x i64> %res 2117 } 2118 2119 define <8 x i64> @test_x86_avx512_mask_psrlv_q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) { 2120 ; CHECK-LABEL: test_x86_avx512_mask_psrlv_q: 2121 ; CHECK: ## BB#0: 2122 ; CHECK-NEXT: movzbl %dil, %eax 2123 ; CHECK-NEXT: kmovw %eax, %k1 2124 ; CHECK-NEXT: vpsrlvq %zmm1, %zmm0, %zmm2 {%k1} 2125 ; CHECK-NEXT: vmovaps %zmm2, %zmm0 2126 ; CHECK-NEXT: retq 2127 %res = call <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) 2128 ret <8 x i64> %res 2129 } 2130 2131 define <8 x i64> @test_x86_avx512_maskz_psrlv_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) { 2132 ; CHECK-LABEL: test_x86_avx512_maskz_psrlv_q: 2133 ; CHECK: ## BB#0: 2134 ; CHECK-NEXT: movzbl %dil, %eax 2135 ; CHECK-NEXT: kmovw %eax, %k1 2136 ; CHECK-NEXT: vpsrlvq %zmm1, %zmm0, %zmm0 {%k1} {z} 2137 ; CHECK-NEXT: retq 2138 %res = call <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask) 2139 ret <8 x i64> %res 2140 } 2141 2142 declare <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64>, <8 x i64>, <8 x i64>, i8) nounwind readnone 2143 2144 define <8 x i64> @test_x86_avx512_psrlv_q_memop(<8 x i64> %a0, <8 x i64>* %ptr) { 2145 ; CHECK-LABEL: test_x86_avx512_psrlv_q_memop: 2146 ; CHECK: ## BB#0: 2147 ; CHECK-NEXT: vpsrlvq (%rdi), %zmm0, %zmm0 2148 ; CHECK-NEXT: retq 2149 %b = load <8 x i64>, <8 x i64>* %ptr 2150 %res = call <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64> %a0, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1) 2151 ret <8 x i64> %res 2152 } 2153 2154 declare <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) 2155 declare <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) 2156 declare <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) 2157 2158 define <16 x float> @test_vsubps_rn(<16 x float> %a0, <16 x float> %a1) { 2159 ; CHECK-LABEL: test_vsubps_rn: 2160 ; CHECK: ## BB#0: 2161 ; CHECK-NEXT: vsubps {rn-sae}, %zmm1, %zmm0, %zmm0 2162 ; CHECK-NEXT: retq 2163 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, 2164 <16 x float> zeroinitializer, i16 -1, i32 0) 2165 ret <16 x float> %res 2166 } 2167 2168 define <16 x float> @test_vsubps_rd(<16 x float> %a0, <16 x float> %a1) { 2169 ; CHECK-LABEL: test_vsubps_rd: 2170 ; CHECK: ## BB#0: 2171 ; CHECK-NEXT: vsubps {rd-sae}, %zmm1, %zmm0, %zmm0 2172 ; CHECK-NEXT: retq 2173 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, 2174 <16 x float> zeroinitializer, i16 -1, i32 1) 2175 ret <16 x float> %res 2176 } 2177 2178 define <16 x float> @test_vsubps_ru(<16 x float> %a0, <16 x float> %a1) { 2179 ; CHECK-LABEL: test_vsubps_ru: 2180 ; CHECK: ## BB#0: 2181 ; CHECK-NEXT: vsubps {ru-sae}, %zmm1, %zmm0, %zmm0 2182 ; CHECK-NEXT: retq 2183 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, 2184 <16 x float> zeroinitializer, i16 -1, i32 2) 2185 ret <16 x float> %res 2186 } 2187 2188 define <16 x float> @test_vsubps_rz(<16 x float> %a0, <16 x float> %a1) { 2189 ; CHECK-LABEL: test_vsubps_rz: 2190 ; CHECK: ## BB#0: 2191 ; CHECK-NEXT: vsubps {rz-sae}, %zmm1, %zmm0, %zmm0 2192 ; CHECK-NEXT: retq 2193 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, 2194 <16 x float> zeroinitializer, i16 -1, i32 3) 2195 ret <16 x float> %res 2196 } 2197 2198 define <16 x float> @test_vmulps_rn(<16 x float> %a0, <16 x float> %a1) { 2199 ; CHECK-LABEL: test_vmulps_rn: 2200 ; CHECK: ## BB#0: 2201 ; CHECK-NEXT: vmulps {rn-sae}, %zmm1, %zmm0, %zmm0 2202 ; CHECK-NEXT: retq 2203 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1, 2204 <16 x float> zeroinitializer, i16 -1, i32 0) 2205 ret <16 x float> %res 2206 } 2207 2208 define <16 x float> @test_vmulps_rd(<16 x float> %a0, <16 x float> %a1) { 2209 ; CHECK-LABEL: test_vmulps_rd: 2210 ; CHECK: ## BB#0: 2211 ; CHECK-NEXT: vmulps {rd-sae}, %zmm1, %zmm0, %zmm0 2212 ; CHECK-NEXT: retq 2213 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1, 2214 <16 x float> zeroinitializer, i16 -1, i32 1) 2215 ret <16 x float> %res 2216 } 2217 2218 define <16 x float> @test_vmulps_ru(<16 x float> %a0, <16 x float> %a1) { 2219 ; CHECK-LABEL: test_vmulps_ru: 2220 ; CHECK: ## BB#0: 2221 ; CHECK-NEXT: vmulps {ru-sae}, %zmm1, %zmm0, %zmm0 2222 ; CHECK-NEXT: retq 2223 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1, 2224 <16 x float> zeroinitializer, i16 -1, i32 2) 2225 ret <16 x float> %res 2226 } 2227 2228 define <16 x float> @test_vmulps_rz(<16 x float> %a0, <16 x float> %a1) { 2229 ; CHECK-LABEL: test_vmulps_rz: 2230 ; CHECK: ## BB#0: 2231 ; CHECK-NEXT: vmulps {rz-sae}, %zmm1, %zmm0, %zmm0 2232 ; CHECK-NEXT: retq 2233 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1, 2234 <16 x float> zeroinitializer, i16 -1, i32 3) 2235 ret <16 x float> %res 2236 } 2237 2238 ;; mask float 2239 define <16 x float> @test_vmulps_mask_rn(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 2240 ; CHECK-LABEL: test_vmulps_mask_rn: 2241 ; CHECK: ## BB#0: 2242 ; CHECK-NEXT: kmovw %edi, %k1 2243 ; CHECK-NEXT: vmulps {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} 2244 ; CHECK-NEXT: retq 2245 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1, 2246 <16 x float> zeroinitializer, i16 %mask, i32 0) 2247 ret <16 x float> %res 2248 } 2249 2250 define <16 x float> @test_vmulps_mask_rd(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 2251 ; CHECK-LABEL: test_vmulps_mask_rd: 2252 ; CHECK: ## BB#0: 2253 ; CHECK-NEXT: kmovw %edi, %k1 2254 ; CHECK-NEXT: vmulps {rd-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} 2255 ; CHECK-NEXT: retq 2256 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1, 2257 <16 x float> zeroinitializer, i16 %mask, i32 1) 2258 ret <16 x float> %res 2259 } 2260 2261 define <16 x float> @test_vmulps_mask_ru(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 2262 ; CHECK-LABEL: test_vmulps_mask_ru: 2263 ; CHECK: ## BB#0: 2264 ; CHECK-NEXT: kmovw %edi, %k1 2265 ; CHECK-NEXT: vmulps {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} 2266 ; CHECK-NEXT: retq 2267 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1, 2268 <16 x float> zeroinitializer, i16 %mask, i32 2) 2269 ret <16 x float> %res 2270 } 2271 2272 define <16 x float> @test_vmulps_mask_rz(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 2273 ; CHECK-LABEL: test_vmulps_mask_rz: 2274 ; CHECK: ## BB#0: 2275 ; CHECK-NEXT: kmovw %edi, %k1 2276 ; CHECK-NEXT: vmulps {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} 2277 ; CHECK-NEXT: retq 2278 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1, 2279 <16 x float> zeroinitializer, i16 %mask, i32 3) 2280 ret <16 x float> %res 2281 } 2282 2283 ;; With Passthru value 2284 define <16 x float> @test_vmulps_mask_passthru_rn(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) { 2285 ; CHECK-LABEL: test_vmulps_mask_passthru_rn: 2286 ; CHECK: ## BB#0: 2287 ; CHECK-NEXT: kmovw %edi, %k1 2288 ; CHECK-NEXT: vmulps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} 2289 ; CHECK-NEXT: vmovaps %zmm2, %zmm0 2290 ; CHECK-NEXT: retq 2291 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1, 2292 <16 x float> %passthru, i16 %mask, i32 0) 2293 ret <16 x float> %res 2294 } 2295 2296 define <16 x float> @test_vmulps_mask_passthru_rd(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) { 2297 ; CHECK-LABEL: test_vmulps_mask_passthru_rd: 2298 ; CHECK: ## BB#0: 2299 ; CHECK-NEXT: kmovw %edi, %k1 2300 ; CHECK-NEXT: vmulps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1} 2301 ; CHECK-NEXT: vmovaps %zmm2, %zmm0 2302 ; CHECK-NEXT: retq 2303 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1, 2304 <16 x float> %passthru, i16 %mask, i32 1) 2305 ret <16 x float> %res 2306 } 2307 2308 define <16 x float> @test_vmulps_mask_passthru_ru(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) { 2309 ; CHECK-LABEL: test_vmulps_mask_passthru_ru: 2310 ; CHECK: ## BB#0: 2311 ; CHECK-NEXT: kmovw %edi, %k1 2312 ; CHECK-NEXT: vmulps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1} 2313 ; CHECK-NEXT: vmovaps %zmm2, %zmm0 2314 ; CHECK-NEXT: retq 2315 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1, 2316 <16 x float> %passthru, i16 %mask, i32 2) 2317 ret <16 x float> %res 2318 } 2319 2320 define <16 x float> @test_vmulps_mask_passthru_rz(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) { 2321 ; CHECK-LABEL: test_vmulps_mask_passthru_rz: 2322 ; CHECK: ## BB#0: 2323 ; CHECK-NEXT: kmovw %edi, %k1 2324 ; CHECK-NEXT: vmulps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1} 2325 ; CHECK-NEXT: vmovaps %zmm2, %zmm0 2326 ; CHECK-NEXT: retq 2327 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1, 2328 <16 x float> %passthru, i16 %mask, i32 3) 2329 ret <16 x float> %res 2330 } 2331 2332 ;; mask double 2333 define <8 x double> @test_vmulpd_mask_rn(<8 x double> %a0, <8 x double> %a1, i8 %mask) { 2334 ; CHECK-LABEL: test_vmulpd_mask_rn: 2335 ; CHECK: ## BB#0: 2336 ; CHECK-NEXT: movzbl %dil, %eax 2337 ; CHECK-NEXT: kmovw %eax, %k1 2338 ; CHECK-NEXT: vmulpd {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} 2339 ; CHECK-NEXT: retq 2340 %res = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a0, <8 x double> %a1, 2341 <8 x double> zeroinitializer, i8 %mask, i32 0) 2342 ret <8 x double> %res 2343 } 2344 2345 define <8 x double> @test_vmulpd_mask_rd(<8 x double> %a0, <8 x double> %a1, i8 %mask) { 2346 ; CHECK-LABEL: test_vmulpd_mask_rd: 2347 ; CHECK: ## BB#0: 2348 ; CHECK-NEXT: movzbl %dil, %eax 2349 ; CHECK-NEXT: kmovw %eax, %k1 2350 ; CHECK-NEXT: vmulpd {rd-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} 2351 ; CHECK-NEXT: retq 2352 %res = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a0, <8 x double> %a1, 2353 <8 x double> zeroinitializer, i8 %mask, i32 1) 2354 ret <8 x double> %res 2355 } 2356 2357 define <8 x double> @test_vmulpd_mask_ru(<8 x double> %a0, <8 x double> %a1, i8 %mask) { 2358 ; CHECK-LABEL: test_vmulpd_mask_ru: 2359 ; CHECK: ## BB#0: 2360 ; CHECK-NEXT: movzbl %dil, %eax 2361 ; CHECK-NEXT: kmovw %eax, %k1 2362 ; CHECK-NEXT: vmulpd {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} 2363 ; CHECK-NEXT: retq 2364 %res = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a0, <8 x double> %a1, 2365 <8 x double> zeroinitializer, i8 %mask, i32 2) 2366 ret <8 x double> %res 2367 } 2368 2369 define <8 x double> @test_vmulpd_mask_rz(<8 x double> %a0, <8 x double> %a1, i8 %mask) { 2370 ; CHECK-LABEL: test_vmulpd_mask_rz: 2371 ; CHECK: ## BB#0: 2372 ; CHECK-NEXT: movzbl %dil, %eax 2373 ; CHECK-NEXT: kmovw %eax, %k1 2374 ; CHECK-NEXT: vmulpd {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} 2375 ; CHECK-NEXT: retq 2376 %res = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a0, <8 x double> %a1, 2377 <8 x double> zeroinitializer, i8 %mask, i32 3) 2378 ret <8 x double> %res 2379 } 2380 2381 define <16 x i32> @test_xor_epi32(<16 x i32> %a, <16 x i32> %b) { 2382 ; CHECK-LABEL: test_xor_epi32: 2383 ; CHECK: ## BB#0: 2384 ; CHECK-NEXT: vpxord %zmm1, %zmm0, %zmm0 2385 ; CHECK-NEXT: retq 2386 %res = call <16 x i32> @llvm.x86.avx512.mask.pxor.d.512(<16 x i32> %a,<16 x i32> %b, <16 x i32>zeroinitializer, i16 -1) 2387 ret < 16 x i32> %res 2388 } 2389 2390 define <16 x i32> @test_mask_xor_epi32(<16 x i32> %a,<16 x i32> %b, <16 x i32> %passThru, i16 %mask) { 2391 ; CHECK-LABEL: test_mask_xor_epi32: 2392 ; CHECK: ## BB#0: 2393 ; CHECK-NEXT: kmovw %edi, %k1 2394 ; CHECK-NEXT: vpxord %zmm1, %zmm0, %zmm2 {%k1} 2395 ; CHECK-NEXT: vmovaps %zmm2, %zmm0 2396 ; CHECK-NEXT: retq 2397 %res = call <16 x i32> @llvm.x86.avx512.mask.pxor.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) 2398 ret < 16 x i32> %res 2399 } 2400 2401 declare <16 x i32> @llvm.x86.avx512.mask.pxor.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) 2402 2403 define <16 x i32> @test_or_epi32(<16 x i32> %a, <16 x i32> %b) { 2404 ; CHECK-LABEL: test_or_epi32: 2405 ; CHECK: ## BB#0: 2406 ; CHECK-NEXT: vpord %zmm1, %zmm0, %zmm0 2407 ; CHECK-NEXT: retq 2408 %res = call <16 x i32> @llvm.x86.avx512.mask.por.d.512(<16 x i32> %a,<16 x i32> %b, <16 x i32>zeroinitializer, i16 -1) 2409 ret < 16 x i32> %res 2410 } 2411 2412 define <16 x i32> @test_mask_or_epi32(<16 x i32> %a,<16 x i32> %b, <16 x i32> %passThru, i16 %mask) { 2413 ; CHECK-LABEL: test_mask_or_epi32: 2414 ; CHECK: ## BB#0: 2415 ; CHECK-NEXT: kmovw %edi, %k1 2416 ; CHECK-NEXT: vpord %zmm1, %zmm0, %zmm2 {%k1} 2417 ; CHECK-NEXT: vmovaps %zmm2, %zmm0 2418 ; CHECK-NEXT: retq 2419 %res = call <16 x i32> @llvm.x86.avx512.mask.por.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) 2420 ret < 16 x i32> %res 2421 } 2422 2423 declare <16 x i32> @llvm.x86.avx512.mask.por.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) 2424 2425 define <16 x i32> @test_and_epi32(<16 x i32> %a, <16 x i32> %b) { 2426 ; CHECK-LABEL: test_and_epi32: 2427 ; CHECK: ## BB#0: 2428 ; CHECK-NEXT: vpandd %zmm1, %zmm0, %zmm0 2429 ; CHECK-NEXT: retq 2430 %res = call <16 x i32> @llvm.x86.avx512.mask.pand.d.512(<16 x i32> %a,<16 x i32> %b, <16 x i32>zeroinitializer, i16 -1) 2431 ret < 16 x i32> %res 2432 } 2433 2434 define <16 x i32> @test_mask_and_epi32(<16 x i32> %a,<16 x i32> %b, <16 x i32> %passThru, i16 %mask) { 2435 ; CHECK-LABEL: test_mask_and_epi32: 2436 ; CHECK: ## BB#0: 2437 ; CHECK-NEXT: kmovw %edi, %k1 2438 ; CHECK-NEXT: vpandd %zmm1, %zmm0, %zmm2 {%k1} 2439 ; CHECK-NEXT: vmovaps %zmm2, %zmm0 2440 ; CHECK-NEXT: retq 2441 %res = call <16 x i32> @llvm.x86.avx512.mask.pand.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) 2442 ret < 16 x i32> %res 2443 } 2444 2445 declare <16 x i32> @llvm.x86.avx512.mask.pand.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) 2446 2447 define <8 x i64> @test_xor_epi64(<8 x i64> %a, <8 x i64> %b) { 2448 ; CHECK-LABEL: test_xor_epi64: 2449 ; CHECK: ## BB#0: 2450 ; CHECK-NEXT: vpxorq %zmm1, %zmm0, %zmm0 2451 ; CHECK-NEXT: retq 2452 %res = call <8 x i64> @llvm.x86.avx512.mask.pxor.q.512(<8 x i64> %a,<8 x i64> %b, <8 x i64>zeroinitializer, i8 -1) 2453 ret < 8 x i64> %res 2454 } 2455 2456 define <8 x i64> @test_mask_xor_epi64(<8 x i64> %a,<8 x i64> %b, <8 x i64> %passThru, i8 %mask) { 2457 ; CHECK-LABEL: test_mask_xor_epi64: 2458 ; CHECK: ## BB#0: 2459 ; CHECK-NEXT: movzbl %dil, %eax 2460 ; CHECK-NEXT: kmovw %eax, %k1 2461 ; CHECK-NEXT: vpxorq %zmm1, %zmm0, %zmm2 {%k1} 2462 ; CHECK-NEXT: vmovaps %zmm2, %zmm0 2463 ; CHECK-NEXT: retq 2464 %res = call <8 x i64> @llvm.x86.avx512.mask.pxor.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) 2465 ret < 8 x i64> %res 2466 } 2467 2468 declare <8 x i64> @llvm.x86.avx512.mask.pxor.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) 2469 2470 define <8 x i64> @test_or_epi64(<8 x i64> %a, <8 x i64> %b) { 2471 ; CHECK-LABEL: test_or_epi64: 2472 ; CHECK: ## BB#0: 2473 ; CHECK-NEXT: vporq %zmm1, %zmm0, %zmm0 2474 ; CHECK-NEXT: retq 2475 %res = call <8 x i64> @llvm.x86.avx512.mask.por.q.512(<8 x i64> %a,<8 x i64> %b, <8 x i64>zeroinitializer, i8 -1) 2476 ret < 8 x i64> %res 2477 } 2478 2479 define <8 x i64> @test_mask_or_epi64(<8 x i64> %a,<8 x i64> %b, <8 x i64> %passThru, i8 %mask) { 2480 ; CHECK-LABEL: test_mask_or_epi64: 2481 ; CHECK: ## BB#0: 2482 ; CHECK-NEXT: movzbl %dil, %eax 2483 ; CHECK-NEXT: kmovw %eax, %k1 2484 ; CHECK-NEXT: vporq %zmm1, %zmm0, %zmm2 {%k1} 2485 ; CHECK-NEXT: vmovaps %zmm2, %zmm0 2486 ; CHECK-NEXT: retq 2487 %res = call <8 x i64> @llvm.x86.avx512.mask.por.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) 2488 ret < 8 x i64> %res 2489 } 2490 2491 declare <8 x i64> @llvm.x86.avx512.mask.por.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) 2492 2493 define <8 x i64> @test_and_epi64(<8 x i64> %a, <8 x i64> %b) { 2494 ; CHECK-LABEL: test_and_epi64: 2495 ; CHECK: ## BB#0: 2496 ; CHECK-NEXT: vpandq %zmm1, %zmm0, %zmm0 2497 ; CHECK-NEXT: retq 2498 %res = call <8 x i64> @llvm.x86.avx512.mask.pand.q.512(<8 x i64> %a,<8 x i64> %b, <8 x i64>zeroinitializer, i8 -1) 2499 ret < 8 x i64> %res 2500 } 2501 2502 define <8 x i64> @test_mask_and_epi64(<8 x i64> %a,<8 x i64> %b, <8 x i64> %passThru, i8 %mask) { 2503 ; CHECK-LABEL: test_mask_and_epi64: 2504 ; CHECK: ## BB#0: 2505 ; CHECK-NEXT: movzbl %dil, %eax 2506 ; CHECK-NEXT: kmovw %eax, %k1 2507 ; CHECK-NEXT: vpandq %zmm1, %zmm0, %zmm2 {%k1} 2508 ; CHECK-NEXT: vmovaps %zmm2, %zmm0 2509 ; CHECK-NEXT: retq 2510 %res = call <8 x i64> @llvm.x86.avx512.mask.pand.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) 2511 ret < 8 x i64> %res 2512 } 2513 2514 declare <8 x i64> @llvm.x86.avx512.mask.pand.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) 2515 2516 2517 define <16 x i32> @test_mask_add_epi32_rr(<16 x i32> %a, <16 x i32> %b) { 2518 ; CHECK-LABEL: test_mask_add_epi32_rr: 2519 ; CHECK: ## BB#0: 2520 ; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0 2521 ; CHECK-NEXT: retq 2522 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1) 2523 ret < 16 x i32> %res 2524 } 2525 2526 define <16 x i32> @test_mask_add_epi32_rrk(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) { 2527 ; CHECK-LABEL: test_mask_add_epi32_rrk: 2528 ; CHECK: ## BB#0: 2529 ; CHECK-NEXT: kmovw %edi, %k1 2530 ; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm2 {%k1} 2531 ; CHECK-NEXT: vmovaps %zmm2, %zmm0 2532 ; CHECK-NEXT: retq 2533 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) 2534 ret < 16 x i32> %res 2535 } 2536 2537 define <16 x i32> @test_mask_add_epi32_rrkz(<16 x i32> %a, <16 x i32> %b, i16 %mask) { 2538 ; CHECK-LABEL: test_mask_add_epi32_rrkz: 2539 ; CHECK: ## BB#0: 2540 ; CHECK-NEXT: kmovw %edi, %k1 2541 ; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0 {%k1} {z} 2542 ; CHECK-NEXT: retq 2543 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask) 2544 ret < 16 x i32> %res 2545 } 2546 2547 define <16 x i32> @test_mask_add_epi32_rm(<16 x i32> %a, <16 x i32>* %ptr_b) { 2548 ; CHECK-LABEL: test_mask_add_epi32_rm: 2549 ; CHECK: ## BB#0: 2550 ; CHECK-NEXT: vpaddd (%rdi), %zmm0, %zmm0 2551 ; CHECK-NEXT: retq 2552 %b = load <16 x i32>, <16 x i32>* %ptr_b 2553 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1) 2554 ret < 16 x i32> %res 2555 } 2556 2557 define <16 x i32> @test_mask_add_epi32_rmk(<16 x i32> %a, <16 x i32>* %ptr_b, <16 x i32> %passThru, i16 %mask) { 2558 ; CHECK-LABEL: test_mask_add_epi32_rmk: 2559 ; CHECK: ## BB#0: 2560 ; CHECK-NEXT: kmovw %esi, %k1 2561 ; CHECK-NEXT: vpaddd (%rdi), %zmm0, %zmm1 {%k1} 2562 ; CHECK-NEXT: vmovaps %zmm1, %zmm0 2563 ; CHECK-NEXT: retq 2564 %b = load <16 x i32>, <16 x i32>* %ptr_b 2565 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) 2566 ret < 16 x i32> %res 2567 } 2568 2569 define <16 x i32> @test_mask_add_epi32_rmkz(<16 x i32> %a, <16 x i32>* %ptr_b, i16 %mask) { 2570 ; CHECK-LABEL: test_mask_add_epi32_rmkz: 2571 ; CHECK: ## BB#0: 2572 ; CHECK-NEXT: kmovw %esi, %k1 2573 ; CHECK-NEXT: vpaddd (%rdi), %zmm0, %zmm0 {%k1} {z} 2574 ; CHECK-NEXT: retq 2575 %b = load <16 x i32>, <16 x i32>* %ptr_b 2576 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask) 2577 ret < 16 x i32> %res 2578 } 2579 2580 define <16 x i32> @test_mask_add_epi32_rmb(<16 x i32> %a, i32* %ptr_b) { 2581 ; CHECK-LABEL: test_mask_add_epi32_rmb: 2582 ; CHECK: ## BB#0: 2583 ; CHECK-NEXT: vpaddd (%rdi){1to16}, %zmm0, %zmm0 2584 ; CHECK-NEXT: retq 2585 %q = load i32, i32* %ptr_b 2586 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 2587 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer 2588 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1) 2589 ret < 16 x i32> %res 2590 } 2591 2592 define <16 x i32> @test_mask_add_epi32_rmbk(<16 x i32> %a, i32* %ptr_b, <16 x i32> %passThru, i16 %mask) { 2593 ; CHECK-LABEL: test_mask_add_epi32_rmbk: 2594 ; CHECK: ## BB#0: 2595 ; CHECK-NEXT: kmovw %esi, %k1 2596 ; CHECK-NEXT: vpaddd (%rdi){1to16}, %zmm0, %zmm1 {%k1} 2597 ; CHECK-NEXT: vmovaps %zmm1, %zmm0 2598 ; CHECK-NEXT: retq 2599 %q = load i32, i32* %ptr_b 2600 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 2601 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer 2602 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) 2603 ret < 16 x i32> %res 2604 } 2605 2606 define <16 x i32> @test_mask_add_epi32_rmbkz(<16 x i32> %a, i32* %ptr_b, i16 %mask) { 2607 ; CHECK-LABEL: test_mask_add_epi32_rmbkz: 2608 ; CHECK: ## BB#0: 2609 ; CHECK-NEXT: kmovw %esi, %k1 2610 ; CHECK-NEXT: vpaddd (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} 2611 ; CHECK-NEXT: retq 2612 %q = load i32, i32* %ptr_b 2613 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 2614 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer 2615 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask) 2616 ret < 16 x i32> %res 2617 } 2618 2619 declare <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) 2620 2621 define <16 x i32> @test_mask_sub_epi32_rr(<16 x i32> %a, <16 x i32> %b) { 2622 ; CHECK-LABEL: test_mask_sub_epi32_rr: 2623 ; CHECK: ## BB#0: 2624 ; CHECK-NEXT: vpsubd %zmm1, %zmm0, %zmm0 2625 ; CHECK-NEXT: retq 2626 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1) 2627 ret < 16 x i32> %res 2628 } 2629 2630 define <16 x i32> @test_mask_sub_epi32_rrk(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) { 2631 ; CHECK-LABEL: test_mask_sub_epi32_rrk: 2632 ; CHECK: ## BB#0: 2633 ; CHECK-NEXT: kmovw %edi, %k1 2634 ; CHECK-NEXT: vpsubd %zmm1, %zmm0, %zmm2 {%k1} 2635 ; CHECK-NEXT: vmovaps %zmm2, %zmm0 2636 ; CHECK-NEXT: retq 2637 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) 2638 ret < 16 x i32> %res 2639 } 2640 2641 define <16 x i32> @test_mask_sub_epi32_rrkz(<16 x i32> %a, <16 x i32> %b, i16 %mask) { 2642 ; CHECK-LABEL: test_mask_sub_epi32_rrkz: 2643 ; CHECK: ## BB#0: 2644 ; CHECK-NEXT: kmovw %edi, %k1 2645 ; CHECK-NEXT: vpsubd %zmm1, %zmm0, %zmm0 {%k1} {z} 2646 ; CHECK-NEXT: retq 2647 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask) 2648 ret < 16 x i32> %res 2649 } 2650 2651 define <16 x i32> @test_mask_sub_epi32_rm(<16 x i32> %a, <16 x i32>* %ptr_b) { 2652 ; CHECK-LABEL: test_mask_sub_epi32_rm: 2653 ; CHECK: ## BB#0: 2654 ; CHECK-NEXT: vpsubd (%rdi), %zmm0, %zmm0 2655 ; CHECK-NEXT: retq 2656 %b = load <16 x i32>, <16 x i32>* %ptr_b 2657 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1) 2658 ret < 16 x i32> %res 2659 } 2660 2661 define <16 x i32> @test_mask_sub_epi32_rmk(<16 x i32> %a, <16 x i32>* %ptr_b, <16 x i32> %passThru, i16 %mask) { 2662 ; CHECK-LABEL: test_mask_sub_epi32_rmk: 2663 ; CHECK: ## BB#0: 2664 ; CHECK-NEXT: kmovw %esi, %k1 2665 ; CHECK-NEXT: vpsubd (%rdi), %zmm0, %zmm1 {%k1} 2666 ; CHECK-NEXT: vmovaps %zmm1, %zmm0 2667 ; CHECK-NEXT: retq 2668 %b = load <16 x i32>, <16 x i32>* %ptr_b 2669 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) 2670 ret < 16 x i32> %res 2671 } 2672 2673 define <16 x i32> @test_mask_sub_epi32_rmkz(<16 x i32> %a, <16 x i32>* %ptr_b, i16 %mask) { 2674 ; CHECK-LABEL: test_mask_sub_epi32_rmkz: 2675 ; CHECK: ## BB#0: 2676 ; CHECK-NEXT: kmovw %esi, %k1 2677 ; CHECK-NEXT: vpsubd (%rdi), %zmm0, %zmm0 {%k1} {z} 2678 ; CHECK-NEXT: retq 2679 %b = load <16 x i32>, <16 x i32>* %ptr_b 2680 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask) 2681 ret < 16 x i32> %res 2682 } 2683 2684 define <16 x i32> @test_mask_sub_epi32_rmb(<16 x i32> %a, i32* %ptr_b) { 2685 ; CHECK-LABEL: test_mask_sub_epi32_rmb: 2686 ; CHECK: ## BB#0: 2687 ; CHECK-NEXT: vpsubd (%rdi){1to16}, %zmm0, %zmm0 2688 ; CHECK-NEXT: retq 2689 %q = load i32, i32* %ptr_b 2690 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 2691 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer 2692 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1) 2693 ret < 16 x i32> %res 2694 } 2695 2696 define <16 x i32> @test_mask_sub_epi32_rmbk(<16 x i32> %a, i32* %ptr_b, <16 x i32> %passThru, i16 %mask) { 2697 ; CHECK-LABEL: test_mask_sub_epi32_rmbk: 2698 ; CHECK: ## BB#0: 2699 ; CHECK-NEXT: kmovw %esi, %k1 2700 ; CHECK-NEXT: vpsubd (%rdi){1to16}, %zmm0, %zmm1 {%k1} 2701 ; CHECK-NEXT: vmovaps %zmm1, %zmm0 2702 ; CHECK-NEXT: retq 2703 %q = load i32, i32* %ptr_b 2704 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 2705 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer 2706 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) 2707 ret < 16 x i32> %res 2708 } 2709 2710 define <16 x i32> @test_mask_sub_epi32_rmbkz(<16 x i32> %a, i32* %ptr_b, i16 %mask) { 2711 ; CHECK-LABEL: test_mask_sub_epi32_rmbkz: 2712 ; CHECK: ## BB#0: 2713 ; CHECK-NEXT: kmovw %esi, %k1 2714 ; CHECK-NEXT: vpsubd (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} 2715 ; CHECK-NEXT: retq 2716 %q = load i32, i32* %ptr_b 2717 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 2718 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer 2719 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask) 2720 ret < 16 x i32> %res 2721 } 2722 2723 declare <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) 2724 2725 define <8 x i64> @test_mask_add_epi64_rr(<8 x i64> %a, <8 x i64> %b) { 2726 ; CHECK-LABEL: test_mask_add_epi64_rr: 2727 ; CHECK: ## BB#0: 2728 ; CHECK-NEXT: vpaddq %zmm1, %zmm0, %zmm0 2729 ; CHECK-NEXT: retq 2730 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1) 2731 ret < 8 x i64> %res 2732 } 2733 2734 define <8 x i64> @test_mask_add_epi64_rrk(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) { 2735 ; CHECK-LABEL: test_mask_add_epi64_rrk: 2736 ; CHECK: ## BB#0: 2737 ; CHECK-NEXT: movzbl %dil, %eax 2738 ; CHECK-NEXT: kmovw %eax, %k1 2739 ; CHECK-NEXT: vpaddq %zmm1, %zmm0, %zmm2 {%k1} 2740 ; CHECK-NEXT: vmovaps %zmm2, %zmm0 2741 ; CHECK-NEXT: retq 2742 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) 2743 ret < 8 x i64> %res 2744 } 2745 2746 define <8 x i64> @test_mask_add_epi64_rrkz(<8 x i64> %a, <8 x i64> %b, i8 %mask) { 2747 ; CHECK-LABEL: test_mask_add_epi64_rrkz: 2748 ; CHECK: ## BB#0: 2749 ; CHECK-NEXT: movzbl %dil, %eax 2750 ; CHECK-NEXT: kmovw %eax, %k1 2751 ; CHECK-NEXT: vpaddq %zmm1, %zmm0, %zmm0 {%k1} {z} 2752 ; CHECK-NEXT: retq 2753 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask) 2754 ret < 8 x i64> %res 2755 } 2756 2757 define <8 x i64> @test_mask_add_epi64_rm(<8 x i64> %a, <8 x i64>* %ptr_b) { 2758 ; CHECK-LABEL: test_mask_add_epi64_rm: 2759 ; CHECK: ## BB#0: 2760 ; CHECK-NEXT: vpaddq (%rdi), %zmm0, %zmm0 2761 ; CHECK-NEXT: retq 2762 %b = load <8 x i64>, <8 x i64>* %ptr_b 2763 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1) 2764 ret < 8 x i64> %res 2765 } 2766 2767 define <8 x i64> @test_mask_add_epi64_rmk(<8 x i64> %a, <8 x i64>* %ptr_b, <8 x i64> %passThru, i8 %mask) { 2768 ; CHECK-LABEL: test_mask_add_epi64_rmk: 2769 ; CHECK: ## BB#0: 2770 ; CHECK-NEXT: movzbl %sil, %eax 2771 ; CHECK-NEXT: kmovw %eax, %k1 2772 ; CHECK-NEXT: vpaddq (%rdi), %zmm0, %zmm1 {%k1} 2773 ; CHECK-NEXT: vmovaps %zmm1, %zmm0 2774 ; CHECK-NEXT: retq 2775 %b = load <8 x i64>, <8 x i64>* %ptr_b 2776 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) 2777 ret < 8 x i64> %res 2778 } 2779 2780 define <8 x i64> @test_mask_add_epi64_rmkz(<8 x i64> %a, <8 x i64>* %ptr_b, i8 %mask) { 2781 ; CHECK-LABEL: test_mask_add_epi64_rmkz: 2782 ; CHECK: ## BB#0: 2783 ; CHECK-NEXT: movzbl %sil, %eax 2784 ; CHECK-NEXT: kmovw %eax, %k1 2785 ; CHECK-NEXT: vpaddq (%rdi), %zmm0, %zmm0 {%k1} {z} 2786 ; CHECK-NEXT: retq 2787 %b = load <8 x i64>, <8 x i64>* %ptr_b 2788 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask) 2789 ret < 8 x i64> %res 2790 } 2791 2792 define <8 x i64> @test_mask_add_epi64_rmb(<8 x i64> %a, i64* %ptr_b) { 2793 ; CHECK-LABEL: test_mask_add_epi64_rmb: 2794 ; CHECK: ## BB#0: 2795 ; CHECK-NEXT: vpaddq (%rdi){1to8}, %zmm0, %zmm0 2796 ; CHECK-NEXT: retq 2797 %q = load i64, i64* %ptr_b 2798 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0 2799 %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer 2800 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1) 2801 ret < 8 x i64> %res 2802 } 2803 2804 define <8 x i64> @test_mask_add_epi64_rmbk(<8 x i64> %a, i64* %ptr_b, <8 x i64> %passThru, i8 %mask) { 2805 ; CHECK-LABEL: test_mask_add_epi64_rmbk: 2806 ; CHECK: ## BB#0: 2807 ; CHECK-NEXT: movzbl %sil, %eax 2808 ; CHECK-NEXT: kmovw %eax, %k1 2809 ; CHECK-NEXT: vpaddq (%rdi){1to8}, %zmm0, %zmm1 {%k1} 2810 ; CHECK-NEXT: vmovaps %zmm1, %zmm0 2811 ; CHECK-NEXT: retq 2812 %q = load i64, i64* %ptr_b 2813 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0 2814 %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer 2815 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) 2816 ret < 8 x i64> %res 2817 } 2818 2819 define <8 x i64> @test_mask_add_epi64_rmbkz(<8 x i64> %a, i64* %ptr_b, i8 %mask) { 2820 ; CHECK-LABEL: test_mask_add_epi64_rmbkz: 2821 ; CHECK: ## BB#0: 2822 ; CHECK-NEXT: movzbl %sil, %eax 2823 ; CHECK-NEXT: kmovw %eax, %k1 2824 ; CHECK-NEXT: vpaddq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} 2825 ; CHECK-NEXT: retq 2826 %q = load i64, i64* %ptr_b 2827 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0 2828 %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer 2829 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask) 2830 ret < 8 x i64> %res 2831 } 2832 2833 declare <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) 2834 2835 define <8 x i64> @test_mask_sub_epi64_rr(<8 x i64> %a, <8 x i64> %b) { 2836 ; CHECK-LABEL: test_mask_sub_epi64_rr: 2837 ; CHECK: ## BB#0: 2838 ; CHECK-NEXT: vpsubq %zmm1, %zmm0, %zmm0 2839 ; CHECK-NEXT: retq 2840 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1) 2841 ret < 8 x i64> %res 2842 } 2843 2844 define <8 x i64> @test_mask_sub_epi64_rrk(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) { 2845 ; CHECK-LABEL: test_mask_sub_epi64_rrk: 2846 ; CHECK: ## BB#0: 2847 ; CHECK-NEXT: movzbl %dil, %eax 2848 ; CHECK-NEXT: kmovw %eax, %k1 2849 ; CHECK-NEXT: vpsubq %zmm1, %zmm0, %zmm2 {%k1} 2850 ; CHECK-NEXT: vmovaps %zmm2, %zmm0 2851 ; CHECK-NEXT: retq 2852 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) 2853 ret < 8 x i64> %res 2854 } 2855 2856 define <8 x i64> @test_mask_sub_epi64_rrkz(<8 x i64> %a, <8 x i64> %b, i8 %mask) { 2857 ; CHECK-LABEL: test_mask_sub_epi64_rrkz: 2858 ; CHECK: ## BB#0: 2859 ; CHECK-NEXT: movzbl %dil, %eax 2860 ; CHECK-NEXT: kmovw %eax, %k1 2861 ; CHECK-NEXT: vpsubq %zmm1, %zmm0, %zmm0 {%k1} {z} 2862 ; CHECK-NEXT: retq 2863 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask) 2864 ret < 8 x i64> %res 2865 } 2866 2867 define <8 x i64> @test_mask_sub_epi64_rm(<8 x i64> %a, <8 x i64>* %ptr_b) { 2868 ; CHECK-LABEL: test_mask_sub_epi64_rm: 2869 ; CHECK: ## BB#0: 2870 ; CHECK-NEXT: vpsubq (%rdi), %zmm0, %zmm0 2871 ; CHECK-NEXT: retq 2872 %b = load <8 x i64>, <8 x i64>* %ptr_b 2873 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1) 2874 ret < 8 x i64> %res 2875 } 2876 2877 define <8 x i64> @test_mask_sub_epi64_rmk(<8 x i64> %a, <8 x i64>* %ptr_b, <8 x i64> %passThru, i8 %mask) { 2878 ; CHECK-LABEL: test_mask_sub_epi64_rmk: 2879 ; CHECK: ## BB#0: 2880 ; CHECK-NEXT: movzbl %sil, %eax 2881 ; CHECK-NEXT: kmovw %eax, %k1 2882 ; CHECK-NEXT: vpsubq (%rdi), %zmm0, %zmm1 {%k1} 2883 ; CHECK-NEXT: vmovaps %zmm1, %zmm0 2884 ; CHECK-NEXT: retq 2885 %b = load <8 x i64>, <8 x i64>* %ptr_b 2886 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) 2887 ret < 8 x i64> %res 2888 } 2889 2890 define <8 x i64> @test_mask_sub_epi64_rmkz(<8 x i64> %a, <8 x i64>* %ptr_b, i8 %mask) { 2891 ; CHECK-LABEL: test_mask_sub_epi64_rmkz: 2892 ; CHECK: ## BB#0: 2893 ; CHECK-NEXT: movzbl %sil, %eax 2894 ; CHECK-NEXT: kmovw %eax, %k1 2895 ; CHECK-NEXT: vpsubq (%rdi), %zmm0, %zmm0 {%k1} {z} 2896 ; CHECK-NEXT: retq 2897 %b = load <8 x i64>, <8 x i64>* %ptr_b 2898 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask) 2899 ret < 8 x i64> %res 2900 } 2901 2902 define <8 x i64> @test_mask_sub_epi64_rmb(<8 x i64> %a, i64* %ptr_b) { 2903 ; CHECK-LABEL: test_mask_sub_epi64_rmb: 2904 ; CHECK: ## BB#0: 2905 ; CHECK-NEXT: vpsubq (%rdi){1to8}, %zmm0, %zmm0 2906 ; CHECK-NEXT: retq 2907 %q = load i64, i64* %ptr_b 2908 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0 2909 %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer 2910 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1) 2911 ret < 8 x i64> %res 2912 } 2913 2914 define <8 x i64> @test_mask_sub_epi64_rmbk(<8 x i64> %a, i64* %ptr_b, <8 x i64> %passThru, i8 %mask) { 2915 ; CHECK-LABEL: test_mask_sub_epi64_rmbk: 2916 ; CHECK: ## BB#0: 2917 ; CHECK-NEXT: movzbl %sil, %eax 2918 ; CHECK-NEXT: kmovw %eax, %k1 2919 ; CHECK-NEXT: vpsubq (%rdi){1to8}, %zmm0, %zmm1 {%k1} 2920 ; CHECK-NEXT: vmovaps %zmm1, %zmm0 2921 ; CHECK-NEXT: retq 2922 %q = load i64, i64* %ptr_b 2923 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0 2924 %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer 2925 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) 2926 ret < 8 x i64> %res 2927 } 2928 2929 define <8 x i64> @test_mask_sub_epi64_rmbkz(<8 x i64> %a, i64* %ptr_b, i8 %mask) { 2930 ; CHECK-LABEL: test_mask_sub_epi64_rmbkz: 2931 ; CHECK: ## BB#0: 2932 ; CHECK-NEXT: movzbl %sil, %eax 2933 ; CHECK-NEXT: kmovw %eax, %k1 2934 ; CHECK-NEXT: vpsubq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} 2935 ; CHECK-NEXT: retq 2936 %q = load i64, i64* %ptr_b 2937 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0 2938 %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer 2939 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask) 2940 ret < 8 x i64> %res 2941 } 2942 2943 declare <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) 2944 2945 define <8 x i64> @test_mask_mul_epi32_rr(<16 x i32> %a, <16 x i32> %b) { 2946 ; CHECK-LABEL: test_mask_mul_epi32_rr: 2947 ; CHECK: ## BB#0: 2948 ; CHECK-NEXT: vpmuldq %zmm1, %zmm0, %zmm0 2949 ; CHECK-NEXT: retq 2950 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1) 2951 ret < 8 x i64> %res 2952 } 2953 2954 define <8 x i64> @test_mask_mul_epi32_rrk(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask) { 2955 ; CHECK-LABEL: test_mask_mul_epi32_rrk: 2956 ; CHECK: ## BB#0: 2957 ; CHECK-NEXT: movzbl %dil, %eax 2958 ; CHECK-NEXT: kmovw %eax, %k1 2959 ; CHECK-NEXT: vpmuldq %zmm1, %zmm0, %zmm2 {%k1} 2960 ; CHECK-NEXT: vmovaps %zmm2, %zmm0 2961 ; CHECK-NEXT: retq 2962 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask) 2963 ret < 8 x i64> %res 2964 } 2965 2966 define <8 x i64> @test_mask_mul_epi32_rrkz(<16 x i32> %a, <16 x i32> %b, i8 %mask) { 2967 ; CHECK-LABEL: test_mask_mul_epi32_rrkz: 2968 ; CHECK: ## BB#0: 2969 ; CHECK-NEXT: movzbl %dil, %eax 2970 ; CHECK-NEXT: kmovw %eax, %k1 2971 ; CHECK-NEXT: vpmuldq %zmm1, %zmm0, %zmm0 {%k1} {z} 2972 ; CHECK-NEXT: retq 2973 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask) 2974 ret < 8 x i64> %res 2975 } 2976 2977 define <8 x i64> @test_mask_mul_epi32_rm(<16 x i32> %a, <16 x i32>* %ptr_b) { 2978 ; CHECK-LABEL: test_mask_mul_epi32_rm: 2979 ; CHECK: ## BB#0: 2980 ; CHECK-NEXT: vpmuldq (%rdi), %zmm0, %zmm0 2981 ; CHECK-NEXT: retq 2982 %b = load <16 x i32>, <16 x i32>* %ptr_b 2983 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1) 2984 ret < 8 x i64> %res 2985 } 2986 2987 define <8 x i64> @test_mask_mul_epi32_rmk(<16 x i32> %a, <16 x i32>* %ptr_b, <8 x i64> %passThru, i8 %mask) { 2988 ; CHECK-LABEL: test_mask_mul_epi32_rmk: 2989 ; CHECK: ## BB#0: 2990 ; CHECK-NEXT: movzbl %sil, %eax 2991 ; CHECK-NEXT: kmovw %eax, %k1 2992 ; CHECK-NEXT: vpmuldq (%rdi), %zmm0, %zmm1 {%k1} 2993 ; CHECK-NEXT: vmovaps %zmm1, %zmm0 2994 ; CHECK-NEXT: retq 2995 %b = load <16 x i32>, <16 x i32>* %ptr_b 2996 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask) 2997 ret < 8 x i64> %res 2998 } 2999 3000 define <8 x i64> @test_mask_mul_epi32_rmkz(<16 x i32> %a, <16 x i32>* %ptr_b, i8 %mask) { 3001 ; CHECK-LABEL: test_mask_mul_epi32_rmkz: 3002 ; CHECK: ## BB#0: 3003 ; CHECK-NEXT: movzbl %sil, %eax 3004 ; CHECK-NEXT: kmovw %eax, %k1 3005 ; CHECK-NEXT: vpmuldq (%rdi), %zmm0, %zmm0 {%k1} {z} 3006 ; CHECK-NEXT: retq 3007 %b = load <16 x i32>, <16 x i32>* %ptr_b 3008 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask) 3009 ret < 8 x i64> %res 3010 } 3011 3012 define <8 x i64> @test_mask_mul_epi32_rmb(<16 x i32> %a, i64* %ptr_b) { 3013 ; CHECK-LABEL: test_mask_mul_epi32_rmb: 3014 ; CHECK: ## BB#0: 3015 ; CHECK-NEXT: vpmuldq (%rdi){1to8}, %zmm0, %zmm0 3016 ; CHECK-NEXT: retq 3017 %q = load i64, i64* %ptr_b 3018 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0 3019 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer 3020 %b = bitcast <8 x i64> %b64 to <16 x i32> 3021 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1) 3022 ret < 8 x i64> %res 3023 } 3024 3025 define <8 x i64> @test_mask_mul_epi32_rmbk(<16 x i32> %a, i64* %ptr_b, <8 x i64> %passThru, i8 %mask) { 3026 ; CHECK-LABEL: test_mask_mul_epi32_rmbk: 3027 ; CHECK: ## BB#0: 3028 ; CHECK-NEXT: movzbl %sil, %eax 3029 ; CHECK-NEXT: kmovw %eax, %k1 3030 ; CHECK-NEXT: vpmuldq (%rdi){1to8}, %zmm0, %zmm1 {%k1} 3031 ; CHECK-NEXT: vmovaps %zmm1, %zmm0 3032 ; CHECK-NEXT: retq 3033 %q = load i64, i64* %ptr_b 3034 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0 3035 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer 3036 %b = bitcast <8 x i64> %b64 to <16 x i32> 3037 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask) 3038 ret < 8 x i64> %res 3039 } 3040 3041 define <8 x i64> @test_mask_mul_epi32_rmbkz(<16 x i32> %a, i64* %ptr_b, i8 %mask) { 3042 ; CHECK-LABEL: test_mask_mul_epi32_rmbkz: 3043 ; CHECK: ## BB#0: 3044 ; CHECK-NEXT: movzbl %sil, %eax 3045 ; CHECK-NEXT: kmovw %eax, %k1 3046 ; CHECK-NEXT: vpmuldq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} 3047 ; CHECK-NEXT: retq 3048 %q = load i64, i64* %ptr_b 3049 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0 3050 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer 3051 %b = bitcast <8 x i64> %b64 to <16 x i32> 3052 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask) 3053 ret < 8 x i64> %res 3054 } 3055 3056 declare <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32>, <16 x i32>, <8 x i64>, i8) 3057 3058 define <8 x i64> @test_mask_mul_epu32_rr(<16 x i32> %a, <16 x i32> %b) { 3059 ; CHECK-LABEL: test_mask_mul_epu32_rr: 3060 ; CHECK: ## BB#0: 3061 ; CHECK-NEXT: vpmuludq %zmm1, %zmm0, %zmm0 3062 ; CHECK-NEXT: retq 3063 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1) 3064 ret < 8 x i64> %res 3065 } 3066 3067 define <8 x i64> @test_mask_mul_epu32_rrk(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask) { 3068 ; CHECK-LABEL: test_mask_mul_epu32_rrk: 3069 ; CHECK: ## BB#0: 3070 ; CHECK-NEXT: movzbl %dil, %eax 3071 ; CHECK-NEXT: kmovw %eax, %k1 3072 ; CHECK-NEXT: vpmuludq %zmm1, %zmm0, %zmm2 {%k1} 3073 ; CHECK-NEXT: vmovaps %zmm2, %zmm0 3074 ; CHECK-NEXT: retq 3075 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask) 3076 ret < 8 x i64> %res 3077 } 3078 3079 define <8 x i64> @test_mask_mul_epu32_rrkz(<16 x i32> %a, <16 x i32> %b, i8 %mask) { 3080 ; CHECK-LABEL: test_mask_mul_epu32_rrkz: 3081 ; CHECK: ## BB#0: 3082 ; CHECK-NEXT: movzbl %dil, %eax 3083 ; CHECK-NEXT: kmovw %eax, %k1 3084 ; CHECK-NEXT: vpmuludq %zmm1, %zmm0, %zmm0 {%k1} {z} 3085 ; CHECK-NEXT: retq 3086 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask) 3087 ret < 8 x i64> %res 3088 } 3089 3090 define <8 x i64> @test_mask_mul_epu32_rm(<16 x i32> %a, <16 x i32>* %ptr_b) { 3091 ; CHECK-LABEL: test_mask_mul_epu32_rm: 3092 ; CHECK: ## BB#0: 3093 ; CHECK-NEXT: vpmuludq (%rdi), %zmm0, %zmm0 3094 ; CHECK-NEXT: retq 3095 %b = load <16 x i32>, <16 x i32>* %ptr_b 3096 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1) 3097 ret < 8 x i64> %res 3098 } 3099 3100 define <8 x i64> @test_mask_mul_epu32_rmk(<16 x i32> %a, <16 x i32>* %ptr_b, <8 x i64> %passThru, i8 %mask) { 3101 ; CHECK-LABEL: test_mask_mul_epu32_rmk: 3102 ; CHECK: ## BB#0: 3103 ; CHECK-NEXT: movzbl %sil, %eax 3104 ; CHECK-NEXT: kmovw %eax, %k1 3105 ; CHECK-NEXT: vpmuludq (%rdi), %zmm0, %zmm1 {%k1} 3106 ; CHECK-NEXT: vmovaps %zmm1, %zmm0 3107 ; CHECK-NEXT: retq 3108 %b = load <16 x i32>, <16 x i32>* %ptr_b 3109 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask) 3110 ret < 8 x i64> %res 3111 } 3112 3113 define <8 x i64> @test_mask_mul_epu32_rmkz(<16 x i32> %a, <16 x i32>* %ptr_b, i8 %mask) { 3114 ; CHECK-LABEL: test_mask_mul_epu32_rmkz: 3115 ; CHECK: ## BB#0: 3116 ; CHECK-NEXT: movzbl %sil, %eax 3117 ; CHECK-NEXT: kmovw %eax, %k1 3118 ; CHECK-NEXT: vpmuludq (%rdi), %zmm0, %zmm0 {%k1} {z} 3119 ; CHECK-NEXT: retq 3120 %b = load <16 x i32>, <16 x i32>* %ptr_b 3121 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask) 3122 ret < 8 x i64> %res 3123 } 3124 3125 define <8 x i64> @test_mask_mul_epu32_rmb(<16 x i32> %a, i64* %ptr_b) { 3126 ; CHECK-LABEL: test_mask_mul_epu32_rmb: 3127 ; CHECK: ## BB#0: 3128 ; CHECK-NEXT: vpmuludq (%rdi){1to8}, %zmm0, %zmm0 3129 ; CHECK-NEXT: retq 3130 %q = load i64, i64* %ptr_b 3131 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0 3132 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer 3133 %b = bitcast <8 x i64> %b64 to <16 x i32> 3134 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1) 3135 ret < 8 x i64> %res 3136 } 3137 3138 define <8 x i64> @test_mask_mul_epu32_rmbk(<16 x i32> %a, i64* %ptr_b, <8 x i64> %passThru, i8 %mask) { 3139 ; CHECK-LABEL: test_mask_mul_epu32_rmbk: 3140 ; CHECK: ## BB#0: 3141 ; CHECK-NEXT: movzbl %sil, %eax 3142 ; CHECK-NEXT: kmovw %eax, %k1 3143 ; CHECK-NEXT: vpmuludq (%rdi){1to8}, %zmm0, %zmm1 {%k1} 3144 ; CHECK-NEXT: vmovaps %zmm1, %zmm0 3145 ; CHECK-NEXT: retq 3146 %q = load i64, i64* %ptr_b 3147 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0 3148 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer 3149 %b = bitcast <8 x i64> %b64 to <16 x i32> 3150 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask) 3151 ret < 8 x i64> %res 3152 } 3153 3154 define <8 x i64> @test_mask_mul_epu32_rmbkz(<16 x i32> %a, i64* %ptr_b, i8 %mask) { 3155 ; CHECK-LABEL: test_mask_mul_epu32_rmbkz: 3156 ; CHECK: ## BB#0: 3157 ; CHECK-NEXT: movzbl %sil, %eax 3158 ; CHECK-NEXT: kmovw %eax, %k1 3159 ; CHECK-NEXT: vpmuludq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} 3160 ; CHECK-NEXT: retq 3161 %q = load i64, i64* %ptr_b 3162 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0 3163 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer 3164 %b = bitcast <8 x i64> %b64 to <16 x i32> 3165 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask) 3166 ret < 8 x i64> %res 3167 } 3168 3169 declare <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32>, <16 x i32>, <8 x i64>, i8) 3170 3171 define <16 x i32> @test_mask_mullo_epi32_rr_512(<16 x i32> %a, <16 x i32> %b) { 3172 ; CHECK-LABEL: test_mask_mullo_epi32_rr_512: 3173 ; CHECK: ## BB#0: 3174 ; CHECK-NEXT: vpmulld %zmm1, %zmm0, %zmm0 3175 ; CHECK-NEXT: retq 3176 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1) 3177 ret <16 x i32> %res 3178 } 3179 3180 define <16 x i32> @test_mask_mullo_epi32_rrk_512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) { 3181 ; CHECK-LABEL: test_mask_mullo_epi32_rrk_512: 3182 ; CHECK: ## BB#0: 3183 ; CHECK-NEXT: kmovw %edi, %k1 3184 ; CHECK-NEXT: vpmulld %zmm1, %zmm0, %zmm2 {%k1} 3185 ; CHECK-NEXT: vmovaps %zmm2, %zmm0 3186 ; CHECK-NEXT: retq 3187 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) 3188 ret < 16 x i32> %res 3189 } 3190 3191 define <16 x i32> @test_mask_mullo_epi32_rrkz_512(<16 x i32> %a, <16 x i32> %b, i16 %mask) { 3192 ; CHECK-LABEL: test_mask_mullo_epi32_rrkz_512: 3193 ; CHECK: ## BB#0: 3194 ; CHECK-NEXT: kmovw %edi, %k1 3195 ; CHECK-NEXT: vpmulld %zmm1, %zmm0, %zmm0 {%k1} {z} 3196 ; CHECK-NEXT: retq 3197 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask) 3198 ret < 16 x i32> %res 3199 } 3200 3201 define <16 x i32> @test_mask_mullo_epi32_rm_512(<16 x i32> %a, <16 x i32>* %ptr_b) { 3202 ; CHECK-LABEL: test_mask_mullo_epi32_rm_512: 3203 ; CHECK: ## BB#0: 3204 ; CHECK-NEXT: vpmulld (%rdi), %zmm0, %zmm0 3205 ; CHECK-NEXT: retq 3206 %b = load <16 x i32>, <16 x i32>* %ptr_b 3207 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1) 3208 ret < 16 x i32> %res 3209 } 3210 3211 define <16 x i32> @test_mask_mullo_epi32_rmk_512(<16 x i32> %a, <16 x i32>* %ptr_b, <16 x i32> %passThru, i16 %mask) { 3212 ; CHECK-LABEL: test_mask_mullo_epi32_rmk_512: 3213 ; CHECK: ## BB#0: 3214 ; CHECK-NEXT: kmovw %esi, %k1 3215 ; CHECK-NEXT: vpmulld (%rdi), %zmm0, %zmm1 {%k1} 3216 ; CHECK-NEXT: vmovaps %zmm1, %zmm0 3217 ; CHECK-NEXT: retq 3218 %b = load <16 x i32>, <16 x i32>* %ptr_b 3219 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) 3220 ret < 16 x i32> %res 3221 } 3222 3223 define <16 x i32> @test_mask_mullo_epi32_rmkz_512(<16 x i32> %a, <16 x i32>* %ptr_b, i16 %mask) { 3224 ; CHECK-LABEL: test_mask_mullo_epi32_rmkz_512: 3225 ; CHECK: ## BB#0: 3226 ; CHECK-NEXT: kmovw %esi, %k1 3227 ; CHECK-NEXT: vpmulld (%rdi), %zmm0, %zmm0 {%k1} {z} 3228 ; CHECK-NEXT: retq 3229 %b = load <16 x i32>, <16 x i32>* %ptr_b 3230 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask) 3231 ret < 16 x i32> %res 3232 } 3233 3234 define <16 x i32> @test_mask_mullo_epi32_rmb_512(<16 x i32> %a, i32* %ptr_b) { 3235 ; CHECK-LABEL: test_mask_mullo_epi32_rmb_512: 3236 ; CHECK: ## BB#0: 3237 ; CHECK-NEXT: vpmulld (%rdi){1to16}, %zmm0, %zmm0 3238 ; CHECK-NEXT: retq 3239 %q = load i32, i32* %ptr_b 3240 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 3241 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer 3242 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1) 3243 ret < 16 x i32> %res 3244 } 3245 3246 define <16 x i32> @test_mask_mullo_epi32_rmbk_512(<16 x i32> %a, i32* %ptr_b, <16 x i32> %passThru, i16 %mask) { 3247 ; CHECK-LABEL: test_mask_mullo_epi32_rmbk_512: 3248 ; CHECK: ## BB#0: 3249 ; CHECK-NEXT: kmovw %esi, %k1 3250 ; CHECK-NEXT: vpmulld (%rdi){1to16}, %zmm0, %zmm1 {%k1} 3251 ; CHECK-NEXT: vmovaps %zmm1, %zmm0 3252 ; CHECK-NEXT: retq 3253 %q = load i32, i32* %ptr_b 3254 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 3255 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer 3256 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) 3257 ret < 16 x i32> %res 3258 } 3259 3260 define <16 x i32> @test_mask_mullo_epi32_rmbkz_512(<16 x i32> %a, i32* %ptr_b, i16 %mask) { 3261 ; CHECK-LABEL: test_mask_mullo_epi32_rmbkz_512: 3262 ; CHECK: ## BB#0: 3263 ; CHECK-NEXT: kmovw %esi, %k1 3264 ; CHECK-NEXT: vpmulld (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} 3265 ; CHECK-NEXT: retq 3266 %q = load i32, i32* %ptr_b 3267 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 3268 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer 3269 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask) 3270 ret < 16 x i32> %res 3271 } 3272 3273 declare <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) 3274 3275 define <16 x float> @test_mm512_maskz_add_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 3276 ; CHECK-LABEL: test_mm512_maskz_add_round_ps_rn_sae: 3277 ; CHECK: ## BB#0: 3278 ; CHECK-NEXT: kmovw %edi, %k1 3279 ; CHECK-NEXT: vaddps {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} 3280 ; CHECK-NEXT: retq 3281 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 0) 3282 ret <16 x float> %res 3283 } 3284 define <16 x float> @test_mm512_maskz_add_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 3285 ; CHECK-LABEL: test_mm512_maskz_add_round_ps_rd_sae: 3286 ; CHECK: ## BB#0: 3287 ; CHECK-NEXT: kmovw %edi, %k1 3288 ; CHECK-NEXT: vaddps {rd-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} 3289 ; CHECK-NEXT: retq 3290 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 1) 3291 ret <16 x float> %res 3292 } 3293 define <16 x float> @test_mm512_maskz_add_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 3294 ; CHECK-LABEL: test_mm512_maskz_add_round_ps_ru_sae: 3295 ; CHECK: ## BB#0: 3296 ; CHECK-NEXT: kmovw %edi, %k1 3297 ; CHECK-NEXT: vaddps {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} 3298 ; CHECK-NEXT: retq 3299 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 2) 3300 ret <16 x float> %res 3301 } 3302 3303 define <16 x float> @test_mm512_maskz_add_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 3304 ; CHECK-LABEL: test_mm512_maskz_add_round_ps_rz_sae: 3305 ; CHECK: ## BB#0: 3306 ; CHECK-NEXT: kmovw %edi, %k1 3307 ; CHECK-NEXT: vaddps {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} 3308 ; CHECK-NEXT: retq 3309 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 3) 3310 ret <16 x float> %res 3311 } 3312 3313 3314 define <16 x float> @test_mm512_maskz_add_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 3315 ; CHECK-LABEL: test_mm512_maskz_add_round_ps_current: 3316 ; CHECK: ## BB#0: 3317 ; CHECK-NEXT: kmovw %edi, %k1 3318 ; CHECK-NEXT: vaddps %zmm1, %zmm0, %zmm0 {%k1} {z} 3319 ; CHECK-NEXT: retq 3320 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 4) 3321 ret <16 x float> %res 3322 } 3323 3324 define <16 x float> @test_mm512_mask_add_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) { 3325 ; CHECK-LABEL: test_mm512_mask_add_round_ps_rn_sae: 3326 ; CHECK: ## BB#0: 3327 ; CHECK-NEXT: kmovw %edi, %k1 3328 ; CHECK-NEXT: vaddps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} 3329 ; CHECK-NEXT: vmovaps %zmm2, %zmm0 3330 ; CHECK-NEXT: retq 3331 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 0) 3332 ret <16 x float> %res 3333 } 3334 define <16 x float> @test_mm512_mask_add_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) { 3335 ; CHECK-LABEL: test_mm512_mask_add_round_ps_rd_sae: 3336 ; CHECK: ## BB#0: 3337 ; CHECK-NEXT: kmovw %edi, %k1 3338 ; CHECK-NEXT: vaddps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1} 3339 ; CHECK-NEXT: vmovaps %zmm2, %zmm0 3340 ; CHECK-NEXT: retq 3341 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 1) 3342 ret <16 x float> %res 3343 } 3344 define <16 x float> @test_mm512_mask_add_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) { 3345 ; CHECK-LABEL: test_mm512_mask_add_round_ps_ru_sae: 3346 ; CHECK: ## BB#0: 3347 ; CHECK-NEXT: kmovw %edi, %k1 3348 ; CHECK-NEXT: vaddps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1} 3349 ; CHECK-NEXT: vmovaps %zmm2, %zmm0 3350 ; CHECK-NEXT: retq 3351 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 2) 3352 ret <16 x float> %res 3353 } 3354 3355 define <16 x float> @test_mm512_mask_add_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) { 3356 ; CHECK-LABEL: test_mm512_mask_add_round_ps_rz_sae: 3357 ; CHECK: ## BB#0: 3358 ; CHECK-NEXT: kmovw %edi, %k1 3359 ; CHECK-NEXT: vaddps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1} 3360 ; CHECK-NEXT: vmovaps %zmm2, %zmm0 3361 ; CHECK-NEXT: retq 3362 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 3) 3363 ret <16 x float> %res 3364 } 3365 3366 3367 define <16 x float> @test_mm512_mask_add_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) { 3368 ; CHECK-LABEL: test_mm512_mask_add_round_ps_current: 3369 ; CHECK: ## BB#0: 3370 ; CHECK-NEXT: kmovw %edi, %k1 3371 ; CHECK-NEXT: vaddps %zmm1, %zmm0, %zmm2 {%k1} 3372 ; CHECK-NEXT: vmovaps %zmm2, %zmm0 3373 ; CHECK-NEXT: retq 3374 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4) 3375 ret <16 x float> %res 3376 } 3377 3378 3379 define <16 x float> @test_mm512_add_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 3380 ; CHECK-LABEL: test_mm512_add_round_ps_rn_sae: 3381 ; CHECK: ## BB#0: 3382 ; CHECK-NEXT: vaddps {rn-sae}, %zmm1, %zmm0, %zmm0 3383 ; CHECK-NEXT: retq 3384 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 0) 3385 ret <16 x float> %res 3386 } 3387 define <16 x float> @test_mm512_add_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 3388 ; CHECK-LABEL: test_mm512_add_round_ps_rd_sae: 3389 ; CHECK: ## BB#0: 3390 ; CHECK-NEXT: vaddps {rd-sae}, %zmm1, %zmm0, %zmm0 3391 ; CHECK-NEXT: retq 3392 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 1) 3393 ret <16 x float> %res 3394 } 3395 define <16 x float> @test_mm512_add_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 3396 ; CHECK-LABEL: test_mm512_add_round_ps_ru_sae: 3397 ; CHECK: ## BB#0: 3398 ; CHECK-NEXT: vaddps {ru-sae}, %zmm1, %zmm0, %zmm0 3399 ; CHECK-NEXT: retq 3400 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 2) 3401 ret <16 x float> %res 3402 } 3403 3404 define <16 x float> @test_mm512_add_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 3405 ; CHECK-LABEL: test_mm512_add_round_ps_rz_sae: 3406 ; CHECK: ## BB#0: 3407 ; CHECK-NEXT: vaddps {rz-sae}, %zmm1, %zmm0, %zmm0 3408 ; CHECK-NEXT: retq 3409 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 3) 3410 ret <16 x float> %res 3411 } 3412 3413 define <16 x float> @test_mm512_add_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 3414 ; CHECK-LABEL: test_mm512_add_round_ps_current: 3415 ; CHECK: ## BB#0: 3416 ; CHECK-NEXT: vaddps %zmm1, %zmm0, %zmm0 3417 ; CHECK-NEXT: retq 3418 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4) 3419 ret <16 x float> %res 3420 } 3421 declare <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) 3422 3423 define <16 x float> @test_mm512_mask_sub_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) { 3424 ; CHECK-LABEL: test_mm512_mask_sub_round_ps_rn_sae: 3425 ; CHECK: ## BB#0: 3426 ; CHECK-NEXT: kmovw %edi, %k1 3427 ; CHECK-NEXT: vsubps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} 3428 ; CHECK-NEXT: vmovaps %zmm2, %zmm0 3429 ; CHECK-NEXT: retq 3430 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 0) 3431 ret <16 x float> %res 3432 } 3433 define <16 x float> @test_mm512_mask_sub_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) { 3434 ; CHECK-LABEL: test_mm512_mask_sub_round_ps_rd_sae: 3435 ; CHECK: ## BB#0: 3436 ; CHECK-NEXT: kmovw %edi, %k1 3437 ; CHECK-NEXT: vsubps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1} 3438 ; CHECK-NEXT: vmovaps %zmm2, %zmm0 3439 ; CHECK-NEXT: retq 3440 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 1) 3441 ret <16 x float> %res 3442 } 3443 define <16 x float> @test_mm512_mask_sub_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) { 3444 ; CHECK-LABEL: test_mm512_mask_sub_round_ps_ru_sae: 3445 ; CHECK: ## BB#0: 3446 ; CHECK-NEXT: kmovw %edi, %k1 3447 ; CHECK-NEXT: vsubps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1} 3448 ; CHECK-NEXT: vmovaps %zmm2, %zmm0 3449 ; CHECK-NEXT: retq 3450 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 2) 3451 ret <16 x float> %res 3452 } 3453 3454 define <16 x float> @test_mm512_mask_sub_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) { 3455 ; CHECK-LABEL: test_mm512_mask_sub_round_ps_rz_sae: 3456 ; CHECK: ## BB#0: 3457 ; CHECK-NEXT: kmovw %edi, %k1 3458 ; CHECK-NEXT: vsubps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1} 3459 ; CHECK-NEXT: vmovaps %zmm2, %zmm0 3460 ; CHECK-NEXT: retq 3461 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 3) 3462 ret <16 x float> %res 3463 } 3464 3465 3466 define <16 x float> @test_mm512_mask_sub_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) { 3467 ; CHECK-LABEL: test_mm512_mask_sub_round_ps_current: 3468 ; CHECK: ## BB#0: 3469 ; CHECK-NEXT: kmovw %edi, %k1 3470 ; CHECK-NEXT: vsubps %zmm1, %zmm0, %zmm2 {%k1} 3471 ; CHECK-NEXT: vmovaps %zmm2, %zmm0 3472 ; CHECK-NEXT: retq 3473 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4) 3474 ret <16 x float> %res 3475 } 3476 3477 define <16 x float> @test_mm512_sub_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 3478 ; CHECK-LABEL: test_mm512_sub_round_ps_rn_sae: 3479 ; CHECK: ## BB#0: 3480 ; CHECK-NEXT: vsubps {rn-sae}, %zmm1, %zmm0, %zmm0 3481 ; CHECK-NEXT: retq 3482 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 0) 3483 ret <16 x float> %res 3484 } 3485 define <16 x float> @test_mm512_sub_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 3486 ; CHECK-LABEL: test_mm512_sub_round_ps_rd_sae: 3487 ; CHECK: ## BB#0: 3488 ; CHECK-NEXT: vsubps {rd-sae}, %zmm1, %zmm0, %zmm0 3489 ; CHECK-NEXT: retq 3490 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 1) 3491 ret <16 x float> %res 3492 } 3493 define <16 x float> @test_mm512_sub_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 3494 ; CHECK-LABEL: test_mm512_sub_round_ps_ru_sae: 3495 ; CHECK: ## BB#0: 3496 ; CHECK-NEXT: vsubps {ru-sae}, %zmm1, %zmm0, %zmm0 3497 ; CHECK-NEXT: retq 3498 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 2) 3499 ret <16 x float> %res 3500 } 3501 3502 define <16 x float> @test_mm512_sub_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 3503 ; CHECK-LABEL: test_mm512_sub_round_ps_rz_sae: 3504 ; CHECK: ## BB#0: 3505 ; CHECK-NEXT: vsubps {rz-sae}, %zmm1, %zmm0, %zmm0 3506 ; CHECK-NEXT: retq 3507 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 3) 3508 ret <16 x float> %res 3509 } 3510 3511 define <16 x float> @test_mm512_sub_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 3512 ; CHECK-LABEL: test_mm512_sub_round_ps_current: 3513 ; CHECK: ## BB#0: 3514 ; CHECK-NEXT: vsubps %zmm1, %zmm0, %zmm0 3515 ; CHECK-NEXT: retq 3516 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4) 3517 ret <16 x float> %res 3518 } 3519 3520 define <16 x float> @test_mm512_maskz_div_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 3521 ; CHECK-LABEL: test_mm512_maskz_div_round_ps_rn_sae: 3522 ; CHECK: ## BB#0: 3523 ; CHECK-NEXT: kmovw %edi, %k1 3524 ; CHECK-NEXT: vdivps {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} 3525 ; CHECK-NEXT: retq 3526 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 0) 3527 ret <16 x float> %res 3528 } 3529 define <16 x float> @test_mm512_maskz_div_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 3530 ; CHECK-LABEL: test_mm512_maskz_div_round_ps_rd_sae: 3531 ; CHECK: ## BB#0: 3532 ; CHECK-NEXT: kmovw %edi, %k1 3533 ; CHECK-NEXT: vdivps {rd-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} 3534 ; CHECK-NEXT: retq 3535 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 1) 3536 ret <16 x float> %res 3537 } 3538 define <16 x float> @test_mm512_maskz_div_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 3539 ; CHECK-LABEL: test_mm512_maskz_div_round_ps_ru_sae: 3540 ; CHECK: ## BB#0: 3541 ; CHECK-NEXT: kmovw %edi, %k1 3542 ; CHECK-NEXT: vdivps {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} 3543 ; CHECK-NEXT: retq 3544 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 2) 3545 ret <16 x float> %res 3546 } 3547 3548 define <16 x float> @test_mm512_maskz_div_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 3549 ; CHECK-LABEL: test_mm512_maskz_div_round_ps_rz_sae: 3550 ; CHECK: ## BB#0: 3551 ; CHECK-NEXT: kmovw %edi, %k1 3552 ; CHECK-NEXT: vdivps {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} 3553 ; CHECK-NEXT: retq 3554 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 3) 3555 ret <16 x float> %res 3556 } 3557 3558 3559 define <16 x float> @test_mm512_maskz_div_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 3560 ; CHECK-LABEL: test_mm512_maskz_div_round_ps_current: 3561 ; CHECK: ## BB#0: 3562 ; CHECK-NEXT: kmovw %edi, %k1 3563 ; CHECK-NEXT: vdivps %zmm1, %zmm0, %zmm0 {%k1} {z} 3564 ; CHECK-NEXT: retq 3565 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 4) 3566 ret <16 x float> %res 3567 } 3568 3569 define <16 x float> @test_mm512_mask_div_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) { 3570 ; CHECK-LABEL: test_mm512_mask_div_round_ps_rn_sae: 3571 ; CHECK: ## BB#0: 3572 ; CHECK-NEXT: kmovw %edi, %k1 3573 ; CHECK-NEXT: vdivps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} 3574 ; CHECK-NEXT: vmovaps %zmm2, %zmm0 3575 ; CHECK-NEXT: retq 3576 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 0) 3577 ret <16 x float> %res 3578 } 3579 define <16 x float> @test_mm512_mask_div_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) { 3580 ; CHECK-LABEL: test_mm512_mask_div_round_ps_rd_sae: 3581 ; CHECK: ## BB#0: 3582 ; CHECK-NEXT: kmovw %edi, %k1 3583 ; CHECK-NEXT: vdivps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1} 3584 ; CHECK-NEXT: vmovaps %zmm2, %zmm0 3585 ; CHECK-NEXT: retq 3586 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 1) 3587 ret <16 x float> %res 3588 } 3589 define <16 x float> @test_mm512_mask_div_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) { 3590 ; CHECK-LABEL: test_mm512_mask_div_round_ps_ru_sae: 3591 ; CHECK: ## BB#0: 3592 ; CHECK-NEXT: kmovw %edi, %k1 3593 ; CHECK-NEXT: vdivps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1} 3594 ; CHECK-NEXT: vmovaps %zmm2, %zmm0 3595 ; CHECK-NEXT: retq 3596 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 2) 3597 ret <16 x float> %res 3598 } 3599 3600 define <16 x float> @test_mm512_mask_div_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) { 3601 ; CHECK-LABEL: test_mm512_mask_div_round_ps_rz_sae: 3602 ; CHECK: ## BB#0: 3603 ; CHECK-NEXT: kmovw %edi, %k1 3604 ; CHECK-NEXT: vdivps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1} 3605 ; CHECK-NEXT: vmovaps %zmm2, %zmm0 3606 ; CHECK-NEXT: retq 3607 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 3) 3608 ret <16 x float> %res 3609 } 3610 3611 3612 define <16 x float> @test_mm512_mask_div_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) { 3613 ; CHECK-LABEL: test_mm512_mask_div_round_ps_current: 3614 ; CHECK: ## BB#0: 3615 ; CHECK-NEXT: kmovw %edi, %k1 3616 ; CHECK-NEXT: vdivps %zmm1, %zmm0, %zmm2 {%k1} 3617 ; CHECK-NEXT: vmovaps %zmm2, %zmm0 3618 ; CHECK-NEXT: retq 3619 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4) 3620 ret <16 x float> %res 3621 } 3622 3623 3624 define <16 x float> @test_mm512_div_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 3625 ; CHECK-LABEL: test_mm512_div_round_ps_rn_sae: 3626 ; CHECK: ## BB#0: 3627 ; CHECK-NEXT: vdivps {rn-sae}, %zmm1, %zmm0, %zmm0 3628 ; CHECK-NEXT: retq 3629 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 0) 3630 ret <16 x float> %res 3631 } 3632 define <16 x float> @test_mm512_div_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 3633 ; CHECK-LABEL: test_mm512_div_round_ps_rd_sae: 3634 ; CHECK: ## BB#0: 3635 ; CHECK-NEXT: vdivps {rd-sae}, %zmm1, %zmm0, %zmm0 3636 ; CHECK-NEXT: retq 3637 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 1) 3638 ret <16 x float> %res 3639 } 3640 define <16 x float> @test_mm512_div_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 3641 ; CHECK-LABEL: test_mm512_div_round_ps_ru_sae: 3642 ; CHECK: ## BB#0: 3643 ; CHECK-NEXT: vdivps {ru-sae}, %zmm1, %zmm0, %zmm0 3644 ; CHECK-NEXT: retq 3645 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 2) 3646 ret <16 x float> %res 3647 } 3648 3649 define <16 x float> @test_mm512_div_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 3650 ; CHECK-LABEL: test_mm512_div_round_ps_rz_sae: 3651 ; CHECK: ## BB#0: 3652 ; CHECK-NEXT: vdivps {rz-sae}, %zmm1, %zmm0, %zmm0 3653 ; CHECK-NEXT: retq 3654 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 3) 3655 ret <16 x float> %res 3656 } 3657 3658 define <16 x float> @test_mm512_div_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 3659 ; CHECK-LABEL: test_mm512_div_round_ps_current: 3660 ; CHECK: ## BB#0: 3661 ; CHECK-NEXT: vdivps %zmm1, %zmm0, %zmm0 3662 ; CHECK-NEXT: retq 3663 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4) 3664 ret <16 x float> %res 3665 } 3666 declare <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) 3667 3668 define <16 x float> @test_mm512_maskz_min_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 3669 ; CHECK-LABEL: test_mm512_maskz_min_round_ps_sae: 3670 ; CHECK: ## BB#0: 3671 ; CHECK-NEXT: kmovw %edi, %k1 3672 ; CHECK-NEXT: vminps {sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} 3673 ; CHECK-NEXT: retq 3674 %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 8) 3675 ret <16 x float> %res 3676 } 3677 3678 define <16 x float> @test_mm512_maskz_min_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 3679 ; CHECK-LABEL: test_mm512_maskz_min_round_ps_current: 3680 ; CHECK: ## BB#0: 3681 ; CHECK-NEXT: kmovw %edi, %k1 3682 ; CHECK-NEXT: vminps %zmm1, %zmm0, %zmm0 {%k1} {z} 3683 ; CHECK-NEXT: retq 3684 %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 4) 3685 ret <16 x float> %res 3686 } 3687 3688 define <16 x float> @test_mm512_mask_min_round_ps_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) { 3689 ; CHECK-LABEL: test_mm512_mask_min_round_ps_sae: 3690 ; CHECK: ## BB#0: 3691 ; CHECK-NEXT: kmovw %edi, %k1 3692 ; CHECK-NEXT: vminps {sae}, %zmm1, %zmm0, %zmm2 {%k1} 3693 ; CHECK-NEXT: vmovaps %zmm2, %zmm0 3694 ; CHECK-NEXT: retq 3695 %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 8) 3696 ret <16 x float> %res 3697 } 3698 3699 define <16 x float> @test_mm512_mask_min_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) { 3700 ; CHECK-LABEL: test_mm512_mask_min_round_ps_current: 3701 ; CHECK: ## BB#0: 3702 ; CHECK-NEXT: kmovw %edi, %k1 3703 ; CHECK-NEXT: vminps %zmm1, %zmm0, %zmm2 {%k1} 3704 ; CHECK-NEXT: vmovaps %zmm2, %zmm0 3705 ; CHECK-NEXT: retq 3706 %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4) 3707 ret <16 x float> %res 3708 } 3709 3710 define <16 x float> @test_mm512_min_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 3711 ; CHECK-LABEL: test_mm512_min_round_ps_sae: 3712 ; CHECK: ## BB#0: 3713 ; CHECK-NEXT: vminps {sae}, %zmm1, %zmm0, %zmm0 3714 ; CHECK-NEXT: retq 3715 %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 8) 3716 ret <16 x float> %res 3717 } 3718 3719 define <16 x float> @test_mm512_min_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 3720 ; CHECK-LABEL: test_mm512_min_round_ps_current: 3721 ; CHECK: ## BB#0: 3722 ; CHECK-NEXT: vminps %zmm1, %zmm0, %zmm0 3723 ; CHECK-NEXT: retq 3724 %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4) 3725 ret <16 x float> %res 3726 } 3727 declare <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) 3728 3729 define <16 x float> @test_mm512_maskz_max_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 3730 ; CHECK-LABEL: test_mm512_maskz_max_round_ps_sae: 3731 ; CHECK: ## BB#0: 3732 ; CHECK-NEXT: kmovw %edi, %k1 3733 ; CHECK-NEXT: vmaxps {sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} 3734 ; CHECK-NEXT: retq 3735 %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 8) 3736 ret <16 x float> %res 3737 } 3738 3739 define <16 x float> @test_mm512_maskz_max_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 3740 ; CHECK-LABEL: test_mm512_maskz_max_round_ps_current: 3741 ; CHECK: ## BB#0: 3742 ; CHECK-NEXT: kmovw %edi, %k1 3743 ; CHECK-NEXT: vmaxps %zmm1, %zmm0, %zmm0 {%k1} {z} 3744 ; CHECK-NEXT: retq 3745 %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 4) 3746 ret <16 x float> %res 3747 } 3748 3749 define <16 x float> @test_mm512_mask_max_round_ps_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) { 3750 ; CHECK-LABEL: test_mm512_mask_max_round_ps_sae: 3751 ; CHECK: ## BB#0: 3752 ; CHECK-NEXT: kmovw %edi, %k1 3753 ; CHECK-NEXT: vmaxps {sae}, %zmm1, %zmm0, %zmm2 {%k1} 3754 ; CHECK-NEXT: vmovaps %zmm2, %zmm0 3755 ; CHECK-NEXT: retq 3756 %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 8) 3757 ret <16 x float> %res 3758 } 3759 3760 define <16 x float> @test_mm512_mask_max_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) { 3761 ; CHECK-LABEL: test_mm512_mask_max_round_ps_current: 3762 ; CHECK: ## BB#0: 3763 ; CHECK-NEXT: kmovw %edi, %k1 3764 ; CHECK-NEXT: vmaxps %zmm1, %zmm0, %zmm2 {%k1} 3765 ; CHECK-NEXT: vmovaps %zmm2, %zmm0 3766 ; CHECK-NEXT: retq 3767 %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4) 3768 ret <16 x float> %res 3769 } 3770 3771 define <16 x float> @test_mm512_max_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 3772 ; CHECK-LABEL: test_mm512_max_round_ps_sae: 3773 ; CHECK: ## BB#0: 3774 ; CHECK-NEXT: vmaxps {sae}, %zmm1, %zmm0, %zmm0 3775 ; CHECK-NEXT: retq 3776 %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 8) 3777 ret <16 x float> %res 3778 } 3779 3780 define <16 x float> @test_mm512_max_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 3781 ; CHECK-LABEL: test_mm512_max_round_ps_current: 3782 ; CHECK: ## BB#0: 3783 ; CHECK-NEXT: vmaxps %zmm1, %zmm0, %zmm0 3784 ; CHECK-NEXT: retq 3785 %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4) 3786 ret <16 x float> %res 3787 } 3788 declare <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) 3789 3790 declare <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>, <4 x float>, <4 x float>, i8, i32) nounwind readnone 3791 3792 define <4 x float> @test_mask_add_ss_rn(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) { 3793 ; CHECK-LABEL: test_mask_add_ss_rn: 3794 ; CHECK: ## BB#0: 3795 ; CHECK-NEXT: andl $1, %edi 3796 ; CHECK-NEXT: kmovw %edi, %k1 3797 ; CHECK-NEXT: vaddss {rn-sae}, %xmm1, %xmm0, %xmm2 {%k1} 3798 ; CHECK-NEXT: vmovaps %zmm2, %zmm0 3799 ; CHECK-NEXT: retq 3800 %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 0) 3801 ret <4 x float> %res 3802 } 3803 3804 define <4 x float> @test_mask_add_ss_rd(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) { 3805 ; CHECK-LABEL: test_mask_add_ss_rd: 3806 ; CHECK: ## BB#0: 3807 ; CHECK-NEXT: andl $1, %edi 3808 ; CHECK-NEXT: kmovw %edi, %k1 3809 ; CHECK-NEXT: vaddss {rd-sae}, %xmm1, %xmm0, %xmm2 {%k1} 3810 ; CHECK-NEXT: vmovaps %zmm2, %zmm0 3811 ; CHECK-NEXT: retq 3812 %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 1) 3813 ret <4 x float> %res 3814 } 3815 3816 define <4 x float> @test_mask_add_ss_ru(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) { 3817 ; CHECK-LABEL: test_mask_add_ss_ru: 3818 ; CHECK: ## BB#0: 3819 ; CHECK-NEXT: andl $1, %edi 3820 ; CHECK-NEXT: kmovw %edi, %k1 3821 ; CHECK-NEXT: vaddss {ru-sae}, %xmm1, %xmm0, %xmm2 {%k1} 3822 ; CHECK-NEXT: vmovaps %zmm2, %zmm0 3823 ; CHECK-NEXT: retq 3824 %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 2) 3825 ret <4 x float> %res 3826 } 3827 3828 define <4 x float> @test_mask_add_ss_rz(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) { 3829 ; CHECK-LABEL: test_mask_add_ss_rz: 3830 ; CHECK: ## BB#0: 3831 ; CHECK-NEXT: andl $1, %edi 3832 ; CHECK-NEXT: kmovw %edi, %k1 3833 ; CHECK-NEXT: vaddss {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1} 3834 ; CHECK-NEXT: vmovaps %zmm2, %zmm0 3835 ; CHECK-NEXT: retq 3836 %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 3) 3837 ret <4 x float> %res 3838 } 3839 3840 define <4 x float> @test_mask_add_ss_current(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) { 3841 ; CHECK-LABEL: test_mask_add_ss_current: 3842 ; CHECK: ## BB#0: 3843 ; CHECK-NEXT: andl $1, %edi 3844 ; CHECK-NEXT: kmovw %edi, %k1 3845 ; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm2 {%k1} 3846 ; CHECK-NEXT: vmovaps %zmm2, %zmm0 3847 ; CHECK-NEXT: retq 3848 %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 4) 3849 ret <4 x float> %res 3850 } 3851 3852 define <4 x float> @test_maskz_add_ss_rn(<4 x float> %a0, <4 x float> %a1, i8 %mask) { 3853 ; CHECK-LABEL: test_maskz_add_ss_rn: 3854 ; CHECK: ## BB#0: 3855 ; CHECK-NEXT: andl $1, %edi 3856 ; CHECK-NEXT: kmovw %edi, %k1 3857 ; CHECK-NEXT: vaddss {rn-sae}, %xmm1, %xmm0, %xmm0 {%k1} {z} 3858 ; CHECK-NEXT: retq 3859 %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 %mask, i32 0) 3860 ret <4 x float> %res 3861 } 3862 3863 define <4 x float> @test_add_ss_rn(<4 x float> %a0, <4 x float> %a1) { 3864 ; CHECK-LABEL: test_add_ss_rn: 3865 ; CHECK: ## BB#0: 3866 ; CHECK-NEXT: vaddss {rn-sae}, %xmm1, %xmm0, %xmm0 3867 ; CHECK-NEXT: retq 3868 %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1, i32 0) 3869 ret <4 x float> %res 3870 } 3871 3872 declare <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>, <2 x double>, <2 x double>, i8, i32) nounwind readnone 3873 3874 define <2 x double> @test_mask_add_sd_rn(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) { 3875 ; CHECK-LABEL: test_mask_add_sd_rn: 3876 ; CHECK: ## BB#0: 3877 ; CHECK-NEXT: andl $1, %edi 3878 ; CHECK-NEXT: kmovw %edi, %k1 3879 ; CHECK-NEXT: vaddsd {rn-sae}, %xmm1, %xmm0, %xmm2 {%k1} 3880 ; CHECK-NEXT: vmovaps %zmm2, %zmm0 3881 ; CHECK-NEXT: retq 3882 %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 0) 3883 ret <2 x double> %res 3884 } 3885 3886 define <2 x double> @test_mask_add_sd_rd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) { 3887 ; CHECK-LABEL: test_mask_add_sd_rd: 3888 ; CHECK: ## BB#0: 3889 ; CHECK-NEXT: andl $1, %edi 3890 ; CHECK-NEXT: kmovw %edi, %k1 3891 ; CHECK-NEXT: vaddsd {rd-sae}, %xmm1, %xmm0, %xmm2 {%k1} 3892 ; CHECK-NEXT: vmovaps %zmm2, %zmm0 3893 ; CHECK-NEXT: retq 3894 %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 1) 3895 ret <2 x double> %res 3896 } 3897 3898 define <2 x double> @test_mask_add_sd_ru(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) { 3899 ; CHECK-LABEL: test_mask_add_sd_ru: 3900 ; CHECK: ## BB#0: 3901 ; CHECK-NEXT: andl $1, %edi 3902 ; CHECK-NEXT: kmovw %edi, %k1 3903 ; CHECK-NEXT: vaddsd {ru-sae}, %xmm1, %xmm0, %xmm2 {%k1} 3904 ; CHECK-NEXT: vmovaps %zmm2, %zmm0 3905 ; CHECK-NEXT: retq 3906 %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 2) 3907 ret <2 x double> %res 3908 } 3909 3910 define <2 x double> @test_mask_add_sd_rz(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) { 3911 ; CHECK-LABEL: test_mask_add_sd_rz: 3912 ; CHECK: ## BB#0: 3913 ; CHECK-NEXT: andl $1, %edi 3914 ; CHECK-NEXT: kmovw %edi, %k1 3915 ; CHECK-NEXT: vaddsd {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1} 3916 ; CHECK-NEXT: vmovaps %zmm2, %zmm0 3917 ; CHECK-NEXT: retq 3918 %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 3) 3919 ret <2 x double> %res 3920 } 3921 3922 define <2 x double> @test_mask_add_sd_current(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) { 3923 ; CHECK-LABEL: test_mask_add_sd_current: 3924 ; CHECK: ## BB#0: 3925 ; CHECK-NEXT: andl $1, %edi 3926 ; CHECK-NEXT: kmovw %edi, %k1 3927 ; CHECK-NEXT: vaddsd %xmm1, %xmm0, %xmm2 {%k1} 3928 ; CHECK-NEXT: vmovaps %zmm2, %zmm0 3929 ; CHECK-NEXT: retq 3930 %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 4) 3931 ret <2 x double> %res 3932 } 3933 3934 define <2 x double> @test_maskz_add_sd_rn(<2 x double> %a0, <2 x double> %a1, i8 %mask) { 3935 ; CHECK-LABEL: test_maskz_add_sd_rn: 3936 ; CHECK: ## BB#0: 3937 ; CHECK-NEXT: andl $1, %edi 3938 ; CHECK-NEXT: kmovw %edi, %k1 3939 ; CHECK-NEXT: vaddsd {rn-sae}, %xmm1, %xmm0, %xmm0 {%k1} {z} 3940 ; CHECK-NEXT: retq 3941 %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 %mask, i32 0) 3942 ret <2 x double> %res 3943 } 3944 3945 define <2 x double> @test_add_sd_rn(<2 x double> %a0, <2 x double> %a1) { 3946 ; CHECK-LABEL: test_add_sd_rn: 3947 ; CHECK: ## BB#0: 3948 ; CHECK-NEXT: vaddsd {rn-sae}, %xmm1, %xmm0, %xmm0 3949 ; CHECK-NEXT: retq 3950 %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1, i32 0) 3951 ret <2 x double> %res 3952 } 3953 3954 declare <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>, <4 x float>, <4 x float>, i8, i32) nounwind readnone 3955 3956 define <4 x float> @test_mask_max_ss_sae(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) { 3957 ; CHECK-LABEL: test_mask_max_ss_sae: 3958 ; CHECK: ## BB#0: 3959 ; CHECK-NEXT: andl $1, %edi 3960 ; CHECK-NEXT: kmovw %edi, %k1 3961 ; CHECK-NEXT: vmaxss {sae}, %xmm1, %xmm0, %xmm2 {%k1} 3962 ; CHECK-NEXT: vmovaps %zmm2, %zmm0 3963 ; CHECK-NEXT: retq 3964 %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 8) 3965 ret <4 x float> %res 3966 } 3967 3968 define <4 x float> @test_maskz_max_ss_sae(<4 x float> %a0, <4 x float> %a1, i8 %mask) { 3969 ; CHECK-LABEL: test_maskz_max_ss_sae: 3970 ; CHECK: ## BB#0: 3971 ; CHECK-NEXT: andl $1, %edi 3972 ; CHECK-NEXT: kmovw %edi, %k1 3973 ; CHECK-NEXT: vmaxss {sae}, %xmm1, %xmm0, %xmm0 {%k1} {z} 3974 ; CHECK-NEXT: retq 3975 %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 %mask, i32 8) 3976 ret <4 x float> %res 3977 } 3978 3979 define <4 x float> @test_max_ss_sae(<4 x float> %a0, <4 x float> %a1) { 3980 ; CHECK-LABEL: test_max_ss_sae: 3981 ; CHECK: ## BB#0: 3982 ; CHECK-NEXT: vmaxss {sae}, %xmm1, %xmm0, %xmm0 3983 ; CHECK-NEXT: retq 3984 %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1, i32 8) 3985 ret <4 x float> %res 3986 } 3987 3988 define <4 x float> @test_mask_max_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) { 3989 ; CHECK-LABEL: test_mask_max_ss: 3990 ; CHECK: ## BB#0: 3991 ; CHECK-NEXT: andl $1, %edi 3992 ; CHECK-NEXT: kmovw %edi, %k1 3993 ; CHECK-NEXT: vmaxss %xmm1, %xmm0, %xmm2 {%k1} 3994 ; CHECK-NEXT: vmovaps %zmm2, %zmm0 3995 ; CHECK-NEXT: retq 3996 %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 4) 3997 ret <4 x float> %res 3998 } 3999 4000 define <4 x float> @test_maskz_max_ss(<4 x float> %a0, <4 x float> %a1, i8 %mask) { 4001 ; CHECK-LABEL: test_maskz_max_ss: 4002 ; CHECK: ## BB#0: 4003 ; CHECK-NEXT: andl $1, %edi 4004 ; CHECK-NEXT: kmovw %edi, %k1 4005 ; CHECK-NEXT: vmaxss %xmm1, %xmm0, %xmm0 {%k1} {z} 4006 ; CHECK-NEXT: retq 4007 %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 %mask, i32 4) 4008 ret <4 x float> %res 4009 } 4010 4011 define <4 x float> @test_max_ss(<4 x float> %a0, <4 x float> %a1) { 4012 ; CHECK-LABEL: test_max_ss: 4013 ; CHECK: ## BB#0: 4014 ; CHECK-NEXT: vmaxss %xmm1, %xmm0, %xmm0 4015 ; CHECK-NEXT: retq 4016 %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1, i32 4) 4017 ret <4 x float> %res 4018 } 4019 declare <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>, <2 x double>, <2 x double>, i8, i32) nounwind readnone 4020 4021 define <2 x double> @test_mask_max_sd_sae(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) { 4022 ; CHECK-LABEL: test_mask_max_sd_sae: 4023 ; CHECK: ## BB#0: 4024 ; CHECK-NEXT: andl $1, %edi 4025 ; CHECK-NEXT: kmovw %edi, %k1 4026 ; CHECK-NEXT: vmaxsd {sae}, %xmm1, %xmm0, %xmm2 {%k1} 4027 ; CHECK-NEXT: vmovaps %zmm2, %zmm0 4028 ; CHECK-NEXT: retq 4029 %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 8) 4030 ret <2 x double> %res 4031 } 4032 4033 define <2 x double> @test_maskz_max_sd_sae(<2 x double> %a0, <2 x double> %a1, i8 %mask) { 4034 ; CHECK-LABEL: test_maskz_max_sd_sae: 4035 ; CHECK: ## BB#0: 4036 ; CHECK-NEXT: andl $1, %edi 4037 ; CHECK-NEXT: kmovw %edi, %k1 4038 ; CHECK-NEXT: vmaxsd {sae}, %xmm1, %xmm0, %xmm0 {%k1} {z} 4039 ; CHECK-NEXT: retq 4040 %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 %mask, i32 8) 4041 ret <2 x double> %res 4042 } 4043 4044 define <2 x double> @test_max_sd_sae(<2 x double> %a0, <2 x double> %a1) { 4045 ; CHECK-LABEL: test_max_sd_sae: 4046 ; CHECK: ## BB#0: 4047 ; CHECK-NEXT: vmaxsd {sae}, %xmm1, %xmm0, %xmm0 4048 ; CHECK-NEXT: retq 4049 %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1, i32 8) 4050 ret <2 x double> %res 4051 } 4052 4053 define <2 x double> @test_mask_max_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) { 4054 ; CHECK-LABEL: test_mask_max_sd: 4055 ; CHECK: ## BB#0: 4056 ; CHECK-NEXT: andl $1, %edi 4057 ; CHECK-NEXT: kmovw %edi, %k1 4058 ; CHECK-NEXT: vmaxsd %xmm1, %xmm0, %xmm2 {%k1} 4059 ; CHECK-NEXT: vmovaps %zmm2, %zmm0 4060 ; CHECK-NEXT: retq 4061 %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 4) 4062 ret <2 x double> %res 4063 } 4064 4065 define <2 x double> @test_maskz_max_sd(<2 x double> %a0, <2 x double> %a1, i8 %mask) { 4066 ; CHECK-LABEL: test_maskz_max_sd: 4067 ; CHECK: ## BB#0: 4068 ; CHECK-NEXT: andl $1, %edi 4069 ; CHECK-NEXT: kmovw %edi, %k1 4070 ; CHECK-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 {%k1} {z} 4071 ; CHECK-NEXT: retq 4072 %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 %mask, i32 4) 4073 ret <2 x double> %res 4074 } 4075 4076 define <2 x double> @test_max_sd(<2 x double> %a0, <2 x double> %a1) { 4077 ; CHECK-LABEL: test_max_sd: 4078 ; CHECK: ## BB#0: 4079 ; CHECK-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 4080 ; CHECK-NEXT: retq 4081 %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1, i32 4) 4082 ret <2 x double> %res 4083 } 4084 4085 define <2 x double> @test_x86_avx512_cvtsi2sd32(<2 x double> %a, i32 %b) { 4086 ; CHECK-LABEL: test_x86_avx512_cvtsi2sd32: 4087 ; CHECK: ## BB#0: 4088 ; CHECK-NEXT: vcvtsi2sdl %edi, {rz-sae}, %xmm0, %xmm0 4089 ; CHECK-NEXT: retq 4090 %res = call <2 x double> @llvm.x86.avx512.cvtsi2sd32(<2 x double> %a, i32 %b, i32 3) ; <<<2 x double>> [#uses=1] 4091 ret <2 x double> %res 4092 } 4093 declare <2 x double> @llvm.x86.avx512.cvtsi2sd32(<2 x double>, i32, i32) nounwind readnone 4094 4095 define <2 x double> @test_x86_avx512_cvtsi2sd64(<2 x double> %a, i64 %b) { 4096 ; CHECK-LABEL: test_x86_avx512_cvtsi2sd64: 4097 ; CHECK: ## BB#0: 4098 ; CHECK-NEXT: vcvtsi2sdq %rdi, {rz-sae}, %xmm0, %xmm0 4099 ; CHECK-NEXT: retq 4100 %res = call <2 x double> @llvm.x86.avx512.cvtsi2sd64(<2 x double> %a, i64 %b, i32 3) ; <<<2 x double>> [#uses=1] 4101 ret <2 x double> %res 4102 } 4103 declare <2 x double> @llvm.x86.avx512.cvtsi2sd64(<2 x double>, i64, i32) nounwind readnone 4104 4105 define <4 x float> @test_x86_avx512_cvtsi2ss32(<4 x float> %a, i32 %b) { 4106 ; CHECK-LABEL: test_x86_avx512_cvtsi2ss32: 4107 ; CHECK: ## BB#0: 4108 ; CHECK-NEXT: vcvtsi2ssl %edi, {rz-sae}, %xmm0, %xmm0 4109 ; CHECK-NEXT: retq 4110 %res = call <4 x float> @llvm.x86.avx512.cvtsi2ss32(<4 x float> %a, i32 %b, i32 3) ; <<<4 x float>> [#uses=1] 4111 ret <4 x float> %res 4112 } 4113 declare <4 x float> @llvm.x86.avx512.cvtsi2ss32(<4 x float>, i32, i32) nounwind readnone 4114 4115 define <4 x float> @test_x86_avx512_cvtsi2ss64(<4 x float> %a, i64 %b) { 4116 ; CHECK-LABEL: test_x86_avx512_cvtsi2ss64: 4117 ; CHECK: ## BB#0: 4118 ; CHECK-NEXT: vcvtsi2ssq %rdi, {rz-sae}, %xmm0, %xmm0 4119 ; CHECK-NEXT: retq 4120 %res = call <4 x float> @llvm.x86.avx512.cvtsi2ss64(<4 x float> %a, i64 %b, i32 3) ; <<<4 x float>> [#uses=1] 4121 ret <4 x float> %res 4122 } 4123 declare <4 x float> @llvm.x86.avx512.cvtsi2ss64(<4 x float>, i64, i32) nounwind readnone 4124 4125 define <4 x float> @test_x86_avx512__mm_cvt_roundu32_ss (<4 x float> %a, i32 %b) 4126 ; CHECK-LABEL: test_x86_avx512__mm_cvt_roundu32_ss: 4127 ; CHECK: ## BB#0: 4128 ; CHECK-NEXT: vcvtusi2ssl %edi, {rd-sae}, %xmm0, %xmm0 4129 ; CHECK-NEXT: retq 4130 { 4131 %res = call <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float> %a, i32 %b, i32 1) ; <<<4 x float>> [#uses=1] 4132 ret <4 x float> %res 4133 } 4134 4135 define <4 x float> @test_x86_avx512__mm_cvt_roundu32_ss_mem(<4 x float> %a, i32* %ptr) 4136 ; CHECK-LABEL: test_x86_avx512__mm_cvt_roundu32_ss_mem: 4137 ; CHECK: ## BB#0: 4138 ; CHECK-NEXT: movl (%rdi), %eax 4139 ; CHECK-NEXT: vcvtusi2ssl %eax, {rd-sae}, %xmm0, %xmm0 4140 ; CHECK-NEXT: retq 4141 { 4142 %b = load i32, i32* %ptr 4143 %res = call <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float> %a, i32 %b, i32 1) ; <<<4 x float>> [#uses=1] 4144 ret <4 x float> %res 4145 } 4146 4147 define <4 x float> @test_x86_avx512__mm_cvtu32_ss(<4 x float> %a, i32 %b) 4148 ; CHECK-LABEL: test_x86_avx512__mm_cvtu32_ss: 4149 ; CHECK: ## BB#0: 4150 ; CHECK-NEXT: vcvtusi2ssl %edi, %xmm0, %xmm0 4151 ; CHECK-NEXT: retq 4152 { 4153 %res = call <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float> %a, i32 %b, i32 4) ; <<<4 x float>> [#uses=1] 4154 ret <4 x float> %res 4155 } 4156 4157 define <4 x float> @test_x86_avx512__mm_cvtu32_ss_mem(<4 x float> %a, i32* %ptr) 4158 ; CHECK-LABEL: test_x86_avx512__mm_cvtu32_ss_mem: 4159 ; CHECK: ## BB#0: 4160 ; CHECK-NEXT: vcvtusi2ssl (%rdi), %xmm0, %xmm0 4161 ; CHECK-NEXT: retq 4162 { 4163 %b = load i32, i32* %ptr 4164 %res = call <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float> %a, i32 %b, i32 4) ; <<<4 x float>> [#uses=1] 4165 ret <4 x float> %res 4166 } 4167 declare <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float>, i32, i32) nounwind readnone 4168 4169 define <4 x float> @_mm_cvt_roundu64_ss (<4 x float> %a, i64 %b) 4170 ; CHECK-LABEL: _mm_cvt_roundu64_ss: 4171 ; CHECK: ## BB#0: 4172 ; CHECK-NEXT: vcvtusi2ssq %rdi, {rd-sae}, %xmm0, %xmm0 4173 ; CHECK-NEXT: retq 4174 { 4175 %res = call <4 x float> @llvm.x86.avx512.cvtusi642ss(<4 x float> %a, i64 %b, i32 1) ; <<<4 x float>> [#uses=1] 4176 ret <4 x float> %res 4177 } 4178 4179 define <4 x float> @_mm_cvtu64_ss(<4 x float> %a, i64 %b) 4180 ; CHECK-LABEL: _mm_cvtu64_ss: 4181 ; CHECK: ## BB#0: 4182 ; CHECK-NEXT: vcvtusi2ssq %rdi, %xmm0, %xmm0 4183 ; CHECK-NEXT: retq 4184 { 4185 %res = call <4 x float> @llvm.x86.avx512.cvtusi642ss(<4 x float> %a, i64 %b, i32 4) ; <<<4 x float>> [#uses=1] 4186 ret <4 x float> %res 4187 } 4188 declare <4 x float> @llvm.x86.avx512.cvtusi642ss(<4 x float>, i64, i32) nounwind readnone 4189 4190 define <2 x double> @test_x86_avx512_mm_cvtu32_sd(<2 x double> %a, i32 %b) 4191 ; CHECK-LABEL: test_x86_avx512_mm_cvtu32_sd: 4192 ; CHECK: ## BB#0: 4193 ; CHECK-NEXT: vcvtusi2sdl %edi, %xmm0, %xmm0 4194 ; CHECK-NEXT: retq 4195 { 4196 %res = call <2 x double> @llvm.x86.avx512.cvtusi2sd(<2 x double> %a, i32 %b) ; <<<2 x double>> [#uses=1] 4197 ret <2 x double> %res 4198 } 4199 declare <2 x double> @llvm.x86.avx512.cvtusi2sd(<2 x double>, i32) nounwind readnone 4200 4201 define <2 x double> @test_x86_avx512_mm_cvtu64_sd(<2 x double> %a, i64 %b) 4202 ; CHECK-LABEL: test_x86_avx512_mm_cvtu64_sd: 4203 ; CHECK: ## BB#0: 4204 ; CHECK-NEXT: vcvtusi2sdq %rdi, {rd-sae}, %xmm0, %xmm0 4205 ; CHECK-NEXT: retq 4206 { 4207 %res = call <2 x double> @llvm.x86.avx512.cvtusi642sd(<2 x double> %a, i64 %b, i32 1) ; <<<2 x double>> [#uses=1] 4208 ret <2 x double> %res 4209 } 4210 4211 define <2 x double> @test_x86_avx512__mm_cvt_roundu64_sd(<2 x double> %a, i64 %b) 4212 ; CHECK-LABEL: test_x86_avx512__mm_cvt_roundu64_sd: 4213 ; CHECK: ## BB#0: 4214 ; CHECK-NEXT: vcvtusi2sdq %rdi, %xmm0, %xmm0 4215 ; CHECK-NEXT: retq 4216 { 4217 %res = call <2 x double> @llvm.x86.avx512.cvtusi642sd(<2 x double> %a, i64 %b, i32 4) ; <<<2 x double>> [#uses=1] 4218 ret <2 x double> %res 4219 } 4220 declare <2 x double> @llvm.x86.avx512.cvtusi642sd(<2 x double>, i64, i32) nounwind readnone 4221 4222 define <8 x i64> @test_vpmaxq(<8 x i64> %a0, <8 x i64> %a1) { 4223 ; CHECK-LABEL: test_vpmaxq: 4224 ; CHECK: ## BB#0: 4225 ; CHECK-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0 4226 ; CHECK-NEXT: retq 4227 %res = call <8 x i64> @llvm.x86.avx512.mask.pmaxs.q.512(<8 x i64> %a0, <8 x i64> %a1, 4228 <8 x i64>zeroinitializer, i8 -1) 4229 ret <8 x i64> %res 4230 } 4231 declare <8 x i64> @llvm.x86.avx512.mask.pmaxs.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) 4232 4233 define <16 x i32> @test_vpminud(<16 x i32> %a0, <16 x i32> %a1) { 4234 ; CHECK-LABEL: test_vpminud: 4235 ; CHECK: ## BB#0: 4236 ; CHECK-NEXT: vpminud %zmm1, %zmm0, %zmm0 4237 ; CHECK-NEXT: retq 4238 %res = call <16 x i32> @llvm.x86.avx512.mask.pminu.d.512(<16 x i32> %a0, <16 x i32> %a1, 4239 <16 x i32>zeroinitializer, i16 -1) 4240 ret <16 x i32> %res 4241 } 4242 declare <16 x i32> @llvm.x86.avx512.mask.pminu.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) 4243 4244 define <16 x i32> @test_vpmaxsd(<16 x i32> %a0, <16 x i32> %a1) { 4245 ; CHECK-LABEL: test_vpmaxsd: 4246 ; CHECK: ## BB#0: 4247 ; CHECK-NEXT: vpmaxsd %zmm1, %zmm0, %zmm0 4248 ; CHECK-NEXT: retq 4249 %res = call <16 x i32> @llvm.x86.avx512.mask.pmaxs.d.512(<16 x i32> %a0, <16 x i32> %a1, 4250 <16 x i32>zeroinitializer, i16 -1) 4251 ret <16 x i32> %res 4252 } 4253 declare <16 x i32> @llvm.x86.avx512.mask.pmaxs.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) 4254 4255 define <16 x i32>@test_int_x86_avx512_mask_pmaxs_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) { 4256 ; CHECK-LABEL: test_int_x86_avx512_mask_pmaxs_d_512: 4257 ; CHECK: ## BB#0: 4258 ; CHECK-NEXT: kmovw %edi, %k1 4259 ; CHECK-NEXT: vpmaxsd %zmm1, %zmm0, %zmm2 {%k1} 4260 ; CHECK-NEXT: vpmaxsd %zmm1, %zmm0, %zmm0 4261 ; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0 4262 ; CHECK-NEXT: retq 4263 %res = call <16 x i32> @llvm.x86.avx512.mask.pmaxs.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) 4264 %res1 = call <16 x i32> @llvm.x86.avx512.mask.pmaxs.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1) 4265 %res2 = add <16 x i32> %res, %res1 4266 ret <16 x i32> %res2 4267 } 4268 4269 define <8 x i64>@test_int_x86_avx512_mask_pmaxs_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) { 4270 ; CHECK-LABEL: test_int_x86_avx512_mask_pmaxs_q_512: 4271 ; CHECK: ## BB#0: 4272 ; CHECK-NEXT: movzbl %dil, %eax 4273 ; CHECK-NEXT: kmovw %eax, %k1 4274 ; CHECK-NEXT: vpmaxsq %zmm1, %zmm0, %zmm2 {%k1} 4275 ; CHECK-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0 4276 ; CHECK-NEXT: vpaddq %zmm0, %zmm2, %zmm0 4277 ; CHECK-NEXT: retq 4278 %res = call <8 x i64> @llvm.x86.avx512.mask.pmaxs.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) 4279 %res1 = call <8 x i64> @llvm.x86.avx512.mask.pmaxs.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1) 4280 %res2 = add <8 x i64> %res, %res1 4281 ret <8 x i64> %res2 4282 } 4283 4284 declare <16 x i32> @llvm.x86.avx512.mask.pmaxu.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) 4285 4286 define <16 x i32>@test_int_x86_avx512_mask_pmaxu_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) { 4287 ; CHECK-LABEL: test_int_x86_avx512_mask_pmaxu_d_512: 4288 ; CHECK: ## BB#0: 4289 ; CHECK-NEXT: kmovw %edi, %k1 4290 ; CHECK-NEXT: vpmaxud %zmm1, %zmm0, %zmm2 {%k1} 4291 ; CHECK-NEXT: vpmaxud %zmm1, %zmm0, %zmm0 4292 ; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0 4293 ; CHECK-NEXT: retq 4294 %res = call <16 x i32> @llvm.x86.avx512.mask.pmaxu.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) 4295 %res1 = call <16 x i32> @llvm.x86.avx512.mask.pmaxu.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1) 4296 %res2 = add <16 x i32> %res, %res1 4297 ret <16 x i32> %res2 4298 } 4299 4300 declare <8 x i64> @llvm.x86.avx512.mask.pmaxu.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) 4301 4302 define <8 x i64>@test_int_x86_avx512_mask_pmaxu_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) { 4303 ; CHECK-LABEL: test_int_x86_avx512_mask_pmaxu_q_512: 4304 ; CHECK: ## BB#0: 4305 ; CHECK-NEXT: movzbl %dil, %eax 4306 ; CHECK-NEXT: kmovw %eax, %k1 4307 ; CHECK-NEXT: vpmaxuq %zmm1, %zmm0, %zmm2 {%k1} 4308 ; CHECK-NEXT: vpmaxuq %zmm1, %zmm0, %zmm0 4309 ; CHECK-NEXT: vpaddq %zmm0, %zmm2, %zmm0 4310 ; CHECK-NEXT: retq 4311 %res = call <8 x i64> @llvm.x86.avx512.mask.pmaxu.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) 4312 %res1 = call <8 x i64> @llvm.x86.avx512.mask.pmaxu.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1) 4313 %res2 = add <8 x i64> %res, %res1 4314 ret <8 x i64> %res2 4315 } 4316 4317 declare <16 x i32> @llvm.x86.avx512.mask.pmins.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) 4318 4319 define <16 x i32>@test_int_x86_avx512_mask_pmins_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) { 4320 ; CHECK-LABEL: test_int_x86_avx512_mask_pmins_d_512: 4321 ; CHECK: ## BB#0: 4322 ; CHECK-NEXT: kmovw %edi, %k1 4323 ; CHECK-NEXT: vpminsd %zmm1, %zmm0, %zmm2 {%k1} 4324 ; CHECK-NEXT: vpminsd %zmm1, %zmm0, %zmm0 4325 ; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0 4326 ; CHECK-NEXT: retq 4327 %res = call <16 x i32> @llvm.x86.avx512.mask.pmins.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) 4328 %res1 = call <16 x i32> @llvm.x86.avx512.mask.pmins.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1) 4329 %res2 = add <16 x i32> %res, %res1 4330 ret <16 x i32> %res2 4331 } 4332 4333 declare <8 x i64> @llvm.x86.avx512.mask.pmins.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) 4334 4335 define <8 x i64>@test_int_x86_avx512_mask_pmins_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) { 4336 ; CHECK-LABEL: test_int_x86_avx512_mask_pmins_q_512: 4337 ; CHECK: ## BB#0: 4338 ; CHECK-NEXT: movzbl %dil, %eax 4339 ; CHECK-NEXT: kmovw %eax, %k1 4340 ; CHECK-NEXT: vpminsq %zmm1, %zmm0, %zmm2 {%k1} 4341 ; CHECK-NEXT: vpminsq %zmm1, %zmm0, %zmm0 4342 ; CHECK-NEXT: vpaddq %zmm0, %zmm2, %zmm0 4343 ; CHECK-NEXT: retq 4344 %res = call <8 x i64> @llvm.x86.avx512.mask.pmins.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) 4345 %res1 = call <8 x i64> @llvm.x86.avx512.mask.pmins.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1) 4346 %res2 = add <8 x i64> %res, %res1 4347 ret <8 x i64> %res2 4348 } 4349 4350 define <16 x i32>@test_int_x86_avx512_mask_pminu_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) { 4351 ; CHECK-LABEL: test_int_x86_avx512_mask_pminu_d_512: 4352 ; CHECK: ## BB#0: 4353 ; CHECK-NEXT: kmovw %edi, %k1 4354 ; CHECK-NEXT: vpminud %zmm1, %zmm0, %zmm2 {%k1} 4355 ; CHECK-NEXT: vpminud %zmm1, %zmm0, %zmm0 4356 ; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0 4357 ; CHECK-NEXT: retq 4358 %res = call <16 x i32> @llvm.x86.avx512.mask.pminu.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) 4359 %res1 = call <16 x i32> @llvm.x86.avx512.mask.pminu.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1) 4360 %res2 = add <16 x i32> %res, %res1 4361 ret <16 x i32> %res2 4362 } 4363 4364 declare <8 x i64> @llvm.x86.avx512.mask.pminu.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) 4365 4366 define <8 x i64>@test_int_x86_avx512_mask_pminu_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) { 4367 ; CHECK-LABEL: test_int_x86_avx512_mask_pminu_q_512: 4368 ; CHECK: ## BB#0: 4369 ; CHECK-NEXT: movzbl %dil, %eax 4370 ; CHECK-NEXT: kmovw %eax, %k1 4371 ; CHECK-NEXT: vpminuq %zmm1, %zmm0, %zmm2 {%k1} 4372 ; CHECK-NEXT: vpminuq %zmm1, %zmm0, %zmm0 4373 ; CHECK-NEXT: vpaddq %zmm0, %zmm2, %zmm0 4374 ; CHECK-NEXT: retq 4375 %res = call <8 x i64> @llvm.x86.avx512.mask.pminu.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) 4376 %res1 = call <8 x i64> @llvm.x86.avx512.mask.pminu.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1) 4377 %res2 = add <8 x i64> %res, %res1 4378 ret <8 x i64> %res2 4379 } 4380 4381 declare <16 x i32> @llvm.x86.avx512.mask.vpermi2var.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) 4382 4383 define <16 x i32>@test_int_x86_avx512_mask_vpermi2var_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2p, <16 x i32> %x4, i16 %x3) { 4384 ; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_d_512: 4385 ; CHECK: ## BB#0: 4386 ; CHECK-NEXT: kmovw %esi, %k1 4387 ; CHECK-NEXT: vmovaps %zmm1, %zmm3 4388 ; CHECK-NEXT: vpermi2d (%rdi), %zmm0, %zmm3 {%k1} 4389 ; CHECK-NEXT: vpermi2d %zmm2, %zmm0, %zmm1 4390 ; CHECK-NEXT: vpaddd %zmm1, %zmm3, %zmm0 4391 ; CHECK-NEXT: retq 4392 %x2 = load <16 x i32>, <16 x i32>* %x2p 4393 %res = call <16 x i32> @llvm.x86.avx512.mask.vpermi2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) 4394 %res1 = call <16 x i32> @llvm.x86.avx512.mask.vpermi2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x4, i16 -1) 4395 %res2 = add <16 x i32> %res, %res1 4396 ret <16 x i32> %res2 4397 } 4398 4399 declare <8 x double> @llvm.x86.avx512.mask.vpermi2var.pd.512(<8 x double>, <8 x i64>, <8 x double>, i8) 4400 4401 define <8 x double>@test_int_x86_avx512_mask_vpermi2var_pd_512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3) { 4402 ; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_pd_512: 4403 ; CHECK: ## BB#0: 4404 ; CHECK-NEXT: movzbl %dil, %eax 4405 ; CHECK-NEXT: kmovw %eax, %k1 4406 ; CHECK-NEXT: vmovaps %zmm1, %zmm3 4407 ; CHECK-NEXT: vpermi2pd %zmm2, %zmm0, %zmm3 {%k1} 4408 ; CHECK-NEXT: vpermi2pd %zmm2, %zmm0, %zmm1 4409 ; CHECK-NEXT: vaddpd %zmm1, %zmm3, %zmm0 4410 ; CHECK-NEXT: retq 4411 %res = call <8 x double> @llvm.x86.avx512.mask.vpermi2var.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3) 4412 %res1 = call <8 x double> @llvm.x86.avx512.mask.vpermi2var.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 -1) 4413 %res2 = fadd <8 x double> %res, %res1 4414 ret <8 x double> %res2 4415 } 4416 4417 declare <16 x float> @llvm.x86.avx512.mask.vpermi2var.ps.512(<16 x float>, <16 x i32>, <16 x float>, i16) 4418 4419 define <16 x float>@test_int_x86_avx512_mask_vpermi2var_ps_512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3) { 4420 ; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_ps_512: 4421 ; CHECK: ## BB#0: 4422 ; CHECK-NEXT: kmovw %edi, %k1 4423 ; CHECK-NEXT: vmovaps %zmm1, %zmm3 4424 ; CHECK-NEXT: vpermi2ps %zmm2, %zmm0, %zmm3 {%k1} 4425 ; CHECK-NEXT: vpermi2ps %zmm2, %zmm0, %zmm1 4426 ; CHECK-NEXT: vaddps %zmm1, %zmm3, %zmm0 4427 ; CHECK-NEXT: retq 4428 %res = call <16 x float> @llvm.x86.avx512.mask.vpermi2var.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3) 4429 %res1 = call <16 x float> @llvm.x86.avx512.mask.vpermi2var.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 -1) 4430 %res2 = fadd <16 x float> %res, %res1 4431 ret <16 x float> %res2 4432 } 4433 4434 declare <8 x i64> @llvm.x86.avx512.mask.vpermi2var.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) 4435 4436 define <8 x i64>@test_int_x86_avx512_mask_vpermi2var_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) { 4437 ; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_q_512: 4438 ; CHECK: ## BB#0: 4439 ; CHECK-NEXT: movzbl %dil, %eax 4440 ; CHECK-NEXT: kmovw %eax, %k1 4441 ; CHECK-NEXT: vmovaps %zmm1, %zmm3 4442 ; CHECK-NEXT: vpermi2q %zmm2, %zmm0, %zmm3 {%k1} 4443 ; CHECK-NEXT: vpermi2q %zmm2, %zmm0, %zmm1 4444 ; CHECK-NEXT: vpaddq %zmm1, %zmm3, %zmm0 4445 ; CHECK-NEXT: retq 4446 %res = call <8 x i64> @llvm.x86.avx512.mask.vpermi2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) 4447 %res1 = call <8 x i64> @llvm.x86.avx512.mask.vpermi2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1) 4448 %res2 = add <8 x i64> %res, %res1 4449 ret <8 x i64> %res2 4450 } 4451 4452 declare <16 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) 4453 4454 define <16 x i32>@test_int_x86_avx512_maskz_vpermt2var_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2p, i16 %x3) { 4455 ; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_d_512: 4456 ; CHECK: ## BB#0: 4457 ; CHECK-NEXT: kmovw %esi, %k1 4458 ; CHECK-NEXT: vmovaps %zmm1, %zmm2 4459 ; CHECK-NEXT: vpermt2d (%rdi), %zmm0, %zmm2 {%k1} {z} 4460 ; CHECK-NEXT: vpermt2d %zmm1, %zmm0, %zmm1 4461 ; CHECK-NEXT: vpaddd %zmm1, %zmm2, %zmm0 4462 ; CHECK-NEXT: retq 4463 %x2 = load <16 x i32>, <16 x i32>* %x2p 4464 %res = call <16 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) 4465 %res1 = call <16 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x1, i16 -1) 4466 %res2 = add <16 x i32> %res, %res1 4467 ret <16 x i32> %res2 4468 } 4469 4470 declare <8 x double> @llvm.x86.avx512.maskz.vpermt2var.pd.512(<8 x i64>, <8 x double>, <8 x double>, i8) 4471 4472 define <8 x double>@test_int_x86_avx512_maskz_vpermt2var_pd_512(<8 x i64> %x0, <8 x double> %x1, double* %x2ptr, i8 %x3) { 4473 ; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_pd_512: 4474 ; CHECK: ## BB#0: 4475 ; CHECK-NEXT: movzbl %sil, %eax 4476 ; CHECK-NEXT: kmovw %eax, %k1 4477 ; CHECK-NEXT: vmovaps %zmm1, %zmm2 4478 ; CHECK-NEXT: vpermt2pd (%rdi){1to8}, %zmm0, %zmm2 {%k1} {z} 4479 ; CHECK-NEXT: vpermt2pd %zmm1, %zmm0, %zmm1 4480 ; CHECK-NEXT: vaddpd %zmm1, %zmm2, %zmm0 4481 ; CHECK-NEXT: retq 4482 %x2s = load double, double* %x2ptr 4483 %x2ins = insertelement <8 x double> undef, double %x2s, i32 0 4484 %x2 = shufflevector <8 x double> %x2ins, <8 x double> undef, <8 x i32> zeroinitializer 4485 %res = call <8 x double> @llvm.x86.avx512.maskz.vpermt2var.pd.512(<8 x i64> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3) 4486 %res1 = call <8 x double> @llvm.x86.avx512.maskz.vpermt2var.pd.512(<8 x i64> %x0, <8 x double> %x1, <8 x double> %x1, i8 -1) 4487 %res2 = fadd <8 x double> %res, %res1 4488 ret <8 x double> %res2 4489 } 4490 4491 declare <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32>, <16 x float>, <16 x float>, i16) 4492 4493 define <16 x float>@test_int_x86_avx512_maskz_vpermt2var_ps_512(<16 x i32> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3) { 4494 ; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_ps_512: 4495 ; CHECK: ## BB#0: 4496 ; CHECK-NEXT: kmovw %edi, %k1 4497 ; CHECK-NEXT: vmovaps %zmm1, %zmm3 4498 ; CHECK-NEXT: vpermt2ps %zmm2, %zmm0, %zmm3 {%k1} {z} 4499 ; CHECK-NEXT: vpermt2ps %zmm2, %zmm0, %zmm1 4500 ; CHECK-NEXT: vaddps %zmm1, %zmm3, %zmm0 4501 ; CHECK-NEXT: retq 4502 %res = call <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3) 4503 %res1 = call <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1) 4504 %res2 = fadd <16 x float> %res, %res1 4505 ret <16 x float> %res2 4506 } 4507 4508 4509 declare <8 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) 4510 4511 define <8 x i64>@test_int_x86_avx512_maskz_vpermt2var_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) { 4512 ; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_q_512: 4513 ; CHECK: ## BB#0: 4514 ; CHECK-NEXT: movzbl %dil, %eax 4515 ; CHECK-NEXT: kmovw %eax, %k1 4516 ; CHECK-NEXT: vmovaps %zmm1, %zmm3 4517 ; CHECK-NEXT: vpermt2q %zmm2, %zmm0, %zmm3 {%k1} {z} 4518 ; CHECK-NEXT: vpermt2q %zmm2, %zmm0, %zmm1 4519 ; CHECK-NEXT: vpaddq %zmm1, %zmm3, %zmm0 4520 ; CHECK-NEXT: retq 4521 %res = call <8 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) 4522 %res1 = call <8 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1) 4523 %res2 = add <8 x i64> %res, %res1 4524 ret <8 x i64> %res2 4525 } 4526 4527 declare <16 x i32> @llvm.x86.avx512.mask.vpermt2var.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) 4528 4529 define <16 x i32>@test_int_x86_avx512_mask_vpermt2var_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) { 4530 ; CHECK-LABEL: test_int_x86_avx512_mask_vpermt2var_d_512: 4531 ; CHECK: ## BB#0: 4532 ; CHECK-NEXT: kmovw %edi, %k1 4533 ; CHECK-NEXT: vmovaps %zmm1, %zmm3 4534 ; CHECK-NEXT: vpermt2d %zmm2, %zmm0, %zmm3 {%k1} 4535 ; CHECK-NEXT: vpermt2d %zmm2, %zmm0, %zmm1 4536 ; CHECK-NEXT: vpaddd %zmm1, %zmm3, %zmm0 4537 ; CHECK-NEXT: retq 4538 %res = call <16 x i32> @llvm.x86.avx512.mask.vpermt2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) 4539 %res1 = call <16 x i32> @llvm.x86.avx512.mask.vpermt2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1) 4540 %res2 = add <16 x i32> %res, %res1 4541 ret <16 x i32> %res2 4542 } 4543 4544 declare <8 x double> @llvm.x86.avx512.mask.scalef.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) 4545 define <8 x double>@test_int_x86_avx512_mask_scalef_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3) { 4546 ; CHECK-LABEL: test_int_x86_avx512_mask_scalef_pd_512: 4547 ; CHECK: ## BB#0: 4548 ; CHECK-NEXT: movzbl %dil, %eax 4549 ; CHECK-NEXT: kmovw %eax, %k1 4550 ; CHECK-NEXT: vscalefpd {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1} 4551 ; CHECK-NEXT: vscalefpd {rn-sae}, %zmm1, %zmm0, %zmm0 4552 ; CHECK-NEXT: vaddpd %zmm0, %zmm2, %zmm0 4553 ; CHECK-NEXT: retq 4554 %res = call <8 x double> @llvm.x86.avx512.mask.scalef.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 3) 4555 %res1 = call <8 x double> @llvm.x86.avx512.mask.scalef.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0) 4556 %res2 = fadd <8 x double> %res, %res1 4557 ret <8 x double> %res2 4558 } 4559 4560 declare <16 x float> @llvm.x86.avx512.mask.scalef.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) 4561 define <16 x float>@test_int_x86_avx512_mask_scalef_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3) { 4562 ; CHECK-LABEL: test_int_x86_avx512_mask_scalef_ps_512: 4563 ; CHECK: ## BB#0: 4564 ; CHECK-NEXT: kmovw %edi, %k1 4565 ; CHECK-NEXT: vscalefps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1} 4566 ; CHECK-NEXT: vscalefps {rn-sae}, %zmm1, %zmm0, %zmm0 4567 ; CHECK-NEXT: vaddps %zmm0, %zmm2, %zmm0 4568 ; CHECK-NEXT: retq 4569 %res = call <16 x float> @llvm.x86.avx512.mask.scalef.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 2) 4570 %res1 = call <16 x float> @llvm.x86.avx512.mask.scalef.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0) 4571 %res2 = fadd <16 x float> %res, %res1 4572 ret <16 x float> %res2 4573 } 4574 4575 declare <8 x double> @llvm.x86.avx512.mask.unpckh.pd.512(<8 x double>, <8 x double>, <8 x double>, i8) 4576 4577 define <8 x double>@test_int_x86_avx512_mask_unpckh_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3) { 4578 ; CHECK-LABEL: test_int_x86_avx512_mask_unpckh_pd_512: 4579 ; CHECK: ## BB#0: 4580 ; CHECK-NEXT: movzbl %dil, %eax 4581 ; CHECK-NEXT: kmovw %eax, %k1 4582 ; CHECK-NEXT: vunpckhpd {{.*#+}} zmm2 = zmm2[1],k1[1],zmm2[3],k1[3],zmm2[5],k1[5],zmm2[7],k1[7] 4583 ; CHECK-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] 4584 ; CHECK-NEXT: vaddpd %zmm0, %zmm2, %zmm0 4585 ; CHECK-NEXT: retq 4586 %res = call <8 x double> @llvm.x86.avx512.mask.unpckh.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3) 4587 %res1 = call <8 x double> @llvm.x86.avx512.mask.unpckh.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1) 4588 %res2 = fadd <8 x double> %res, %res1 4589 ret <8 x double> %res2 4590 } 4591 4592 declare <16 x float> @llvm.x86.avx512.mask.unpckh.ps.512(<16 x float>, <16 x float>, <16 x float>, i16) 4593 4594 define <16 x float>@test_int_x86_avx512_mask_unpckh_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3) { 4595 ; CHECK-LABEL: test_int_x86_avx512_mask_unpckh_ps_512: 4596 ; CHECK: ## BB#0: 4597 ; CHECK-NEXT: kmovw %edi, %k1 4598 ; CHECK-NEXT: vunpckhps {{.*#+}} zmm2 = zmm2[2],k1[2],zmm2[3],k1[3],zmm2[6],k1[6],zmm2[7],k1[7],zmm2[10],k1[10],zmm2[11],k1[11],zmm2[14],k1[14],zmm2[15],k1[15] 4599 ; CHECK-NEXT: vunpckhps {{.*#+}} zmm0 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] 4600 ; CHECK-NEXT: vaddps %zmm0, %zmm2, %zmm0 4601 ; CHECK-NEXT: retq 4602 %res = call <16 x float> @llvm.x86.avx512.mask.unpckh.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3) 4603 %res1 = call <16 x float> @llvm.x86.avx512.mask.unpckh.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1) 4604 %res2 = fadd <16 x float> %res, %res1 4605 ret <16 x float> %res2 4606 } 4607 4608 declare <8 x double> @llvm.x86.avx512.mask.unpckl.pd.512(<8 x double>, <8 x double>, <8 x double>, i8) 4609 4610 define <8 x double>@test_int_x86_avx512_mask_unpckl_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3) { 4611 ; CHECK-LABEL: test_int_x86_avx512_mask_unpckl_pd_512: 4612 ; CHECK: ## BB#0: 4613 ; CHECK-NEXT: movzbl %dil, %eax 4614 ; CHECK-NEXT: kmovw %eax, %k1 4615 ; CHECK-NEXT: vunpcklpd {{.*#+}} zmm2 = zmm2[0],k1[0],zmm2[2],k1[2],zmm2[4],k1[4],zmm2[6],k1[6] 4616 ; CHECK-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] 4617 ; CHECK-NEXT: vaddpd %zmm0, %zmm2, %zmm0 4618 ; CHECK-NEXT: retq 4619 %res = call <8 x double> @llvm.x86.avx512.mask.unpckl.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3) 4620 %res1 = call <8 x double> @llvm.x86.avx512.mask.unpckl.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1) 4621 %res2 = fadd <8 x double> %res, %res1 4622 ret <8 x double> %res2 4623 } 4624 4625 declare <16 x float> @llvm.x86.avx512.mask.unpckl.ps.512(<16 x float>, <16 x float>, <16 x float>, i16) 4626 4627 define <16 x float>@test_int_x86_avx512_mask_unpckl_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3) { 4628 ; CHECK-LABEL: test_int_x86_avx512_mask_unpckl_ps_512: 4629 ; CHECK: ## BB#0: 4630 ; CHECK-NEXT: kmovw %edi, %k1 4631 ; CHECK-NEXT: vunpcklps {{.*#+}} zmm2 = zmm2[0],k1[0],zmm2[1],k1[1],zmm2[4],k1[4],zmm2[5],k1[5],zmm2[8],k1[8],zmm2[9],k1[9],zmm2[12],k1[12],zmm2[13],k1[13] 4632 ; CHECK-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] 4633 ; CHECK-NEXT: vaddps %zmm0, %zmm2, %zmm0 4634 ; CHECK-NEXT: retq 4635 %res = call <16 x float> @llvm.x86.avx512.mask.unpckl.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3) 4636 %res1 = call <16 x float> @llvm.x86.avx512.mask.unpckl.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1) 4637 %res2 = fadd <16 x float> %res, %res1 4638 ret <16 x float> %res2 4639 } 4640 4641 declare <8 x i64> @llvm.x86.avx512.mask.punpcklqd.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) 4642 4643 define <8 x i64>@test_int_x86_avx512_mask_punpcklqd_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) { 4644 ; CHECK-LABEL: test_int_x86_avx512_mask_punpcklqd_q_512: 4645 ; CHECK: ## BB#0: 4646 ; CHECK-NEXT: movzbl %dil, %eax 4647 ; CHECK-NEXT: kmovw %eax, %k1 4648 ; CHECK-NEXT: vpunpcklqdq {{.*#+}} zmm2 = zmm2[0],k1[0],zmm2[2],k1[2],zmm2[4],k1[4],zmm2[6],k1[6] 4649 ; CHECK-NEXT: vpunpcklqdq {{.*#+}} zmm3 = k1[0],zmm0[0],k1[2],zmm0[2],k1[4],zmm0[4],k1[6],zmm0[6] 4650 ; CHECK-NEXT: vpunpcklqdq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] 4651 ; CHECK-NEXT: vpaddq %zmm0, %zmm2, %zmm0 4652 ; CHECK-NEXT: vpaddq %zmm0, %zmm3, %zmm0 4653 ; CHECK-NEXT: retq 4654 %res = call <8 x i64> @llvm.x86.avx512.mask.punpcklqd.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) 4655 %res1 = call <8 x i64> @llvm.x86.avx512.mask.punpcklqd.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1) 4656 %res2 = call <8 x i64> @llvm.x86.avx512.mask.punpcklqd.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> zeroinitializer,i8 %x3) 4657 %res3 = add <8 x i64> %res, %res1 4658 %res4 = add <8 x i64> %res2, %res3 4659 ret <8 x i64> %res4 4660 } 4661 4662 declare <8 x i64> @llvm.x86.avx512.mask.punpckhqd.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) 4663 4664 define <8 x i64>@test_int_x86_avx512_mask_punpckhqd_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) { 4665 ; CHECK-LABEL: test_int_x86_avx512_mask_punpckhqd_q_512: 4666 ; CHECK: ## BB#0: 4667 ; CHECK-NEXT: movzbl %dil, %eax 4668 ; CHECK-NEXT: kmovw %eax, %k1 4669 ; CHECK-NEXT: vpunpckhqdq {{.*#+}} zmm2 = zmm2[1],k1[1],zmm2[3],k1[3],zmm2[5],k1[5],zmm2[7],k1[7] 4670 ; CHECK-NEXT: vpunpckhqdq {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] 4671 ; CHECK-NEXT: vpaddq %zmm0, %zmm2, %zmm0 4672 ; CHECK-NEXT: retq 4673 %res = call <8 x i64> @llvm.x86.avx512.mask.punpckhqd.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) 4674 %res1 = call <8 x i64> @llvm.x86.avx512.mask.punpckhqd.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1) 4675 %res2 = add <8 x i64> %res, %res1 4676 ret <8 x i64> %res2 4677 } 4678 4679 declare <16 x i32> @llvm.x86.avx512.mask.punpckhd.q.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) 4680 4681 define <16 x i32>@test_int_x86_avx512_mask_punpckhd_q_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) { 4682 ; CHECK-LABEL: test_int_x86_avx512_mask_punpckhd_q_512: 4683 ; CHECK: ## BB#0: 4684 ; CHECK-NEXT: kmovw %edi, %k1 4685 ; CHECK-NEXT: vpunpckhdq {{.*#+}} zmm2 = zmm2[2],k1[2],zmm2[3],k1[3],zmm2[6],k1[6],zmm2[7],k1[7],zmm2[10],k1[10],zmm2[11],k1[11],zmm2[14],k1[14],zmm2[15],k1[15] 4686 ; CHECK-NEXT: vpunpckhdq {{.*#+}} zmm0 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] 4687 ; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0 4688 ; CHECK-NEXT: retq 4689 %res = call <16 x i32> @llvm.x86.avx512.mask.punpckhd.q.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) 4690 %res1 = call <16 x i32> @llvm.x86.avx512.mask.punpckhd.q.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1) 4691 %res2 = add <16 x i32> %res, %res1 4692 ret <16 x i32> %res2 4693 } 4694 4695 declare <16 x i32> @llvm.x86.avx512.mask.punpckld.q.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) 4696 4697 define <16 x i32>@test_int_x86_avx512_mask_punpckld_q_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) { 4698 ; CHECK-LABEL: test_int_x86_avx512_mask_punpckld_q_512: 4699 ; CHECK: ## BB#0: 4700 ; CHECK-NEXT: kmovw %edi, %k1 4701 ; CHECK-NEXT: vpunpckldq {{.*#+}} zmm2 = zmm2[0],k1[0],zmm2[1],k1[1],zmm2[4],k1[4],zmm2[5],k1[5],zmm2[8],k1[8],zmm2[9],k1[9],zmm2[12],k1[12],zmm2[13],k1[13] 4702 ; CHECK-NEXT: vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] 4703 ; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0 4704 ; CHECK-NEXT: retq 4705 %res = call <16 x i32> @llvm.x86.avx512.mask.punpckld.q.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) 4706 %res1 = call <16 x i32> @llvm.x86.avx512.mask.punpckld.q.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1) 4707 %res2 = add <16 x i32> %res, %res1 4708 ret <16 x i32> %res2 4709 } 4710 4711 declare <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512(<8 x i64>, <16 x i8>, i8) 4712 4713 define <16 x i8>@test_int_x86_avx512_mask_pmov_qb_512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2) { 4714 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qb_512: 4715 ; CHECK: ## BB#0: 4716 ; CHECK-NEXT: kmovw %edi, %k1 4717 ; CHECK-NEXT: vpmovqb %zmm0, %xmm1 {%k1} 4718 ; CHECK-NEXT: vpmovqb %zmm0, %xmm2 {%k1} {z} 4719 ; CHECK-NEXT: vpmovqb %zmm0, %xmm0 4720 ; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0 4721 ; CHECK-NEXT: vpaddb %xmm2, %xmm0, %xmm0 4722 ; CHECK-NEXT: retq 4723 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 -1) 4724 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2) 4725 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512(<8 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2) 4726 %res3 = add <16 x i8> %res0, %res1 4727 %res4 = add <16 x i8> %res3, %res2 4728 ret <16 x i8> %res4 4729 } 4730 4731 declare void @llvm.x86.avx512.mask.pmov.qb.mem.512(i8* %ptr, <8 x i64>, i8) 4732 4733 define void @test_int_x86_avx512_mask_pmov_qb_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) { 4734 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qb_mem_512: 4735 ; CHECK: ## BB#0: 4736 ; CHECK-NEXT: movzbl %sil, %eax 4737 ; CHECK-NEXT: kmovw %eax, %k1 4738 ; CHECK-NEXT: vpmovqb %zmm0, (%rdi) 4739 ; CHECK-NEXT: vpmovqb %zmm0, (%rdi) {%k1} 4740 ; CHECK-NEXT: retq 4741 call void @llvm.x86.avx512.mask.pmov.qb.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1) 4742 call void @llvm.x86.avx512.mask.pmov.qb.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2) 4743 ret void 4744 } 4745 4746 declare <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.512(<8 x i64>, <16 x i8>, i8) 4747 4748 define <16 x i8>@test_int_x86_avx512_mask_pmovs_qb_512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2) { 4749 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qb_512: 4750 ; CHECK: ## BB#0: 4751 ; CHECK-NEXT: kmovw %edi, %k1 4752 ; CHECK-NEXT: vpmovsqb %zmm0, %xmm1 {%k1} 4753 ; CHECK-NEXT: vpmovsqb %zmm0, %xmm2 {%k1} {z} 4754 ; CHECK-NEXT: vpmovsqb %zmm0, %xmm0 4755 ; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0 4756 ; CHECK-NEXT: vpaddb %xmm2, %xmm0, %xmm0 4757 ; CHECK-NEXT: retq 4758 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 -1) 4759 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2) 4760 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.512(<8 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2) 4761 %res3 = add <16 x i8> %res0, %res1 4762 %res4 = add <16 x i8> %res3, %res2 4763 ret <16 x i8> %res4 4764 } 4765 4766 declare void @llvm.x86.avx512.mask.pmovs.qb.mem.512(i8* %ptr, <8 x i64>, i8) 4767 4768 define void @test_int_x86_avx512_mask_pmovs_qb_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) { 4769 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qb_mem_512: 4770 ; CHECK: ## BB#0: 4771 ; CHECK-NEXT: vpmovsqb %zmm0, (%rdi) 4772 ; CHECK-NEXT: kmovw %esi, %k1 4773 ; CHECK-NEXT: vpmovsqb %zmm0, (%rdi) {%k1} 4774 ; CHECK-NEXT: retq 4775 call void @llvm.x86.avx512.mask.pmovs.qb.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1) 4776 call void @llvm.x86.avx512.mask.pmovs.qb.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2) 4777 ret void 4778 } 4779 4780 declare <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.512(<8 x i64>, <16 x i8>, i8) 4781 4782 define <16 x i8>@test_int_x86_avx512_mask_pmovus_qb_512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2) { 4783 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qb_512: 4784 ; CHECK: ## BB#0: 4785 ; CHECK-NEXT: kmovw %edi, %k1 4786 ; CHECK-NEXT: vpmovusqb %zmm0, %xmm1 {%k1} 4787 ; CHECK-NEXT: vpmovusqb %zmm0, %xmm2 {%k1} {z} 4788 ; CHECK-NEXT: vpmovusqb %zmm0, %xmm0 4789 ; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0 4790 ; CHECK-NEXT: vpaddb %xmm2, %xmm0, %xmm0 4791 ; CHECK-NEXT: retq 4792 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 -1) 4793 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2) 4794 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.512(<8 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2) 4795 %res3 = add <16 x i8> %res0, %res1 4796 %res4 = add <16 x i8> %res3, %res2 4797 ret <16 x i8> %res4 4798 } 4799 4800 declare void @llvm.x86.avx512.mask.pmovus.qb.mem.512(i8* %ptr, <8 x i64>, i8) 4801 4802 define void @test_int_x86_avx512_mask_pmovus_qb_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) { 4803 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qb_mem_512: 4804 ; CHECK: ## BB#0: 4805 ; CHECK-NEXT: vpmovusqb %zmm0, (%rdi) 4806 ; CHECK-NEXT: kmovw %esi, %k1 4807 ; CHECK-NEXT: vpmovusqb %zmm0, (%rdi) {%k1} 4808 ; CHECK-NEXT: retq 4809 call void @llvm.x86.avx512.mask.pmovus.qb.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1) 4810 call void @llvm.x86.avx512.mask.pmovus.qb.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2) 4811 ret void 4812 } 4813 4814 declare <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64>, <8 x i16>, i8) 4815 4816 define <8 x i16>@test_int_x86_avx512_mask_pmov_qw_512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2) { 4817 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qw_512: 4818 ; CHECK: ## BB#0: 4819 ; CHECK-NEXT: movzbl %dil, %eax 4820 ; CHECK-NEXT: kmovw %eax, %k1 4821 ; CHECK-NEXT: vpmovqw %zmm0, %xmm1 {%k1} 4822 ; CHECK-NEXT: vpmovqw %zmm0, %xmm2 {%k1} {z} 4823 ; CHECK-NEXT: vpmovqw %zmm0, %xmm0 4824 ; CHECK-NEXT: vpaddw %xmm1, %xmm0, %xmm0 4825 ; CHECK-NEXT: vpaddw %xmm2, %xmm0, %xmm0 4826 ; CHECK-NEXT: retq 4827 %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 -1) 4828 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2) 4829 %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2) 4830 %res3 = add <8 x i16> %res0, %res1 4831 %res4 = add <8 x i16> %res3, %res2 4832 ret <8 x i16> %res4 4833 } 4834 4835 declare void @llvm.x86.avx512.mask.pmov.qw.mem.512(i8* %ptr, <8 x i64>, i8) 4836 4837 define void @test_int_x86_avx512_mask_pmov_qw_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) { 4838 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qw_mem_512: 4839 ; CHECK: ## BB#0: 4840 ; CHECK-NEXT: movzbl %sil, %eax 4841 ; CHECK-NEXT: kmovw %eax, %k1 4842 ; CHECK-NEXT: vpmovqw %zmm0, (%rdi) 4843 ; CHECK-NEXT: vpmovqw %zmm0, (%rdi) {%k1} 4844 ; CHECK-NEXT: retq 4845 call void @llvm.x86.avx512.mask.pmov.qw.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1) 4846 call void @llvm.x86.avx512.mask.pmov.qw.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2) 4847 ret void 4848 } 4849 4850 declare <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.512(<8 x i64>, <8 x i16>, i8) 4851 4852 define <8 x i16>@test_int_x86_avx512_mask_pmovs_qw_512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2) { 4853 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qw_512: 4854 ; CHECK: ## BB#0: 4855 ; CHECK-NEXT: movzbl %dil, %eax 4856 ; CHECK-NEXT: kmovw %eax, %k1 4857 ; CHECK-NEXT: vpmovsqw %zmm0, %xmm1 {%k1} 4858 ; CHECK-NEXT: vpmovsqw %zmm0, %xmm2 {%k1} {z} 4859 ; CHECK-NEXT: vpmovsqw %zmm0, %xmm0 4860 ; CHECK-NEXT: vpaddw %xmm1, %xmm0, %xmm0 4861 ; CHECK-NEXT: vpaddw %xmm2, %xmm0, %xmm0 4862 ; CHECK-NEXT: retq 4863 %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 -1) 4864 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2) 4865 %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.512(<8 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2) 4866 %res3 = add <8 x i16> %res0, %res1 4867 %res4 = add <8 x i16> %res3, %res2 4868 ret <8 x i16> %res4 4869 } 4870 4871 declare void @llvm.x86.avx512.mask.pmovs.qw.mem.512(i8* %ptr, <8 x i64>, i8) 4872 4873 define void @test_int_x86_avx512_mask_pmovs_qw_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) { 4874 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qw_mem_512: 4875 ; CHECK: ## BB#0: 4876 ; CHECK-NEXT: vpmovsqw %zmm0, (%rdi) 4877 ; CHECK-NEXT: kmovw %esi, %k1 4878 ; CHECK-NEXT: vpmovsqw %zmm0, (%rdi) {%k1} 4879 ; CHECK-NEXT: retq 4880 call void @llvm.x86.avx512.mask.pmovs.qw.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1) 4881 call void @llvm.x86.avx512.mask.pmovs.qw.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2) 4882 ret void 4883 } 4884 4885 declare <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.512(<8 x i64>, <8 x i16>, i8) 4886 4887 define <8 x i16>@test_int_x86_avx512_mask_pmovus_qw_512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2) { 4888 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qw_512: 4889 ; CHECK: ## BB#0: 4890 ; CHECK-NEXT: movzbl %dil, %eax 4891 ; CHECK-NEXT: kmovw %eax, %k1 4892 ; CHECK-NEXT: vpmovusqw %zmm0, %xmm1 {%k1} 4893 ; CHECK-NEXT: vpmovusqw %zmm0, %xmm2 {%k1} {z} 4894 ; CHECK-NEXT: vpmovusqw %zmm0, %xmm0 4895 ; CHECK-NEXT: vpaddw %xmm1, %xmm0, %xmm0 4896 ; CHECK-NEXT: vpaddw %xmm2, %xmm0, %xmm0 4897 ; CHECK-NEXT: retq 4898 %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 -1) 4899 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2) 4900 %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.512(<8 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2) 4901 %res3 = add <8 x i16> %res0, %res1 4902 %res4 = add <8 x i16> %res3, %res2 4903 ret <8 x i16> %res4 4904 } 4905 4906 declare void @llvm.x86.avx512.mask.pmovus.qw.mem.512(i8* %ptr, <8 x i64>, i8) 4907 4908 define void @test_int_x86_avx512_mask_pmovus_qw_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) { 4909 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qw_mem_512: 4910 ; CHECK: ## BB#0: 4911 ; CHECK-NEXT: vpmovusqw %zmm0, (%rdi) 4912 ; CHECK-NEXT: kmovw %esi, %k1 4913 ; CHECK-NEXT: vpmovusqw %zmm0, (%rdi) {%k1} 4914 ; CHECK-NEXT: retq 4915 call void @llvm.x86.avx512.mask.pmovus.qw.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1) 4916 call void @llvm.x86.avx512.mask.pmovus.qw.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2) 4917 ret void 4918 } 4919 4920 declare <8 x i32> @llvm.x86.avx512.mask.pmov.qd.512(<8 x i64>, <8 x i32>, i8) 4921 4922 define <8 x i32>@test_int_x86_avx512_mask_pmov_qd_512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2) { 4923 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qd_512: 4924 ; CHECK: ## BB#0: 4925 ; CHECK-NEXT: movzbl %dil, %eax 4926 ; CHECK-NEXT: kmovw %eax, %k1 4927 ; CHECK-NEXT: vpmovqd %zmm0, %ymm1 {%k1} 4928 ; CHECK-NEXT: vpmovqd %zmm0, %ymm2 {%k1} {z} 4929 ; CHECK-NEXT: vpmovqd %zmm0, %ymm0 4930 ; CHECK-NEXT: vpaddd %ymm1, %ymm0, %ymm0 4931 ; CHECK-NEXT: vpaddd %ymm2, %ymm0, %ymm0 4932 ; CHECK-NEXT: retq 4933 %res0 = call <8 x i32> @llvm.x86.avx512.mask.pmov.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 -1) 4934 %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmov.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2) 4935 %res2 = call <8 x i32> @llvm.x86.avx512.mask.pmov.qd.512(<8 x i64> %x0, <8 x i32> zeroinitializer, i8 %x2) 4936 %res3 = add <8 x i32> %res0, %res1 4937 %res4 = add <8 x i32> %res3, %res2 4938 ret <8 x i32> %res4 4939 } 4940 4941 declare void @llvm.x86.avx512.mask.pmov.qd.mem.512(i8* %ptr, <8 x i64>, i8) 4942 4943 define void @test_int_x86_avx512_mask_pmov_qd_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) { 4944 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qd_mem_512: 4945 ; CHECK: ## BB#0: 4946 ; CHECK-NEXT: movzbl %sil, %eax 4947 ; CHECK-NEXT: kmovw %eax, %k1 4948 ; CHECK-NEXT: vpmovqd %zmm0, (%rdi) 4949 ; CHECK-NEXT: vpmovqd %zmm0, (%rdi) {%k1} 4950 ; CHECK-NEXT: retq 4951 call void @llvm.x86.avx512.mask.pmov.qd.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1) 4952 call void @llvm.x86.avx512.mask.pmov.qd.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2) 4953 ret void 4954 } 4955 4956 declare <8 x i32> @llvm.x86.avx512.mask.pmovs.qd.512(<8 x i64>, <8 x i32>, i8) 4957 4958 define <8 x i32>@test_int_x86_avx512_mask_pmovs_qd_512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2) { 4959 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qd_512: 4960 ; CHECK: ## BB#0: 4961 ; CHECK-NEXT: movzbl %dil, %eax 4962 ; CHECK-NEXT: kmovw %eax, %k1 4963 ; CHECK-NEXT: vpmovsqd %zmm0, %ymm1 {%k1} 4964 ; CHECK-NEXT: vpmovsqd %zmm0, %ymm2 {%k1} {z} 4965 ; CHECK-NEXT: vpmovsqd %zmm0, %ymm0 4966 ; CHECK-NEXT: vpaddd %ymm1, %ymm0, %ymm0 4967 ; CHECK-NEXT: vpaddd %ymm2, %ymm0, %ymm0 4968 ; CHECK-NEXT: retq 4969 %res0 = call <8 x i32> @llvm.x86.avx512.mask.pmovs.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 -1) 4970 %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmovs.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2) 4971 %res2 = call <8 x i32> @llvm.x86.avx512.mask.pmovs.qd.512(<8 x i64> %x0, <8 x i32> zeroinitializer, i8 %x2) 4972 %res3 = add <8 x i32> %res0, %res1 4973 %res4 = add <8 x i32> %res3, %res2 4974 ret <8 x i32> %res4 4975 } 4976 4977 declare void @llvm.x86.avx512.mask.pmovs.qd.mem.512(i8* %ptr, <8 x i64>, i8) 4978 4979 define void @test_int_x86_avx512_mask_pmovs_qd_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) { 4980 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qd_mem_512: 4981 ; CHECK: ## BB#0: 4982 ; CHECK-NEXT: vpmovsqd %zmm0, (%rdi) 4983 ; CHECK-NEXT: kmovw %esi, %k1 4984 ; CHECK-NEXT: vpmovsqd %zmm0, (%rdi) {%k1} 4985 ; CHECK-NEXT: retq 4986 call void @llvm.x86.avx512.mask.pmovs.qd.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1) 4987 call void @llvm.x86.avx512.mask.pmovs.qd.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2) 4988 ret void 4989 } 4990 4991 declare <8 x i32> @llvm.x86.avx512.mask.pmovus.qd.512(<8 x i64>, <8 x i32>, i8) 4992 4993 define <8 x i32>@test_int_x86_avx512_mask_pmovus_qd_512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2) { 4994 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qd_512: 4995 ; CHECK: ## BB#0: 4996 ; CHECK-NEXT: movzbl %dil, %eax 4997 ; CHECK-NEXT: kmovw %eax, %k1 4998 ; CHECK-NEXT: vpmovusqd %zmm0, %ymm1 {%k1} 4999 ; CHECK-NEXT: vpmovusqd %zmm0, %ymm2 {%k1} {z} 5000 ; CHECK-NEXT: vpmovusqd %zmm0, %ymm0 5001 ; CHECK-NEXT: vpaddd %ymm1, %ymm0, %ymm0 5002 ; CHECK-NEXT: vpaddd %ymm2, %ymm0, %ymm0 5003 ; CHECK-NEXT: retq 5004 %res0 = call <8 x i32> @llvm.x86.avx512.mask.pmovus.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 -1) 5005 %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmovus.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2) 5006 %res2 = call <8 x i32> @llvm.x86.avx512.mask.pmovus.qd.512(<8 x i64> %x0, <8 x i32> zeroinitializer, i8 %x2) 5007 %res3 = add <8 x i32> %res0, %res1 5008 %res4 = add <8 x i32> %res3, %res2 5009 ret <8 x i32> %res4 5010 } 5011 5012 declare void @llvm.x86.avx512.mask.pmovus.qd.mem.512(i8* %ptr, <8 x i64>, i8) 5013 5014 define void @test_int_x86_avx512_mask_pmovus_qd_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) { 5015 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qd_mem_512: 5016 ; CHECK: ## BB#0: 5017 ; CHECK-NEXT: vpmovusqd %zmm0, (%rdi) 5018 ; CHECK-NEXT: kmovw %esi, %k1 5019 ; CHECK-NEXT: vpmovusqd %zmm0, (%rdi) {%k1} 5020 ; CHECK-NEXT: retq 5021 call void @llvm.x86.avx512.mask.pmovus.qd.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1) 5022 call void @llvm.x86.avx512.mask.pmovus.qd.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2) 5023 ret void 5024 } 5025 5026 declare <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32>, <16 x i8>, i16) 5027 5028 define <16 x i8>@test_int_x86_avx512_mask_pmov_db_512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2) { 5029 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_db_512: 5030 ; CHECK: ## BB#0: 5031 ; CHECK-NEXT: kmovw %edi, %k1 5032 ; CHECK-NEXT: vpmovdb %zmm0, %xmm1 {%k1} 5033 ; CHECK-NEXT: vpmovdb %zmm0, %xmm2 {%k1} {z} 5034 ; CHECK-NEXT: vpmovdb %zmm0, %xmm0 5035 ; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0 5036 ; CHECK-NEXT: vpaddb %xmm2, %xmm0, %xmm0 5037 ; CHECK-NEXT: retq 5038 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 -1) 5039 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2) 5040 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32> %x0, <16 x i8> zeroinitializer, i16 %x2) 5041 %res3 = add <16 x i8> %res0, %res1 5042 %res4 = add <16 x i8> %res3, %res2 5043 ret <16 x i8> %res4 5044 } 5045 5046 declare void @llvm.x86.avx512.mask.pmov.db.mem.512(i8* %ptr, <16 x i32>, i16) 5047 5048 define void @test_int_x86_avx512_mask_pmov_db_mem_512(i8* %ptr, <16 x i32> %x1, i16 %x2) { 5049 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_db_mem_512: 5050 ; CHECK: ## BB#0: 5051 ; CHECK-NEXT: kmovw %esi, %k1 5052 ; CHECK-NEXT: vpmovdb %zmm0, (%rdi) 5053 ; CHECK-NEXT: vpmovdb %zmm0, (%rdi) {%k1} 5054 ; CHECK-NEXT: retq 5055 call void @llvm.x86.avx512.mask.pmov.db.mem.512(i8* %ptr, <16 x i32> %x1, i16 -1) 5056 call void @llvm.x86.avx512.mask.pmov.db.mem.512(i8* %ptr, <16 x i32> %x1, i16 %x2) 5057 ret void 5058 } 5059 5060 declare <16 x i8> @llvm.x86.avx512.mask.pmovs.db.512(<16 x i32>, <16 x i8>, i16) 5061 5062 define <16 x i8>@test_int_x86_avx512_mask_pmovs_db_512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2) { 5063 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_db_512: 5064 ; CHECK: ## BB#0: 5065 ; CHECK-NEXT: kmovw %edi, %k1 5066 ; CHECK-NEXT: vpmovsdb %zmm0, %xmm1 {%k1} 5067 ; CHECK-NEXT: vpmovsdb %zmm0, %xmm2 {%k1} {z} 5068 ; CHECK-NEXT: vpmovsdb %zmm0, %xmm0 5069 ; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0 5070 ; CHECK-NEXT: vpaddb %xmm2, %xmm0, %xmm0 5071 ; CHECK-NEXT: retq 5072 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 -1) 5073 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2) 5074 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.512(<16 x i32> %x0, <16 x i8> zeroinitializer, i16 %x2) 5075 %res3 = add <16 x i8> %res0, %res1 5076 %res4 = add <16 x i8> %res3, %res2 5077 ret <16 x i8> %res4 5078 } 5079 5080 declare void @llvm.x86.avx512.mask.pmovs.db.mem.512(i8* %ptr, <16 x i32>, i16) 5081 5082 define void @test_int_x86_avx512_mask_pmovs_db_mem_512(i8* %ptr, <16 x i32> %x1, i16 %x2) { 5083 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_db_mem_512: 5084 ; CHECK: ## BB#0: 5085 ; CHECK-NEXT: vpmovsdb %zmm0, (%rdi) 5086 ; CHECK-NEXT: kmovw %esi, %k1 5087 ; CHECK-NEXT: vpmovsdb %zmm0, (%rdi) {%k1} 5088 ; CHECK-NEXT: retq 5089 call void @llvm.x86.avx512.mask.pmovs.db.mem.512(i8* %ptr, <16 x i32> %x1, i16 -1) 5090 call void @llvm.x86.avx512.mask.pmovs.db.mem.512(i8* %ptr, <16 x i32> %x1, i16 %x2) 5091 ret void 5092 } 5093 5094 declare <16 x i8> @llvm.x86.avx512.mask.pmovus.db.512(<16 x i32>, <16 x i8>, i16) 5095 5096 define <16 x i8>@test_int_x86_avx512_mask_pmovus_db_512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2) { 5097 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_db_512: 5098 ; CHECK: ## BB#0: 5099 ; CHECK-NEXT: kmovw %edi, %k1 5100 ; CHECK-NEXT: vpmovusdb %zmm0, %xmm1 {%k1} 5101 ; CHECK-NEXT: vpmovusdb %zmm0, %xmm2 {%k1} {z} 5102 ; CHECK-NEXT: vpmovusdb %zmm0, %xmm0 5103 ; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0 5104 ; CHECK-NEXT: vpaddb %xmm2, %xmm0, %xmm0 5105 ; CHECK-NEXT: retq 5106 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 -1) 5107 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2) 5108 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.512(<16 x i32> %x0, <16 x i8> zeroinitializer, i16 %x2) 5109 %res3 = add <16 x i8> %res0, %res1 5110 %res4 = add <16 x i8> %res3, %res2 5111 ret <16 x i8> %res4 5112 } 5113 5114 declare void @llvm.x86.avx512.mask.pmovus.db.mem.512(i8* %ptr, <16 x i32>, i16) 5115 5116 define void @test_int_x86_avx512_mask_pmovus_db_mem_512(i8* %ptr, <16 x i32> %x1, i16 %x2) { 5117 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_db_mem_512: 5118 ; CHECK: ## BB#0: 5119 ; CHECK-NEXT: vpmovusdb %zmm0, (%rdi) 5120 ; CHECK-NEXT: kmovw %esi, %k1 5121 ; CHECK-NEXT: vpmovusdb %zmm0, (%rdi) {%k1} 5122 ; CHECK-NEXT: retq 5123 call void @llvm.x86.avx512.mask.pmovus.db.mem.512(i8* %ptr, <16 x i32> %x1, i16 -1) 5124 call void @llvm.x86.avx512.mask.pmovus.db.mem.512(i8* %ptr, <16 x i32> %x1, i16 %x2) 5125 ret void 5126 } 5127 5128 declare <16 x i16> @llvm.x86.avx512.mask.pmov.dw.512(<16 x i32>, <16 x i16>, i16) 5129 5130 define <16 x i16>@test_int_x86_avx512_mask_pmov_dw_512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2) { 5131 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_dw_512: 5132 ; CHECK: ## BB#0: 5133 ; CHECK-NEXT: kmovw %edi, %k1 5134 ; CHECK-NEXT: vpmovdw %zmm0, %ymm1 {%k1} 5135 ; CHECK-NEXT: vpmovdw %zmm0, %ymm2 {%k1} {z} 5136 ; CHECK-NEXT: vpmovdw %zmm0, %ymm0 5137 ; CHECK-NEXT: vpaddw %ymm1, %ymm0, %ymm0 5138 ; CHECK-NEXT: vpaddw %ymm2, %ymm0, %ymm0 5139 ; CHECK-NEXT: retq 5140 %res0 = call <16 x i16> @llvm.x86.avx512.mask.pmov.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 -1) 5141 %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmov.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2) 5142 %res2 = call <16 x i16> @llvm.x86.avx512.mask.pmov.dw.512(<16 x i32> %x0, <16 x i16> zeroinitializer, i16 %x2) 5143 %res3 = add <16 x i16> %res0, %res1 5144 %res4 = add <16 x i16> %res3, %res2 5145 ret <16 x i16> %res4 5146 } 5147 5148 declare void @llvm.x86.avx512.mask.pmov.dw.mem.512(i8* %ptr, <16 x i32>, i16) 5149 5150 define void @test_int_x86_avx512_mask_pmov_dw_mem_512(i8* %ptr, <16 x i32> %x1, i16 %x2) { 5151 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_dw_mem_512: 5152 ; CHECK: ## BB#0: 5153 ; CHECK-NEXT: kmovw %esi, %k1 5154 ; CHECK-NEXT: vpmovdw %zmm0, (%rdi) 5155 ; CHECK-NEXT: vpmovdw %zmm0, (%rdi) {%k1} 5156 ; CHECK-NEXT: retq 5157 call void @llvm.x86.avx512.mask.pmov.dw.mem.512(i8* %ptr, <16 x i32> %x1, i16 -1) 5158 call void @llvm.x86.avx512.mask.pmov.dw.mem.512(i8* %ptr, <16 x i32> %x1, i16 %x2) 5159 ret void 5160 } 5161 5162 declare <16 x i16> @llvm.x86.avx512.mask.pmovs.dw.512(<16 x i32>, <16 x i16>, i16) 5163 5164 define <16 x i16>@test_int_x86_avx512_mask_pmovs_dw_512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2) { 5165 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_dw_512: 5166 ; CHECK: ## BB#0: 5167 ; CHECK-NEXT: kmovw %edi, %k1 5168 ; CHECK-NEXT: vpmovsdw %zmm0, %ymm1 {%k1} 5169 ; CHECK-NEXT: vpmovsdw %zmm0, %ymm2 {%k1} {z} 5170 ; CHECK-NEXT: vpmovsdw %zmm0, %ymm0 5171 ; CHECK-NEXT: vpaddw %ymm1, %ymm0, %ymm0 5172 ; CHECK-NEXT: vpaddw %ymm2, %ymm0, %ymm0 5173 ; CHECK-NEXT: retq 5174 %res0 = call <16 x i16> @llvm.x86.avx512.mask.pmovs.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 -1) 5175 %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmovs.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2) 5176 %res2 = call <16 x i16> @llvm.x86.avx512.mask.pmovs.dw.512(<16 x i32> %x0, <16 x i16> zeroinitializer, i16 %x2) 5177 %res3 = add <16 x i16> %res0, %res1 5178 %res4 = add <16 x i16> %res3, %res2 5179 ret <16 x i16> %res4 5180 } 5181 5182 declare void @llvm.x86.avx512.mask.pmovs.dw.mem.512(i8* %ptr, <16 x i32>, i16) 5183 5184 define void @test_int_x86_avx512_mask_pmovs_dw_mem_512(i8* %ptr, <16 x i32> %x1, i16 %x2) { 5185 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_dw_mem_512: 5186 ; CHECK: ## BB#0: 5187 ; CHECK-NEXT: vpmovsdw %zmm0, (%rdi) 5188 ; CHECK-NEXT: kmovw %esi, %k1 5189 ; CHECK-NEXT: vpmovsdw %zmm0, (%rdi) {%k1} 5190 ; CHECK-NEXT: retq 5191 call void @llvm.x86.avx512.mask.pmovs.dw.mem.512(i8* %ptr, <16 x i32> %x1, i16 -1) 5192 call void @llvm.x86.avx512.mask.pmovs.dw.mem.512(i8* %ptr, <16 x i32> %x1, i16 %x2) 5193 ret void 5194 } 5195 5196 declare <16 x i16> @llvm.x86.avx512.mask.pmovus.dw.512(<16 x i32>, <16 x i16>, i16) 5197 5198 define <16 x i16>@test_int_x86_avx512_mask_pmovus_dw_512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2) { 5199 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_dw_512: 5200 ; CHECK: ## BB#0: 5201 ; CHECK-NEXT: kmovw %edi, %k1 5202 ; CHECK-NEXT: vpmovusdw %zmm0, %ymm1 {%k1} 5203 ; CHECK-NEXT: vpmovusdw %zmm0, %ymm2 {%k1} {z} 5204 ; CHECK-NEXT: vpmovusdw %zmm0, %ymm0 5205 ; CHECK-NEXT: vpaddw %ymm1, %ymm0, %ymm0 5206 ; CHECK-NEXT: vpaddw %ymm2, %ymm0, %ymm0 5207 ; CHECK-NEXT: retq 5208 %res0 = call <16 x i16> @llvm.x86.avx512.mask.pmovus.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 -1) 5209 %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmovus.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2) 5210 %res2 = call <16 x i16> @llvm.x86.avx512.mask.pmovus.dw.512(<16 x i32> %x0, <16 x i16> zeroinitializer, i16 %x2) 5211 %res3 = add <16 x i16> %res0, %res1 5212 %res4 = add <16 x i16> %res3, %res2 5213 ret <16 x i16> %res4 5214 } 5215 5216 declare void @llvm.x86.avx512.mask.pmovus.dw.mem.512(i8* %ptr, <16 x i32>, i16) 5217 5218 define void @test_int_x86_avx512_mask_pmovus_dw_mem_512(i8* %ptr, <16 x i32> %x1, i16 %x2) { 5219 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_dw_mem_512: 5220 ; CHECK: ## BB#0: 5221 ; CHECK-NEXT: vpmovusdw %zmm0, (%rdi) 5222 ; CHECK-NEXT: kmovw %esi, %k1 5223 ; CHECK-NEXT: vpmovusdw %zmm0, (%rdi) {%k1} 5224 ; CHECK-NEXT: retq 5225 call void @llvm.x86.avx512.mask.pmovus.dw.mem.512(i8* %ptr, <16 x i32> %x1, i16 -1) 5226 call void @llvm.x86.avx512.mask.pmovus.dw.mem.512(i8* %ptr, <16 x i32> %x1, i16 %x2) 5227 ret void 5228 } 5229 5230 declare <8 x double> @llvm.x86.avx512.mask.cvtdq2pd.512(<8 x i32>, <8 x double>, i8) 5231 5232 define <8 x double>@test_int_x86_avx512_mask_cvt_dq2pd_512(<8 x i32> %x0, <8 x double> %x1, i8 %x2) { 5233 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2pd_512: 5234 ; CHECK: ## BB#0: 5235 ; CHECK-NEXT: movzbl %dil, %eax 5236 ; CHECK-NEXT: kmovw %eax, %k1 5237 ; CHECK-NEXT: vcvtdq2pd %ymm0, %zmm1 {%k1} 5238 ; CHECK-NEXT: vcvtdq2pd %ymm0, %zmm0 5239 ; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0 5240 ; CHECK-NEXT: retq 5241 %res = call <8 x double> @llvm.x86.avx512.mask.cvtdq2pd.512(<8 x i32> %x0, <8 x double> %x1, i8 %x2) 5242 %res1 = call <8 x double> @llvm.x86.avx512.mask.cvtdq2pd.512(<8 x i32> %x0, <8 x double> %x1, i8 -1) 5243 %res2 = fadd <8 x double> %res, %res1 5244 ret <8 x double> %res2 5245 } 5246 5247 declare <16 x float> @llvm.x86.avx512.mask.cvtdq2ps.512(<16 x i32>, <16 x float>, i16, i32) 5248 5249 define <16 x float>@test_int_x86_avx512_mask_cvt_dq2ps_512(<16 x i32> %x0, <16 x float> %x1, i16 %x2) { 5250 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2ps_512: 5251 ; CHECK: ## BB#0: 5252 ; CHECK-NEXT: kmovw %edi, %k1 5253 ; CHECK-NEXT: vcvtdq2ps %zmm0, %zmm1 {%k1} 5254 ; CHECK-NEXT: vcvtdq2ps {rn-sae}, %zmm0, %zmm0 5255 ; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0 5256 ; CHECK-NEXT: retq 5257 %res = call <16 x float> @llvm.x86.avx512.mask.cvtdq2ps.512(<16 x i32> %x0, <16 x float> %x1, i16 %x2, i32 4) 5258 %res1 = call <16 x float> @llvm.x86.avx512.mask.cvtdq2ps.512(<16 x i32> %x0, <16 x float> %x1, i16 -1, i32 0) 5259 %res2 = fadd <16 x float> %res, %res1 5260 ret <16 x float> %res2 5261 } 5262 5263 declare <8 x i32> @llvm.x86.avx512.mask.cvtpd2dq.512(<8 x double>, <8 x i32>, i8, i32) 5264 5265 define <8 x i32>@test_int_x86_avx512_mask_cvt_pd2dq_512(<8 x double> %x0, <8 x i32> %x1, i8 %x2) { 5266 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2dq_512: 5267 ; CHECK: ## BB#0: 5268 ; CHECK-NEXT: movzbl %dil, %eax 5269 ; CHECK-NEXT: kmovw %eax, %k1 5270 ; CHECK-NEXT: vcvtpd2dq %zmm0, %ymm1 {%k1} 5271 ; CHECK-NEXT: vcvtpd2dq {rn-sae}, %zmm0, %ymm0 5272 ; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 5273 ; CHECK-NEXT: retq 5274 %res = call <8 x i32> @llvm.x86.avx512.mask.cvtpd2dq.512(<8 x double> %x0, <8 x i32> %x1, i8 %x2, i32 4) 5275 %res1 = call <8 x i32> @llvm.x86.avx512.mask.cvtpd2dq.512(<8 x double> %x0, <8 x i32> %x1, i8 -1, i32 0) 5276 %res2 = add <8 x i32> %res, %res1 5277 ret <8 x i32> %res2 5278 } 5279 5280 declare <8 x float> @llvm.x86.avx512.mask.cvtpd2ps.512(<8 x double>, <8 x float>, i8, i32) 5281 5282 define <8 x float>@test_int_x86_avx512_mask_cvt_pd2ps_512(<8 x double> %x0, <8 x float> %x1, i8 %x2) { 5283 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2ps_512: 5284 ; CHECK: ## BB#0: 5285 ; CHECK-NEXT: movzbl %dil, %eax 5286 ; CHECK-NEXT: kmovw %eax, %k1 5287 ; CHECK-NEXT: vcvtpd2ps %zmm0, %ymm1 {%k1} 5288 ; CHECK-NEXT: vcvtpd2ps {ru-sae}, %zmm0, %ymm0 5289 ; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0 5290 ; CHECK-NEXT: retq 5291 %res = call <8 x float> @llvm.x86.avx512.mask.cvtpd2ps.512(<8 x double> %x0, <8 x float> %x1, i8 %x2, i32 4) 5292 %res1 = call <8 x float> @llvm.x86.avx512.mask.cvtpd2ps.512(<8 x double> %x0, <8 x float> %x1, i8 -1, i32 2) 5293 %res2 = fadd <8 x float> %res, %res1 5294 ret <8 x float> %res2 5295 } 5296 5297 declare <8 x i32> @llvm.x86.avx512.mask.cvtpd2udq.512(<8 x double>, <8 x i32>, i8, i32) 5298 5299 define <8 x i32>@test_int_x86_avx512_mask_cvt_pd2udq_512(<8 x double> %x0, <8 x i32> %x1, i8 %x2) { 5300 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2udq_512: 5301 ; CHECK: ## BB#0: 5302 ; CHECK-NEXT: movzbl %dil, %eax 5303 ; CHECK-NEXT: kmovw %eax, %k1 5304 ; CHECK-NEXT: vcvtpd2udq {ru-sae}, %zmm0, %ymm1 {%k1} 5305 ; CHECK-NEXT: vcvtpd2udq {rn-sae}, %zmm0, %ymm0 5306 ; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 5307 ; CHECK-NEXT: retq 5308 %res = call <8 x i32> @llvm.x86.avx512.mask.cvtpd2udq.512(<8 x double> %x0, <8 x i32> %x1, i8 %x2, i32 2) 5309 %res1 = call <8 x i32> @llvm.x86.avx512.mask.cvtpd2udq.512(<8 x double> %x0, <8 x i32> %x1, i8 -1, i32 0) 5310 %res2 = add <8 x i32> %res, %res1 5311 ret <8 x i32> %res2 5312 } 5313 5314 declare <16 x i32> @llvm.x86.avx512.mask.cvtps2dq.512(<16 x float>, <16 x i32>, i16, i32) 5315 5316 define <16 x i32>@test_int_x86_avx512_mask_cvt_ps2dq_512(<16 x float> %x0, <16 x i32> %x1, i16 %x2) { 5317 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2dq_512: 5318 ; CHECK: ## BB#0: 5319 ; CHECK-NEXT: kmovw %edi, %k1 5320 ; CHECK-NEXT: vcvtps2dq {ru-sae}, %zmm0, %zmm1 {%k1} 5321 ; CHECK-NEXT: vcvtps2dq {rn-sae}, %zmm0, %zmm0 5322 ; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0 5323 ; CHECK-NEXT: retq 5324 %res = call <16 x i32> @llvm.x86.avx512.mask.cvtps2dq.512(<16 x float> %x0, <16 x i32> %x1, i16 %x2, i32 2) 5325 %res1 = call <16 x i32> @llvm.x86.avx512.mask.cvtps2dq.512(<16 x float> %x0, <16 x i32> %x1, i16 -1, i32 0) 5326 %res2 = add <16 x i32> %res, %res1 5327 ret <16 x i32> %res2 5328 } 5329 5330 declare <8 x double> @llvm.x86.avx512.mask.cvtps2pd.512(<8 x float>, <8 x double>, i8, i32) 5331 5332 define <8 x double>@test_int_x86_avx512_mask_cvt_ps2pd_512(<8 x float> %x0, <8 x double> %x1, i8 %x2) { 5333 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2pd_512: 5334 ; CHECK: ## BB#0: 5335 ; CHECK-NEXT: movzbl %dil, %eax 5336 ; CHECK-NEXT: kmovw %eax, %k1 5337 ; CHECK-NEXT: vcvtps2pd %ymm0, %zmm1 {%k1} 5338 ; CHECK-NEXT: vcvtps2pd {sae}, %ymm0, %zmm0 5339 ; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0 5340 ; CHECK-NEXT: retq 5341 %res = call <8 x double> @llvm.x86.avx512.mask.cvtps2pd.512(<8 x float> %x0, <8 x double> %x1, i8 %x2, i32 4) 5342 %res1 = call <8 x double> @llvm.x86.avx512.mask.cvtps2pd.512(<8 x float> %x0, <8 x double> %x1, i8 -1, i32 8) 5343 %res2 = fadd <8 x double> %res, %res1 5344 ret <8 x double> %res2 5345 } 5346 5347 declare <16 x i32> @llvm.x86.avx512.mask.cvtps2udq.512(<16 x float>, <16 x i32>, i16, i32) 5348 5349 define <16 x i32>@test_int_x86_avx512_mask_cvt_ps2udq_512(<16 x float> %x0, <16 x i32> %x1, i16 %x2) { 5350 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2udq_512: 5351 ; CHECK: ## BB#0: 5352 ; CHECK-NEXT: kmovw %edi, %k1 5353 ; CHECK-NEXT: vcvtps2udq {ru-sae}, %zmm0, %zmm1 {%k1} 5354 ; CHECK-NEXT: vcvtps2udq {rn-sae}, %zmm0, %zmm0 5355 ; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0 5356 ; CHECK-NEXT: retq 5357 %res = call <16 x i32> @llvm.x86.avx512.mask.cvtps2udq.512(<16 x float> %x0, <16 x i32> %x1, i16 %x2, i32 2) 5358 %res1 = call <16 x i32> @llvm.x86.avx512.mask.cvtps2udq.512(<16 x float> %x0, <16 x i32> %x1, i16 -1, i32 0) 5359 %res2 = add <16 x i32> %res, %res1 5360 ret <16 x i32> %res2 5361 } 5362 5363 declare <8 x i32> @llvm.x86.avx512.mask.cvttpd2dq.512(<8 x double>, <8 x i32>, i8, i32) 5364 5365 define <8 x i32>@test_int_x86_avx512_mask_cvtt_pd2dq_512(<8 x double> %x0, <8 x i32> %x1, i8 %x2) { 5366 ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_pd2dq_512: 5367 ; CHECK: ## BB#0: 5368 ; CHECK-NEXT: movzbl %dil, %eax 5369 ; CHECK-NEXT: kmovw %eax, %k1 5370 ; CHECK-NEXT: vcvttpd2dq %zmm0, %ymm1 {%k1} 5371 ; CHECK-NEXT: vcvttpd2dq {sae}, %zmm0, %ymm0 5372 ; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 5373 ; CHECK-NEXT: retq 5374 %res = call <8 x i32> @llvm.x86.avx512.mask.cvttpd2dq.512(<8 x double> %x0, <8 x i32> %x1, i8 %x2, i32 4) 5375 %res1 = call <8 x i32> @llvm.x86.avx512.mask.cvttpd2dq.512(<8 x double> %x0, <8 x i32> %x1, i8 -1, i32 8) 5376 %res2 = add <8 x i32> %res, %res1 5377 ret <8 x i32> %res2 5378 } 5379 5380 declare <8 x double> @llvm.x86.avx512.mask.cvtudq2pd.512(<8 x i32>, <8 x double>, i8) 5381 5382 define <8 x double>@test_int_x86_avx512_mask_cvt_udq2pd_512(<8 x i32> %x0, <8 x double> %x1, i8 %x2) { 5383 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2pd_512: 5384 ; CHECK: ## BB#0: 5385 ; CHECK-NEXT: movzbl %dil, %eax 5386 ; CHECK-NEXT: kmovw %eax, %k1 5387 ; CHECK-NEXT: vcvtudq2pd %ymm0, %zmm1 {%k1} 5388 ; CHECK-NEXT: vcvtudq2pd %ymm0, %zmm0 5389 ; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0 5390 ; CHECK-NEXT: retq 5391 %res = call <8 x double> @llvm.x86.avx512.mask.cvtudq2pd.512(<8 x i32> %x0, <8 x double> %x1, i8 %x2) 5392 %res1 = call <8 x double> @llvm.x86.avx512.mask.cvtudq2pd.512(<8 x i32> %x0, <8 x double> %x1, i8 -1) 5393 %res2 = fadd <8 x double> %res, %res1 5394 ret <8 x double> %res2 5395 } 5396 5397 5398 declare <16 x float> @llvm.x86.avx512.mask.cvtudq2ps.512(<16 x i32>, <16 x float>, i16, i32) 5399 5400 define <16 x float>@test_int_x86_avx512_mask_cvt_udq2ps_512(<16 x i32> %x0, <16 x float> %x1, i16 %x2) { 5401 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2ps_512: 5402 ; CHECK: ## BB#0: 5403 ; CHECK-NEXT: kmovw %edi, %k1 5404 ; CHECK-NEXT: vcvtudq2ps %zmm0, %zmm1 {%k1} 5405 ; CHECK-NEXT: vcvtudq2ps {rn-sae}, %zmm0, %zmm0 5406 ; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0 5407 ; CHECK-NEXT: retq 5408 %res = call <16 x float> @llvm.x86.avx512.mask.cvtudq2ps.512(<16 x i32> %x0, <16 x float> %x1, i16 %x2, i32 4) 5409 %res1 = call <16 x float> @llvm.x86.avx512.mask.cvtudq2ps.512(<16 x i32> %x0, <16 x float> %x1, i16 -1, i32 0) 5410 %res2 = fadd <16 x float> %res, %res1 5411 ret <16 x float> %res2 5412 } 5413 5414 declare <8 x i32> @llvm.x86.avx512.mask.cvttpd2udq.512(<8 x double>, <8 x i32>, i8, i32) 5415 5416 define <8 x i32>@test_int_x86_avx512_mask_cvtt_pd2udq_512(<8 x double> %x0, <8 x i32> %x1, i8 %x2) { 5417 ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_pd2udq_512: 5418 ; CHECK: ## BB#0: 5419 ; CHECK-NEXT: movzbl %dil, %eax 5420 ; CHECK-NEXT: kmovw %eax, %k1 5421 ; CHECK-NEXT: vcvttpd2udq %zmm0, %ymm1 {%k1} 5422 ; CHECK-NEXT: vcvttpd2udq {sae}, %zmm0, %ymm0 5423 ; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 5424 ; CHECK-NEXT: retq 5425 %res = call <8 x i32> @llvm.x86.avx512.mask.cvttpd2udq.512(<8 x double> %x0, <8 x i32> %x1, i8 %x2, i32 4) 5426 %res1 = call <8 x i32> @llvm.x86.avx512.mask.cvttpd2udq.512(<8 x double> %x0, <8 x i32> %x1, i8 -1, i32 8) 5427 %res2 = add <8 x i32> %res, %res1 5428 ret <8 x i32> %res2 5429 } 5430 5431 declare <16 x i32> @llvm.x86.avx512.mask.cvttps2dq.512(<16 x float>, <16 x i32>, i16, i32) 5432 5433 define <16 x i32>@test_int_x86_avx512_mask_cvtt_ps2dq_512(<16 x float> %x0, <16 x i32> %x1, i16 %x2) { 5434 ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ps2dq_512: 5435 ; CHECK: ## BB#0: 5436 ; CHECK-NEXT: kmovw %edi, %k1 5437 ; CHECK-NEXT: vcvttps2dq %zmm0, %zmm1 {%k1} 5438 ; CHECK-NEXT: vcvttps2dq {sae}, %zmm0, %zmm0 5439 ; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0 5440 ; CHECK-NEXT: retq 5441 %res = call <16 x i32> @llvm.x86.avx512.mask.cvttps2dq.512(<16 x float> %x0, <16 x i32> %x1, i16 %x2, i32 4) 5442 %res1 = call <16 x i32> @llvm.x86.avx512.mask.cvttps2dq.512(<16 x float> %x0, <16 x i32> %x1, i16 -1, i32 8) 5443 %res2 = add <16 x i32> %res, %res1 5444 ret <16 x i32> %res2 5445 } 5446 5447 declare <16 x i32> @llvm.x86.avx512.mask.cvttps2udq.512(<16 x float>, <16 x i32>, i16, i32) 5448 5449 define <16 x i32>@test_int_x86_avx512_mask_cvtt_ps2udq_512(<16 x float> %x0, <16 x i32> %x1, i16 %x2) { 5450 ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ps2udq_512: 5451 ; CHECK: ## BB#0: 5452 ; CHECK-NEXT: kmovw %edi, %k1 5453 ; CHECK-NEXT: vcvttps2udq %zmm0, %zmm1 {%k1} 5454 ; CHECK-NEXT: vcvttps2udq {sae}, %zmm0, %zmm0 5455 ; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0 5456 ; CHECK-NEXT: retq 5457 %res = call <16 x i32> @llvm.x86.avx512.mask.cvttps2udq.512(<16 x float> %x0, <16 x i32> %x1, i16 %x2, i32 4) 5458 %res1 = call <16 x i32> @llvm.x86.avx512.mask.cvttps2udq.512(<16 x float> %x0, <16 x i32> %x1, i16 -1, i32 8) 5459 %res2 = add <16 x i32> %res, %res1 5460 ret <16 x i32> %res2 5461 } 5462 5463 5464 declare <4 x float> @llvm.x86.avx512.mask.scalef.ss(<4 x float>, <4 x float>,<4 x float>, i8, i32) 5465 define <4 x float>@test_int_x86_avx512_mask_scalef_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4) { 5466 ; CHECK-LABEL: test_int_x86_avx512_mask_scalef_ss: 5467 ; CHECK: ## BB#0: 5468 ; CHECK-NEXT: andl $1, %edi 5469 ; CHECK-NEXT: kmovw %edi, %k1 5470 ; CHECK-NEXT: vscalefss %xmm1, %xmm0, %xmm2 {%k1} 5471 ; CHECK-NEXT: vscalefss {rn-sae}, %xmm1, %xmm0, %xmm0 5472 ; CHECK-NEXT: vaddps %xmm0, %xmm2, %xmm0 5473 ; CHECK-NEXT: retq 5474 %res = call <4 x float> @llvm.x86.avx512.mask.scalef.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4, i32 4) 5475 %res1 = call <4 x float> @llvm.x86.avx512.mask.scalef.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 -1, i32 8) 5476 %res2 = fadd <4 x float> %res, %res1 5477 ret <4 x float> %res2 5478 } 5479 5480 declare <2 x double> @llvm.x86.avx512.mask.scalef.sd(<2 x double>, <2 x double>,<2 x double>, i8, i32) 5481 define <2 x double>@test_int_x86_avx512_mask_scalef_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4) { 5482 ; CHECK-LABEL: test_int_x86_avx512_mask_scalef_sd: 5483 ; CHECK: ## BB#0: 5484 ; CHECK-NEXT: andl $1, %edi 5485 ; CHECK-NEXT: kmovw %edi, %k1 5486 ; CHECK-NEXT: vscalefsd %xmm1, %xmm0, %xmm2 {%k1} 5487 ; CHECK-NEXT: vscalefsd {rn-sae}, %xmm1, %xmm0, %xmm0 5488 ; CHECK-NEXT: vaddpd %xmm0, %xmm2, %xmm0 5489 ; CHECK-NEXT: retq 5490 %res = call <2 x double> @llvm.x86.avx512.mask.scalef.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4, i32 4) 5491 %res1 = call <2 x double> @llvm.x86.avx512.mask.scalef.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 -1, i32 8) 5492 %res2 = fadd <2 x double> %res, %res1 5493 ret <2 x double> %res2 5494 } 5495 5496 declare <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32) nounwind readnone 5497 5498 define <4 x float> @test_getexp_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) { 5499 ; CHECK-LABEL: test_getexp_ss: 5500 ; CHECK: ## BB#0: 5501 ; CHECK-NEXT: andl $1, %edi 5502 ; CHECK-NEXT: kmovw %edi, %k1 5503 ; CHECK-NEXT: vmovaps %zmm2, %zmm3 5504 ; CHECK-NEXT: vgetexpss %xmm1, %xmm0, %xmm3 {%k1} 5505 ; CHECK-NEXT: vgetexpss {sae}, %xmm1, %xmm0, %xmm2 {%k1} 5506 ; CHECK-NEXT: vgetexpss {sae}, %xmm1, %xmm0, %xmm4 {%k1} {z} 5507 ; CHECK-NEXT: vgetexpss {sae}, %xmm1, %xmm0, %xmm0 5508 ; CHECK-NEXT: vaddps %xmm2, %xmm3, %xmm1 5509 ; CHECK-NEXT: vaddps %xmm0, %xmm4, %xmm0 5510 ; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0 5511 ; CHECK-NEXT: retq 5512 %res0 = call <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 4) 5513 %res1 = call <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 8) 5514 %res2 = call <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 %mask, i32 8) 5515 %res3 = call <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1, i32 8) 5516 5517 %res.1 = fadd <4 x float> %res0, %res1 5518 %res.2 = fadd <4 x float> %res2, %res3 5519 %res = fadd <4 x float> %res.1, %res.2 5520 ret <4 x float> %res 5521 } 5522 5523 declare <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32) nounwind readnone 5524 5525 define <2 x double> @test_getexp_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) { 5526 ; CHECK-LABEL: test_getexp_sd: 5527 ; CHECK: ## BB#0: 5528 ; CHECK-NEXT: andl $1, %edi 5529 ; CHECK-NEXT: kmovw %edi, %k1 5530 ; CHECK-NEXT: vmovaps %zmm2, %zmm3 5531 ; CHECK-NEXT: vgetexpsd %xmm1, %xmm0, %xmm3 {%k1} 5532 ; CHECK-NEXT: vgetexpsd %xmm1, %xmm0, %xmm4 5533 ; CHECK-NEXT: vgetexpsd {sae}, %xmm1, %xmm0, %xmm2 {%k1} 5534 ; CHECK-NEXT: vgetexpsd {sae}, %xmm1, %xmm0, %xmm0 {%k1} {z} 5535 ; CHECK-NEXT: vaddpd %xmm2, %xmm3, %xmm1 5536 ; CHECK-NEXT: vaddpd %xmm4, %xmm0, %xmm0 5537 ; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0 5538 ; CHECK-NEXT: retq 5539 %res0 = call <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 4) 5540 %res1 = call <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 8) 5541 %res2 = call <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 %mask, i32 8) 5542 %res3 = call <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1, i32 4) 5543 5544 %res.1 = fadd <2 x double> %res0, %res1 5545 %res.2 = fadd <2 x double> %res2, %res3 5546 %res = fadd <2 x double> %res.1, %res.2 5547 ret <2 x double> %res 5548 } 5549 5550 declare i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double>, <2 x double>, i32, i8, i32) 5551 5552 define i8@test_int_x86_avx512_mask_cmp_sd(<2 x double> %x0, <2 x double> %x1, i8 %x3, i32 %x4) { 5553 ; CHECK-LABEL: test_int_x86_avx512_mask_cmp_sd: 5554 ; CHECK: ## BB#0: 5555 ; CHECK-NEXT: andl $1, %edi 5556 ; CHECK-NEXT: kmovw %edi, %k1 5557 ; CHECK-NEXT: vcmpnltsd {sae}, %xmm1, %xmm0, %k0 {%k1} 5558 ; CHECK-NEXT: kmovw %k0, %eax 5559 ; CHECK-NEXT: shlb $7, %al 5560 ; CHECK-NEXT: sarb $7, %al 5561 ; CHECK-NEXT: retq 5562 5563 %res4 = call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> %x0, <2 x double> %x1, i32 5, i8 %x3, i32 8) 5564 ret i8 %res4 5565 } 5566 5567 define i8@test_int_x86_avx512_mask_cmp_sd_all(<2 x double> %x0, <2 x double> %x1, i8 %x3, i32 %x4) { 5568 ; CHECK-LABEL: test_int_x86_avx512_mask_cmp_sd_all: 5569 ; CHECK: ## BB#0: 5570 ; CHECK-NEXT: vcmpunordsd {sae}, %xmm1, %xmm0, %k0 5571 ; CHECK-NEXT: vcmplesd %xmm1, %xmm0, %k1 5572 ; CHECK-NEXT: korw %k0, %k1, %k0 5573 ; CHECK-NEXT: vcmpnltsd {sae}, %xmm1, %xmm0, %k1 5574 ; CHECK-NEXT: vcmpneqsd %xmm1, %xmm0, %k2 5575 ; CHECK-NEXT: korw %k1, %k2, %k1 5576 ; CHECK-NEXT: andl $1, %edi 5577 ; CHECK-NEXT: kmovw %edi, %k2 5578 ; CHECK-NEXT: kandw %k2, %k1, %k1 5579 ; CHECK-NEXT: korw %k1, %k0, %k0 5580 ; CHECK-NEXT: kmovw %k0, %eax 5581 ; CHECK-NEXT: shlb $7, %al 5582 ; CHECK-NEXT: sarb $7, %al 5583 ; CHECK-NEXT: retq 5584 5585 %res1 = call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> %x0, <2 x double> %x1, i32 2, i8 -1, i32 4) 5586 %res2 = call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> %x0, <2 x double> %x1, i32 3, i8 -1, i32 8) 5587 %res3 = call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> %x0, <2 x double> %x1, i32 4, i8 %x3, i32 4) 5588 %res4 = call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> %x0, <2 x double> %x1, i32 5, i8 %x3, i32 8) 5589 5590 %res11 = or i8 %res1, %res2 5591 %res12 = or i8 %res3, %res4 5592 %res13 = or i8 %res11, %res12 5593 ret i8 %res13 5594 } 5595 5596 declare i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float>, <4 x float>, i32, i8, i32) 5597 5598 define i8@test_int_x86_avx512_mask_cmp_ss(<4 x float> %x0, <4 x float> %x1, i8 %x3, i32 %x4) { 5599 ; CHECK-LABEL: test_int_x86_avx512_mask_cmp_ss: 5600 ; CHECK: ## BB#0: 5601 ; CHECK-NEXT: andl $1, %edi 5602 ; CHECK-NEXT: kmovw %edi, %k1 5603 ; CHECK-NEXT: vcmpunordss %xmm1, %xmm0, %k0 {%k1} 5604 ; CHECK-NEXT: kmovw %k0, %eax 5605 ; CHECK-NEXT: shlb $7, %al 5606 ; CHECK-NEXT: sarb $7, %al 5607 ; CHECK-NEXT: retq 5608 5609 %res2 = call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> %x0, <4 x float> %x1, i32 3, i8 %x3, i32 4) 5610 ret i8 %res2 5611 } 5612 5613 5614 define i8@test_int_x86_avx512_mask_cmp_ss_all(<4 x float> %x0, <4 x float> %x1, i8 %x3, i32 %x4) { 5615 ; CHECK-LABEL: test_int_x86_avx512_mask_cmp_ss_all: 5616 ; CHECK: ## BB#0: 5617 ; CHECK-NEXT: vcmpless %xmm1, %xmm0, %k1 5618 ; CHECK-NEXT: vcmpunordss {sae}, %xmm1, %xmm0, %k0 {%k1} 5619 ; CHECK-NEXT: vcmpneqss %xmm1, %xmm0, %k1 5620 ; CHECK-NEXT: vcmpnltss {sae}, %xmm1, %xmm0, %k1 {%k1} 5621 ; CHECK-NEXT: andl $1, %edi 5622 ; CHECK-NEXT: kmovw %edi, %k2 5623 ; CHECK-NEXT: kandw %k2, %k1, %k1 5624 ; CHECK-NEXT: kandw %k1, %k0, %k0 5625 ; CHECK-NEXT: kmovw %k0, %eax 5626 ; CHECK-NEXT: shlb $7, %al 5627 ; CHECK-NEXT: sarb $7, %al 5628 ; CHECK-NEXT: retq 5629 %res1 = call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> %x0, <4 x float> %x1, i32 2, i8 -1, i32 4) 5630 %res2 = call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> %x0, <4 x float> %x1, i32 3, i8 -1, i32 8) 5631 %res3 = call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> %x0, <4 x float> %x1, i32 4, i8 %x3, i32 4) 5632 %res4 = call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> %x0, <4 x float> %x1, i32 5, i8 %x3, i32 8) 5633 5634 %res11 = and i8 %res1, %res2 5635 %res12 = and i8 %res3, %res4 5636 %res13 = and i8 %res11, %res12 5637 ret i8 %res13 5638 } 5639 5640 declare <16 x float> @llvm.x86.avx512.mask.shuf.f32x4(<16 x float>, <16 x float>, i32, <16 x float>, i16) 5641 5642 define <16 x float>@test_int_x86_avx512_mask_shuf_f32x4(<16 x float> %x0, <16 x float> %x1, <16 x float> %x3, i16 %x4) { 5643 ; CHECK-LABEL: test_int_x86_avx512_mask_shuf_f32x4: 5644 ; CHECK: ## BB#0: 5645 ; CHECK-NEXT: kmovw %edi, %k1 5646 ; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm2 = zmm0[8,9,10,11,4,5,6,7],zmm1[4,5,6,7,0,1,2,3] 5647 ; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm0 = zmm0[8,9,10,11,4,5,6,7],zmm1[4,5,6,7,0,1,2,3] 5648 ; CHECK-NEXT: vaddps %zmm0, %zmm2, %zmm0 5649 ; CHECK-NEXT: retq 5650 %res = call <16 x float> @llvm.x86.avx512.mask.shuf.f32x4(<16 x float> %x0, <16 x float> %x1, i32 22, <16 x float> %x3, i16 %x4) 5651 %res1 = call <16 x float> @llvm.x86.avx512.mask.shuf.f32x4(<16 x float> %x0, <16 x float> %x1, i32 22, <16 x float> %x3, i16 -1) 5652 %res2 = fadd <16 x float> %res, %res1 5653 ret <16 x float> %res2 5654 } 5655 5656 declare <8 x double> @llvm.x86.avx512.mask.shuf.f64x2(<8 x double>, <8 x double>, i32, <8 x double>, i8) 5657 5658 define <8 x double>@test_int_x86_avx512_mask_shuf_f64x2(<8 x double> %x0, <8 x double> %x1, <8 x double> %x3, i8 %x4) { 5659 ; CHECK-LABEL: test_int_x86_avx512_mask_shuf_f64x2: 5660 ; CHECK: ## BB#0: 5661 ; CHECK-NEXT: movzbl %dil, %eax 5662 ; CHECK-NEXT: kmovw %eax, %k1 5663 ; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm2 = zmm0[4,5,2,3],zmm1[2,3,0,1] 5664 ; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm3 = zmm0[4,5,2,3],zmm1[2,3,0,1] 5665 ; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[4,5,2,3],zmm1[2,3,0,1] 5666 ; CHECK-NEXT: vaddpd %zmm0, %zmm2, %zmm0 5667 ; CHECK-NEXT: vaddpd %zmm3, %zmm0, %zmm0 5668 ; CHECK-NEXT: retq 5669 %res = call <8 x double> @llvm.x86.avx512.mask.shuf.f64x2(<8 x double> %x0, <8 x double> %x1, i32 22, <8 x double> %x3, i8 %x4) 5670 %res1 = call <8 x double> @llvm.x86.avx512.mask.shuf.f64x2(<8 x double> %x0, <8 x double> %x1, i32 22, <8 x double> %x3, i8 -1) 5671 %res2 = call <8 x double> @llvm.x86.avx512.mask.shuf.f64x2(<8 x double> %x0, <8 x double> %x1, i32 22, <8 x double> zeroinitializer, i8 %x4) 5672 5673 %res3 = fadd <8 x double> %res, %res1 5674 %res4 = fadd <8 x double> %res3, %res2 5675 ret <8 x double> %res4 5676 } 5677 5678 declare <16 x i32> @llvm.x86.avx512.mask.shuf.i32x4(<16 x i32>, <16 x i32>, i32, <16 x i32>, i16) 5679 5680 define <16 x i32>@test_int_x86_avx512_mask_shuf_i32x4(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x3, i16 %x4) { 5681 ; CHECK-LABEL: test_int_x86_avx512_mask_shuf_i32x4: 5682 ; CHECK: ## BB#0: 5683 ; CHECK-NEXT: kmovw %edi, %k1 5684 ; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm2 = zmm0[8,9,10,11,4,5,6,7],zmm1[4,5,6,7,0,1,2,3] 5685 ; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm0 = zmm0[8,9,10,11,4,5,6,7],zmm1[4,5,6,7,0,1,2,3] 5686 ; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0 5687 ; CHECK-NEXT: retq 5688 %res = call <16 x i32> @llvm.x86.avx512.mask.shuf.i32x4(<16 x i32> %x0, <16 x i32> %x1, i32 22, <16 x i32> %x3, i16 %x4) 5689 %res1 = call <16 x i32> @llvm.x86.avx512.mask.shuf.i32x4(<16 x i32> %x0, <16 x i32> %x1, i32 22, <16 x i32> %x3, i16 -1) 5690 %res2 = add <16 x i32> %res, %res1 5691 ret <16 x i32> %res2 5692 } 5693 5694 declare <8 x i64> @llvm.x86.avx512.mask.shuf.i64x2(<8 x i64>, <8 x i64>, i32, <8 x i64>, i8) 5695 5696 define <8 x i64>@test_int_x86_avx512_mask_shuf_i64x2(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x3, i8 %x4) { 5697 ; CHECK-LABEL: test_int_x86_avx512_mask_shuf_i64x2: 5698 ; CHECK: ## BB#0: 5699 ; CHECK-NEXT: movzbl %dil, %eax 5700 ; CHECK-NEXT: kmovw %eax, %k1 5701 ; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm0[4,5,2,3],zmm1[2,3,0,1] 5702 ; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[4,5,2,3],zmm1[2,3,0,1] 5703 ; CHECK-NEXT: vpaddq %zmm0, %zmm2, %zmm0 5704 ; CHECK-NEXT: retq 5705 %res = call <8 x i64> @llvm.x86.avx512.mask.shuf.i64x2(<8 x i64> %x0, <8 x i64> %x1, i32 22, <8 x i64> %x3, i8 %x4) 5706 %res1 = call <8 x i64> @llvm.x86.avx512.mask.shuf.i64x2(<8 x i64> %x0, <8 x i64> %x1, i32 22, <8 x i64> %x3, i8 -1) 5707 %res2 = add <8 x i64> %res, %res1 5708 ret <8 x i64> %res2 5709 } 5710 5711 declare <8 x double> @llvm.x86.avx512.mask.getmant.pd.512(<8 x double>, i32, <8 x double>, i8, i32) 5712 5713 define <8 x double>@test_int_x86_avx512_mask_getmant_pd_512(<8 x double> %x0, <8 x double> %x2, i8 %x3) { 5714 ; CHECK-LABEL: test_int_x86_avx512_mask_getmant_pd_512: 5715 ; CHECK: ## BB#0: 5716 ; CHECK-NEXT: movzbl %dil, %eax 5717 ; CHECK-NEXT: kmovw %eax, %k1 5718 ; CHECK-NEXT: vgetmantpd $11, %zmm0, %zmm1 {%k1} 5719 ; CHECK-NEXT: vgetmantpd $11,{sae}, %zmm0, %zmm0 5720 ; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0 5721 ; CHECK-NEXT: retq 5722 %res = call <8 x double> @llvm.x86.avx512.mask.getmant.pd.512(<8 x double> %x0, i32 11, <8 x double> %x2, i8 %x3, i32 4) 5723 %res1 = call <8 x double> @llvm.x86.avx512.mask.getmant.pd.512(<8 x double> %x0, i32 11, <8 x double> %x2, i8 -1, i32 8) 5724 %res2 = fadd <8 x double> %res, %res1 5725 ret <8 x double> %res2 5726 } 5727 5728 declare <16 x float> @llvm.x86.avx512.mask.getmant.ps.512(<16 x float>, i32, <16 x float>, i16, i32) 5729 5730 define <16 x float>@test_int_x86_avx512_mask_getmant_ps_512(<16 x float> %x0, <16 x float> %x2, i16 %x3) { 5731 ; CHECK-LABEL: test_int_x86_avx512_mask_getmant_ps_512: 5732 ; CHECK: ## BB#0: 5733 ; CHECK-NEXT: kmovw %edi, %k1 5734 ; CHECK-NEXT: vgetmantps $11, %zmm0, %zmm1 {%k1} 5735 ; CHECK-NEXT: vgetmantps $11,{sae}, %zmm0, %zmm0 5736 ; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0 5737 ; CHECK-NEXT: retq 5738 %res = call <16 x float> @llvm.x86.avx512.mask.getmant.ps.512(<16 x float> %x0, i32 11, <16 x float> %x2, i16 %x3, i32 4) 5739 %res1 = call <16 x float> @llvm.x86.avx512.mask.getmant.ps.512(<16 x float> %x0, i32 11, <16 x float> %x2, i16 -1, i32 8) 5740 %res2 = fadd <16 x float> %res, %res1 5741 ret <16 x float> %res2 5742 } 5743 5744 declare <2 x double> @llvm.x86.avx512.mask.getmant.sd(<2 x double>, <2 x double>, i32, <2 x double>, i8, i32) 5745 5746 define <2 x double>@test_int_x86_avx512_mask_getmant_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) { 5747 ; CHECK-LABEL: test_int_x86_avx512_mask_getmant_sd: 5748 ; CHECK: ## BB#0: 5749 ; CHECK-NEXT: andl $1, %edi 5750 ; CHECK-NEXT: kmovw %edi, %k1 5751 ; CHECK-NEXT: vmovaps %zmm2, %zmm3 5752 ; CHECK-NEXT: vgetmantsd $11, %xmm1, %xmm0, %xmm3 {%k1} 5753 ; CHECK-NEXT: vgetmantsd $11, %xmm1, %xmm0, %xmm4 {%k1} {z} 5754 ; CHECK-NEXT: vgetmantsd $11, %xmm1, %xmm0, %xmm5 5755 ; CHECK-NEXT: vgetmantsd $11,{sae}, %xmm1, %xmm0, %xmm2 {%k1} 5756 ; CHECK-NEXT: vaddpd %xmm4, %xmm3, %xmm0 5757 ; CHECK-NEXT: vaddpd %xmm5, %xmm2, %xmm1 5758 ; CHECK-NEXT: vaddpd %xmm1, %xmm0, %xmm0 5759 ; CHECK-NEXT: retq 5760 %res = call <2 x double> @llvm.x86.avx512.mask.getmant.sd(<2 x double> %x0, <2 x double> %x1, i32 11, <2 x double> %x2, i8 %x3, i32 4) 5761 %res1 = call <2 x double> @llvm.x86.avx512.mask.getmant.sd(<2 x double> %x0, <2 x double> %x1, i32 11, <2 x double> zeroinitializer, i8 %x3, i32 4) 5762 %res2 = call <2 x double> @llvm.x86.avx512.mask.getmant.sd(<2 x double> %x0, <2 x double> %x1, i32 11, <2 x double> %x2, i8 %x3, i32 8) 5763 %res3 = call <2 x double> @llvm.x86.avx512.mask.getmant.sd(<2 x double> %x0, <2 x double> %x1, i32 11, <2 x double> %x2, i8 -1, i32 4) 5764 %res11 = fadd <2 x double> %res, %res1 5765 %res12 = fadd <2 x double> %res2, %res3 5766 %res13 = fadd <2 x double> %res11, %res12 5767 ret <2 x double> %res13 5768 } 5769 5770 declare <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float>, <4 x float>, i32, <4 x float>, i8, i32) 5771 5772 define <4 x float>@test_int_x86_avx512_mask_getmant_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) { 5773 ; CHECK-LABEL: test_int_x86_avx512_mask_getmant_ss: 5774 ; CHECK: ## BB#0: 5775 ; CHECK-NEXT: andl $1, %edi 5776 ; CHECK-NEXT: kmovw %edi, %k1 5777 ; CHECK-NEXT: vgetmantss $11, %xmm1, %xmm0, %xmm2 {%k1} 5778 ; CHECK-NEXT: vgetmantss $11, %xmm1, %xmm0, %xmm3 {%k1} {z} 5779 ; CHECK-NEXT: vgetmantss $11, %xmm1, %xmm0, %xmm4 5780 ; CHECK-NEXT: vgetmantss $11,{sae}, %xmm1, %xmm0, %xmm0 5781 ; CHECK-NEXT: vaddps %xmm3, %xmm2, %xmm1 5782 ; CHECK-NEXT: vaddps %xmm4, %xmm0, %xmm0 5783 ; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0 5784 ; CHECK-NEXT: retq 5785 %res = call <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float> %x0, <4 x float> %x1, i32 11, <4 x float> %x2, i8 %x3, i32 4) 5786 %res1 = call <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float> %x0, <4 x float> %x1, i32 11, <4 x float> zeroinitializer, i8 %x3, i32 4) 5787 %res2 = call <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float> %x0, <4 x float> %x1, i32 11, <4 x float> %x2, i8 -1, i32 8) 5788 %res3 = call <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float> %x0, <4 x float> %x1, i32 11, <4 x float> %x2, i8 -1, i32 4) 5789 %res11 = fadd <4 x float> %res, %res1 5790 %res12 = fadd <4 x float> %res2, %res3 5791 %res13 = fadd <4 x float> %res11, %res12 5792 ret <4 x float> %res13 5793 } 5794 5795 declare <8 x double> @llvm.x86.avx512.mask.shuf.pd.512(<8 x double>, <8 x double>, i32, <8 x double>, i8) 5796 5797 define <8 x double>@test_int_x86_avx512_mask_shuf_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x3, i8 %x4) { 5798 ; CHECK-LABEL: test_int_x86_avx512_mask_shuf_pd_512: 5799 ; CHECK: ## BB#0: 5800 ; CHECK-NEXT: movzbl %dil, %eax 5801 ; CHECK-NEXT: kmovw %eax, %k1 5802 ; CHECK-NEXT: vshufpd {{.*#+}} zmm2 = zmm2[0],k1[1],zmm2[3],k1[2],zmm2[5],k1[4],zmm2[6],k1[6] 5803 ; CHECK-NEXT: vshufpd {{.*#+}} zmm3 = k1[0],zmm0[1],k1[3],zmm0[2],k1[5],zmm0[4],k1[6],zmm0[6] 5804 ; CHECK-NEXT: vshufpd {{.*#+}} zmm0 = zmm0[0],zmm1[1],zmm0[3],zmm1[2],zmm0[5],zmm1[4],zmm0[6],zmm1[6] 5805 ; CHECK-NEXT: vaddpd %zmm0, %zmm2, %zmm0 5806 ; CHECK-NEXT: vaddpd %zmm3, %zmm0, %zmm0 5807 ; CHECK-NEXT: retq 5808 %res = call <8 x double> @llvm.x86.avx512.mask.shuf.pd.512(<8 x double> %x0, <8 x double> %x1, i32 22, <8 x double> %x3, i8 %x4) 5809 %res1 = call <8 x double> @llvm.x86.avx512.mask.shuf.pd.512(<8 x double> %x0, <8 x double> %x1, i32 22, <8 x double> %x3, i8 -1) 5810 %res2 = call <8 x double> @llvm.x86.avx512.mask.shuf.pd.512(<8 x double> %x0, <8 x double> %x1, i32 22, <8 x double> zeroinitializer, i8 %x4) 5811 5812 %res3 = fadd <8 x double> %res, %res1 5813 %res4 = fadd <8 x double> %res3, %res2 5814 ret <8 x double> %res4 5815 } 5816 5817 declare <16 x float> @llvm.x86.avx512.mask.shuf.ps.512(<16 x float>, <16 x float>, i32, <16 x float>, i16) 5818 5819 define <16 x float>@test_int_x86_avx512_mask_shuf_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x3, i16 %x4) { 5820 ; CHECK-LABEL: test_int_x86_avx512_mask_shuf_ps_512: 5821 ; CHECK: ## BB#0: 5822 ; CHECK-NEXT: kmovw %edi, %k1 5823 ; CHECK-NEXT: vshufps {{.*#+}} zmm2 = zmm2[2,1],k1[1,0],zmm2[6,5],k1[5,4],zmm2[10,9],k1[9,8],zmm2[14,13],k1[13,12] 5824 ; CHECK-NEXT: vshufps {{.*#+}} zmm0 = zmm0[2,1],zmm1[1,0],zmm0[6,5],zmm1[5,4],zmm0[10,9],zmm1[9,8],zmm0[14,13],zmm1[13,12] 5825 ; CHECK-NEXT: vaddps %zmm0, %zmm2, %zmm0 5826 ; CHECK-NEXT: retq 5827 %res = call <16 x float> @llvm.x86.avx512.mask.shuf.ps.512(<16 x float> %x0, <16 x float> %x1, i32 22, <16 x float> %x3, i16 %x4) 5828 %res1 = call <16 x float> @llvm.x86.avx512.mask.shuf.ps.512(<16 x float> %x0, <16 x float> %x1, i32 22, <16 x float> %x3, i16 -1) 5829 %res2 = fadd <16 x float> %res, %res1 5830 ret <16 x float> %res2 5831 } 5832 5833 declare <8 x double> @llvm.x86.avx512.mask.vpermil.pd.512(<8 x double>, i32, <8 x double>, i8) 5834 5835 define <8 x double>@test_int_x86_avx512_mask_vpermil_pd_512(<8 x double> %x0, <8 x double> %x2, i8 %x3) { 5836 ; CHECK-LABEL: test_int_x86_avx512_mask_vpermil_pd_512: 5837 ; CHECK: ## BB#0: 5838 ; CHECK-NEXT: movzbl %dil, %eax 5839 ; CHECK-NEXT: kmovw %eax, %k1 5840 ; CHECK-NEXT: vpermilpd {{.*#+}} zmm1 = zmm1[0,1,3,2,5,4,6,6] 5841 ; CHECK-NEXT: vpermilpd {{.*#+}} zmm2 = k1[0,1,3,2,5,4,6,6] 5842 ; CHECK-NEXT: vpermilpd {{.*#+}} zmm0 = zmm0[0,1,3,2,5,4,6,6] 5843 ; CHECK-NEXT: vaddpd %zmm2, %zmm1, %zmm1 5844 ; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0 5845 ; CHECK-NEXT: retq 5846 %res = call <8 x double> @llvm.x86.avx512.mask.vpermil.pd.512(<8 x double> %x0, i32 22, <8 x double> %x2, i8 %x3) 5847 %res1 = call <8 x double> @llvm.x86.avx512.mask.vpermil.pd.512(<8 x double> %x0, i32 22, <8 x double> zeroinitializer, i8 %x3) 5848 %res2 = call <8 x double> @llvm.x86.avx512.mask.vpermil.pd.512(<8 x double> %x0, i32 22, <8 x double> %x2, i8 -1) 5849 %res3 = fadd <8 x double> %res, %res1 5850 %res4 = fadd <8 x double> %res3, %res2 5851 ret <8 x double> %res4 5852 } 5853 5854 declare <16 x float> @llvm.x86.avx512.mask.vpermil.ps.512(<16 x float>, i32, <16 x float>, i16) 5855 5856 define <16 x float>@test_int_x86_avx512_mask_vpermil_ps_512(<16 x float> %x0, <16 x float> %x2, i16 %x3) { 5857 ; CHECK-LABEL: test_int_x86_avx512_mask_vpermil_ps_512: 5858 ; CHECK: ## BB#0: 5859 ; CHECK-NEXT: kmovw %edi, %k1 5860 ; CHECK-NEXT: vpermilps {{.*#+}} zmm1 = zmm1[2,1,1,0,6,5,5,4,10,9,9,8,14,13,13,12] 5861 ; CHECK-NEXT: vpermilps {{.*#+}} zmm2 = k1[2,1,1,0,6,5,5,4,10,9,9,8,14,13,13,12] 5862 ; CHECK-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[2,1,1,0,6,5,5,4,10,9,9,8,14,13,13,12] 5863 ; CHECK-NEXT: vaddps %zmm2, %zmm1, %zmm1 5864 ; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0 5865 ; CHECK-NEXT: retq 5866 %res = call <16 x float> @llvm.x86.avx512.mask.vpermil.ps.512(<16 x float> %x0, i32 22, <16 x float> %x2, i16 %x3) 5867 %res1 = call <16 x float> @llvm.x86.avx512.mask.vpermil.ps.512(<16 x float> %x0, i32 22, <16 x float> zeroinitializer, i16 %x3) 5868 %res2 = call <16 x float> @llvm.x86.avx512.mask.vpermil.ps.512(<16 x float> %x0, i32 22, <16 x float> %x2, i16 -1) 5869 %res3 = fadd <16 x float> %res, %res1 5870 %res4 = fadd <16 x float> %res3, %res2 5871 ret <16 x float> %res4 5872 } 5873 5874 declare <8 x double> @llvm.x86.avx512.mask.vpermilvar.pd.512(<8 x double>, <8 x i64>, <8 x double>, i8) 5875 5876 define <8 x double>@test_int_x86_avx512_mask_vpermilvar_pd_512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3) { 5877 ; CHECK-LABEL: test_int_x86_avx512_mask_vpermilvar_pd_512: 5878 ; CHECK: ## BB#0: 5879 ; CHECK-NEXT: movzbl %dil, %eax 5880 ; CHECK-NEXT: kmovw %eax, %k1 5881 ; CHECK-NEXT: vpermilpd %zmm1, %zmm0, %zmm2 {%k1} 5882 ; CHECK-NEXT: vpermilpd %zmm1, %zmm0, %zmm3 {%k1} {z} 5883 ; CHECK-NEXT: vpermilpd %zmm1, %zmm0, %zmm0 5884 ; CHECK-NEXT: vaddpd %zmm3, %zmm2, %zmm1 5885 ; CHECK-NEXT: vaddpd %zmm1, %zmm0, %zmm0 5886 ; CHECK-NEXT: retq 5887 %res = call <8 x double> @llvm.x86.avx512.mask.vpermilvar.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3) 5888 %res1 = call <8 x double> @llvm.x86.avx512.mask.vpermilvar.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> zeroinitializer, i8 %x3) 5889 %res2 = call <8 x double> @llvm.x86.avx512.mask.vpermilvar.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 -1) 5890 %res3 = fadd <8 x double> %res, %res1 5891 %res4 = fadd <8 x double> %res2, %res3 5892 ret <8 x double> %res4 5893 } 5894 5895 declare <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float>, <16 x i32>, <16 x float>, i16) 5896 5897 define <16 x float>@test_int_x86_avx512_mask_vpermilvar_ps_512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3) { 5898 ; CHECK-LABEL: test_int_x86_avx512_mask_vpermilvar_ps_512: 5899 ; CHECK: ## BB#0: 5900 ; CHECK-NEXT: kmovw %edi, %k1 5901 ; CHECK-NEXT: vpermilps %zmm1, %zmm0, %zmm2 {%k1} 5902 ; CHECK-NEXT: vpermilps %zmm1, %zmm0, %zmm3 {%k1} {z} 5903 ; CHECK-NEXT: vpermilps %zmm1, %zmm0, %zmm0 5904 ; CHECK-NEXT: vaddps %zmm3, %zmm2, %zmm1 5905 ; CHECK-NEXT: vaddps %zmm1, %zmm0, %zmm0 5906 ; CHECK-NEXT: retq 5907 %res = call <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3) 5908 %res1 = call <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> zeroinitializer, i16 %x3) 5909 %res2 = call <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 -1) 5910 %res3 = fadd <16 x float> %res, %res1 5911 %res4 = fadd <16 x float> %res2, %res3 5912 ret <16 x float> %res4 5913 } 5914 5915 declare <16 x float> @llvm.x86.avx512.mask.insertf32x4.512(<16 x float>, <4 x float>, i32, <16 x float>, i8) 5916 5917 define <16 x float>@test_int_x86_avx512_mask_insertf32x4_512(<16 x float> %x0, <4 x float> %x1, <16 x float> %x3, i8 %x4) { 5918 ; CHECK-LABEL: test_int_x86_avx512_mask_insertf32x4_512: 5919 ; CHECK: ## BB#0: 5920 ; CHECK-NEXT: kmovw %edi, %k1 5921 ; CHECK-NEXT: vinsertf32x4 $1, %xmm1, %zmm0, %zmm2 {%k1} 5922 ; CHECK-NEXT: vinsertf32x4 $1, %xmm1, %zmm0, %zmm3 {%k1} {z} 5923 ; CHECK-NEXT: vinsertf32x4 $1, %xmm1, %zmm0, %zmm0 5924 ; CHECK-NEXT: vaddps %zmm0, %zmm2, %zmm0 5925 ; CHECK-NEXT: vaddps %zmm0, %zmm3, %zmm0 5926 ; CHECK-NEXT: retq 5927 %res = call <16 x float> @llvm.x86.avx512.mask.insertf32x4.512(<16 x float> %x0, <4 x float> %x1, i32 1, <16 x float> %x3, i8 %x4) 5928 %res1 = call <16 x float> @llvm.x86.avx512.mask.insertf32x4.512(<16 x float> %x0, <4 x float> %x1, i32 1, <16 x float> %x3, i8 -1) 5929 %res2 = call <16 x float> @llvm.x86.avx512.mask.insertf32x4.512(<16 x float> %x0, <4 x float> %x1, i32 1, <16 x float> zeroinitializer, i8 %x4) 5930 %res3 = fadd <16 x float> %res, %res1 5931 %res4 = fadd <16 x float> %res2, %res3 5932 ret <16 x float> %res4 5933 } 5934 5935 declare <16 x i32> @llvm.x86.avx512.mask.inserti32x4.512(<16 x i32>, <4 x i32>, i32, <16 x i32>, i8) 5936 5937 define <16 x i32>@test_int_x86_avx512_mask_inserti32x4_512(<16 x i32> %x0, <4 x i32> %x1, <16 x i32> %x3, i8 %x4) { 5938 ; CHECK-LABEL: test_int_x86_avx512_mask_inserti32x4_512: 5939 ; CHECK: ## BB#0: 5940 ; CHECK-NEXT: kmovw %edi, %k1 5941 ; CHECK-NEXT: vinserti32x4 $1, %xmm1, %zmm0, %zmm2 {%k1} 5942 ; CHECK-NEXT: vinserti32x4 $1, %xmm1, %zmm0, %zmm3 {%k1} {z} 5943 ; CHECK-NEXT: vinserti32x4 $1, %xmm1, %zmm0, %zmm0 5944 ; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0 5945 ; CHECK-NEXT: vpaddd %zmm0, %zmm3, %zmm0 5946 ; CHECK-NEXT: retq 5947 %res = call <16 x i32> @llvm.x86.avx512.mask.inserti32x4.512(<16 x i32> %x0, <4 x i32> %x1, i32 1, <16 x i32> %x3, i8 %x4) 5948 %res1 = call <16 x i32> @llvm.x86.avx512.mask.inserti32x4.512(<16 x i32> %x0, <4 x i32> %x1, i32 1, <16 x i32> %x3, i8 -1) 5949 %res2 = call <16 x i32> @llvm.x86.avx512.mask.inserti32x4.512(<16 x i32> %x0, <4 x i32> %x1, i32 1, <16 x i32> zeroinitializer, i8 %x4) 5950 %res3 = add <16 x i32> %res, %res1 5951 %res4 = add <16 x i32> %res2, %res3 5952 ret <16 x i32> %res4 5953 } 5954 5955 declare <8 x double> @llvm.x86.avx512.mask.insertf64x4.512(<8 x double>, <4 x double>, i32, <8 x double>, i8) 5956 5957 define <8 x double>@test_int_x86_avx512_mask_insertf64x4_512(<8 x double> %x0, <4 x double> %x1, <8 x double> %x3, i8 %x4) { 5958 ; CHECK-LABEL: test_int_x86_avx512_mask_insertf64x4_512: 5959 ; CHECK: ## BB#0: 5960 ; CHECK-NEXT: movzbl %dil, %eax 5961 ; CHECK-NEXT: kmovw %eax, %k1 5962 ; CHECK-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm2 {%k1} 5963 ; CHECK-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm3 {%k1} {z} 5964 ; CHECK-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 5965 ; CHECK-NEXT: vaddpd %zmm0, %zmm2, %zmm0 5966 ; CHECK-NEXT: vaddpd %zmm0, %zmm3, %zmm0 5967 ; CHECK-NEXT: retq 5968 %res = call <8 x double> @llvm.x86.avx512.mask.insertf64x4.512(<8 x double> %x0, <4 x double> %x1, i32 1, <8 x double> %x3, i8 %x4) 5969 %res1 = call <8 x double> @llvm.x86.avx512.mask.insertf64x4.512(<8 x double> %x0, <4 x double> %x1, i32 1, <8 x double> %x3, i8 -1) 5970 %res2 = call <8 x double> @llvm.x86.avx512.mask.insertf64x4.512(<8 x double> %x0, <4 x double> %x1, i32 1, <8 x double> zeroinitializer, i8 %x4) 5971 %res3 = fadd <8 x double> %res, %res1 5972 %res4 = fadd <8 x double> %res2, %res3 5973 ret <8 x double> %res4 5974 } 5975 5976 declare <8 x i64> @llvm.x86.avx512.mask.inserti64x4.512(<8 x i64>, <4 x i64>, i32, <8 x i64>, i8) 5977 5978 define <8 x i64>@test_int_x86_avx512_mask_inserti64x4_512(<8 x i64> %x0, <4 x i64> %x1, <8 x i64> %x3, i8 %x4) { 5979 ; CHECK-LABEL: test_int_x86_avx512_mask_inserti64x4_512: 5980 ; CHECK: ## BB#0: 5981 ; CHECK-NEXT: movzbl %dil, %eax 5982 ; CHECK-NEXT: kmovw %eax, %k1 5983 ; CHECK-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm2 {%k1} 5984 ; CHECK-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm3 {%k1} {z} 5985 ; CHECK-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 5986 ; CHECK-NEXT: vpaddq %zmm0, %zmm2, %zmm0 5987 ; CHECK-NEXT: vpaddq %zmm0, %zmm3, %zmm0 5988 ; CHECK-NEXT: retq 5989 %res = call <8 x i64> @llvm.x86.avx512.mask.inserti64x4.512(<8 x i64> %x0, <4 x i64> %x1, i32 1, <8 x i64> %x3, i8 %x4) 5990 %res1 = call <8 x i64> @llvm.x86.avx512.mask.inserti64x4.512(<8 x i64> %x0, <4 x i64> %x1, i32 1, <8 x i64> %x3, i8 -1) 5991 %res2 = call <8 x i64> @llvm.x86.avx512.mask.inserti64x4.512(<8 x i64> %x0, <4 x i64> %x1, i32 1, <8 x i64> zeroinitializer, i8 %x4) 5992 %res3 = add <8 x i64> %res, %res1 5993 %res4 = add <8 x i64> %res2, %res3 5994 ret <8 x i64> %res4 5995 } 5996 5997 declare <2 x double> @llvm.x86.avx512.mask.cvtss2sd.round(<4 x float>, <4 x float>, <2 x double>, i8, i32) 5998 5999 define <2 x double>@test_int_x86_avx512_mask_cvt_ss2sd_round(<4 x float> %x0,<4 x float> %x1, <2 x double> %x2, i8 %x3) { 6000 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ss2sd_round: 6001 ; CHECK: ## BB#0: 6002 ; CHECK-NEXT: andl $1, %edi 6003 ; CHECK-NEXT: kmovw %edi, %k1 6004 ; CHECK-NEXT: vcvtss2sd %xmm1, %xmm0, %xmm2 {%k1} 6005 ; CHECK-NEXT: vcvtss2sd {sae}, %xmm1, %xmm0, %xmm0 6006 ; CHECK-NEXT: vaddpd %xmm0, %xmm2, %xmm0 6007 ; CHECK-NEXT: retq 6008 %res = call <2 x double> @llvm.x86.avx512.mask.cvtss2sd.round(<4 x float> %x0, <4 x float> %x1, <2 x double> %x2, i8 %x3, i32 4) 6009 %res1 = call <2 x double> @llvm.x86.avx512.mask.cvtss2sd.round(<4 x float> %x0, <4 x float> %x1, <2 x double> %x2, i8 -1, i32 8) 6010 %res2 = fadd <2 x double> %res, %res1 6011 ret <2 x double> %res2 6012 } 6013 6014 declare <4 x float> @llvm.x86.avx512.mask.cvtsd2ss.round(<2 x double>, <2 x double>, <4 x float>, i8, i32) 6015 6016 define <4 x float>@test_int_x86_avx512_mask_cvt_sd2ss_round(<2 x double> %x0,<2 x double> %x1, <4 x float> %x2, i8 %x3) { 6017 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_sd2ss_round: 6018 ; CHECK: ## BB#0: 6019 ; CHECK-NEXT: andl $1, %edi 6020 ; CHECK-NEXT: kmovw %edi, %k1 6021 ; CHECK-NEXT: vcvtsd2ss {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1} 6022 ; CHECK-NEXT: vcvtsd2ss {rn-sae}, %xmm1, %xmm0, %xmm0 6023 ; CHECK-NEXT: vaddps %xmm0, %xmm2, %xmm0 6024 ; CHECK-NEXT: retq 6025 %res = call <4 x float> @llvm.x86.avx512.mask.cvtsd2ss.round(<2 x double> %x0, <2 x double> %x1, <4 x float> %x2, i8 %x3, i32 3) 6026 %res1 = call <4 x float> @llvm.x86.avx512.mask.cvtsd2ss.round(<2 x double> %x0, <2 x double> %x1, <4 x float> %x2, i8 -1, i32 8) 6027 %res2 = fadd <4 x float> %res, %res1 6028 ret <4 x float> %res2 6029 } 6030 6031 declare <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i32, i16) 6032 6033 define <16 x i32>@test_int_x86_avx512_mask_pternlog_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x4) { 6034 ; CHECK-LABEL: test_int_x86_avx512_mask_pternlog_d_512: 6035 ; CHECK: ## BB#0: 6036 ; CHECK-NEXT: kmovw %edi, %k1 6037 ; CHECK-NEXT: vmovaps %zmm0, %zmm3 6038 ; CHECK-NEXT: vpternlogd $33, %zmm2, %zmm1, %zmm3 {%k1} 6039 ; CHECK-NEXT: vpternlogd $33, %zmm2, %zmm1, %zmm0 6040 ; CHECK-NEXT: vpaddd %zmm0, %zmm3, %zmm0 6041 ; CHECK-NEXT: retq 6042 %res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33, i16 %x4) 6043 %res1 = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33, i16 -1) 6044 %res2 = add <16 x i32> %res, %res1 6045 ret <16 x i32> %res2 6046 } 6047 6048 declare <16 x i32> @llvm.x86.avx512.maskz.pternlog.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i32, i16) 6049 6050 define <16 x i32>@test_int_x86_avx512_maskz_pternlog_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x4) { 6051 ; CHECK-LABEL: test_int_x86_avx512_maskz_pternlog_d_512: 6052 ; CHECK: ## BB#0: 6053 ; CHECK-NEXT: kmovw %edi, %k1 6054 ; CHECK-NEXT: vmovaps %zmm0, %zmm3 6055 ; CHECK-NEXT: vpternlogd $33, %zmm2, %zmm1, %zmm3 {%k1} {z} 6056 ; CHECK-NEXT: vpternlogd $33, %zmm2, %zmm1, %zmm0 6057 ; CHECK-NEXT: vpaddd %zmm0, %zmm3, %zmm0 6058 ; CHECK-NEXT: retq 6059 %res = call <16 x i32> @llvm.x86.avx512.maskz.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33, i16 %x4) 6060 %res1 = call <16 x i32> @llvm.x86.avx512.maskz.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33, i16 -1) 6061 %res2 = add <16 x i32> %res, %res1 6062 ret <16 x i32> %res2 6063 } 6064 6065 declare <8 x i64> @llvm.x86.avx512.mask.pternlog.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i32, i8) 6066 6067 define <8 x i64>@test_int_x86_avx512_mask_pternlog_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x4) { 6068 ; CHECK-LABEL: test_int_x86_avx512_mask_pternlog_q_512: 6069 ; CHECK: ## BB#0: 6070 ; CHECK-NEXT: movzbl %dil, %eax 6071 ; CHECK-NEXT: kmovw %eax, %k1 6072 ; CHECK-NEXT: vmovaps %zmm0, %zmm3 6073 ; CHECK-NEXT: vpternlogq $33, %zmm2, %zmm1, %zmm3 {%k1} 6074 ; CHECK-NEXT: vpternlogq $33, %zmm2, %zmm1, %zmm0 6075 ; CHECK-NEXT: vpaddq %zmm0, %zmm3, %zmm0 6076 ; CHECK-NEXT: retq 6077 %res = call <8 x i64> @llvm.x86.avx512.mask.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33, i8 %x4) 6078 %res1 = call <8 x i64> @llvm.x86.avx512.mask.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33, i8 -1) 6079 %res2 = add <8 x i64> %res, %res1 6080 ret <8 x i64> %res2 6081 } 6082 6083 declare <8 x i64> @llvm.x86.avx512.maskz.pternlog.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i32, i8) 6084 6085 define <8 x i64>@test_int_x86_avx512_maskz_pternlog_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x4) { 6086 ; CHECK-LABEL: test_int_x86_avx512_maskz_pternlog_q_512: 6087 ; CHECK: ## BB#0: 6088 ; CHECK-NEXT: movzbl %dil, %eax 6089 ; CHECK-NEXT: kmovw %eax, %k1 6090 ; CHECK-NEXT: vmovaps %zmm0, %zmm3 6091 ; CHECK-NEXT: vpternlogq $33, %zmm2, %zmm1, %zmm3 {%k1} {z} 6092 ; CHECK-NEXT: vpternlogq $33, %zmm2, %zmm1, %zmm0 6093 ; CHECK-NEXT: vpaddq %zmm0, %zmm3, %zmm0 6094 ; CHECK-NEXT: retq 6095 %res = call <8 x i64> @llvm.x86.avx512.maskz.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33, i8 %x4) 6096 %res1 = call <8 x i64> @llvm.x86.avx512.maskz.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33, i8 -1) 6097 %res2 = add <8 x i64> %res, %res1 6098 ret <8 x i64> %res2 6099 } 6100 6101 declare <16 x float> @llvm.x86.avx512.mask.movsldup.512(<16 x float>, <16 x float>, i16) 6102 6103 define <16 x float>@test_int_x86_avx512_mask_movsldup_512(<16 x float> %x0, <16 x float> %x1, i16 %x2) { 6104 ; CHECK-LABEL: test_int_x86_avx512_mask_movsldup_512: 6105 ; CHECK: ## BB#0: 6106 ; CHECK-NEXT: kmovw %edi, %k1 6107 ; CHECK-NEXT: vmovsldup {{.*#+}} zmm1 = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14] 6108 ; CHECK-NEXT: vmovsldup {{.*#+}} zmm2 = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14] 6109 ; CHECK-NEXT: vmovsldup {{.*#+}} zmm0 = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14] 6110 ; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0 6111 ; CHECK-NEXT: vaddps %zmm0, %zmm2, %zmm0 6112 ; CHECK-NEXT: retq 6113 %res = call <16 x float> @llvm.x86.avx512.mask.movsldup.512(<16 x float> %x0, <16 x float> %x1, i16 %x2) 6114 %res1 = call <16 x float> @llvm.x86.avx512.mask.movsldup.512(<16 x float> %x0, <16 x float> %x1, i16 -1) 6115 %res2 = call <16 x float> @llvm.x86.avx512.mask.movsldup.512(<16 x float> %x0, <16 x float> zeroinitializer, i16 %x2) 6116 %res3 = fadd <16 x float> %res, %res1 6117 %res4 = fadd <16 x float> %res2, %res3 6118 ret <16 x float> %res4 6119 } 6120 6121 declare <16 x float> @llvm.x86.avx512.mask.movshdup.512(<16 x float>, <16 x float>, i16) 6122 6123 define <16 x float>@test_int_x86_avx512_mask_movshdup_512(<16 x float> %x0, <16 x float> %x1, i16 %x2) { 6124 ; CHECK-LABEL: test_int_x86_avx512_mask_movshdup_512: 6125 ; CHECK: ## BB#0: 6126 ; CHECK-NEXT: kmovw %edi, %k1 6127 ; CHECK-NEXT: vmovshdup {{.*#+}} zmm1 = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15] 6128 ; CHECK-NEXT: vmovshdup {{.*#+}} zmm2 = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15] 6129 ; CHECK-NEXT: vmovshdup {{.*#+}} zmm0 = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15] 6130 ; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0 6131 ; CHECK-NEXT: vaddps %zmm0, %zmm2, %zmm0 6132 ; CHECK-NEXT: retq 6133 %res = call <16 x float> @llvm.x86.avx512.mask.movshdup.512(<16 x float> %x0, <16 x float> %x1, i16 %x2) 6134 %res1 = call <16 x float> @llvm.x86.avx512.mask.movshdup.512(<16 x float> %x0, <16 x float> %x1, i16 -1) 6135 %res2 = call <16 x float> @llvm.x86.avx512.mask.movshdup.512(<16 x float> %x0, <16 x float> zeroinitializer, i16 %x2) 6136 %res3 = fadd <16 x float> %res, %res1 6137 %res4 = fadd <16 x float> %res2, %res3 6138 ret <16 x float> %res4 6139 } 6140 6141 declare <8 x double> @llvm.x86.avx512.mask.movddup.512(<8 x double>, <8 x double>, i8) 6142 6143 define <8 x double>@test_int_x86_avx512_mask_movddup_512(<8 x double> %x0, <8 x double> %x1, i8 %x2) { 6144 ; CHECK-LABEL: test_int_x86_avx512_mask_movddup_512: 6145 ; CHECK: ## BB#0: 6146 ; CHECK-NEXT: movzbl %dil, %eax 6147 ; CHECK-NEXT: kmovw %eax, %k1 6148 ; CHECK-NEXT: vmovddup {{.*#+}} zmm1 = zmm0[0,0,2,2,4,4,6,6] 6149 ; CHECK-NEXT: vmovddup {{.*#+}} zmm2 = zmm0[0,0,2,2,4,4,6,6] 6150 ; CHECK-NEXT: vmovddup {{.*#+}} zmm0 = zmm0[0,0,2,2,4,4,6,6] 6151 ; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0 6152 ; CHECK-NEXT: vaddpd %zmm0, %zmm2, %zmm0 6153 ; CHECK-NEXT: retq 6154 %res = call <8 x double> @llvm.x86.avx512.mask.movddup.512(<8 x double> %x0, <8 x double> %x1, i8 %x2) 6155 %res1 = call <8 x double> @llvm.x86.avx512.mask.movddup.512(<8 x double> %x0, <8 x double> %x1, i8 -1) 6156 %res2 = call <8 x double> @llvm.x86.avx512.mask.movddup.512(<8 x double> %x0, <8 x double> zeroinitializer, i8 %x2) 6157 %res3 = fadd <8 x double> %res, %res1 6158 %res4 = fadd <8 x double> %res2, %res3 6159 ret <8 x double> %res4 6160 } 6161 6162 define i32 @test_x86_avx512_comi_sd_eq_sae(<2 x double> %a0, <2 x double> %a1) { 6163 ; CHECK-LABEL: test_x86_avx512_comi_sd_eq_sae: 6164 ; CHECK: ## BB#0: 6165 ; CHECK-NEXT: vcomisd {sae}, %xmm1, %xmm0 6166 ; CHECK-NEXT: sete %al 6167 ; CHECK-NEXT: movzbl %al, %eax 6168 ; CHECK-NEXT: retq 6169 %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 0, i32 8) 6170 ret i32 %res 6171 } 6172 6173 define i32 @test_x86_avx512_ucomi_sd_eq_sae(<2 x double> %a0, <2 x double> %a1) { 6174 ; CHECK-LABEL: test_x86_avx512_ucomi_sd_eq_sae: 6175 ; CHECK: ## BB#0: 6176 ; CHECK-NEXT: vucomisd {sae}, %xmm1, %xmm0 6177 ; CHECK-NEXT: sete %al 6178 ; CHECK-NEXT: movzbl %al, %eax 6179 ; CHECK-NEXT: retq 6180 %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 8, i32 8) 6181 ret i32 %res 6182 } 6183 6184 define i32 @test_x86_avx512_comi_sd_eq(<2 x double> %a0, <2 x double> %a1) { 6185 ; CHECK-LABEL: test_x86_avx512_comi_sd_eq: 6186 ; CHECK: ## BB#0: 6187 ; CHECK-NEXT: vcomisd %xmm1, %xmm0 6188 ; CHECK-NEXT: sete %al 6189 ; CHECK-NEXT: movzbl %al, %eax 6190 ; CHECK-NEXT: retq 6191 %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 0, i32 4) 6192 ret i32 %res 6193 } 6194 6195 define i32 @test_x86_avx512_ucomi_sd_eq(<2 x double> %a0, <2 x double> %a1) { 6196 ; CHECK-LABEL: test_x86_avx512_ucomi_sd_eq: 6197 ; CHECK: ## BB#0: 6198 ; CHECK-NEXT: vucomisd %xmm1, %xmm0 6199 ; CHECK-NEXT: sete %al 6200 ; CHECK-NEXT: movzbl %al, %eax 6201 ; CHECK-NEXT: retq 6202 %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 8, i32 4) 6203 ret i32 %res 6204 } 6205 6206 define i32 @test_x86_avx512_comi_sd_lt_sae(<2 x double> %a0, <2 x double> %a1) { 6207 ; CHECK-LABEL: test_x86_avx512_comi_sd_lt_sae: 6208 ; CHECK: ## BB#0: 6209 ; CHECK-NEXT: vcomisd {sae}, %xmm1, %xmm0 6210 ; CHECK-NEXT: sbbl %eax, %eax 6211 ; CHECK-NEXT: andl $1, %eax 6212 ; CHECK-NEXT: retq 6213 %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 1, i32 8) 6214 ret i32 %res 6215 } 6216 6217 define i32 @test_x86_avx512_ucomi_sd_lt_sae(<2 x double> %a0, <2 x double> %a1) { 6218 ; CHECK-LABEL: test_x86_avx512_ucomi_sd_lt_sae: 6219 ; CHECK: ## BB#0: 6220 ; CHECK-NEXT: vucomisd {sae}, %xmm1, %xmm0 6221 ; CHECK-NEXT: sbbl %eax, %eax 6222 ; CHECK-NEXT: andl $1, %eax 6223 ; CHECK-NEXT: retq 6224 %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 9, i32 8) 6225 ret i32 %res 6226 } 6227 6228 define i32 @test_x86_avx512_comi_sd_lt(<2 x double> %a0, <2 x double> %a1) { 6229 ; CHECK-LABEL: test_x86_avx512_comi_sd_lt: 6230 ; CHECK: ## BB#0: 6231 ; CHECK-NEXT: vcomisd %xmm1, %xmm0 6232 ; CHECK-NEXT: sbbl %eax, %eax 6233 ; CHECK-NEXT: andl $1, %eax 6234 ; CHECK-NEXT: retq 6235 %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 1, i32 4) 6236 ret i32 %res 6237 } 6238 6239 define i32 @test_x86_avx512_ucomi_sd_lt(<2 x double> %a0, <2 x double> %a1) { 6240 ; CHECK-LABEL: test_x86_avx512_ucomi_sd_lt: 6241 ; CHECK: ## BB#0: 6242 ; CHECK-NEXT: vucomisd %xmm1, %xmm0 6243 ; CHECK-NEXT: sbbl %eax, %eax 6244 ; CHECK-NEXT: andl $1, %eax 6245 ; CHECK-NEXT: retq 6246 %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 9, i32 4) 6247 ret i32 %res 6248 } 6249 6250 declare i32 @llvm.x86.avx512.vcomi.sd(<2 x double>, <2 x double>, i32, i32) 6251 6252 define i32 @test_x86_avx512_ucomi_ss_lt(<4 x float> %a0, <4 x float> %a1) { 6253 ; CHECK-LABEL: test_x86_avx512_ucomi_ss_lt: 6254 ; CHECK: ## BB#0: 6255 ; CHECK-NEXT: vucomiss %xmm1, %xmm0 6256 ; CHECK-NEXT: sbbl %eax, %eax 6257 ; CHECK-NEXT: andl $1, %eax 6258 ; CHECK-NEXT: retq 6259 %res = call i32 @llvm.x86.avx512.vcomi.ss(<4 x float> %a0, <4 x float> %a1, i32 9, i32 4) 6260 ret i32 %res 6261 } 6262 6263 declare i32 @llvm.x86.avx512.vcomi.ss(<4 x float>, <4 x float>, i32, i32) 6264 declare <4 x float> @llvm.x86.avx512.mask.move.ss(<4 x float>, <4 x float>, <4 x float>, i8) 6265 6266 define <4 x float>@test_int_x86_avx512_mask_move_ss_rrk(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) { 6267 ; CHECK-LABEL: test_int_x86_avx512_mask_move_ss_rrk: 6268 ; CHECK: ## BB#0: 6269 ; CHECK-NEXT: andl $1, %edi 6270 ; CHECK-NEXT: kmovw %edi, %k1 6271 ; CHECK-NEXT: vmovss %xmm1, %xmm0, %xmm2 {%k1} 6272 ; CHECK-NEXT: vmovaps %zmm2, %zmm0 6273 ; CHECK-NEXT: retq 6274 %res = call <4 x float> @llvm.x86.avx512.mask.move.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) 6275 ret <4 x float> %res 6276 } 6277 6278 define <4 x float>@test_int_x86_avx512_mask_move_ss_rrkz(<4 x float> %x0, <4 x float> %x1, i8 %x2) { 6279 ; CHECK-LABEL: test_int_x86_avx512_mask_move_ss_rrkz: 6280 ; CHECK: ## BB#0: 6281 ; CHECK-NEXT: andl $1, %edi 6282 ; CHECK-NEXT: kmovw %edi, %k1 6283 ; CHECK-NEXT: vmovss %xmm1, %xmm0, %xmm0 {%k1} {z} 6284 ; CHECK-NEXT: retq 6285 %res = call <4 x float> @llvm.x86.avx512.mask.move.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> zeroinitializer, i8 %x2) 6286 ret <4 x float> %res 6287 } 6288 6289 define <4 x float>@test_int_x86_avx512_mask_move_ss_rr(<4 x float> %x0, <4 x float> %x1, i8 %x2) { 6290 ; CHECK-LABEL: test_int_x86_avx512_mask_move_ss_rr: 6291 ; CHECK: ## BB#0: 6292 ; CHECK-NEXT: vmovss %xmm1, %xmm0, %xmm0 6293 ; CHECK-NEXT: retq 6294 %res = call <4 x float> @llvm.x86.avx512.mask.move.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> zeroinitializer, i8 -1) 6295 ret <4 x float> %res 6296 } 6297 6298 declare <2 x double> @llvm.x86.avx512.mask.move.sd(<2 x double>, <2 x double>, <2 x double>, i8) 6299 define <2 x double>@test_int_x86_avx512_mask_move_sd_rr(<2 x double> %x0, <2 x double> %x1, i8 %x2) { 6300 ; CHECK-LABEL: test_int_x86_avx512_mask_move_sd_rr: 6301 ; CHECK: ## BB#0: 6302 ; CHECK-NEXT: vmovsd %xmm1, %xmm0, %xmm0 6303 ; CHECK-NEXT: retq 6304 %res = call <2 x double> @llvm.x86.avx512.mask.move.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> zeroinitializer, i8 -1) 6305 ret <2 x double> %res 6306 } 6307 6308 define <2 x double>@test_int_x86_avx512_mask_move_sd_rrkz(<2 x double> %x0, <2 x double> %x1, i8 %x2) { 6309 ; CHECK-LABEL: test_int_x86_avx512_mask_move_sd_rrkz: 6310 ; CHECK: ## BB#0: 6311 ; CHECK-NEXT: andl $1, %edi 6312 ; CHECK-NEXT: kmovw %edi, %k1 6313 ; CHECK-NEXT: vmovsd %xmm1, %xmm0, %xmm0 {%k1} {z} 6314 ; CHECK-NEXT: retq 6315 %res = call <2 x double> @llvm.x86.avx512.mask.move.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> zeroinitializer, i8 %x2) 6316 ret <2 x double> %res 6317 } 6318 6319 define <2 x double>@test_int_x86_avx512_mask_move_sd_rrk(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) { 6320 ; CHECK-LABEL: test_int_x86_avx512_mask_move_sd_rrk: 6321 ; CHECK: ## BB#0: 6322 ; CHECK-NEXT: andl $1, %edi 6323 ; CHECK-NEXT: kmovw %edi, %k1 6324 ; CHECK-NEXT: vmovsd %xmm1, %xmm0, %xmm2 {%k1} 6325 ; CHECK-NEXT: vmovaps %zmm2, %zmm0 6326 ; CHECK-NEXT: retq 6327 %res = call <2 x double> @llvm.x86.avx512.mask.move.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) 6328 ret <2 x double> %res 6329 } 6330 6331