1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=+avx512f | FileCheck %s 3 4 5 define <8 x double> @test_mask_compress_pd_512(<8 x double> %data, <8 x double> %passthru, i8 %mask) { 6 ; CHECK-LABEL: test_mask_compress_pd_512: 7 ; CHECK: ## %bb.0: 8 ; CHECK-NEXT: kmovw %edi, %k1 9 ; CHECK-NEXT: vcompresspd %zmm0, %zmm1 {%k1} 10 ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 11 ; CHECK-NEXT: retq 12 %res = call <8 x double> @llvm.x86.avx512.mask.compress.pd.512(<8 x double> %data, <8 x double> %passthru, i8 %mask) 13 ret <8 x double> %res 14 } 15 16 define <8 x double> @test_maskz_compress_pd_512(<8 x double> %data, i8 %mask) { 17 ; CHECK-LABEL: test_maskz_compress_pd_512: 18 ; CHECK: ## %bb.0: 19 ; CHECK-NEXT: kmovw %edi, %k1 20 ; CHECK-NEXT: vcompresspd %zmm0, %zmm0 {%k1} {z} 21 ; CHECK-NEXT: retq 22 %res = call <8 x double> @llvm.x86.avx512.mask.compress.pd.512(<8 x double> %data, <8 x double> zeroinitializer, i8 %mask) 23 ret <8 x double> %res 24 } 25 26 define <8 x double> @test_compress_pd_512(<8 x double> %data) { 27 ; CHECK-LABEL: test_compress_pd_512: 28 ; CHECK: ## %bb.0: 29 ; CHECK-NEXT: retq 30 %res = call <8 x double> @llvm.x86.avx512.mask.compress.pd.512(<8 x double> %data, <8 x double> undef, i8 -1) 31 ret <8 x double> %res 32 } 33 34 declare <8 x double> @llvm.x86.avx512.mask.compress.pd.512(<8 x double> %data, <8 x double> %src0, i8 %mask) 35 36 define <16 x float> @test_mask_compress_ps_512(<16 x float> %data, <16 x float> %passthru, i16 %mask) { 37 ; CHECK-LABEL: test_mask_compress_ps_512: 38 ; CHECK: ## %bb.0: 39 ; CHECK-NEXT: kmovw %edi, %k1 40 ; CHECK-NEXT: vcompressps %zmm0, %zmm1 {%k1} 41 ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 42 ; CHECK-NEXT: retq 43 %res = call <16 x float> @llvm.x86.avx512.mask.compress.ps.512(<16 x float> %data, <16 x float> %passthru, i16 %mask) 44 ret <16 x float> %res 45 } 46 47 define <16 x float> @test_maskz_compress_ps_512(<16 x float> %data, i16 %mask) { 48 ; CHECK-LABEL: test_maskz_compress_ps_512: 49 ; CHECK: ## %bb.0: 50 ; CHECK-NEXT: kmovw %edi, %k1 51 ; CHECK-NEXT: vcompressps %zmm0, %zmm0 {%k1} {z} 52 ; CHECK-NEXT: retq 53 %res = call <16 x float> @llvm.x86.avx512.mask.compress.ps.512(<16 x float> %data, <16 x float> zeroinitializer, i16 %mask) 54 ret <16 x float> %res 55 } 56 57 define <16 x float> @test_compress_ps_512(<16 x float> %data) { 58 ; CHECK-LABEL: test_compress_ps_512: 59 ; CHECK: ## %bb.0: 60 ; CHECK-NEXT: retq 61 %res = call <16 x float> @llvm.x86.avx512.mask.compress.ps.512(<16 x float> %data, <16 x float> undef, i16 -1) 62 ret <16 x float> %res 63 } 64 65 declare <16 x float> @llvm.x86.avx512.mask.compress.ps.512(<16 x float> %data, <16 x float> %src0, i16 %mask) 66 67 define <8 x i64> @test_mask_compress_q_512(<8 x i64> %data, <8 x i64> %passthru, i8 %mask) { 68 ; CHECK-LABEL: test_mask_compress_q_512: 69 ; CHECK: ## %bb.0: 70 ; CHECK-NEXT: kmovw %edi, %k1 71 ; CHECK-NEXT: vpcompressq %zmm0, %zmm1 {%k1} 72 ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 73 ; CHECK-NEXT: retq 74 %res = call <8 x i64> @llvm.x86.avx512.mask.compress.q.512(<8 x i64> %data, <8 x i64> %passthru, i8 %mask) 75 ret <8 x i64> %res 76 } 77 78 define <8 x i64> @test_maskz_compress_q_512(<8 x i64> %data, i8 %mask) { 79 ; CHECK-LABEL: test_maskz_compress_q_512: 80 ; CHECK: ## %bb.0: 81 ; CHECK-NEXT: kmovw %edi, %k1 82 ; CHECK-NEXT: vpcompressq %zmm0, %zmm0 {%k1} {z} 83 ; CHECK-NEXT: retq 84 %res = call <8 x i64> @llvm.x86.avx512.mask.compress.q.512(<8 x i64> %data, <8 x i64> zeroinitializer, i8 %mask) 85 ret <8 x i64> %res 86 } 87 88 define <8 x i64> @test_compress_q_512(<8 x i64> %data) { 89 ; CHECK-LABEL: test_compress_q_512: 90 ; CHECK: ## %bb.0: 91 ; CHECK-NEXT: retq 92 %res = call <8 x i64> @llvm.x86.avx512.mask.compress.q.512(<8 x i64> %data, <8 x i64> undef, i8 -1) 93 ret <8 x i64> %res 94 } 95 96 declare <8 x i64> @llvm.x86.avx512.mask.compress.q.512(<8 x i64> %data, <8 x i64> %src0, i8 %mask) 97 98 define <16 x i32> @test_mask_compress_d_512(<16 x i32> %data, <16 x i32> %passthru, i16 %mask) { 99 ; CHECK-LABEL: test_mask_compress_d_512: 100 ; CHECK: ## %bb.0: 101 ; CHECK-NEXT: kmovw %edi, %k1 102 ; CHECK-NEXT: vpcompressd %zmm0, %zmm1 {%k1} 103 ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 104 ; CHECK-NEXT: retq 105 %res = call <16 x i32> @llvm.x86.avx512.mask.compress.d.512(<16 x i32> %data, <16 x i32> %passthru, i16 %mask) 106 ret <16 x i32> %res 107 } 108 109 define <16 x i32> @test_maskz_compress_d_512(<16 x i32> %data, i16 %mask) { 110 ; CHECK-LABEL: test_maskz_compress_d_512: 111 ; CHECK: ## %bb.0: 112 ; CHECK-NEXT: kmovw %edi, %k1 113 ; CHECK-NEXT: vpcompressd %zmm0, %zmm0 {%k1} {z} 114 ; CHECK-NEXT: retq 115 %res = call <16 x i32> @llvm.x86.avx512.mask.compress.d.512(<16 x i32> %data, <16 x i32> zeroinitializer, i16 %mask) 116 ret <16 x i32> %res 117 } 118 119 define <16 x i32> @test_compress_d_512(<16 x i32> %data) { 120 ; CHECK-LABEL: test_compress_d_512: 121 ; CHECK: ## %bb.0: 122 ; CHECK-NEXT: retq 123 %res = call <16 x i32> @llvm.x86.avx512.mask.compress.d.512(<16 x i32> %data, <16 x i32> undef, i16 -1) 124 ret <16 x i32> %res 125 } 126 127 declare <16 x i32> @llvm.x86.avx512.mask.compress.d.512(<16 x i32> %data, <16 x i32> %src0, i16 %mask) 128 129 define <8 x double> @test_expand_pd_512(<8 x double> %data) { 130 ; CHECK-LABEL: test_expand_pd_512: 131 ; CHECK: ## %bb.0: 132 ; CHECK-NEXT: retq 133 %res = call <8 x double> @llvm.x86.avx512.mask.expand.pd.512(<8 x double> %data, <8 x double> undef, i8 -1) 134 ret <8 x double> %res 135 } 136 137 define <8 x double> @test_mask_expand_pd_512(<8 x double> %data, <8 x double> %passthru, i8 %mask) { 138 ; CHECK-LABEL: test_mask_expand_pd_512: 139 ; CHECK: ## %bb.0: 140 ; CHECK-NEXT: kmovw %edi, %k1 141 ; CHECK-NEXT: vexpandpd %zmm0, %zmm1 {%k1} 142 ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 143 ; CHECK-NEXT: retq 144 %res = call <8 x double> @llvm.x86.avx512.mask.expand.pd.512(<8 x double> %data, <8 x double> %passthru, i8 %mask) 145 ret <8 x double> %res 146 } 147 148 define <8 x double> @test_maskz_expand_pd_512(<8 x double> %data, i8 %mask) { 149 ; CHECK-LABEL: test_maskz_expand_pd_512: 150 ; CHECK: ## %bb.0: 151 ; CHECK-NEXT: kmovw %edi, %k1 152 ; CHECK-NEXT: vexpandpd %zmm0, %zmm0 {%k1} {z} 153 ; CHECK-NEXT: retq 154 %res = call <8 x double> @llvm.x86.avx512.mask.expand.pd.512(<8 x double> %data, <8 x double> zeroinitializer, i8 %mask) 155 ret <8 x double> %res 156 } 157 158 declare <8 x double> @llvm.x86.avx512.mask.expand.pd.512(<8 x double> %data, <8 x double> %src0, i8 %mask) 159 160 define <16 x float> @test_expand_ps_512(<16 x float> %data) { 161 ; CHECK-LABEL: test_expand_ps_512: 162 ; CHECK: ## %bb.0: 163 ; CHECK-NEXT: retq 164 %res = call <16 x float> @llvm.x86.avx512.mask.expand.ps.512(<16 x float> %data, <16 x float> undef, i16 -1) 165 ret <16 x float> %res 166 } 167 168 define <16 x float> @test_mask_expand_ps_512(<16 x float> %data, <16 x float> %passthru, i16 %mask) { 169 ; CHECK-LABEL: test_mask_expand_ps_512: 170 ; CHECK: ## %bb.0: 171 ; CHECK-NEXT: kmovw %edi, %k1 172 ; CHECK-NEXT: vexpandps %zmm0, %zmm1 {%k1} 173 ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 174 ; CHECK-NEXT: retq 175 %res = call <16 x float> @llvm.x86.avx512.mask.expand.ps.512(<16 x float> %data, <16 x float> %passthru, i16 %mask) 176 ret <16 x float> %res 177 } 178 179 define <16 x float> @test_maskz_expand_ps_512(<16 x float> %data, i16 %mask) { 180 ; CHECK-LABEL: test_maskz_expand_ps_512: 181 ; CHECK: ## %bb.0: 182 ; CHECK-NEXT: kmovw %edi, %k1 183 ; CHECK-NEXT: vexpandps %zmm0, %zmm0 {%k1} {z} 184 ; CHECK-NEXT: retq 185 %res = call <16 x float> @llvm.x86.avx512.mask.expand.ps.512(<16 x float> %data, <16 x float> zeroinitializer, i16 %mask) 186 ret <16 x float> %res 187 } 188 189 declare <16 x float> @llvm.x86.avx512.mask.expand.ps.512(<16 x float> %data, <16 x float> %src0, i16 %mask) 190 191 define <8 x i64> @test_expand_q_512(<8 x i64> %data) { 192 ; CHECK-LABEL: test_expand_q_512: 193 ; CHECK: ## %bb.0: 194 ; CHECK-NEXT: retq 195 %res = call <8 x i64> @llvm.x86.avx512.mask.expand.q.512(<8 x i64> %data, <8 x i64> undef, i8 -1) 196 ret <8 x i64> %res 197 } 198 199 define <8 x i64> @test_mask_expand_q_512(<8 x i64> %data, <8 x i64> %passthru, i8 %mask) { 200 ; CHECK-LABEL: test_mask_expand_q_512: 201 ; CHECK: ## %bb.0: 202 ; CHECK-NEXT: kmovw %edi, %k1 203 ; CHECK-NEXT: vpexpandq %zmm0, %zmm1 {%k1} 204 ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 205 ; CHECK-NEXT: retq 206 %res = call <8 x i64> @llvm.x86.avx512.mask.expand.q.512(<8 x i64> %data, <8 x i64> %passthru, i8 %mask) 207 ret <8 x i64> %res 208 } 209 210 define <8 x i64> @test_maskz_expand_q_512(<8 x i64> %data, i8 %mask) { 211 ; CHECK-LABEL: test_maskz_expand_q_512: 212 ; CHECK: ## %bb.0: 213 ; CHECK-NEXT: kmovw %edi, %k1 214 ; CHECK-NEXT: vpexpandq %zmm0, %zmm0 {%k1} {z} 215 ; CHECK-NEXT: retq 216 %res = call <8 x i64> @llvm.x86.avx512.mask.expand.q.512(<8 x i64> %data, <8 x i64> zeroinitializer, i8 %mask) 217 ret <8 x i64> %res 218 } 219 220 declare <8 x i64> @llvm.x86.avx512.mask.expand.q.512(<8 x i64> %data, <8 x i64> %src0, i8 %mask) 221 222 define <16 x i32> @test_expand_d_512(<16 x i32> %data) { 223 ; CHECK-LABEL: test_expand_d_512: 224 ; CHECK: ## %bb.0: 225 ; CHECK-NEXT: retq 226 %res = call <16 x i32> @llvm.x86.avx512.mask.expand.d.512(<16 x i32> %data, <16 x i32> undef, i16 -1) 227 ret <16 x i32> %res 228 } 229 230 define <16 x i32> @test_mask_expand_d_512(<16 x i32> %data, <16 x i32> %passthru, i16 %mask) { 231 ; CHECK-LABEL: test_mask_expand_d_512: 232 ; CHECK: ## %bb.0: 233 ; CHECK-NEXT: kmovw %edi, %k1 234 ; CHECK-NEXT: vpexpandd %zmm0, %zmm1 {%k1} 235 ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 236 ; CHECK-NEXT: retq 237 %res = call <16 x i32> @llvm.x86.avx512.mask.expand.d.512(<16 x i32> %data, <16 x i32> %passthru, i16 %mask) 238 ret <16 x i32> %res 239 } 240 241 define <16 x i32> @test_maskz_expand_d_512(<16 x i32> %data, i16 %mask) { 242 ; CHECK-LABEL: test_maskz_expand_d_512: 243 ; CHECK: ## %bb.0: 244 ; CHECK-NEXT: kmovw %edi, %k1 245 ; CHECK-NEXT: vpexpandd %zmm0, %zmm0 {%k1} {z} 246 ; CHECK-NEXT: retq 247 %res = call <16 x i32> @llvm.x86.avx512.mask.expand.d.512(<16 x i32> %data, <16 x i32> zeroinitializer, i16 %mask) 248 ret <16 x i32> %res 249 } 250 251 declare <16 x i32> @llvm.x86.avx512.mask.expand.d.512(<16 x i32> %data, <16 x i32> %src0, i16 %mask) 252 253 define <16 x float> @test_rcp_ps_512(<16 x float> %a0) { 254 ; CHECK-LABEL: test_rcp_ps_512: 255 ; CHECK: ## %bb.0: 256 ; CHECK-NEXT: vrcp14ps %zmm0, %zmm0 257 ; CHECK-NEXT: retq 258 %res = call <16 x float> @llvm.x86.avx512.rcp14.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1) ; <<16 x float>> [#uses=1] 259 ret <16 x float> %res 260 } 261 declare <16 x float> @llvm.x86.avx512.rcp14.ps.512(<16 x float>, <16 x float>, i16) nounwind readnone 262 263 define <8 x double> @test_rcp_pd_512(<8 x double> %a0) { 264 ; CHECK-LABEL: test_rcp_pd_512: 265 ; CHECK: ## %bb.0: 266 ; CHECK-NEXT: vrcp14pd %zmm0, %zmm0 267 ; CHECK-NEXT: retq 268 %res = call <8 x double> @llvm.x86.avx512.rcp14.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1) ; <<8 x double>> [#uses=1] 269 ret <8 x double> %res 270 } 271 declare <8 x double> @llvm.x86.avx512.rcp14.pd.512(<8 x double>, <8 x double>, i8) nounwind readnone 272 273 declare <2 x double> @llvm.x86.avx512.mask.rndscale.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32, i32) 274 275 define <2 x double> @test_rndscale_sd(<2 x double> %a, <2 x double> %b) { 276 ; CHECK-LABEL: test_rndscale_sd: 277 ; CHECK: ## %bb.0: 278 ; CHECK-NEXT: vroundsd $11, %xmm1, %xmm0, %xmm0 279 ; CHECK-NEXT: retq 280 %res = call <2 x double> @llvm.x86.avx512.mask.rndscale.sd(<2 x double> %a, <2 x double> %b, <2 x double> undef, i8 -1, i32 11, i32 4) 281 ret <2 x double>%res 282 } 283 284 define <2 x double> @test_rndscale_sd_mask(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) { 285 ; CHECK-LABEL: test_rndscale_sd_mask: 286 ; CHECK: ## %bb.0: 287 ; CHECK-NEXT: kmovw %edi, %k1 288 ; CHECK-NEXT: vrndscalesd $11, %xmm1, %xmm0, %xmm2 {%k1} 289 ; CHECK-NEXT: vmovapd %xmm2, %xmm0 290 ; CHECK-NEXT: retq 291 %res = call <2 x double> @llvm.x86.avx512.mask.rndscale.sd(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask, i32 11, i32 4) 292 ret <2 x double>%res 293 } 294 295 define <2 x double> @test_rndscale_sd_mask_load(<2 x double> %a, <2 x double>* %bptr, <2 x double> %c, i8 %mask) { 296 ; CHECK-LABEL: test_rndscale_sd_mask_load: 297 ; CHECK: ## %bb.0: 298 ; CHECK-NEXT: kmovw %esi, %k1 299 ; CHECK-NEXT: vrndscalesd $11, (%rdi), %xmm0, %xmm1 {%k1} 300 ; CHECK-NEXT: vmovapd %xmm1, %xmm0 301 ; CHECK-NEXT: retq 302 %b = load <2 x double>, <2 x double>* %bptr 303 %res = call <2 x double> @llvm.x86.avx512.mask.rndscale.sd(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask, i32 11, i32 4) 304 ret <2 x double>%res 305 } 306 307 define <2 x double> @test_rndscale_sd_maskz(<2 x double> %a, <2 x double> %b, i8 %mask) { 308 ; CHECK-LABEL: test_rndscale_sd_maskz: 309 ; CHECK: ## %bb.0: 310 ; CHECK-NEXT: kmovw %edi, %k1 311 ; CHECK-NEXT: vrndscalesd $11, %xmm1, %xmm0, %xmm0 {%k1} {z} 312 ; CHECK-NEXT: retq 313 %res = call <2 x double> @llvm.x86.avx512.mask.rndscale.sd(<2 x double> %a, <2 x double> %b, <2 x double> zeroinitializer, i8 %mask, i32 11, i32 4) 314 ret <2 x double>%res 315 } 316 317 declare <4 x float> @llvm.x86.avx512.mask.rndscale.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32, i32) 318 319 define <4 x float> @test_rndscale_ss(<4 x float> %a, <4 x float> %b) { 320 ; CHECK-LABEL: test_rndscale_ss: 321 ; CHECK: ## %bb.0: 322 ; CHECK-NEXT: vroundss $11, %xmm1, %xmm0, %xmm0 323 ; CHECK-NEXT: retq 324 %res = call <4 x float> @llvm.x86.avx512.mask.rndscale.ss(<4 x float> %a, <4 x float> %b, <4 x float> undef, i8 -1, i32 11, i32 4) 325 ret <4 x float>%res 326 } 327 328 define <4 x float> @test_rndscale_ss_load(<4 x float> %a, <4 x float>* %bptr) { 329 ; CHECK-LABEL: test_rndscale_ss_load: 330 ; CHECK: ## %bb.0: 331 ; CHECK-NEXT: vroundss $11, (%rdi), %xmm0, %xmm0 332 ; CHECK-NEXT: retq 333 %b = load <4 x float>, <4 x float>* %bptr 334 %res = call <4 x float> @llvm.x86.avx512.mask.rndscale.ss(<4 x float> %a, <4 x float> %b, <4 x float> undef, i8 -1, i32 11, i32 4) 335 ret <4 x float>%res 336 } 337 338 define <4 x float> @test_rndscale_ss_mask(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) { 339 ; CHECK-LABEL: test_rndscale_ss_mask: 340 ; CHECK: ## %bb.0: 341 ; CHECK-NEXT: kmovw %edi, %k1 342 ; CHECK-NEXT: vrndscaless $11, %xmm1, %xmm0, %xmm2 {%k1} 343 ; CHECK-NEXT: vmovaps %xmm2, %xmm0 344 ; CHECK-NEXT: retq 345 %res = call <4 x float> @llvm.x86.avx512.mask.rndscale.ss(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask, i32 11, i32 4) 346 ret <4 x float>%res 347 } 348 349 define <4 x float> @test_rndscale_ss_maskz(<4 x float> %a, <4 x float> %b, i8 %mask) { 350 ; CHECK-LABEL: test_rndscale_ss_maskz: 351 ; CHECK: ## %bb.0: 352 ; CHECK-NEXT: kmovw %edi, %k1 353 ; CHECK-NEXT: vrndscaless $11, %xmm1, %xmm0, %xmm0 {%k1} {z} 354 ; CHECK-NEXT: retq 355 %res = call <4 x float> @llvm.x86.avx512.mask.rndscale.ss(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 %mask, i32 11, i32 4) 356 ret <4 x float>%res 357 } 358 359 declare <8 x double> @llvm.x86.avx512.mask.rndscale.pd.512(<8 x double>, i32, <8 x double>, i8, i32) 360 361 define <8 x double> @test7(<8 x double> %a) { 362 ; CHECK-LABEL: test7: 363 ; CHECK: ## %bb.0: 364 ; CHECK-NEXT: vrndscalepd $11, %zmm0, %zmm0 365 ; CHECK-NEXT: retq 366 %res = call <8 x double> @llvm.x86.avx512.mask.rndscale.pd.512(<8 x double> %a, i32 11, <8 x double> %a, i8 -1, i32 4) 367 ret <8 x double>%res 368 } 369 370 declare <16 x float> @llvm.x86.avx512.mask.rndscale.ps.512(<16 x float>, i32, <16 x float>, i16, i32) 371 372 define <16 x float> @test8(<16 x float> %a) { 373 ; CHECK-LABEL: test8: 374 ; CHECK: ## %bb.0: 375 ; CHECK-NEXT: vrndscaleps $11, %zmm0, %zmm0 376 ; CHECK-NEXT: retq 377 %res = call <16 x float> @llvm.x86.avx512.mask.rndscale.ps.512(<16 x float> %a, i32 11, <16 x float> %a, i16 -1, i32 4) 378 ret <16 x float>%res 379 } 380 381 define <16 x float> @test_rsqrt_ps_512(<16 x float> %a0) { 382 ; CHECK-LABEL: test_rsqrt_ps_512: 383 ; CHECK: ## %bb.0: 384 ; CHECK-NEXT: vrsqrt14ps %zmm0, %zmm0 385 ; CHECK-NEXT: retq 386 %res = call <16 x float> @llvm.x86.avx512.rsqrt14.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1) ; <<16 x float>> [#uses=1] 387 ret <16 x float> %res 388 } 389 declare <16 x float> @llvm.x86.avx512.rsqrt14.ps.512(<16 x float>, <16 x float>, i16) nounwind readnone 390 391 define <8 x double> @test_sqrt_pd_512(<8 x double> %a0) { 392 ; CHECK-LABEL: test_sqrt_pd_512: 393 ; CHECK: ## %bb.0: 394 ; CHECK-NEXT: vsqrtpd %zmm0, %zmm0 395 ; CHECK-NEXT: retq 396 %1 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> %a0) 397 ret <8 x double> %1 398 } 399 400 define <8 x double> @test_mask_sqrt_pd_512(<8 x double> %a0, <8 x double> %passthru, i8 %mask) { 401 ; CHECK-LABEL: test_mask_sqrt_pd_512: 402 ; CHECK: ## %bb.0: 403 ; CHECK-NEXT: kmovw %edi, %k1 404 ; CHECK-NEXT: vsqrtpd %zmm0, %zmm1 {%k1} 405 ; CHECK-NEXT: vmovapd %zmm1, %zmm0 406 ; CHECK-NEXT: retq 407 %1 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> %a0) 408 %2 = bitcast i8 %mask to <8 x i1> 409 %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> %passthru 410 ret <8 x double> %3 411 } 412 413 define <8 x double> @test_maskz_sqrt_pd_512(<8 x double> %a0, i8 %mask) { 414 ; CHECK-LABEL: test_maskz_sqrt_pd_512: 415 ; CHECK: ## %bb.0: 416 ; CHECK-NEXT: kmovw %edi, %k1 417 ; CHECK-NEXT: vsqrtpd %zmm0, %zmm0 {%k1} {z} 418 ; CHECK-NEXT: retq 419 %1 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> %a0) 420 %2 = bitcast i8 %mask to <8 x i1> 421 %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> zeroinitializer 422 ret <8 x double> %3 423 } 424 declare <8 x double> @llvm.sqrt.v8f64(<8 x double>) 425 426 define <8 x double> @test_sqrt_round_pd_512(<8 x double> %a0) { 427 ; CHECK-LABEL: test_sqrt_round_pd_512: 428 ; CHECK: ## %bb.0: 429 ; CHECK-NEXT: vsqrtpd {rz-sae}, %zmm0, %zmm0 430 ; CHECK-NEXT: retq 431 %1 = call <8 x double> @llvm.x86.avx512.sqrt.pd.512(<8 x double> %a0, i32 11) 432 ret <8 x double> %1 433 } 434 435 define <8 x double> @test_mask_sqrt_round_pd_512(<8 x double> %a0, <8 x double> %passthru, i8 %mask) { 436 ; CHECK-LABEL: test_mask_sqrt_round_pd_512: 437 ; CHECK: ## %bb.0: 438 ; CHECK-NEXT: kmovw %edi, %k1 439 ; CHECK-NEXT: vsqrtpd {rz-sae}, %zmm0, %zmm1 {%k1} 440 ; CHECK-NEXT: vmovapd %zmm1, %zmm0 441 ; CHECK-NEXT: retq 442 %1 = call <8 x double> @llvm.x86.avx512.sqrt.pd.512(<8 x double> %a0, i32 11) 443 %2 = bitcast i8 %mask to <8 x i1> 444 %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> %passthru 445 ret <8 x double> %3 446 } 447 448 define <8 x double> @test_maskz_sqrt_round_pd_512(<8 x double> %a0, i8 %mask) { 449 ; CHECK-LABEL: test_maskz_sqrt_round_pd_512: 450 ; CHECK: ## %bb.0: 451 ; CHECK-NEXT: kmovw %edi, %k1 452 ; CHECK-NEXT: vsqrtpd {rz-sae}, %zmm0, %zmm0 {%k1} {z} 453 ; CHECK-NEXT: retq 454 %1 = call <8 x double> @llvm.x86.avx512.sqrt.pd.512(<8 x double> %a0, i32 11) 455 %2 = bitcast i8 %mask to <8 x i1> 456 %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> zeroinitializer 457 ret <8 x double> %3 458 } 459 declare <8 x double> @llvm.x86.avx512.sqrt.pd.512(<8 x double>, i32) nounwind readnone 460 461 define <16 x float> @test_sqrt_ps_512(<16 x float> %a0) { 462 ; CHECK-LABEL: test_sqrt_ps_512: 463 ; CHECK: ## %bb.0: 464 ; CHECK-NEXT: vsqrtps %zmm0, %zmm0 465 ; CHECK-NEXT: retq 466 %1 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> %a0) 467 ret <16 x float> %1 468 } 469 470 define <16 x float> @test_mask_sqrt_ps_512(<16 x float> %a0, <16 x float> %passthru, i16 %mask) { 471 ; CHECK-LABEL: test_mask_sqrt_ps_512: 472 ; CHECK: ## %bb.0: 473 ; CHECK-NEXT: kmovw %edi, %k1 474 ; CHECK-NEXT: vsqrtps %zmm0, %zmm1 {%k1} 475 ; CHECK-NEXT: vmovaps %zmm1, %zmm0 476 ; CHECK-NEXT: retq 477 %1 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> %a0) 478 %2 = bitcast i16 %mask to <16 x i1> 479 %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %passthru 480 ret <16 x float> %3 481 } 482 483 define <16 x float> @test_maskz_sqrt_ps_512(<16 x float> %a0, i16 %mask) { 484 ; CHECK-LABEL: test_maskz_sqrt_ps_512: 485 ; CHECK: ## %bb.0: 486 ; CHECK-NEXT: kmovw %edi, %k1 487 ; CHECK-NEXT: vsqrtps %zmm0, %zmm0 {%k1} {z} 488 ; CHECK-NEXT: retq 489 %1 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> %a0) 490 %2 = bitcast i16 %mask to <16 x i1> 491 %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer 492 ret <16 x float> %3 493 } 494 declare <16 x float> @llvm.sqrt.v16f32(<16 x float>) 495 496 define <16 x float> @test_sqrt_round_ps_512(<16 x float> %a0) { 497 ; CHECK-LABEL: test_sqrt_round_ps_512: 498 ; CHECK: ## %bb.0: 499 ; CHECK-NEXT: vsqrtps {rz-sae}, %zmm0, %zmm0 500 ; CHECK-NEXT: retq 501 %1 = call <16 x float> @llvm.x86.avx512.sqrt.ps.512(<16 x float> %a0, i32 11) 502 ret <16 x float> %1 503 } 504 505 define <16 x float> @test_mask_sqrt_round_ps_512(<16 x float> %a0, <16 x float> %passthru, i16 %mask) { 506 ; CHECK-LABEL: test_mask_sqrt_round_ps_512: 507 ; CHECK: ## %bb.0: 508 ; CHECK-NEXT: kmovw %edi, %k1 509 ; CHECK-NEXT: vsqrtps {rz-sae}, %zmm0, %zmm1 {%k1} 510 ; CHECK-NEXT: vmovaps %zmm1, %zmm0 511 ; CHECK-NEXT: retq 512 %1 = call <16 x float> @llvm.x86.avx512.sqrt.ps.512(<16 x float> %a0, i32 11) 513 %2 = bitcast i16 %mask to <16 x i1> 514 %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %passthru 515 ret <16 x float> %3 516 } 517 518 define <16 x float> @test_maskz_sqrt_round_ps_512(<16 x float> %a0, i16 %mask) { 519 ; CHECK-LABEL: test_maskz_sqrt_round_ps_512: 520 ; CHECK: ## %bb.0: 521 ; CHECK-NEXT: kmovw %edi, %k1 522 ; CHECK-NEXT: vsqrtps {rz-sae}, %zmm0, %zmm0 {%k1} {z} 523 ; CHECK-NEXT: retq 524 %1 = call <16 x float> @llvm.x86.avx512.sqrt.ps.512(<16 x float> %a0, i32 11) 525 %2 = bitcast i16 %mask to <16 x i1> 526 %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer 527 ret <16 x float> %3 528 } 529 declare <16 x float> @llvm.x86.avx512.sqrt.ps.512(<16 x float>, i32) nounwind readnone 530 531 define <8 x double> @test_getexp_pd_512(<8 x double> %a0) { 532 ; CHECK-LABEL: test_getexp_pd_512: 533 ; CHECK: ## %bb.0: 534 ; CHECK-NEXT: vgetexppd %zmm0, %zmm0 535 ; CHECK-NEXT: retq 536 %res = call <8 x double> @llvm.x86.avx512.mask.getexp.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1, i32 4) 537 ret <8 x double> %res 538 } 539 define <8 x double> @test_getexp_round_pd_512(<8 x double> %a0) { 540 ; CHECK-LABEL: test_getexp_round_pd_512: 541 ; CHECK: ## %bb.0: 542 ; CHECK-NEXT: vgetexppd {sae}, %zmm0, %zmm0 543 ; CHECK-NEXT: retq 544 %res = call <8 x double> @llvm.x86.avx512.mask.getexp.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1, i32 8) 545 ret <8 x double> %res 546 } 547 declare <8 x double> @llvm.x86.avx512.mask.getexp.pd.512(<8 x double>, <8 x double>, i8, i32) nounwind readnone 548 549 define <16 x float> @test_getexp_ps_512(<16 x float> %a0) { 550 ; CHECK-LABEL: test_getexp_ps_512: 551 ; CHECK: ## %bb.0: 552 ; CHECK-NEXT: vgetexpps %zmm0, %zmm0 553 ; CHECK-NEXT: retq 554 %res = call <16 x float> @llvm.x86.avx512.mask.getexp.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 4) 555 ret <16 x float> %res 556 } 557 558 define <16 x float> @test_getexp_round_ps_512(<16 x float> %a0) { 559 ; CHECK-LABEL: test_getexp_round_ps_512: 560 ; CHECK: ## %bb.0: 561 ; CHECK-NEXT: vgetexpps {sae}, %zmm0, %zmm0 562 ; CHECK-NEXT: retq 563 %res = call <16 x float> @llvm.x86.avx512.mask.getexp.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 8) 564 ret <16 x float> %res 565 } 566 declare <16 x float> @llvm.x86.avx512.mask.getexp.ps.512(<16 x float>, <16 x float>, i16, i32) nounwind readnone 567 568 declare <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32) nounwind readnone 569 570 define <4 x float> @test_sqrt_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) { 571 ; CHECK-LABEL: test_sqrt_ss: 572 ; CHECK: ## %bb.0: 573 ; CHECK-NEXT: kmovw %edi, %k1 574 ; CHECK-NEXT: vmovaps %xmm2, %xmm3 575 ; CHECK-NEXT: vsqrtss %xmm1, %xmm0, %xmm3 {%k1} 576 ; CHECK-NEXT: vsqrtss {rd-sae}, %xmm1, %xmm0, %xmm2 {%k1} 577 ; CHECK-NEXT: vaddps %xmm2, %xmm3, %xmm2 578 ; CHECK-NEXT: vsqrtss {ru-sae}, %xmm1, %xmm0, %xmm3 {%k1} {z} 579 ; CHECK-NEXT: vsqrtss {rz-sae}, %xmm1, %xmm0, %xmm0 580 ; CHECK-NEXT: vaddps %xmm0, %xmm3, %xmm0 581 ; CHECK-NEXT: vaddps %xmm0, %xmm2, %xmm0 582 ; CHECK-NEXT: retq 583 %res0 = call <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 4) 584 %res1 = call <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 1) 585 %res2 = call <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 %mask, i32 2) 586 %res3 = call <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1, i32 3) 587 588 %res.1 = fadd <4 x float> %res0, %res1 589 %res.2 = fadd <4 x float> %res2, %res3 590 %res = fadd <4 x float> %res.1, %res.2 591 ret <4 x float> %res 592 } 593 594 declare <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32) nounwind readnone 595 596 define <2 x double> @test_sqrt_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) { 597 ; CHECK-LABEL: test_sqrt_sd: 598 ; CHECK: ## %bb.0: 599 ; CHECK-NEXT: kmovw %edi, %k1 600 ; CHECK-NEXT: vmovapd %xmm2, %xmm3 601 ; CHECK-NEXT: vsqrtsd %xmm1, %xmm0, %xmm3 {%k1} 602 ; CHECK-NEXT: vsqrtsd {rd-sae}, %xmm1, %xmm0, %xmm2 {%k1} 603 ; CHECK-NEXT: vaddpd %xmm2, %xmm3, %xmm2 604 ; CHECK-NEXT: vsqrtsd {ru-sae}, %xmm1, %xmm0, %xmm3 {%k1} {z} 605 ; CHECK-NEXT: vsqrtsd {rz-sae}, %xmm1, %xmm0, %xmm0 606 ; CHECK-NEXT: vaddpd %xmm0, %xmm3, %xmm0 607 ; CHECK-NEXT: vaddpd %xmm0, %xmm2, %xmm0 608 ; CHECK-NEXT: retq 609 %res0 = call <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 4) 610 %res1 = call <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 1) 611 %res2 = call <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 %mask, i32 2) 612 %res3 = call <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1, i32 3) 613 614 %res.1 = fadd <2 x double> %res0, %res1 615 %res.2 = fadd <2 x double> %res2, %res3 616 %res = fadd <2 x double> %res.1, %res.2 617 ret <2 x double> %res 618 } 619 620 define i64 @test_x86_sse2_cvtsd2si64(<2 x double> %a0) { 621 ; CHECK-LABEL: test_x86_sse2_cvtsd2si64: 622 ; CHECK: ## %bb.0: 623 ; CHECK-NEXT: vcvtsd2si %xmm0, %rax 624 ; CHECK-NEXT: retq 625 %res = call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> %a0) ; <i64> [#uses=1] 626 ret i64 %res 627 } 628 declare i64 @llvm.x86.sse2.cvtsd2si64(<2 x double>) nounwind readnone 629 630 define <2 x double> @test_x86_sse2_cvtsi642sd(<2 x double> %a0, i64 %a1) { 631 ; CHECK-LABEL: test_x86_sse2_cvtsi642sd: 632 ; CHECK: ## %bb.0: 633 ; CHECK-NEXT: vcvtsi2sdq %rdi, %xmm0, %xmm0 634 ; CHECK-NEXT: retq 635 %res = call <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double> %a0, i64 %a1) ; <<2 x double>> [#uses=1] 636 ret <2 x double> %res 637 } 638 declare <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double>, i64) nounwind readnone 639 640 define i64 @test_x86_avx512_cvttsd2si64(<2 x double> %a0) { 641 ; CHECK-LABEL: test_x86_avx512_cvttsd2si64: 642 ; CHECK: ## %bb.0: 643 ; CHECK-NEXT: vcvttsd2si %xmm0, %rcx 644 ; CHECK-NEXT: vcvttsd2si {sae}, %xmm0, %rax 645 ; CHECK-NEXT: addq %rcx, %rax 646 ; CHECK-NEXT: retq 647 %res0 = call i64 @llvm.x86.avx512.cvttsd2si64(<2 x double> %a0, i32 4) ; 648 %res1 = call i64 @llvm.x86.avx512.cvttsd2si64(<2 x double> %a0, i32 8) ; 649 %res2 = add i64 %res0, %res1 650 ret i64 %res2 651 } 652 declare i64 @llvm.x86.avx512.cvttsd2si64(<2 x double>, i32) nounwind readnone 653 654 define i32 @test_x86_avx512_cvttsd2usi(<2 x double> %a0) { 655 ; CHECK-LABEL: test_x86_avx512_cvttsd2usi: 656 ; CHECK: ## %bb.0: 657 ; CHECK-NEXT: vcvttsd2usi %xmm0, %ecx 658 ; CHECK-NEXT: vcvttsd2usi {sae}, %xmm0, %eax 659 ; CHECK-NEXT: addl %ecx, %eax 660 ; CHECK-NEXT: retq 661 %res0 = call i32 @llvm.x86.avx512.cvttsd2usi(<2 x double> %a0, i32 4) ; 662 %res1 = call i32 @llvm.x86.avx512.cvttsd2usi(<2 x double> %a0, i32 8) ; 663 %res2 = add i32 %res0, %res1 664 ret i32 %res2 665 } 666 declare i32 @llvm.x86.avx512.cvttsd2usi(<2 x double>, i32) nounwind readnone 667 668 define i32 @test_x86_avx512_cvttsd2si(<2 x double> %a0) { 669 ; CHECK-LABEL: test_x86_avx512_cvttsd2si: 670 ; CHECK: ## %bb.0: 671 ; CHECK-NEXT: vcvttsd2si %xmm0, %ecx 672 ; CHECK-NEXT: vcvttsd2si {sae}, %xmm0, %eax 673 ; CHECK-NEXT: addl %ecx, %eax 674 ; CHECK-NEXT: retq 675 %res0 = call i32 @llvm.x86.avx512.cvttsd2si(<2 x double> %a0, i32 4) ; 676 %res1 = call i32 @llvm.x86.avx512.cvttsd2si(<2 x double> %a0, i32 8) ; 677 %res2 = add i32 %res0, %res1 678 ret i32 %res2 679 } 680 declare i32 @llvm.x86.avx512.cvttsd2si(<2 x double>, i32) nounwind readnone 681 682 683 684 define i64 @test_x86_avx512_cvttsd2usi64(<2 x double> %a0) { 685 ; CHECK-LABEL: test_x86_avx512_cvttsd2usi64: 686 ; CHECK: ## %bb.0: 687 ; CHECK-NEXT: vcvttsd2usi %xmm0, %rcx 688 ; CHECK-NEXT: vcvttsd2usi {sae}, %xmm0, %rax 689 ; CHECK-NEXT: addq %rcx, %rax 690 ; CHECK-NEXT: retq 691 %res0 = call i64 @llvm.x86.avx512.cvttsd2usi64(<2 x double> %a0, i32 4) ; 692 %res1 = call i64 @llvm.x86.avx512.cvttsd2usi64(<2 x double> %a0, i32 8) ; 693 %res2 = add i64 %res0, %res1 694 ret i64 %res2 695 } 696 declare i64 @llvm.x86.avx512.cvttsd2usi64(<2 x double>, i32) nounwind readnone 697 698 define i64 @test_x86_sse_cvtss2si64(<4 x float> %a0) { 699 ; CHECK-LABEL: test_x86_sse_cvtss2si64: 700 ; CHECK: ## %bb.0: 701 ; CHECK-NEXT: vcvtss2si %xmm0, %rax 702 ; CHECK-NEXT: retq 703 %res = call i64 @llvm.x86.sse.cvtss2si64(<4 x float> %a0) ; <i64> [#uses=1] 704 ret i64 %res 705 } 706 declare i64 @llvm.x86.sse.cvtss2si64(<4 x float>) nounwind readnone 707 708 709 define <4 x float> @test_x86_sse_cvtsi642ss(<4 x float> %a0, i64 %a1) { 710 ; CHECK-LABEL: test_x86_sse_cvtsi642ss: 711 ; CHECK: ## %bb.0: 712 ; CHECK-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 713 ; CHECK-NEXT: retq 714 %res = call <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float> %a0, i64 %a1) ; <<4 x float>> [#uses=1] 715 ret <4 x float> %res 716 } 717 declare <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float>, i64) nounwind readnone 718 719 720 define i32 @test_x86_avx512_cvttss2si(<4 x float> %a0) { 721 ; CHECK-LABEL: test_x86_avx512_cvttss2si: 722 ; CHECK: ## %bb.0: 723 ; CHECK-NEXT: vcvttss2si {sae}, %xmm0, %ecx 724 ; CHECK-NEXT: vcvttss2si %xmm0, %eax 725 ; CHECK-NEXT: addl %ecx, %eax 726 ; CHECK-NEXT: retq 727 %res0 = call i32 @llvm.x86.avx512.cvttss2si(<4 x float> %a0, i32 8) ; 728 %res1 = call i32 @llvm.x86.avx512.cvttss2si(<4 x float> %a0, i32 4) ; 729 %res2 = add i32 %res0, %res1 730 ret i32 %res2 731 } 732 declare i32 @llvm.x86.avx512.cvttss2si(<4 x float>, i32) nounwind readnone 733 734 define i32 @test_x86_avx512_cvttss2si_load(<4 x float>* %a0) { 735 ; CHECK-LABEL: test_x86_avx512_cvttss2si_load: 736 ; CHECK: ## %bb.0: 737 ; CHECK-NEXT: vcvttss2si (%rdi), %eax 738 ; CHECK-NEXT: retq 739 %a1 = load <4 x float>, <4 x float>* %a0 740 %res = call i32 @llvm.x86.avx512.cvttss2si(<4 x float> %a1, i32 4) ; 741 ret i32 %res 742 } 743 744 define i64 @test_x86_avx512_cvttss2si64(<4 x float> %a0) { 745 ; CHECK-LABEL: test_x86_avx512_cvttss2si64: 746 ; CHECK: ## %bb.0: 747 ; CHECK-NEXT: vcvttss2si %xmm0, %rcx 748 ; CHECK-NEXT: vcvttss2si {sae}, %xmm0, %rax 749 ; CHECK-NEXT: addq %rcx, %rax 750 ; CHECK-NEXT: retq 751 %res0 = call i64 @llvm.x86.avx512.cvttss2si64(<4 x float> %a0, i32 4) ; 752 %res1 = call i64 @llvm.x86.avx512.cvttss2si64(<4 x float> %a0, i32 8) ; 753 %res2 = add i64 %res0, %res1 754 ret i64 %res2 755 } 756 declare i64 @llvm.x86.avx512.cvttss2si64(<4 x float>, i32) nounwind readnone 757 758 define i32 @test_x86_avx512_cvttss2usi(<4 x float> %a0) { 759 ; CHECK-LABEL: test_x86_avx512_cvttss2usi: 760 ; CHECK: ## %bb.0: 761 ; CHECK-NEXT: vcvttss2usi {sae}, %xmm0, %ecx 762 ; CHECK-NEXT: vcvttss2usi %xmm0, %eax 763 ; CHECK-NEXT: addl %ecx, %eax 764 ; CHECK-NEXT: retq 765 %res0 = call i32 @llvm.x86.avx512.cvttss2usi(<4 x float> %a0, i32 8) ; 766 %res1 = call i32 @llvm.x86.avx512.cvttss2usi(<4 x float> %a0, i32 4) ; 767 %res2 = add i32 %res0, %res1 768 ret i32 %res2 769 } 770 declare i32 @llvm.x86.avx512.cvttss2usi(<4 x float>, i32) nounwind readnone 771 772 define i64 @test_x86_avx512_cvttss2usi64(<4 x float> %a0) { 773 ; CHECK-LABEL: test_x86_avx512_cvttss2usi64: 774 ; CHECK: ## %bb.0: 775 ; CHECK-NEXT: vcvttss2usi %xmm0, %rcx 776 ; CHECK-NEXT: vcvttss2usi {sae}, %xmm0, %rax 777 ; CHECK-NEXT: addq %rcx, %rax 778 ; CHECK-NEXT: retq 779 %res0 = call i64 @llvm.x86.avx512.cvttss2usi64(<4 x float> %a0, i32 4) ; 780 %res1 = call i64 @llvm.x86.avx512.cvttss2usi64(<4 x float> %a0, i32 8) ; 781 %res2 = add i64 %res0, %res1 782 ret i64 %res2 783 } 784 declare i64 @llvm.x86.avx512.cvttss2usi64(<4 x float>, i32) nounwind readnone 785 786 define i64 @test_x86_avx512_cvtsd2usi64(<2 x double> %a0) { 787 ; CHECK-LABEL: test_x86_avx512_cvtsd2usi64: 788 ; CHECK: ## %bb.0: 789 ; CHECK-NEXT: vcvtsd2usi %xmm0, %rax 790 ; CHECK-NEXT: vcvtsd2usi {rz-sae}, %xmm0, %rcx 791 ; CHECK-NEXT: addq %rax, %rcx 792 ; CHECK-NEXT: vcvtsd2usi {rd-sae}, %xmm0, %rax 793 ; CHECK-NEXT: addq %rcx, %rax 794 ; CHECK-NEXT: retq 795 796 %res = call i64 @llvm.x86.avx512.vcvtsd2usi64(<2 x double> %a0, i32 4) 797 %res1 = call i64 @llvm.x86.avx512.vcvtsd2usi64(<2 x double> %a0, i32 3) 798 %res2 = call i64 @llvm.x86.avx512.vcvtsd2usi64(<2 x double> %a0, i32 1) 799 %res3 = add i64 %res, %res1 800 %res4 = add i64 %res3, %res2 801 ret i64 %res4 802 } 803 declare i64 @llvm.x86.avx512.vcvtsd2usi64(<2 x double>, i32) nounwind readnone 804 805 define i64 @test_x86_avx512_cvtsd2si64(<2 x double> %a0) { 806 ; CHECK-LABEL: test_x86_avx512_cvtsd2si64: 807 ; CHECK: ## %bb.0: 808 ; CHECK-NEXT: vcvtsd2si %xmm0, %rax 809 ; CHECK-NEXT: vcvtsd2si {rz-sae}, %xmm0, %rcx 810 ; CHECK-NEXT: addq %rax, %rcx 811 ; CHECK-NEXT: vcvtsd2si {rd-sae}, %xmm0, %rax 812 ; CHECK-NEXT: addq %rcx, %rax 813 ; CHECK-NEXT: retq 814 815 %res = call i64 @llvm.x86.avx512.vcvtsd2si64(<2 x double> %a0, i32 4) 816 %res1 = call i64 @llvm.x86.avx512.vcvtsd2si64(<2 x double> %a0, i32 3) 817 %res2 = call i64 @llvm.x86.avx512.vcvtsd2si64(<2 x double> %a0, i32 1) 818 %res3 = add i64 %res, %res1 819 %res4 = add i64 %res3, %res2 820 ret i64 %res4 821 } 822 declare i64 @llvm.x86.avx512.vcvtsd2si64(<2 x double>, i32) nounwind readnone 823 824 define i64 @test_x86_avx512_cvtss2usi64(<4 x float> %a0) { 825 ; CHECK-LABEL: test_x86_avx512_cvtss2usi64: 826 ; CHECK: ## %bb.0: 827 ; CHECK-NEXT: vcvtss2usi %xmm0, %rax 828 ; CHECK-NEXT: vcvtss2usi {rz-sae}, %xmm0, %rcx 829 ; CHECK-NEXT: addq %rax, %rcx 830 ; CHECK-NEXT: vcvtss2usi {rd-sae}, %xmm0, %rax 831 ; CHECK-NEXT: addq %rcx, %rax 832 ; CHECK-NEXT: retq 833 834 %res = call i64 @llvm.x86.avx512.vcvtss2usi64(<4 x float> %a0, i32 4) 835 %res1 = call i64 @llvm.x86.avx512.vcvtss2usi64(<4 x float> %a0, i32 3) 836 %res2 = call i64 @llvm.x86.avx512.vcvtss2usi64(<4 x float> %a0, i32 1) 837 %res3 = add i64 %res, %res1 838 %res4 = add i64 %res3, %res2 839 ret i64 %res4 840 } 841 declare i64 @llvm.x86.avx512.vcvtss2usi64(<4 x float>, i32) nounwind readnone 842 843 define i64 @test_x86_avx512_cvtss2si64(<4 x float> %a0) { 844 ; CHECK-LABEL: test_x86_avx512_cvtss2si64: 845 ; CHECK: ## %bb.0: 846 ; CHECK-NEXT: vcvtss2si %xmm0, %rax 847 ; CHECK-NEXT: vcvtss2si {rz-sae}, %xmm0, %rcx 848 ; CHECK-NEXT: addq %rax, %rcx 849 ; CHECK-NEXT: vcvtss2si {rd-sae}, %xmm0, %rax 850 ; CHECK-NEXT: addq %rcx, %rax 851 ; CHECK-NEXT: retq 852 853 %res = call i64 @llvm.x86.avx512.vcvtss2si64(<4 x float> %a0, i32 4) 854 %res1 = call i64 @llvm.x86.avx512.vcvtss2si64(<4 x float> %a0, i32 3) 855 %res2 = call i64 @llvm.x86.avx512.vcvtss2si64(<4 x float> %a0, i32 1) 856 %res3 = add i64 %res, %res1 857 %res4 = add i64 %res3, %res2 858 ret i64 %res4 859 } 860 declare i64 @llvm.x86.avx512.vcvtss2si64(<4 x float>, i32) nounwind readnone 861 862 define i32 @test_x86_avx512_cvtsd2usi32(<2 x double> %a0) { 863 ; CHECK-LABEL: test_x86_avx512_cvtsd2usi32: 864 ; CHECK: ## %bb.0: 865 ; CHECK-NEXT: vcvtsd2usi %xmm0, %eax 866 ; CHECK-NEXT: vcvtsd2usi {rz-sae}, %xmm0, %ecx 867 ; CHECK-NEXT: addl %eax, %ecx 868 ; CHECK-NEXT: vcvtsd2usi {rd-sae}, %xmm0, %eax 869 ; CHECK-NEXT: addl %ecx, %eax 870 ; CHECK-NEXT: retq 871 872 %res = call i32 @llvm.x86.avx512.vcvtsd2usi32(<2 x double> %a0, i32 4) 873 %res1 = call i32 @llvm.x86.avx512.vcvtsd2usi32(<2 x double> %a0, i32 3) 874 %res2 = call i32 @llvm.x86.avx512.vcvtsd2usi32(<2 x double> %a0, i32 1) 875 %res3 = add i32 %res, %res1 876 %res4 = add i32 %res3, %res2 877 ret i32 %res4 878 } 879 declare i32 @llvm.x86.avx512.vcvtsd2usi32(<2 x double>, i32) nounwind readnone 880 881 define i32 @test_x86_avx512_cvtsd2si32(<2 x double> %a0) { 882 ; CHECK-LABEL: test_x86_avx512_cvtsd2si32: 883 ; CHECK: ## %bb.0: 884 ; CHECK-NEXT: vcvtsd2si %xmm0, %eax 885 ; CHECK-NEXT: vcvtsd2si {rz-sae}, %xmm0, %ecx 886 ; CHECK-NEXT: addl %eax, %ecx 887 ; CHECK-NEXT: vcvtsd2si {rd-sae}, %xmm0, %eax 888 ; CHECK-NEXT: addl %ecx, %eax 889 ; CHECK-NEXT: retq 890 891 %res = call i32 @llvm.x86.avx512.vcvtsd2si32(<2 x double> %a0, i32 4) 892 %res1 = call i32 @llvm.x86.avx512.vcvtsd2si32(<2 x double> %a0, i32 3) 893 %res2 = call i32 @llvm.x86.avx512.vcvtsd2si32(<2 x double> %a0, i32 1) 894 %res3 = add i32 %res, %res1 895 %res4 = add i32 %res3, %res2 896 ret i32 %res4 897 } 898 declare i32 @llvm.x86.avx512.vcvtsd2si32(<2 x double>, i32) nounwind readnone 899 900 define i32 @test_x86_avx512_cvtss2usi32(<4 x float> %a0) { 901 ; CHECK-LABEL: test_x86_avx512_cvtss2usi32: 902 ; CHECK: ## %bb.0: 903 ; CHECK-NEXT: vcvtss2usi %xmm0, %eax 904 ; CHECK-NEXT: vcvtss2usi {rz-sae}, %xmm0, %ecx 905 ; CHECK-NEXT: addl %eax, %ecx 906 ; CHECK-NEXT: vcvtss2usi {rd-sae}, %xmm0, %eax 907 ; CHECK-NEXT: addl %ecx, %eax 908 ; CHECK-NEXT: retq 909 910 %res = call i32 @llvm.x86.avx512.vcvtss2usi32(<4 x float> %a0, i32 4) 911 %res1 = call i32 @llvm.x86.avx512.vcvtss2usi32(<4 x float> %a0, i32 3) 912 %res2 = call i32 @llvm.x86.avx512.vcvtss2usi32(<4 x float> %a0, i32 1) 913 %res3 = add i32 %res, %res1 914 %res4 = add i32 %res3, %res2 915 ret i32 %res4 916 } 917 declare i32 @llvm.x86.avx512.vcvtss2usi32(<4 x float>, i32) nounwind readnone 918 919 define i32 @test_x86_avx512_cvtss2si32(<4 x float> %a0) { 920 ; CHECK-LABEL: test_x86_avx512_cvtss2si32: 921 ; CHECK: ## %bb.0: 922 ; CHECK-NEXT: vcvtss2si %xmm0, %eax 923 ; CHECK-NEXT: vcvtss2si {rz-sae}, %xmm0, %ecx 924 ; CHECK-NEXT: addl %eax, %ecx 925 ; CHECK-NEXT: vcvtss2si {rd-sae}, %xmm0, %eax 926 ; CHECK-NEXT: addl %ecx, %eax 927 ; CHECK-NEXT: retq 928 929 %res = call i32 @llvm.x86.avx512.vcvtss2si32(<4 x float> %a0, i32 4) 930 %res1 = call i32 @llvm.x86.avx512.vcvtss2si32(<4 x float> %a0, i32 3) 931 %res2 = call i32 @llvm.x86.avx512.vcvtss2si32(<4 x float> %a0, i32 1) 932 %res3 = add i32 %res, %res1 933 %res4 = add i32 %res3, %res2 934 ret i32 %res4 935 } 936 declare i32 @llvm.x86.avx512.vcvtss2si32(<4 x float>, i32) nounwind readnone 937 938 define <16 x float> @test_x86_vcvtph2ps_512(<16 x i16> %a0) { 939 ; CHECK-LABEL: test_x86_vcvtph2ps_512: 940 ; CHECK: ## %bb.0: 941 ; CHECK-NEXT: vcvtph2ps %ymm0, %zmm0 942 ; CHECK-NEXT: retq 943 %res = call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %a0, <16 x float> zeroinitializer, i16 -1, i32 4) 944 ret <16 x float> %res 945 } 946 947 define <16 x float> @test_x86_vcvtph2ps_512_sae(<16 x i16> %a0) { 948 ; CHECK-LABEL: test_x86_vcvtph2ps_512_sae: 949 ; CHECK: ## %bb.0: 950 ; CHECK-NEXT: vcvtph2ps {sae}, %ymm0, %zmm0 951 ; CHECK-NEXT: retq 952 %res = call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %a0, <16 x float> zeroinitializer, i16 -1, i32 8) 953 ret <16 x float> %res 954 } 955 956 define <16 x float> @test_x86_vcvtph2ps_512_rrk(<16 x i16> %a0,<16 x float> %a1, i16 %mask) { 957 ; CHECK-LABEL: test_x86_vcvtph2ps_512_rrk: 958 ; CHECK: ## %bb.0: 959 ; CHECK-NEXT: kmovw %edi, %k1 960 ; CHECK-NEXT: vcvtph2ps %ymm0, %zmm1 {%k1} 961 ; CHECK-NEXT: vmovaps %zmm1, %zmm0 962 ; CHECK-NEXT: retq 963 %res = call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %a0, <16 x float> %a1, i16 %mask, i32 4) 964 ret <16 x float> %res 965 } 966 967 define <16 x float> @test_x86_vcvtph2ps_512_sae_rrkz(<16 x i16> %a0, i16 %mask) { 968 ; CHECK-LABEL: test_x86_vcvtph2ps_512_sae_rrkz: 969 ; CHECK: ## %bb.0: 970 ; CHECK-NEXT: kmovw %edi, %k1 971 ; CHECK-NEXT: vcvtph2ps {sae}, %ymm0, %zmm0 {%k1} {z} 972 ; CHECK-NEXT: retq 973 %res = call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %a0, <16 x float> zeroinitializer, i16 %mask, i32 8) 974 ret <16 x float> %res 975 } 976 977 define <16 x float> @test_x86_vcvtph2ps_512_rrkz(<16 x i16> %a0, i16 %mask) { 978 ; CHECK-LABEL: test_x86_vcvtph2ps_512_rrkz: 979 ; CHECK: ## %bb.0: 980 ; CHECK-NEXT: kmovw %edi, %k1 981 ; CHECK-NEXT: vcvtph2ps %ymm0, %zmm0 {%k1} {z} 982 ; CHECK-NEXT: retq 983 %res = call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %a0, <16 x float> zeroinitializer, i16 %mask, i32 4) 984 ret <16 x float> %res 985 } 986 987 declare <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16>, <16 x float>, i16, i32) nounwind readonly 988 989 define <16 x i16> @test_x86_vcvtps2ph_256(<16 x float> %a0, <16 x i16> %src, i16 %mask, <16 x i16> * %dst) { 990 ; CHECK-LABEL: test_x86_vcvtps2ph_256: 991 ; CHECK: ## %bb.0: 992 ; CHECK-NEXT: kmovw %edi, %k1 993 ; CHECK-NEXT: vcvtps2ph $2, %zmm0, %ymm1 {%k1} 994 ; CHECK-NEXT: vcvtps2ph $2, %zmm0, %ymm2 {%k1} {z} 995 ; CHECK-NEXT: vpaddw %ymm1, %ymm2, %ymm1 996 ; CHECK-NEXT: vcvtps2ph $2, %zmm0, (%rsi) 997 ; CHECK-NEXT: vmovdqa %ymm1, %ymm0 998 ; CHECK-NEXT: retq 999 %res1 = call <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float> %a0, i32 2, <16 x i16> zeroinitializer, i16 -1) 1000 %res2 = call <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float> %a0, i32 2, <16 x i16> zeroinitializer, i16 %mask) 1001 %res3 = call <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float> %a0, i32 2, <16 x i16> %src, i16 %mask) 1002 store <16 x i16> %res1, <16 x i16> * %dst 1003 %res = add <16 x i16> %res2, %res3 1004 ret <16 x i16> %res 1005 } 1006 1007 declare <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float>, i32, <16 x i16>, i16) nounwind readonly 1008 1009 define i16 @test_cmpps(<16 x float> %a, <16 x float> %b) { 1010 ; CHECK-LABEL: test_cmpps: 1011 ; CHECK: ## %bb.0: 1012 ; CHECK-NEXT: vcmpleps {sae}, %zmm1, %zmm0, %k0 1013 ; CHECK-NEXT: kmovw %k0, %eax 1014 ; CHECK-NEXT: ## kill: def $ax killed $ax killed $eax 1015 ; CHECK-NEXT: vzeroupper 1016 ; CHECK-NEXT: retq 1017 %res = call <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float> %a, <16 x float> %b, i32 2, i32 8) 1018 %1 = bitcast <16 x i1> %res to i16 1019 ret i16 %1 1020 } 1021 declare <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float>, <16 x float>, i32, i32) 1022 1023 define i8 @test_cmppd(<8 x double> %a, <8 x double> %b) { 1024 ; CHECK-LABEL: test_cmppd: 1025 ; CHECK: ## %bb.0: 1026 ; CHECK-NEXT: vcmpneqpd %zmm1, %zmm0, %k0 1027 ; CHECK-NEXT: kmovw %k0, %eax 1028 ; CHECK-NEXT: ## kill: def $al killed $al killed $eax 1029 ; CHECK-NEXT: vzeroupper 1030 ; CHECK-NEXT: retq 1031 %res = call <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double> %a, <8 x double> %b, i32 4, i32 4) 1032 %1 = bitcast <8 x i1> %res to i8 1033 ret i8 %1 1034 } 1035 declare <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double>, <8 x double>, i32, i32) 1036 1037 ; Function Attrs: nounwind readnone 1038 1039 ; fp min - max 1040 define <8 x double> @test_vmaxpd(<8 x double> %a0, <8 x double> %a1) { 1041 ; CHECK-LABEL: test_vmaxpd: 1042 ; CHECK: ## %bb.0: 1043 ; CHECK-NEXT: vmaxpd %zmm1, %zmm0, %zmm0 1044 ; CHECK-NEXT: retq 1045 %1 = call <8 x double> @llvm.x86.avx512.max.pd.512(<8 x double> %a0, <8 x double> %a1, i32 4) 1046 ret <8 x double> %1 1047 } 1048 declare <8 x double> @llvm.x86.avx512.max.pd.512(<8 x double>, <8 x double>, i32) 1049 1050 define <8 x double> @test_vminpd(<8 x double> %a0, <8 x double> %a1) { 1051 ; CHECK-LABEL: test_vminpd: 1052 ; CHECK: ## %bb.0: 1053 ; CHECK-NEXT: vminpd %zmm1, %zmm0, %zmm0 1054 ; CHECK-NEXT: retq 1055 %1 = call <8 x double> @llvm.x86.avx512.min.pd.512(<8 x double> %a0, <8 x double> %a1, i32 4) 1056 ret <8 x double> %1 1057 } 1058 declare <8 x double> @llvm.x86.avx512.min.pd.512(<8 x double>, <8 x double>, i32) 1059 1060 define void @test_mask_store_ss(i8* %ptr, <4 x float> %data, i8 %mask) { 1061 ; CHECK-LABEL: test_mask_store_ss: 1062 ; CHECK: ## %bb.0: 1063 ; CHECK-NEXT: kmovw %esi, %k1 1064 ; CHECK-NEXT: vmovss %xmm0, (%rdi) {%k1} 1065 ; CHECK-NEXT: retq 1066 %1 = and i8 %mask, 1 1067 %2 = bitcast i8* %ptr to <4 x float>* 1068 %3 = bitcast i8 %1 to <8 x i1> 1069 %extract = shufflevector <8 x i1> %3, <8 x i1> %3, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1070 call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %data, <4 x float>* %2, i32 1, <4 x i1> %extract) 1071 ret void 1072 } 1073 declare void @llvm.masked.store.v4f32.p0v4f32(<4 x float>, <4 x float>*, i32, <4 x i1>) #1 1074 1075 1076 declare <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float>, <16 x float>, i32) 1077 declare <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float>, <16 x float>, i32) 1078 declare <8 x double> @llvm.x86.avx512.mul.pd.512(<8 x double>, <8 x double>, i32) 1079 1080 define <16 x float> @test_vsubps_rn(<16 x float> %a0, <16 x float> %a1) { 1081 ; CHECK-LABEL: test_vsubps_rn: 1082 ; CHECK: ## %bb.0: 1083 ; CHECK-NEXT: vsubps {rn-sae}, %zmm1, %zmm0, %zmm0 1084 ; CHECK-NEXT: retq 1085 %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 0) 1086 ret <16 x float> %1 1087 } 1088 1089 define <16 x float> @test_vsubps_rd(<16 x float> %a0, <16 x float> %a1) { 1090 ; CHECK-LABEL: test_vsubps_rd: 1091 ; CHECK: ## %bb.0: 1092 ; CHECK-NEXT: vsubps {rd-sae}, %zmm1, %zmm0, %zmm0 1093 ; CHECK-NEXT: retq 1094 %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 1) 1095 ret <16 x float> %1 1096 } 1097 1098 define <16 x float> @test_vsubps_ru(<16 x float> %a0, <16 x float> %a1) { 1099 ; CHECK-LABEL: test_vsubps_ru: 1100 ; CHECK: ## %bb.0: 1101 ; CHECK-NEXT: vsubps {ru-sae}, %zmm1, %zmm0, %zmm0 1102 ; CHECK-NEXT: retq 1103 %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 2) 1104 ret <16 x float> %1 1105 } 1106 1107 define <16 x float> @test_vsubps_rz(<16 x float> %a0, <16 x float> %a1) { 1108 ; CHECK-LABEL: test_vsubps_rz: 1109 ; CHECK: ## %bb.0: 1110 ; CHECK-NEXT: vsubps {rz-sae}, %zmm1, %zmm0, %zmm0 1111 ; CHECK-NEXT: retq 1112 %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 3) 1113 ret <16 x float> %1 1114 } 1115 1116 define <16 x float> @test_vmulps_rn(<16 x float> %a0, <16 x float> %a1) { 1117 ; CHECK-LABEL: test_vmulps_rn: 1118 ; CHECK: ## %bb.0: 1119 ; CHECK-NEXT: vmulps {rn-sae}, %zmm1, %zmm0, %zmm0 1120 ; CHECK-NEXT: retq 1121 %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 0) 1122 ret <16 x float> %1 1123 } 1124 1125 define <16 x float> @test_vmulps_rd(<16 x float> %a0, <16 x float> %a1) { 1126 ; CHECK-LABEL: test_vmulps_rd: 1127 ; CHECK: ## %bb.0: 1128 ; CHECK-NEXT: vmulps {rd-sae}, %zmm1, %zmm0, %zmm0 1129 ; CHECK-NEXT: retq 1130 %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 1) 1131 ret <16 x float> %1 1132 } 1133 1134 define <16 x float> @test_vmulps_ru(<16 x float> %a0, <16 x float> %a1) { 1135 ; CHECK-LABEL: test_vmulps_ru: 1136 ; CHECK: ## %bb.0: 1137 ; CHECK-NEXT: vmulps {ru-sae}, %zmm1, %zmm0, %zmm0 1138 ; CHECK-NEXT: retq 1139 %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 2) 1140 ret <16 x float> %1 1141 } 1142 1143 define <16 x float> @test_vmulps_rz(<16 x float> %a0, <16 x float> %a1) { 1144 ; CHECK-LABEL: test_vmulps_rz: 1145 ; CHECK: ## %bb.0: 1146 ; CHECK-NEXT: vmulps {rz-sae}, %zmm1, %zmm0, %zmm0 1147 ; CHECK-NEXT: retq 1148 %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 3) 1149 ret <16 x float> %1 1150 } 1151 1152 ;; mask float 1153 define <16 x float> @test_vmulps_mask_rn(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 1154 ; CHECK-LABEL: test_vmulps_mask_rn: 1155 ; CHECK: ## %bb.0: 1156 ; CHECK-NEXT: kmovw %edi, %k1 1157 ; CHECK-NEXT: vmulps {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} 1158 ; CHECK-NEXT: retq 1159 %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 0) 1160 %2 = bitcast i16 %mask to <16 x i1> 1161 %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer 1162 ret <16 x float> %3 1163 } 1164 1165 define <16 x float> @test_vmulps_mask_rd(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 1166 ; CHECK-LABEL: test_vmulps_mask_rd: 1167 ; CHECK: ## %bb.0: 1168 ; CHECK-NEXT: kmovw %edi, %k1 1169 ; CHECK-NEXT: vmulps {rd-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} 1170 ; CHECK-NEXT: retq 1171 %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 1) 1172 %2 = bitcast i16 %mask to <16 x i1> 1173 %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer 1174 ret <16 x float> %3 1175 } 1176 1177 define <16 x float> @test_vmulps_mask_ru(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 1178 ; CHECK-LABEL: test_vmulps_mask_ru: 1179 ; CHECK: ## %bb.0: 1180 ; CHECK-NEXT: kmovw %edi, %k1 1181 ; CHECK-NEXT: vmulps {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} 1182 ; CHECK-NEXT: retq 1183 %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 2) 1184 %2 = bitcast i16 %mask to <16 x i1> 1185 %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer 1186 ret <16 x float> %3 1187 } 1188 1189 define <16 x float> @test_vmulps_mask_rz(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 1190 ; CHECK-LABEL: test_vmulps_mask_rz: 1191 ; CHECK: ## %bb.0: 1192 ; CHECK-NEXT: kmovw %edi, %k1 1193 ; CHECK-NEXT: vmulps {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} 1194 ; CHECK-NEXT: retq 1195 %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 3) 1196 %2 = bitcast i16 %mask to <16 x i1> 1197 %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer 1198 ret <16 x float> %3 1199 } 1200 1201 ;; With Passthru value 1202 define <16 x float> @test_vmulps_mask_passthru_rn(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) { 1203 ; CHECK-LABEL: test_vmulps_mask_passthru_rn: 1204 ; CHECK: ## %bb.0: 1205 ; CHECK-NEXT: kmovw %edi, %k1 1206 ; CHECK-NEXT: vmulps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} 1207 ; CHECK-NEXT: vmovaps %zmm2, %zmm0 1208 ; CHECK-NEXT: retq 1209 %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 0) 1210 %2 = bitcast i16 %mask to <16 x i1> 1211 %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %passthru 1212 ret <16 x float> %3 1213 } 1214 1215 define <16 x float> @test_vmulps_mask_passthru_rd(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) { 1216 ; CHECK-LABEL: test_vmulps_mask_passthru_rd: 1217 ; CHECK: ## %bb.0: 1218 ; CHECK-NEXT: kmovw %edi, %k1 1219 ; CHECK-NEXT: vmulps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1} 1220 ; CHECK-NEXT: vmovaps %zmm2, %zmm0 1221 ; CHECK-NEXT: retq 1222 %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 1) 1223 %2 = bitcast i16 %mask to <16 x i1> 1224 %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %passthru 1225 ret <16 x float> %3 1226 } 1227 1228 define <16 x float> @test_vmulps_mask_passthru_ru(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) { 1229 ; CHECK-LABEL: test_vmulps_mask_passthru_ru: 1230 ; CHECK: ## %bb.0: 1231 ; CHECK-NEXT: kmovw %edi, %k1 1232 ; CHECK-NEXT: vmulps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1} 1233 ; CHECK-NEXT: vmovaps %zmm2, %zmm0 1234 ; CHECK-NEXT: retq 1235 %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 2) 1236 %2 = bitcast i16 %mask to <16 x i1> 1237 %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %passthru 1238 ret <16 x float> %3 1239 } 1240 1241 define <16 x float> @test_vmulps_mask_passthru_rz(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) { 1242 ; CHECK-LABEL: test_vmulps_mask_passthru_rz: 1243 ; CHECK: ## %bb.0: 1244 ; CHECK-NEXT: kmovw %edi, %k1 1245 ; CHECK-NEXT: vmulps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1} 1246 ; CHECK-NEXT: vmovaps %zmm2, %zmm0 1247 ; CHECK-NEXT: retq 1248 %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 3) 1249 %2 = bitcast i16 %mask to <16 x i1> 1250 %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %passthru 1251 ret <16 x float> %3 1252 } 1253 1254 ;; mask double 1255 define <8 x double> @test_vmulpd_mask_rn(<8 x double> %a0, <8 x double> %a1, i8 %mask) { 1256 ; CHECK-LABEL: test_vmulpd_mask_rn: 1257 ; CHECK: ## %bb.0: 1258 ; CHECK-NEXT: kmovw %edi, %k1 1259 ; CHECK-NEXT: vmulpd {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} 1260 ; CHECK-NEXT: retq 1261 %1 = call <8 x double> @llvm.x86.avx512.mul.pd.512(<8 x double> %a0, <8 x double> %a1, i32 0) 1262 %2 = bitcast i8 %mask to <8 x i1> 1263 %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> zeroinitializer 1264 ret <8 x double> %3 1265 } 1266 1267 define <8 x double> @test_vmulpd_mask_rd(<8 x double> %a0, <8 x double> %a1, i8 %mask) { 1268 ; CHECK-LABEL: test_vmulpd_mask_rd: 1269 ; CHECK: ## %bb.0: 1270 ; CHECK-NEXT: kmovw %edi, %k1 1271 ; CHECK-NEXT: vmulpd {rd-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} 1272 ; CHECK-NEXT: retq 1273 %1 = call <8 x double> @llvm.x86.avx512.mul.pd.512(<8 x double> %a0, <8 x double> %a1, i32 1) 1274 %2 = bitcast i8 %mask to <8 x i1> 1275 %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> zeroinitializer 1276 ret <8 x double> %3 1277 } 1278 1279 define <8 x double> @test_vmulpd_mask_ru(<8 x double> %a0, <8 x double> %a1, i8 %mask) { 1280 ; CHECK-LABEL: test_vmulpd_mask_ru: 1281 ; CHECK: ## %bb.0: 1282 ; CHECK-NEXT: kmovw %edi, %k1 1283 ; CHECK-NEXT: vmulpd {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} 1284 ; CHECK-NEXT: retq 1285 %1 = call <8 x double> @llvm.x86.avx512.mul.pd.512(<8 x double> %a0, <8 x double> %a1, i32 2) 1286 %2 = bitcast i8 %mask to <8 x i1> 1287 %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> zeroinitializer 1288 ret <8 x double> %3 1289 } 1290 1291 define <8 x double> @test_vmulpd_mask_rz(<8 x double> %a0, <8 x double> %a1, i8 %mask) { 1292 ; CHECK-LABEL: test_vmulpd_mask_rz: 1293 ; CHECK: ## %bb.0: 1294 ; CHECK-NEXT: kmovw %edi, %k1 1295 ; CHECK-NEXT: vmulpd {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} 1296 ; CHECK-NEXT: retq 1297 %1 = call <8 x double> @llvm.x86.avx512.mul.pd.512(<8 x double> %a0, <8 x double> %a1, i32 3) 1298 %2 = bitcast i8 %mask to <8 x i1> 1299 %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> zeroinitializer 1300 ret <8 x double> %3 1301 } 1302 1303 define <16 x float> @test_mm512_maskz_add_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 1304 ; CHECK-LABEL: test_mm512_maskz_add_round_ps_rn_sae: 1305 ; CHECK: ## %bb.0: 1306 ; CHECK-NEXT: kmovw %edi, %k1 1307 ; CHECK-NEXT: vaddps {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} 1308 ; CHECK-NEXT: retq 1309 %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 0) 1310 %2 = bitcast i16 %mask to <16 x i1> 1311 %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer 1312 ret <16 x float> %3 1313 } 1314 1315 define <16 x float> @test_mm512_maskz_add_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 1316 ; CHECK-LABEL: test_mm512_maskz_add_round_ps_rd_sae: 1317 ; CHECK: ## %bb.0: 1318 ; CHECK-NEXT: kmovw %edi, %k1 1319 ; CHECK-NEXT: vaddps {rd-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} 1320 ; CHECK-NEXT: retq 1321 %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 1) 1322 %2 = bitcast i16 %mask to <16 x i1> 1323 %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer 1324 ret <16 x float> %3 1325 } 1326 1327 define <16 x float> @test_mm512_maskz_add_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 1328 ; CHECK-LABEL: test_mm512_maskz_add_round_ps_ru_sae: 1329 ; CHECK: ## %bb.0: 1330 ; CHECK-NEXT: kmovw %edi, %k1 1331 ; CHECK-NEXT: vaddps {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} 1332 ; CHECK-NEXT: retq 1333 %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 2) 1334 %2 = bitcast i16 %mask to <16 x i1> 1335 %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer 1336 ret <16 x float> %3 1337 } 1338 1339 define <16 x float> @test_mm512_maskz_add_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 1340 ; CHECK-LABEL: test_mm512_maskz_add_round_ps_rz_sae: 1341 ; CHECK: ## %bb.0: 1342 ; CHECK-NEXT: kmovw %edi, %k1 1343 ; CHECK-NEXT: vaddps {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} 1344 ; CHECK-NEXT: retq 1345 %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 3) 1346 %2 = bitcast i16 %mask to <16 x i1> 1347 %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer 1348 ret <16 x float> %3 1349 } 1350 1351 define <16 x float> @test_mm512_maskz_add_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 1352 ; CHECK-LABEL: test_mm512_maskz_add_round_ps_current: 1353 ; CHECK: ## %bb.0: 1354 ; CHECK-NEXT: kmovw %edi, %k1 1355 ; CHECK-NEXT: vaddps %zmm1, %zmm0, %zmm0 {%k1} {z} 1356 ; CHECK-NEXT: retq 1357 %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 4) 1358 %2 = bitcast i16 %mask to <16 x i1> 1359 %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer 1360 ret <16 x float> %3 1361 } 1362 1363 define <16 x float> @test_mm512_mask_add_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) { 1364 ; CHECK-LABEL: test_mm512_mask_add_round_ps_rn_sae: 1365 ; CHECK: ## %bb.0: 1366 ; CHECK-NEXT: kmovw %edi, %k1 1367 ; CHECK-NEXT: vaddps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} 1368 ; CHECK-NEXT: vmovaps %zmm2, %zmm0 1369 ; CHECK-NEXT: retq 1370 %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 0) 1371 %2 = bitcast i16 %mask to <16 x i1> 1372 %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src 1373 ret <16 x float> %3 1374 } 1375 1376 define <16 x float> @test_mm512_mask_add_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) { 1377 ; CHECK-LABEL: test_mm512_mask_add_round_ps_rd_sae: 1378 ; CHECK: ## %bb.0: 1379 ; CHECK-NEXT: kmovw %edi, %k1 1380 ; CHECK-NEXT: vaddps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1} 1381 ; CHECK-NEXT: vmovaps %zmm2, %zmm0 1382 ; CHECK-NEXT: retq 1383 %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 1) 1384 %2 = bitcast i16 %mask to <16 x i1> 1385 %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src 1386 ret <16 x float> %3 1387 } 1388 1389 define <16 x float> @test_mm512_mask_add_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) { 1390 ; CHECK-LABEL: test_mm512_mask_add_round_ps_ru_sae: 1391 ; CHECK: ## %bb.0: 1392 ; CHECK-NEXT: kmovw %edi, %k1 1393 ; CHECK-NEXT: vaddps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1} 1394 ; CHECK-NEXT: vmovaps %zmm2, %zmm0 1395 ; CHECK-NEXT: retq 1396 %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 2) 1397 %2 = bitcast i16 %mask to <16 x i1> 1398 %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src 1399 ret <16 x float> %3 1400 } 1401 1402 define <16 x float> @test_mm512_mask_add_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) { 1403 ; CHECK-LABEL: test_mm512_mask_add_round_ps_rz_sae: 1404 ; CHECK: ## %bb.0: 1405 ; CHECK-NEXT: kmovw %edi, %k1 1406 ; CHECK-NEXT: vaddps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1} 1407 ; CHECK-NEXT: vmovaps %zmm2, %zmm0 1408 ; CHECK-NEXT: retq 1409 %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 3) 1410 %2 = bitcast i16 %mask to <16 x i1> 1411 %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src 1412 ret <16 x float> %3 1413 } 1414 1415 define <16 x float> @test_mm512_mask_add_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) { 1416 ; CHECK-LABEL: test_mm512_mask_add_round_ps_current: 1417 ; CHECK: ## %bb.0: 1418 ; CHECK-NEXT: kmovw %edi, %k1 1419 ; CHECK-NEXT: vaddps %zmm1, %zmm0, %zmm2 {%k1} 1420 ; CHECK-NEXT: vmovaps %zmm2, %zmm0 1421 ; CHECK-NEXT: retq 1422 %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 4) 1423 %2 = bitcast i16 %mask to <16 x i1> 1424 %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src 1425 ret <16 x float> %3 1426 } 1427 1428 define <16 x float> @test_mm512_add_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 1429 ; CHECK-LABEL: test_mm512_add_round_ps_rn_sae: 1430 ; CHECK: ## %bb.0: 1431 ; CHECK-NEXT: vaddps {rn-sae}, %zmm1, %zmm0, %zmm0 1432 ; CHECK-NEXT: retq 1433 %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 0) 1434 ret <16 x float> %1 1435 } 1436 1437 define <16 x float> @test_mm512_add_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 1438 ; CHECK-LABEL: test_mm512_add_round_ps_rd_sae: 1439 ; CHECK: ## %bb.0: 1440 ; CHECK-NEXT: vaddps {rd-sae}, %zmm1, %zmm0, %zmm0 1441 ; CHECK-NEXT: retq 1442 %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 1) 1443 ret <16 x float> %1 1444 } 1445 1446 define <16 x float> @test_mm512_add_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 1447 ; CHECK-LABEL: test_mm512_add_round_ps_ru_sae: 1448 ; CHECK: ## %bb.0: 1449 ; CHECK-NEXT: vaddps {ru-sae}, %zmm1, %zmm0, %zmm0 1450 ; CHECK-NEXT: retq 1451 %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 2) 1452 ret <16 x float> %1 1453 } 1454 1455 define <16 x float> @test_mm512_add_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 1456 ; CHECK-LABEL: test_mm512_add_round_ps_rz_sae: 1457 ; CHECK: ## %bb.0: 1458 ; CHECK-NEXT: vaddps {rz-sae}, %zmm1, %zmm0, %zmm0 1459 ; CHECK-NEXT: retq 1460 %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 3) 1461 ret <16 x float> %1 1462 } 1463 1464 define <16 x float> @test_mm512_add_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 1465 ; CHECK-LABEL: test_mm512_add_round_ps_current: 1466 ; CHECK: ## %bb.0: 1467 ; CHECK-NEXT: vaddps %zmm1, %zmm0, %zmm0 1468 ; CHECK-NEXT: retq 1469 %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 4) 1470 ret <16 x float> %1 1471 } 1472 declare <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float>, <16 x float>, i32) 1473 1474 define <16 x float> @test_mm512_mask_sub_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) { 1475 ; CHECK-LABEL: test_mm512_mask_sub_round_ps_rn_sae: 1476 ; CHECK: ## %bb.0: 1477 ; CHECK-NEXT: kmovw %edi, %k1 1478 ; CHECK-NEXT: vsubps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} 1479 ; CHECK-NEXT: vmovaps %zmm2, %zmm0 1480 ; CHECK-NEXT: retq 1481 %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 0) 1482 %2 = bitcast i16 %mask to <16 x i1> 1483 %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src 1484 ret <16 x float> %3 1485 } 1486 1487 define <16 x float> @test_mm512_mask_sub_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) { 1488 ; CHECK-LABEL: test_mm512_mask_sub_round_ps_rd_sae: 1489 ; CHECK: ## %bb.0: 1490 ; CHECK-NEXT: kmovw %edi, %k1 1491 ; CHECK-NEXT: vsubps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1} 1492 ; CHECK-NEXT: vmovaps %zmm2, %zmm0 1493 ; CHECK-NEXT: retq 1494 %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 1) 1495 %2 = bitcast i16 %mask to <16 x i1> 1496 %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src 1497 ret <16 x float> %3 1498 } 1499 1500 define <16 x float> @test_mm512_mask_sub_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) { 1501 ; CHECK-LABEL: test_mm512_mask_sub_round_ps_ru_sae: 1502 ; CHECK: ## %bb.0: 1503 ; CHECK-NEXT: kmovw %edi, %k1 1504 ; CHECK-NEXT: vsubps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1} 1505 ; CHECK-NEXT: vmovaps %zmm2, %zmm0 1506 ; CHECK-NEXT: retq 1507 %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 2) 1508 %2 = bitcast i16 %mask to <16 x i1> 1509 %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src 1510 ret <16 x float> %3 1511 } 1512 1513 define <16 x float> @test_mm512_mask_sub_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) { 1514 ; CHECK-LABEL: test_mm512_mask_sub_round_ps_rz_sae: 1515 ; CHECK: ## %bb.0: 1516 ; CHECK-NEXT: kmovw %edi, %k1 1517 ; CHECK-NEXT: vsubps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1} 1518 ; CHECK-NEXT: vmovaps %zmm2, %zmm0 1519 ; CHECK-NEXT: retq 1520 %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 3) 1521 %2 = bitcast i16 %mask to <16 x i1> 1522 %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src 1523 ret <16 x float> %3 1524 } 1525 1526 define <16 x float> @test_mm512_mask_sub_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) { 1527 ; CHECK-LABEL: test_mm512_mask_sub_round_ps_current: 1528 ; CHECK: ## %bb.0: 1529 ; CHECK-NEXT: kmovw %edi, %k1 1530 ; CHECK-NEXT: vsubps %zmm1, %zmm0, %zmm2 {%k1} 1531 ; CHECK-NEXT: vmovaps %zmm2, %zmm0 1532 ; CHECK-NEXT: retq 1533 %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 4) 1534 %2 = bitcast i16 %mask to <16 x i1> 1535 %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src 1536 ret <16 x float> %3 1537 } 1538 1539 define <16 x float> @test_mm512_sub_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 1540 ; CHECK-LABEL: test_mm512_sub_round_ps_rn_sae: 1541 ; CHECK: ## %bb.0: 1542 ; CHECK-NEXT: vsubps {rn-sae}, %zmm1, %zmm0, %zmm0 1543 ; CHECK-NEXT: retq 1544 %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 0) 1545 ret <16 x float> %1 1546 } 1547 1548 define <16 x float> @test_mm512_sub_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 1549 ; CHECK-LABEL: test_mm512_sub_round_ps_rd_sae: 1550 ; CHECK: ## %bb.0: 1551 ; CHECK-NEXT: vsubps {rd-sae}, %zmm1, %zmm0, %zmm0 1552 ; CHECK-NEXT: retq 1553 %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 1) 1554 ret <16 x float> %1 1555 } 1556 1557 define <16 x float> @test_mm512_sub_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 1558 ; CHECK-LABEL: test_mm512_sub_round_ps_ru_sae: 1559 ; CHECK: ## %bb.0: 1560 ; CHECK-NEXT: vsubps {ru-sae}, %zmm1, %zmm0, %zmm0 1561 ; CHECK-NEXT: retq 1562 %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 2) 1563 ret <16 x float> %1 1564 } 1565 1566 define <16 x float> @test_mm512_sub_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 1567 ; CHECK-LABEL: test_mm512_sub_round_ps_rz_sae: 1568 ; CHECK: ## %bb.0: 1569 ; CHECK-NEXT: vsubps {rz-sae}, %zmm1, %zmm0, %zmm0 1570 ; CHECK-NEXT: retq 1571 %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 3) 1572 ret <16 x float> %1 1573 } 1574 1575 define <16 x float> @test_mm512_sub_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 1576 ; CHECK-LABEL: test_mm512_sub_round_ps_current: 1577 ; CHECK: ## %bb.0: 1578 ; CHECK-NEXT: vsubps %zmm1, %zmm0, %zmm0 1579 ; CHECK-NEXT: retq 1580 %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 4) 1581 ret <16 x float> %1 1582 } 1583 1584 define <16 x float> @test_mm512_maskz_div_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 1585 ; CHECK-LABEL: test_mm512_maskz_div_round_ps_rn_sae: 1586 ; CHECK: ## %bb.0: 1587 ; CHECK-NEXT: kmovw %edi, %k1 1588 ; CHECK-NEXT: vdivps {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} 1589 ; CHECK-NEXT: retq 1590 %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 0) 1591 %2 = bitcast i16 %mask to <16 x i1> 1592 %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer 1593 ret <16 x float> %3 1594 } 1595 1596 define <16 x float> @test_mm512_maskz_div_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 1597 ; CHECK-LABEL: test_mm512_maskz_div_round_ps_rd_sae: 1598 ; CHECK: ## %bb.0: 1599 ; CHECK-NEXT: kmovw %edi, %k1 1600 ; CHECK-NEXT: vdivps {rd-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} 1601 ; CHECK-NEXT: retq 1602 %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 1) 1603 %2 = bitcast i16 %mask to <16 x i1> 1604 %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer 1605 ret <16 x float> %3 1606 } 1607 1608 define <16 x float> @test_mm512_maskz_div_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 1609 ; CHECK-LABEL: test_mm512_maskz_div_round_ps_ru_sae: 1610 ; CHECK: ## %bb.0: 1611 ; CHECK-NEXT: kmovw %edi, %k1 1612 ; CHECK-NEXT: vdivps {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} 1613 ; CHECK-NEXT: retq 1614 %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 2) 1615 %2 = bitcast i16 %mask to <16 x i1> 1616 %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer 1617 ret <16 x float> %3 1618 } 1619 1620 define <16 x float> @test_mm512_maskz_div_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 1621 ; CHECK-LABEL: test_mm512_maskz_div_round_ps_rz_sae: 1622 ; CHECK: ## %bb.0: 1623 ; CHECK-NEXT: kmovw %edi, %k1 1624 ; CHECK-NEXT: vdivps {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} 1625 ; CHECK-NEXT: retq 1626 %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 3) 1627 %2 = bitcast i16 %mask to <16 x i1> 1628 %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer 1629 ret <16 x float> %3 1630 } 1631 1632 define <16 x float> @test_mm512_maskz_div_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 1633 ; CHECK-LABEL: test_mm512_maskz_div_round_ps_current: 1634 ; CHECK: ## %bb.0: 1635 ; CHECK-NEXT: kmovw %edi, %k1 1636 ; CHECK-NEXT: vdivps %zmm1, %zmm0, %zmm0 {%k1} {z} 1637 ; CHECK-NEXT: retq 1638 %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 4) 1639 %2 = bitcast i16 %mask to <16 x i1> 1640 %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer 1641 ret <16 x float> %3 1642 } 1643 1644 define <16 x float> @test_mm512_mask_div_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) { 1645 ; CHECK-LABEL: test_mm512_mask_div_round_ps_rn_sae: 1646 ; CHECK: ## %bb.0: 1647 ; CHECK-NEXT: kmovw %edi, %k1 1648 ; CHECK-NEXT: vdivps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} 1649 ; CHECK-NEXT: vmovaps %zmm2, %zmm0 1650 ; CHECK-NEXT: retq 1651 %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 0) 1652 %2 = bitcast i16 %mask to <16 x i1> 1653 %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src 1654 ret <16 x float> %3 1655 } 1656 1657 define <16 x float> @test_mm512_mask_div_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) { 1658 ; CHECK-LABEL: test_mm512_mask_div_round_ps_rd_sae: 1659 ; CHECK: ## %bb.0: 1660 ; CHECK-NEXT: kmovw %edi, %k1 1661 ; CHECK-NEXT: vdivps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1} 1662 ; CHECK-NEXT: vmovaps %zmm2, %zmm0 1663 ; CHECK-NEXT: retq 1664 %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 1) 1665 %2 = bitcast i16 %mask to <16 x i1> 1666 %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src 1667 ret <16 x float> %3 1668 } 1669 1670 define <16 x float> @test_mm512_mask_div_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) { 1671 ; CHECK-LABEL: test_mm512_mask_div_round_ps_ru_sae: 1672 ; CHECK: ## %bb.0: 1673 ; CHECK-NEXT: kmovw %edi, %k1 1674 ; CHECK-NEXT: vdivps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1} 1675 ; CHECK-NEXT: vmovaps %zmm2, %zmm0 1676 ; CHECK-NEXT: retq 1677 %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 2) 1678 %2 = bitcast i16 %mask to <16 x i1> 1679 %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src 1680 ret <16 x float> %3 1681 } 1682 1683 define <16 x float> @test_mm512_mask_div_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) { 1684 ; CHECK-LABEL: test_mm512_mask_div_round_ps_rz_sae: 1685 ; CHECK: ## %bb.0: 1686 ; CHECK-NEXT: kmovw %edi, %k1 1687 ; CHECK-NEXT: vdivps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1} 1688 ; CHECK-NEXT: vmovaps %zmm2, %zmm0 1689 ; CHECK-NEXT: retq 1690 %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 3) 1691 %2 = bitcast i16 %mask to <16 x i1> 1692 %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src 1693 ret <16 x float> %3 1694 } 1695 1696 define <16 x float> @test_mm512_mask_div_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) { 1697 ; CHECK-LABEL: test_mm512_mask_div_round_ps_current: 1698 ; CHECK: ## %bb.0: 1699 ; CHECK-NEXT: kmovw %edi, %k1 1700 ; CHECK-NEXT: vdivps %zmm1, %zmm0, %zmm2 {%k1} 1701 ; CHECK-NEXT: vmovaps %zmm2, %zmm0 1702 ; CHECK-NEXT: retq 1703 %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 4) 1704 %2 = bitcast i16 %mask to <16 x i1> 1705 %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src 1706 ret <16 x float> %3 1707 } 1708 1709 define <16 x float> @test_mm512_div_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 1710 ; CHECK-LABEL: test_mm512_div_round_ps_rn_sae: 1711 ; CHECK: ## %bb.0: 1712 ; CHECK-NEXT: vdivps {rn-sae}, %zmm1, %zmm0, %zmm0 1713 ; CHECK-NEXT: retq 1714 %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 0) 1715 ret <16 x float> %1 1716 } 1717 1718 define <16 x float> @test_mm512_div_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 1719 ; CHECK-LABEL: test_mm512_div_round_ps_rd_sae: 1720 ; CHECK: ## %bb.0: 1721 ; CHECK-NEXT: vdivps {rd-sae}, %zmm1, %zmm0, %zmm0 1722 ; CHECK-NEXT: retq 1723 %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 1) 1724 ret <16 x float> %1 1725 } 1726 1727 define <16 x float> @test_mm512_div_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 1728 ; CHECK-LABEL: test_mm512_div_round_ps_ru_sae: 1729 ; CHECK: ## %bb.0: 1730 ; CHECK-NEXT: vdivps {ru-sae}, %zmm1, %zmm0, %zmm0 1731 ; CHECK-NEXT: retq 1732 %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 2) 1733 ret <16 x float> %1 1734 } 1735 1736 define <16 x float> @test_mm512_div_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 1737 ; CHECK-LABEL: test_mm512_div_round_ps_rz_sae: 1738 ; CHECK: ## %bb.0: 1739 ; CHECK-NEXT: vdivps {rz-sae}, %zmm1, %zmm0, %zmm0 1740 ; CHECK-NEXT: retq 1741 %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 3) 1742 ret <16 x float> %1 1743 } 1744 1745 define <16 x float> @test_mm512_div_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 1746 ; CHECK-LABEL: test_mm512_div_round_ps_current: 1747 ; CHECK: ## %bb.0: 1748 ; CHECK-NEXT: vdivps %zmm1, %zmm0, %zmm0 1749 ; CHECK-NEXT: retq 1750 %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 4) 1751 ret <16 x float> %1 1752 } 1753 declare <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float>, <16 x float>, i32) 1754 1755 define <16 x float> @test_mm512_maskz_min_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 1756 ; CHECK-LABEL: test_mm512_maskz_min_round_ps_sae: 1757 ; CHECK: ## %bb.0: 1758 ; CHECK-NEXT: kmovw %edi, %k1 1759 ; CHECK-NEXT: vminps {sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} 1760 ; CHECK-NEXT: retq 1761 %1 = call <16 x float> @llvm.x86.avx512.min.ps.512(<16 x float> %a0, <16 x float> %a1, i32 8) 1762 %2 = bitcast i16 %mask to <16 x i1> 1763 %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer 1764 ret <16 x float> %3 1765 } 1766 1767 define <16 x float> @test_mm512_maskz_min_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 1768 ; CHECK-LABEL: test_mm512_maskz_min_round_ps_current: 1769 ; CHECK: ## %bb.0: 1770 ; CHECK-NEXT: kmovw %edi, %k1 1771 ; CHECK-NEXT: vminps %zmm1, %zmm0, %zmm0 {%k1} {z} 1772 ; CHECK-NEXT: retq 1773 %1 = call <16 x float> @llvm.x86.avx512.min.ps.512(<16 x float> %a0, <16 x float> %a1, i32 4) 1774 %2 = bitcast i16 %mask to <16 x i1> 1775 %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer 1776 ret <16 x float> %3 1777 } 1778 1779 define <16 x float> @test_mm512_mask_min_round_ps_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) { 1780 ; CHECK-LABEL: test_mm512_mask_min_round_ps_sae: 1781 ; CHECK: ## %bb.0: 1782 ; CHECK-NEXT: kmovw %edi, %k1 1783 ; CHECK-NEXT: vminps {sae}, %zmm1, %zmm0, %zmm2 {%k1} 1784 ; CHECK-NEXT: vmovaps %zmm2, %zmm0 1785 ; CHECK-NEXT: retq 1786 %1 = call <16 x float> @llvm.x86.avx512.min.ps.512(<16 x float> %a0, <16 x float> %a1, i32 8) 1787 %2 = bitcast i16 %mask to <16 x i1> 1788 %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src 1789 ret <16 x float> %3 1790 } 1791 1792 define <16 x float> @test_mm512_mask_min_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) { 1793 ; CHECK-LABEL: test_mm512_mask_min_round_ps_current: 1794 ; CHECK: ## %bb.0: 1795 ; CHECK-NEXT: kmovw %edi, %k1 1796 ; CHECK-NEXT: vminps %zmm1, %zmm0, %zmm2 {%k1} 1797 ; CHECK-NEXT: vmovaps %zmm2, %zmm0 1798 ; CHECK-NEXT: retq 1799 %1 = call <16 x float> @llvm.x86.avx512.min.ps.512(<16 x float> %a0, <16 x float> %a1, i32 4) 1800 %2 = bitcast i16 %mask to <16 x i1> 1801 %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src 1802 ret <16 x float> %3 1803 } 1804 1805 define <16 x float> @test_mm512_min_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 1806 ; CHECK-LABEL: test_mm512_min_round_ps_sae: 1807 ; CHECK: ## %bb.0: 1808 ; CHECK-NEXT: vminps {sae}, %zmm1, %zmm0, %zmm0 1809 ; CHECK-NEXT: retq 1810 %1 = call <16 x float> @llvm.x86.avx512.min.ps.512(<16 x float> %a0, <16 x float> %a1, i32 8) 1811 ret <16 x float> %1 1812 } 1813 1814 define <16 x float> @test_mm512_min_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 1815 ; CHECK-LABEL: test_mm512_min_round_ps_current: 1816 ; CHECK: ## %bb.0: 1817 ; CHECK-NEXT: vminps %zmm1, %zmm0, %zmm0 1818 ; CHECK-NEXT: retq 1819 %1 = call <16 x float> @llvm.x86.avx512.min.ps.512(<16 x float> %a0, <16 x float> %a1, i32 4) 1820 ret <16 x float> %1 1821 } 1822 declare <16 x float> @llvm.x86.avx512.min.ps.512(<16 x float>, <16 x float>, i32) 1823 1824 define <16 x float> @test_mm512_maskz_max_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 1825 ; CHECK-LABEL: test_mm512_maskz_max_round_ps_sae: 1826 ; CHECK: ## %bb.0: 1827 ; CHECK-NEXT: kmovw %edi, %k1 1828 ; CHECK-NEXT: vmaxps {sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} 1829 ; CHECK-NEXT: retq 1830 %1 = call <16 x float> @llvm.x86.avx512.max.ps.512(<16 x float> %a0, <16 x float> %a1, i32 8) 1831 %2 = bitcast i16 %mask to <16 x i1> 1832 %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer 1833 ret <16 x float> %3 1834 } 1835 1836 define <16 x float> @test_mm512_maskz_max_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 1837 ; CHECK-LABEL: test_mm512_maskz_max_round_ps_current: 1838 ; CHECK: ## %bb.0: 1839 ; CHECK-NEXT: kmovw %edi, %k1 1840 ; CHECK-NEXT: vmaxps %zmm1, %zmm0, %zmm0 {%k1} {z} 1841 ; CHECK-NEXT: retq 1842 %1 = call <16 x float> @llvm.x86.avx512.max.ps.512(<16 x float> %a0, <16 x float> %a1, i32 4) 1843 %2 = bitcast i16 %mask to <16 x i1> 1844 %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer 1845 ret <16 x float> %3 1846 } 1847 1848 define <16 x float> @test_mm512_mask_max_round_ps_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) { 1849 ; CHECK-LABEL: test_mm512_mask_max_round_ps_sae: 1850 ; CHECK: ## %bb.0: 1851 ; CHECK-NEXT: kmovw %edi, %k1 1852 ; CHECK-NEXT: vmaxps {sae}, %zmm1, %zmm0, %zmm2 {%k1} 1853 ; CHECK-NEXT: vmovaps %zmm2, %zmm0 1854 ; CHECK-NEXT: retq 1855 %1 = call <16 x float> @llvm.x86.avx512.max.ps.512(<16 x float> %a0, <16 x float> %a1, i32 8) 1856 %2 = bitcast i16 %mask to <16 x i1> 1857 %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src 1858 ret <16 x float> %3 1859 } 1860 1861 define <16 x float> @test_mm512_mask_max_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) { 1862 ; CHECK-LABEL: test_mm512_mask_max_round_ps_current: 1863 ; CHECK: ## %bb.0: 1864 ; CHECK-NEXT: kmovw %edi, %k1 1865 ; CHECK-NEXT: vmaxps %zmm1, %zmm0, %zmm2 {%k1} 1866 ; CHECK-NEXT: vmovaps %zmm2, %zmm0 1867 ; CHECK-NEXT: retq 1868 %1 = call <16 x float> @llvm.x86.avx512.max.ps.512(<16 x float> %a0, <16 x float> %a1, i32 4) 1869 %2 = bitcast i16 %mask to <16 x i1> 1870 %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src 1871 ret <16 x float> %3 1872 } 1873 1874 define <16 x float> @test_mm512_max_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 1875 ; CHECK-LABEL: test_mm512_max_round_ps_sae: 1876 ; CHECK: ## %bb.0: 1877 ; CHECK-NEXT: vmaxps {sae}, %zmm1, %zmm0, %zmm0 1878 ; CHECK-NEXT: retq 1879 %1 = call <16 x float> @llvm.x86.avx512.max.ps.512(<16 x float> %a0, <16 x float> %a1, i32 8) 1880 ret <16 x float> %1 1881 } 1882 1883 define <16 x float> @test_mm512_max_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 1884 ; CHECK-LABEL: test_mm512_max_round_ps_current: 1885 ; CHECK: ## %bb.0: 1886 ; CHECK-NEXT: vmaxps %zmm1, %zmm0, %zmm0 1887 ; CHECK-NEXT: retq 1888 %1 = call <16 x float> @llvm.x86.avx512.max.ps.512(<16 x float> %a0, <16 x float> %a1, i32 4) 1889 ret <16 x float> %1 1890 } 1891 declare <16 x float> @llvm.x86.avx512.max.ps.512(<16 x float>, <16 x float>, i32) 1892 1893 declare <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>, <4 x float>, <4 x float>, i8, i32) nounwind readnone 1894 1895 define <4 x float> @test_mask_add_ss_rn(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) { 1896 ; CHECK-LABEL: test_mask_add_ss_rn: 1897 ; CHECK: ## %bb.0: 1898 ; CHECK-NEXT: kmovw %edi, %k1 1899 ; CHECK-NEXT: vaddss {rn-sae}, %xmm1, %xmm0, %xmm2 {%k1} 1900 ; CHECK-NEXT: vmovaps %xmm2, %xmm0 1901 ; CHECK-NEXT: retq 1902 %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 0) 1903 ret <4 x float> %res 1904 } 1905 1906 define <4 x float> @test_mask_add_ss_rd(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) { 1907 ; CHECK-LABEL: test_mask_add_ss_rd: 1908 ; CHECK: ## %bb.0: 1909 ; CHECK-NEXT: kmovw %edi, %k1 1910 ; CHECK-NEXT: vaddss {rd-sae}, %xmm1, %xmm0, %xmm2 {%k1} 1911 ; CHECK-NEXT: vmovaps %xmm2, %xmm0 1912 ; CHECK-NEXT: retq 1913 %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 1) 1914 ret <4 x float> %res 1915 } 1916 1917 define <4 x float> @test_mask_add_ss_ru(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) { 1918 ; CHECK-LABEL: test_mask_add_ss_ru: 1919 ; CHECK: ## %bb.0: 1920 ; CHECK-NEXT: kmovw %edi, %k1 1921 ; CHECK-NEXT: vaddss {ru-sae}, %xmm1, %xmm0, %xmm2 {%k1} 1922 ; CHECK-NEXT: vmovaps %xmm2, %xmm0 1923 ; CHECK-NEXT: retq 1924 %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 2) 1925 ret <4 x float> %res 1926 } 1927 1928 define <4 x float> @test_mask_add_ss_rz(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) { 1929 ; CHECK-LABEL: test_mask_add_ss_rz: 1930 ; CHECK: ## %bb.0: 1931 ; CHECK-NEXT: kmovw %edi, %k1 1932 ; CHECK-NEXT: vaddss {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1} 1933 ; CHECK-NEXT: vmovaps %xmm2, %xmm0 1934 ; CHECK-NEXT: retq 1935 %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 3) 1936 ret <4 x float> %res 1937 } 1938 1939 define <4 x float> @test_mask_add_ss_current(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) { 1940 ; CHECK-LABEL: test_mask_add_ss_current: 1941 ; CHECK: ## %bb.0: 1942 ; CHECK-NEXT: kmovw %edi, %k1 1943 ; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm2 {%k1} 1944 ; CHECK-NEXT: vmovaps %xmm2, %xmm0 1945 ; CHECK-NEXT: retq 1946 %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 4) 1947 ret <4 x float> %res 1948 } 1949 1950 define <4 x float> @test_maskz_add_ss_rn(<4 x float> %a0, <4 x float> %a1, i8 %mask) { 1951 ; CHECK-LABEL: test_maskz_add_ss_rn: 1952 ; CHECK: ## %bb.0: 1953 ; CHECK-NEXT: kmovw %edi, %k1 1954 ; CHECK-NEXT: vaddss {rn-sae}, %xmm1, %xmm0, %xmm0 {%k1} {z} 1955 ; CHECK-NEXT: retq 1956 %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 %mask, i32 0) 1957 ret <4 x float> %res 1958 } 1959 1960 define <4 x float> @test_add_ss_rn(<4 x float> %a0, <4 x float> %a1) { 1961 ; CHECK-LABEL: test_add_ss_rn: 1962 ; CHECK: ## %bb.0: 1963 ; CHECK-NEXT: vaddss {rn-sae}, %xmm1, %xmm0, %xmm0 1964 ; CHECK-NEXT: retq 1965 %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1, i32 0) 1966 ret <4 x float> %res 1967 } 1968 1969 define <4 x float> @test_mask_add_ss_current_memfold(<4 x float> %a0, float* %a1, <4 x float> %a2, i8 %mask) { 1970 ; CHECK-LABEL: test_mask_add_ss_current_memfold: 1971 ; CHECK: ## %bb.0: 1972 ; CHECK-NEXT: kmovw %esi, %k1 1973 ; CHECK-NEXT: vaddss (%rdi), %xmm0, %xmm1 {%k1} 1974 ; CHECK-NEXT: vmovaps %xmm1, %xmm0 1975 ; CHECK-NEXT: retq 1976 %a1.val = load float, float* %a1 1977 %a1v0 = insertelement <4 x float> undef, float %a1.val, i32 0 1978 %a1v1 = insertelement <4 x float> %a1v0, float 0.000000e+00, i32 1 1979 %a1v2 = insertelement <4 x float> %a1v1, float 0.000000e+00, i32 2 1980 %a1v = insertelement <4 x float> %a1v2, float 0.000000e+00, i32 3 1981 %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1v, <4 x float> %a2, i8 %mask, i32 4) 1982 ret <4 x float> %res 1983 } 1984 1985 define <4 x float> @test_maskz_add_ss_current_memfold(<4 x float> %a0, float* %a1, i8 %mask) { 1986 ; CHECK-LABEL: test_maskz_add_ss_current_memfold: 1987 ; CHECK: ## %bb.0: 1988 ; CHECK-NEXT: kmovw %esi, %k1 1989 ; CHECK-NEXT: vaddss (%rdi), %xmm0, %xmm0 {%k1} {z} 1990 ; CHECK-NEXT: retq 1991 %a1.val = load float, float* %a1 1992 %a1v0 = insertelement <4 x float> undef, float %a1.val, i32 0 1993 %a1v1 = insertelement <4 x float> %a1v0, float 0.000000e+00, i32 1 1994 %a1v2 = insertelement <4 x float> %a1v1, float 0.000000e+00, i32 2 1995 %a1v = insertelement <4 x float> %a1v2, float 0.000000e+00, i32 3 1996 %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1v, <4 x float> zeroinitializer, i8 %mask, i32 4) 1997 ret <4 x float> %res 1998 } 1999 2000 declare <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>, <2 x double>, <2 x double>, i8, i32) nounwind readnone 2001 2002 define <2 x double> @test_mask_add_sd_rn(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) { 2003 ; CHECK-LABEL: test_mask_add_sd_rn: 2004 ; CHECK: ## %bb.0: 2005 ; CHECK-NEXT: kmovw %edi, %k1 2006 ; CHECK-NEXT: vaddsd {rn-sae}, %xmm1, %xmm0, %xmm2 {%k1} 2007 ; CHECK-NEXT: vmovapd %xmm2, %xmm0 2008 ; CHECK-NEXT: retq 2009 %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 0) 2010 ret <2 x double> %res 2011 } 2012 2013 define <2 x double> @test_mask_add_sd_rd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) { 2014 ; CHECK-LABEL: test_mask_add_sd_rd: 2015 ; CHECK: ## %bb.0: 2016 ; CHECK-NEXT: kmovw %edi, %k1 2017 ; CHECK-NEXT: vaddsd {rd-sae}, %xmm1, %xmm0, %xmm2 {%k1} 2018 ; CHECK-NEXT: vmovapd %xmm2, %xmm0 2019 ; CHECK-NEXT: retq 2020 %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 1) 2021 ret <2 x double> %res 2022 } 2023 2024 define <2 x double> @test_mask_add_sd_ru(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) { 2025 ; CHECK-LABEL: test_mask_add_sd_ru: 2026 ; CHECK: ## %bb.0: 2027 ; CHECK-NEXT: kmovw %edi, %k1 2028 ; CHECK-NEXT: vaddsd {ru-sae}, %xmm1, %xmm0, %xmm2 {%k1} 2029 ; CHECK-NEXT: vmovapd %xmm2, %xmm0 2030 ; CHECK-NEXT: retq 2031 %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 2) 2032 ret <2 x double> %res 2033 } 2034 2035 define <2 x double> @test_mask_add_sd_rz(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) { 2036 ; CHECK-LABEL: test_mask_add_sd_rz: 2037 ; CHECK: ## %bb.0: 2038 ; CHECK-NEXT: kmovw %edi, %k1 2039 ; CHECK-NEXT: vaddsd {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1} 2040 ; CHECK-NEXT: vmovapd %xmm2, %xmm0 2041 ; CHECK-NEXT: retq 2042 %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 3) 2043 ret <2 x double> %res 2044 } 2045 2046 define <2 x double> @test_mask_add_sd_current(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) { 2047 ; CHECK-LABEL: test_mask_add_sd_current: 2048 ; CHECK: ## %bb.0: 2049 ; CHECK-NEXT: kmovw %edi, %k1 2050 ; CHECK-NEXT: vaddsd %xmm1, %xmm0, %xmm2 {%k1} 2051 ; CHECK-NEXT: vmovapd %xmm2, %xmm0 2052 ; CHECK-NEXT: retq 2053 %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 4) 2054 ret <2 x double> %res 2055 } 2056 2057 define <2 x double> @test_maskz_add_sd_rn(<2 x double> %a0, <2 x double> %a1, i8 %mask) { 2058 ; CHECK-LABEL: test_maskz_add_sd_rn: 2059 ; CHECK: ## %bb.0: 2060 ; CHECK-NEXT: kmovw %edi, %k1 2061 ; CHECK-NEXT: vaddsd {rn-sae}, %xmm1, %xmm0, %xmm0 {%k1} {z} 2062 ; CHECK-NEXT: retq 2063 %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 %mask, i32 0) 2064 ret <2 x double> %res 2065 } 2066 2067 define <2 x double> @test_add_sd_rn(<2 x double> %a0, <2 x double> %a1) { 2068 ; CHECK-LABEL: test_add_sd_rn: 2069 ; CHECK: ## %bb.0: 2070 ; CHECK-NEXT: vaddsd {rn-sae}, %xmm1, %xmm0, %xmm0 2071 ; CHECK-NEXT: retq 2072 %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1, i32 0) 2073 ret <2 x double> %res 2074 } 2075 2076 define <2 x double> @test_mask_add_sd_current_memfold(<2 x double> %a0, double* %a1, <2 x double> %a2, i8 %mask) { 2077 ; CHECK-LABEL: test_mask_add_sd_current_memfold: 2078 ; CHECK: ## %bb.0: 2079 ; CHECK-NEXT: kmovw %esi, %k1 2080 ; CHECK-NEXT: vaddsd (%rdi), %xmm0, %xmm1 {%k1} 2081 ; CHECK-NEXT: vmovapd %xmm1, %xmm0 2082 ; CHECK-NEXT: retq 2083 %a1.val = load double, double* %a1 2084 %a1v0 = insertelement <2 x double> undef, double %a1.val, i32 0 2085 %a1v = insertelement <2 x double> %a1v0, double 0.000000e+00, i32 1 2086 %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1v, <2 x double> %a2, i8 %mask, i32 4) 2087 ret <2 x double> %res 2088 } 2089 2090 define <2 x double> @test_maskz_add_sd_current_memfold(<2 x double> %a0, double* %a1, i8 %mask) { 2091 ; CHECK-LABEL: test_maskz_add_sd_current_memfold: 2092 ; CHECK: ## %bb.0: 2093 ; CHECK-NEXT: kmovw %esi, %k1 2094 ; CHECK-NEXT: vaddsd (%rdi), %xmm0, %xmm0 {%k1} {z} 2095 ; CHECK-NEXT: retq 2096 %a1.val = load double, double* %a1 2097 %a1v0 = insertelement <2 x double> undef, double %a1.val, i32 0 2098 %a1v = insertelement <2 x double> %a1v0, double 0.000000e+00, i32 1 2099 %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1v, <2 x double> zeroinitializer, i8 %mask, i32 4) 2100 ret <2 x double> %res 2101 } 2102 2103 declare <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>, <4 x float>, <4 x float>, i8, i32) nounwind readnone 2104 2105 define <4 x float> @test_mask_max_ss_sae(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) { 2106 ; CHECK-LABEL: test_mask_max_ss_sae: 2107 ; CHECK: ## %bb.0: 2108 ; CHECK-NEXT: kmovw %edi, %k1 2109 ; CHECK-NEXT: vmaxss {sae}, %xmm1, %xmm0, %xmm2 {%k1} 2110 ; CHECK-NEXT: vmovaps %xmm2, %xmm0 2111 ; CHECK-NEXT: retq 2112 %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 8) 2113 ret <4 x float> %res 2114 } 2115 2116 define <4 x float> @test_maskz_max_ss_sae(<4 x float> %a0, <4 x float> %a1, i8 %mask) { 2117 ; CHECK-LABEL: test_maskz_max_ss_sae: 2118 ; CHECK: ## %bb.0: 2119 ; CHECK-NEXT: kmovw %edi, %k1 2120 ; CHECK-NEXT: vmaxss {sae}, %xmm1, %xmm0, %xmm0 {%k1} {z} 2121 ; CHECK-NEXT: retq 2122 %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 %mask, i32 8) 2123 ret <4 x float> %res 2124 } 2125 2126 define <4 x float> @test_max_ss_sae(<4 x float> %a0, <4 x float> %a1) { 2127 ; CHECK-LABEL: test_max_ss_sae: 2128 ; CHECK: ## %bb.0: 2129 ; CHECK-NEXT: vmaxss {sae}, %xmm1, %xmm0, %xmm0 2130 ; CHECK-NEXT: retq 2131 %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1, i32 8) 2132 ret <4 x float> %res 2133 } 2134 2135 define <4 x float> @test_mask_max_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) { 2136 ; CHECK-LABEL: test_mask_max_ss: 2137 ; CHECK: ## %bb.0: 2138 ; CHECK-NEXT: kmovw %edi, %k1 2139 ; CHECK-NEXT: vmaxss %xmm1, %xmm0, %xmm2 {%k1} 2140 ; CHECK-NEXT: vmovaps %xmm2, %xmm0 2141 ; CHECK-NEXT: retq 2142 %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 4) 2143 ret <4 x float> %res 2144 } 2145 2146 define <4 x float> @test_maskz_max_ss(<4 x float> %a0, <4 x float> %a1, i8 %mask) { 2147 ; CHECK-LABEL: test_maskz_max_ss: 2148 ; CHECK: ## %bb.0: 2149 ; CHECK-NEXT: kmovw %edi, %k1 2150 ; CHECK-NEXT: vmaxss %xmm1, %xmm0, %xmm0 {%k1} {z} 2151 ; CHECK-NEXT: retq 2152 %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 %mask, i32 4) 2153 ret <4 x float> %res 2154 } 2155 2156 define <4 x float> @test_max_ss(<4 x float> %a0, <4 x float> %a1) { 2157 ; CHECK-LABEL: test_max_ss: 2158 ; CHECK: ## %bb.0: 2159 ; CHECK-NEXT: vmaxss %xmm1, %xmm0, %xmm0 2160 ; CHECK-NEXT: retq 2161 %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1, i32 4) 2162 ret <4 x float> %res 2163 } 2164 2165 define <4 x float> @test_mask_max_ss_memfold(<4 x float> %a0, float* %a1, <4 x float> %a2, i8 %mask) { 2166 ; CHECK-LABEL: test_mask_max_ss_memfold: 2167 ; CHECK: ## %bb.0: 2168 ; CHECK-NEXT: kmovw %esi, %k1 2169 ; CHECK-NEXT: vmaxss (%rdi), %xmm0, %xmm1 {%k1} 2170 ; CHECK-NEXT: vmovaps %xmm1, %xmm0 2171 ; CHECK-NEXT: retq 2172 %a1.val = load float, float* %a1 2173 %a1v0 = insertelement <4 x float> undef, float %a1.val, i32 0 2174 %a1v1 = insertelement <4 x float> %a1v0, float 0.000000e+00, i32 1 2175 %a1v2 = insertelement <4 x float> %a1v1, float 0.000000e+00, i32 2 2176 %a1v = insertelement <4 x float> %a1v2, float 0.000000e+00, i32 3 2177 %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1v, <4 x float> %a2, i8 %mask, i32 4) 2178 ret <4 x float> %res 2179 } 2180 2181 define <4 x float> @test_maskz_max_ss_memfold(<4 x float> %a0, float* %a1, i8 %mask) { 2182 ; CHECK-LABEL: test_maskz_max_ss_memfold: 2183 ; CHECK: ## %bb.0: 2184 ; CHECK-NEXT: kmovw %esi, %k1 2185 ; CHECK-NEXT: vmaxss (%rdi), %xmm0, %xmm0 {%k1} {z} 2186 ; CHECK-NEXT: retq 2187 %a1.val = load float, float* %a1 2188 %a1v0 = insertelement <4 x float> undef, float %a1.val, i32 0 2189 %a1v1 = insertelement <4 x float> %a1v0, float 0.000000e+00, i32 1 2190 %a1v2 = insertelement <4 x float> %a1v1, float 0.000000e+00, i32 2 2191 %a1v = insertelement <4 x float> %a1v2, float 0.000000e+00, i32 3 2192 %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1v, <4 x float> zeroinitializer, i8 %mask, i32 4) 2193 ret <4 x float> %res 2194 } 2195 declare <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>, <2 x double>, <2 x double>, i8, i32) nounwind readnone 2196 2197 define <2 x double> @test_mask_max_sd_sae(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) { 2198 ; CHECK-LABEL: test_mask_max_sd_sae: 2199 ; CHECK: ## %bb.0: 2200 ; CHECK-NEXT: kmovw %edi, %k1 2201 ; CHECK-NEXT: vmaxsd {sae}, %xmm1, %xmm0, %xmm2 {%k1} 2202 ; CHECK-NEXT: vmovapd %xmm2, %xmm0 2203 ; CHECK-NEXT: retq 2204 %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 8) 2205 ret <2 x double> %res 2206 } 2207 2208 define <2 x double> @test_maskz_max_sd_sae(<2 x double> %a0, <2 x double> %a1, i8 %mask) { 2209 ; CHECK-LABEL: test_maskz_max_sd_sae: 2210 ; CHECK: ## %bb.0: 2211 ; CHECK-NEXT: kmovw %edi, %k1 2212 ; CHECK-NEXT: vmaxsd {sae}, %xmm1, %xmm0, %xmm0 {%k1} {z} 2213 ; CHECK-NEXT: retq 2214 %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 %mask, i32 8) 2215 ret <2 x double> %res 2216 } 2217 2218 define <2 x double> @test_max_sd_sae(<2 x double> %a0, <2 x double> %a1) { 2219 ; CHECK-LABEL: test_max_sd_sae: 2220 ; CHECK: ## %bb.0: 2221 ; CHECK-NEXT: vmaxsd {sae}, %xmm1, %xmm0, %xmm0 2222 ; CHECK-NEXT: retq 2223 %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1, i32 8) 2224 ret <2 x double> %res 2225 } 2226 2227 define <2 x double> @test_mask_max_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) { 2228 ; CHECK-LABEL: test_mask_max_sd: 2229 ; CHECK: ## %bb.0: 2230 ; CHECK-NEXT: kmovw %edi, %k1 2231 ; CHECK-NEXT: vmaxsd %xmm1, %xmm0, %xmm2 {%k1} 2232 ; CHECK-NEXT: vmovapd %xmm2, %xmm0 2233 ; CHECK-NEXT: retq 2234 %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 4) 2235 ret <2 x double> %res 2236 } 2237 2238 define <2 x double> @test_maskz_max_sd(<2 x double> %a0, <2 x double> %a1, i8 %mask) { 2239 ; CHECK-LABEL: test_maskz_max_sd: 2240 ; CHECK: ## %bb.0: 2241 ; CHECK-NEXT: kmovw %edi, %k1 2242 ; CHECK-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 {%k1} {z} 2243 ; CHECK-NEXT: retq 2244 %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 %mask, i32 4) 2245 ret <2 x double> %res 2246 } 2247 2248 define <2 x double> @test_max_sd(<2 x double> %a0, <2 x double> %a1) { 2249 ; CHECK-LABEL: test_max_sd: 2250 ; CHECK: ## %bb.0: 2251 ; CHECK-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 2252 ; CHECK-NEXT: retq 2253 %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1, i32 4) 2254 ret <2 x double> %res 2255 } 2256 2257 define <2 x double> @test_mask_max_sd_memfold(<2 x double> %a0, double* %a1, <2 x double> %a2, i8 %mask) { 2258 ; CHECK-LABEL: test_mask_max_sd_memfold: 2259 ; CHECK: ## %bb.0: 2260 ; CHECK-NEXT: kmovw %esi, %k1 2261 ; CHECK-NEXT: vmaxsd (%rdi), %xmm0, %xmm1 {%k1} 2262 ; CHECK-NEXT: vmovapd %xmm1, %xmm0 2263 ; CHECK-NEXT: retq 2264 %a1.val = load double, double* %a1 2265 %a1v0 = insertelement <2 x double> undef, double %a1.val, i32 0 2266 %a1v = insertelement <2 x double> %a1v0, double 0.000000e+00, i32 1 2267 %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1v, <2 x double> %a2, i8 %mask, i32 4) 2268 ret <2 x double> %res 2269 } 2270 2271 define <2 x double> @test_maskz_max_sd_memfold(<2 x double> %a0, double* %a1, i8 %mask) { 2272 ; CHECK-LABEL: test_maskz_max_sd_memfold: 2273 ; CHECK: ## %bb.0: 2274 ; CHECK-NEXT: kmovw %esi, %k1 2275 ; CHECK-NEXT: vmaxsd (%rdi), %xmm0, %xmm0 {%k1} {z} 2276 ; CHECK-NEXT: retq 2277 %a1.val = load double, double* %a1 2278 %a1v0 = insertelement <2 x double> undef, double %a1.val, i32 0 2279 %a1v = insertelement <2 x double> %a1v0, double 0.000000e+00, i32 1 2280 %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1v, <2 x double> zeroinitializer, i8 %mask, i32 4) 2281 ret <2 x double> %res 2282 } 2283 2284 define <2 x double> @test_x86_avx512_cvtsi2sd64(<2 x double> %a, i64 %b) { 2285 ; CHECK-LABEL: test_x86_avx512_cvtsi2sd64: 2286 ; CHECK: ## %bb.0: 2287 ; CHECK-NEXT: vcvtsi2sdq %rdi, {rz-sae}, %xmm0, %xmm0 2288 ; CHECK-NEXT: retq 2289 %res = call <2 x double> @llvm.x86.avx512.cvtsi2sd64(<2 x double> %a, i64 %b, i32 3) ; <<<2 x double>> [#uses=1] 2290 ret <2 x double> %res 2291 } 2292 declare <2 x double> @llvm.x86.avx512.cvtsi2sd64(<2 x double>, i64, i32) nounwind readnone 2293 2294 define <4 x float> @test_x86_avx512_cvtsi2ss32(<4 x float> %a, i32 %b) { 2295 ; CHECK-LABEL: test_x86_avx512_cvtsi2ss32: 2296 ; CHECK: ## %bb.0: 2297 ; CHECK-NEXT: vcvtsi2ssl %edi, {rz-sae}, %xmm0, %xmm0 2298 ; CHECK-NEXT: retq 2299 %res = call <4 x float> @llvm.x86.avx512.cvtsi2ss32(<4 x float> %a, i32 %b, i32 3) ; <<<4 x float>> [#uses=1] 2300 ret <4 x float> %res 2301 } 2302 declare <4 x float> @llvm.x86.avx512.cvtsi2ss32(<4 x float>, i32, i32) nounwind readnone 2303 2304 define <4 x float> @test_x86_avx512_cvtsi2ss64(<4 x float> %a, i64 %b) { 2305 ; CHECK-LABEL: test_x86_avx512_cvtsi2ss64: 2306 ; CHECK: ## %bb.0: 2307 ; CHECK-NEXT: vcvtsi2ssq %rdi, {rz-sae}, %xmm0, %xmm0 2308 ; CHECK-NEXT: retq 2309 %res = call <4 x float> @llvm.x86.avx512.cvtsi2ss64(<4 x float> %a, i64 %b, i32 3) ; <<<4 x float>> [#uses=1] 2310 ret <4 x float> %res 2311 } 2312 declare <4 x float> @llvm.x86.avx512.cvtsi2ss64(<4 x float>, i64, i32) nounwind readnone 2313 2314 define <4 x float> @test_x86_avx512__mm_cvt_roundu32_ss (<4 x float> %a, i32 %b) 2315 ; CHECK-LABEL: test_x86_avx512__mm_cvt_roundu32_ss: 2316 ; CHECK: ## %bb.0: 2317 ; CHECK-NEXT: vcvtusi2ssl %edi, {rd-sae}, %xmm0, %xmm0 2318 ; CHECK-NEXT: retq 2319 { 2320 %res = call <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float> %a, i32 %b, i32 1) ; <<<4 x float>> [#uses=1] 2321 ret <4 x float> %res 2322 } 2323 2324 define <4 x float> @test_x86_avx512__mm_cvt_roundu32_ss_mem(<4 x float> %a, i32* %ptr) 2325 ; CHECK-LABEL: test_x86_avx512__mm_cvt_roundu32_ss_mem: 2326 ; CHECK: ## %bb.0: 2327 ; CHECK-NEXT: movl (%rdi), %eax 2328 ; CHECK-NEXT: vcvtusi2ssl %eax, {rd-sae}, %xmm0, %xmm0 2329 ; CHECK-NEXT: retq 2330 { 2331 %b = load i32, i32* %ptr 2332 %res = call <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float> %a, i32 %b, i32 1) ; <<<4 x float>> [#uses=1] 2333 ret <4 x float> %res 2334 } 2335 2336 define <4 x float> @test_x86_avx512__mm_cvtu32_ss(<4 x float> %a, i32 %b) 2337 ; CHECK-LABEL: test_x86_avx512__mm_cvtu32_ss: 2338 ; CHECK: ## %bb.0: 2339 ; CHECK-NEXT: vcvtusi2ssl %edi, %xmm0, %xmm0 2340 ; CHECK-NEXT: retq 2341 { 2342 %res = call <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float> %a, i32 %b, i32 4) ; <<<4 x float>> [#uses=1] 2343 ret <4 x float> %res 2344 } 2345 2346 define <4 x float> @test_x86_avx512__mm_cvtu32_ss_mem(<4 x float> %a, i32* %ptr) 2347 ; CHECK-LABEL: test_x86_avx512__mm_cvtu32_ss_mem: 2348 ; CHECK: ## %bb.0: 2349 ; CHECK-NEXT: vcvtusi2ssl (%rdi), %xmm0, %xmm0 2350 ; CHECK-NEXT: retq 2351 { 2352 %b = load i32, i32* %ptr 2353 %res = call <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float> %a, i32 %b, i32 4) ; <<<4 x float>> [#uses=1] 2354 ret <4 x float> %res 2355 } 2356 declare <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float>, i32, i32) nounwind readnone 2357 2358 define <4 x float> @_mm_cvt_roundu64_ss (<4 x float> %a, i64 %b) 2359 ; CHECK-LABEL: _mm_cvt_roundu64_ss: 2360 ; CHECK: ## %bb.0: 2361 ; CHECK-NEXT: vcvtusi2ssq %rdi, {rd-sae}, %xmm0, %xmm0 2362 ; CHECK-NEXT: retq 2363 { 2364 %res = call <4 x float> @llvm.x86.avx512.cvtusi642ss(<4 x float> %a, i64 %b, i32 1) ; <<<4 x float>> [#uses=1] 2365 ret <4 x float> %res 2366 } 2367 2368 define <4 x float> @_mm_cvtu64_ss(<4 x float> %a, i64 %b) 2369 ; CHECK-LABEL: _mm_cvtu64_ss: 2370 ; CHECK: ## %bb.0: 2371 ; CHECK-NEXT: vcvtusi2ssq %rdi, %xmm0, %xmm0 2372 ; CHECK-NEXT: retq 2373 { 2374 %res = call <4 x float> @llvm.x86.avx512.cvtusi642ss(<4 x float> %a, i64 %b, i32 4) ; <<<4 x float>> [#uses=1] 2375 ret <4 x float> %res 2376 } 2377 declare <4 x float> @llvm.x86.avx512.cvtusi642ss(<4 x float>, i64, i32) nounwind readnone 2378 2379 define <2 x double> @test_x86_avx512_mm_cvtu64_sd(<2 x double> %a, i64 %b) 2380 ; CHECK-LABEL: test_x86_avx512_mm_cvtu64_sd: 2381 ; CHECK: ## %bb.0: 2382 ; CHECK-NEXT: vcvtusi2sdq %rdi, {rd-sae}, %xmm0, %xmm0 2383 ; CHECK-NEXT: retq 2384 { 2385 %res = call <2 x double> @llvm.x86.avx512.cvtusi642sd(<2 x double> %a, i64 %b, i32 1) ; <<<2 x double>> [#uses=1] 2386 ret <2 x double> %res 2387 } 2388 2389 define <2 x double> @test_x86_avx512__mm_cvt_roundu64_sd(<2 x double> %a, i64 %b) 2390 ; CHECK-LABEL: test_x86_avx512__mm_cvt_roundu64_sd: 2391 ; CHECK: ## %bb.0: 2392 ; CHECK-NEXT: vcvtusi2sdq %rdi, %xmm0, %xmm0 2393 ; CHECK-NEXT: retq 2394 { 2395 %res = call <2 x double> @llvm.x86.avx512.cvtusi642sd(<2 x double> %a, i64 %b, i32 4) ; <<<2 x double>> [#uses=1] 2396 ret <2 x double> %res 2397 } 2398 declare <2 x double> @llvm.x86.avx512.cvtusi642sd(<2 x double>, i64, i32) nounwind readnone 2399 2400 declare <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32>, <16 x i32>, <16 x i32>) 2401 2402 define <16 x i32>@test_int_x86_avx512_mask_vpermi2var_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2p, <16 x i32> %x4, i16 %x3) { 2403 ; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_d_512: 2404 ; CHECK: ## %bb.0: 2405 ; CHECK-NEXT: kmovw %esi, %k1 2406 ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm3 2407 ; CHECK-NEXT: vpermi2d (%rdi), %zmm0, %zmm3 {%k1} 2408 ; CHECK-NEXT: vpermt2d %zmm2, %zmm1, %zmm0 2409 ; CHECK-NEXT: vpaddd %zmm0, %zmm3, %zmm0 2410 ; CHECK-NEXT: retq 2411 %x2 = load <16 x i32>, <16 x i32>* %x2p 2412 %1 = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2) 2413 %2 = bitcast i16 %x3 to <16 x i1> 2414 %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x1 2415 %4 = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x4) 2416 %res2 = add <16 x i32> %3, %4 2417 ret <16 x i32> %res2 2418 } 2419 2420 declare <8 x double> @llvm.x86.avx512.vpermi2var.pd.512(<8 x double>, <8 x i64>, <8 x double>) 2421 2422 define <8 x double>@test_int_x86_avx512_mask_vpermi2var_pd_512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3) { 2423 ; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_pd_512: 2424 ; CHECK: ## %bb.0: 2425 ; CHECK-NEXT: vmovapd %zmm0, %zmm3 2426 ; CHECK-NEXT: vpermt2pd %zmm2, %zmm1, %zmm3 2427 ; CHECK-NEXT: kmovw %edi, %k1 2428 ; CHECK-NEXT: vpermi2pd %zmm2, %zmm0, %zmm1 {%k1} 2429 ; CHECK-NEXT: vaddpd %zmm3, %zmm1, %zmm0 2430 ; CHECK-NEXT: retq 2431 %1 = call <8 x double> @llvm.x86.avx512.vpermi2var.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2) 2432 %2 = bitcast <8 x i64> %x1 to <8 x double> 2433 %3 = bitcast i8 %x3 to <8 x i1> 2434 %4 = select <8 x i1> %3, <8 x double> %1, <8 x double> %2 2435 %5 = call <8 x double> @llvm.x86.avx512.vpermi2var.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2) 2436 %6 = bitcast <8 x i64> %x1 to <8 x double> 2437 %res2 = fadd <8 x double> %4, %5 2438 ret <8 x double> %res2 2439 } 2440 2441 declare <16 x float> @llvm.x86.avx512.vpermi2var.ps.512(<16 x float>, <16 x i32>, <16 x float>) 2442 2443 define <16 x float>@test_int_x86_avx512_mask_vpermi2var_ps_512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3) { 2444 ; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_ps_512: 2445 ; CHECK: ## %bb.0: 2446 ; CHECK-NEXT: vmovaps %zmm0, %zmm3 2447 ; CHECK-NEXT: vpermt2ps %zmm2, %zmm1, %zmm3 2448 ; CHECK-NEXT: kmovw %edi, %k1 2449 ; CHECK-NEXT: vpermi2ps %zmm2, %zmm0, %zmm1 {%k1} 2450 ; CHECK-NEXT: vaddps %zmm3, %zmm1, %zmm0 2451 ; CHECK-NEXT: retq 2452 %1 = call <16 x float> @llvm.x86.avx512.vpermi2var.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2) 2453 %2 = bitcast <16 x i32> %x1 to <16 x float> 2454 %3 = bitcast i16 %x3 to <16 x i1> 2455 %4 = select <16 x i1> %3, <16 x float> %1, <16 x float> %2 2456 %5 = call <16 x float> @llvm.x86.avx512.vpermi2var.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2) 2457 %6 = bitcast <16 x i32> %x1 to <16 x float> 2458 %res2 = fadd <16 x float> %4, %5 2459 ret <16 x float> %res2 2460 } 2461 2462 declare <8 x i64> @llvm.x86.avx512.vpermi2var.q.512(<8 x i64>, <8 x i64>, <8 x i64>) 2463 2464 define <8 x i64>@test_int_x86_avx512_mask_vpermi2var_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) { 2465 ; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_q_512: 2466 ; CHECK: ## %bb.0: 2467 ; CHECK-NEXT: vmovdqa64 %zmm0, %zmm3 2468 ; CHECK-NEXT: vpermt2q %zmm2, %zmm1, %zmm3 2469 ; CHECK-NEXT: kmovw %edi, %k1 2470 ; CHECK-NEXT: vpermi2q %zmm2, %zmm0, %zmm1 {%k1} 2471 ; CHECK-NEXT: vpaddq %zmm3, %zmm1, %zmm0 2472 ; CHECK-NEXT: retq 2473 %1 = call <8 x i64> @llvm.x86.avx512.vpermi2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2) 2474 %2 = bitcast i8 %x3 to <8 x i1> 2475 %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %x1 2476 %4 = call <8 x i64> @llvm.x86.avx512.vpermi2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2) 2477 %res2 = add <8 x i64> %3, %4 2478 ret <8 x i64> %res2 2479 } 2480 2481 define <16 x i32>@test_int_x86_avx512_maskz_vpermt2var_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2p, i16 %x3) { 2482 ; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_d_512: 2483 ; CHECK: ## %bb.0: 2484 ; CHECK-NEXT: kmovw %esi, %k1 2485 ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm2 2486 ; CHECK-NEXT: vpermt2d (%rdi), %zmm0, %zmm2 {%k1} {z} 2487 ; CHECK-NEXT: vpermt2d %zmm1, %zmm0, %zmm1 2488 ; CHECK-NEXT: vpaddd %zmm1, %zmm2, %zmm0 2489 ; CHECK-NEXT: retq 2490 %x2 = load <16 x i32>, <16 x i32>* %x2p 2491 %1 = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> %x2) 2492 %2 = bitcast i16 %x3 to <16 x i1> 2493 %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> zeroinitializer 2494 %4 = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> %x1) 2495 %res2 = add <16 x i32> %3, %4 2496 ret <16 x i32> %res2 2497 } 2498 2499 define <8 x double>@test_int_x86_avx512_maskz_vpermt2var_pd_512(<8 x i64> %x0, <8 x double> %x1, double* %x2ptr, i8 %x3) { 2500 ; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_pd_512: 2501 ; CHECK: ## %bb.0: 2502 ; CHECK-NEXT: kmovw %esi, %k1 2503 ; CHECK-NEXT: vmovapd %zmm1, %zmm2 2504 ; CHECK-NEXT: vpermt2pd (%rdi){1to8}, %zmm0, %zmm2 {%k1} {z} 2505 ; CHECK-NEXT: vpermt2pd %zmm1, %zmm0, %zmm1 2506 ; CHECK-NEXT: vaddpd %zmm1, %zmm2, %zmm0 2507 ; CHECK-NEXT: retq 2508 %x2s = load double, double* %x2ptr 2509 %x2ins = insertelement <8 x double> undef, double %x2s, i32 0 2510 %x2 = shufflevector <8 x double> %x2ins, <8 x double> undef, <8 x i32> zeroinitializer 2511 %1 = call <8 x double> @llvm.x86.avx512.vpermi2var.pd.512(<8 x double> %x1, <8 x i64> %x0, <8 x double> %x2) 2512 %2 = bitcast i8 %x3 to <8 x i1> 2513 %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> zeroinitializer 2514 %4 = call <8 x double> @llvm.x86.avx512.vpermi2var.pd.512(<8 x double> %x1, <8 x i64> %x0, <8 x double> %x1) 2515 %res2 = fadd <8 x double> %3, %4 2516 ret <8 x double> %res2 2517 } 2518 2519 define <16 x float>@test_int_x86_avx512_maskz_vpermt2var_ps_512(<16 x i32> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3) { 2520 ; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_ps_512: 2521 ; CHECK: ## %bb.0: 2522 ; CHECK-NEXT: vmovaps %zmm1, %zmm3 2523 ; CHECK-NEXT: vpermt2ps %zmm2, %zmm0, %zmm3 2524 ; CHECK-NEXT: kmovw %edi, %k1 2525 ; CHECK-NEXT: vpermt2ps %zmm2, %zmm0, %zmm1 {%k1} {z} 2526 ; CHECK-NEXT: vaddps %zmm3, %zmm1, %zmm0 2527 ; CHECK-NEXT: retq 2528 %1 = call <16 x float> @llvm.x86.avx512.vpermi2var.ps.512(<16 x float> %x1, <16 x i32> %x0, <16 x float> %x2) 2529 %2 = bitcast i16 %x3 to <16 x i1> 2530 %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer 2531 %4 = call <16 x float> @llvm.x86.avx512.vpermi2var.ps.512(<16 x float> %x1, <16 x i32> %x0, <16 x float> %x2) 2532 %res2 = fadd <16 x float> %3, %4 2533 ret <16 x float> %res2 2534 } 2535 2536 define <8 x i64>@test_int_x86_avx512_maskz_vpermt2var_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) { 2537 ; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_q_512: 2538 ; CHECK: ## %bb.0: 2539 ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm3 2540 ; CHECK-NEXT: vpermt2q %zmm2, %zmm0, %zmm3 2541 ; CHECK-NEXT: kmovw %edi, %k1 2542 ; CHECK-NEXT: vpermt2q %zmm2, %zmm0, %zmm1 {%k1} {z} 2543 ; CHECK-NEXT: vpaddq %zmm3, %zmm1, %zmm0 2544 ; CHECK-NEXT: retq 2545 %1 = call <8 x i64> @llvm.x86.avx512.vpermi2var.q.512(<8 x i64> %x1, <8 x i64> %x0, <8 x i64> %x2) 2546 %2 = bitcast i8 %x3 to <8 x i1> 2547 %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> zeroinitializer 2548 %4 = call <8 x i64> @llvm.x86.avx512.vpermi2var.q.512(<8 x i64> %x1, <8 x i64> %x0, <8 x i64> %x2) 2549 %res2 = add <8 x i64> %3, %4 2550 ret <8 x i64> %res2 2551 } 2552 2553 define <16 x i32>@test_int_x86_avx512_mask_vpermt2var_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) { 2554 ; CHECK-LABEL: test_int_x86_avx512_mask_vpermt2var_d_512: 2555 ; CHECK: ## %bb.0: 2556 ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm3 2557 ; CHECK-NEXT: vpermt2d %zmm2, %zmm0, %zmm3 2558 ; CHECK-NEXT: kmovw %edi, %k1 2559 ; CHECK-NEXT: vpermt2d %zmm2, %zmm0, %zmm1 {%k1} 2560 ; CHECK-NEXT: vpaddd %zmm3, %zmm1, %zmm0 2561 ; CHECK-NEXT: retq 2562 %1 = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> %x2) 2563 %2 = bitcast i16 %x3 to <16 x i1> 2564 %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x1 2565 %4 = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> %x2) 2566 %res2 = add <16 x i32> %3, %4 2567 ret <16 x i32> %res2 2568 } 2569 2570 declare <8 x double> @llvm.x86.avx512.mask.scalef.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) 2571 define <8 x double>@test_int_x86_avx512_mask_scalef_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3) { 2572 ; CHECK-LABEL: test_int_x86_avx512_mask_scalef_pd_512: 2573 ; CHECK: ## %bb.0: 2574 ; CHECK-NEXT: kmovw %edi, %k1 2575 ; CHECK-NEXT: vscalefpd {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1} 2576 ; CHECK-NEXT: vscalefpd {rn-sae}, %zmm1, %zmm0, %zmm0 2577 ; CHECK-NEXT: vaddpd %zmm0, %zmm2, %zmm0 2578 ; CHECK-NEXT: retq 2579 %res = call <8 x double> @llvm.x86.avx512.mask.scalef.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 3) 2580 %res1 = call <8 x double> @llvm.x86.avx512.mask.scalef.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0) 2581 %res2 = fadd <8 x double> %res, %res1 2582 ret <8 x double> %res2 2583 } 2584 2585 declare <16 x float> @llvm.x86.avx512.mask.scalef.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) 2586 define <16 x float>@test_int_x86_avx512_mask_scalef_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3) { 2587 ; CHECK-LABEL: test_int_x86_avx512_mask_scalef_ps_512: 2588 ; CHECK: ## %bb.0: 2589 ; CHECK-NEXT: kmovw %edi, %k1 2590 ; CHECK-NEXT: vscalefps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1} 2591 ; CHECK-NEXT: vscalefps {rn-sae}, %zmm1, %zmm0, %zmm0 2592 ; CHECK-NEXT: vaddps %zmm0, %zmm2, %zmm0 2593 ; CHECK-NEXT: retq 2594 %res = call <16 x float> @llvm.x86.avx512.mask.scalef.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 2) 2595 %res1 = call <16 x float> @llvm.x86.avx512.mask.scalef.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0) 2596 %res2 = fadd <16 x float> %res, %res1 2597 ret <16 x float> %res2 2598 } 2599 2600 declare <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512(<8 x i64>, <16 x i8>, i8) 2601 2602 define <16 x i8>@test_int_x86_avx512_mask_pmov_qb_512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2) { 2603 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qb_512: 2604 ; CHECK: ## %bb.0: 2605 ; CHECK-NEXT: kmovw %edi, %k1 2606 ; CHECK-NEXT: vpmovqb %zmm0, %xmm2 {%k1} {z} 2607 ; CHECK-NEXT: vpmovqb %zmm0, %xmm1 {%k1} 2608 ; CHECK-NEXT: vpaddb %xmm2, %xmm1, %xmm1 2609 ; CHECK-NEXT: vpmovqb %zmm0, %xmm0 2610 ; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0 2611 ; CHECK-NEXT: vzeroupper 2612 ; CHECK-NEXT: retq 2613 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 -1) 2614 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2) 2615 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512(<8 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2) 2616 %res3 = add <16 x i8> %res0, %res1 2617 %res4 = add <16 x i8> %res3, %res2 2618 ret <16 x i8> %res4 2619 } 2620 2621 declare void @llvm.x86.avx512.mask.pmov.qb.mem.512(i8* %ptr, <8 x i64>, i8) 2622 2623 define void @test_int_x86_avx512_mask_pmov_qb_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) { 2624 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qb_mem_512: 2625 ; CHECK: ## %bb.0: 2626 ; CHECK-NEXT: kmovw %esi, %k1 2627 ; CHECK-NEXT: vpmovqb %zmm0, (%rdi) 2628 ; CHECK-NEXT: vpmovqb %zmm0, (%rdi) {%k1} 2629 ; CHECK-NEXT: vzeroupper 2630 ; CHECK-NEXT: retq 2631 call void @llvm.x86.avx512.mask.pmov.qb.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1) 2632 call void @llvm.x86.avx512.mask.pmov.qb.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2) 2633 ret void 2634 } 2635 2636 declare <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.512(<8 x i64>, <16 x i8>, i8) 2637 2638 define <16 x i8>@test_int_x86_avx512_mask_pmovs_qb_512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2) { 2639 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qb_512: 2640 ; CHECK: ## %bb.0: 2641 ; CHECK-NEXT: kmovw %edi, %k1 2642 ; CHECK-NEXT: vpmovsqb %zmm0, %xmm2 {%k1} {z} 2643 ; CHECK-NEXT: vpmovsqb %zmm0, %xmm1 {%k1} 2644 ; CHECK-NEXT: vpaddb %xmm2, %xmm1, %xmm1 2645 ; CHECK-NEXT: vpmovsqb %zmm0, %xmm0 2646 ; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0 2647 ; CHECK-NEXT: vzeroupper 2648 ; CHECK-NEXT: retq 2649 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 -1) 2650 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2) 2651 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.512(<8 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2) 2652 %res3 = add <16 x i8> %res0, %res1 2653 %res4 = add <16 x i8> %res3, %res2 2654 ret <16 x i8> %res4 2655 } 2656 2657 declare void @llvm.x86.avx512.mask.pmovs.qb.mem.512(i8* %ptr, <8 x i64>, i8) 2658 2659 define void @test_int_x86_avx512_mask_pmovs_qb_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) { 2660 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qb_mem_512: 2661 ; CHECK: ## %bb.0: 2662 ; CHECK-NEXT: kmovw %esi, %k1 2663 ; CHECK-NEXT: vpmovsqb %zmm0, (%rdi) 2664 ; CHECK-NEXT: vpmovsqb %zmm0, (%rdi) {%k1} 2665 ; CHECK-NEXT: vzeroupper 2666 ; CHECK-NEXT: retq 2667 call void @llvm.x86.avx512.mask.pmovs.qb.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1) 2668 call void @llvm.x86.avx512.mask.pmovs.qb.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2) 2669 ret void 2670 } 2671 2672 declare <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.512(<8 x i64>, <16 x i8>, i8) 2673 2674 define <16 x i8>@test_int_x86_avx512_mask_pmovus_qb_512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2) { 2675 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qb_512: 2676 ; CHECK: ## %bb.0: 2677 ; CHECK-NEXT: kmovw %edi, %k1 2678 ; CHECK-NEXT: vpmovusqb %zmm0, %xmm2 {%k1} {z} 2679 ; CHECK-NEXT: vpmovusqb %zmm0, %xmm1 {%k1} 2680 ; CHECK-NEXT: vpaddb %xmm2, %xmm1, %xmm1 2681 ; CHECK-NEXT: vpmovusqb %zmm0, %xmm0 2682 ; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0 2683 ; CHECK-NEXT: vzeroupper 2684 ; CHECK-NEXT: retq 2685 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 -1) 2686 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2) 2687 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.512(<8 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2) 2688 %res3 = add <16 x i8> %res0, %res1 2689 %res4 = add <16 x i8> %res3, %res2 2690 ret <16 x i8> %res4 2691 } 2692 2693 declare void @llvm.x86.avx512.mask.pmovus.qb.mem.512(i8* %ptr, <8 x i64>, i8) 2694 2695 define void @test_int_x86_avx512_mask_pmovus_qb_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) { 2696 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qb_mem_512: 2697 ; CHECK: ## %bb.0: 2698 ; CHECK-NEXT: kmovw %esi, %k1 2699 ; CHECK-NEXT: vpmovusqb %zmm0, (%rdi) 2700 ; CHECK-NEXT: vpmovusqb %zmm0, (%rdi) {%k1} 2701 ; CHECK-NEXT: vzeroupper 2702 ; CHECK-NEXT: retq 2703 call void @llvm.x86.avx512.mask.pmovus.qb.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1) 2704 call void @llvm.x86.avx512.mask.pmovus.qb.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2) 2705 ret void 2706 } 2707 2708 declare <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64>, <8 x i16>, i8) 2709 2710 define <8 x i16>@test_int_x86_avx512_mask_pmov_qw_512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2) { 2711 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qw_512: 2712 ; CHECK: ## %bb.0: 2713 ; CHECK-NEXT: kmovw %edi, %k1 2714 ; CHECK-NEXT: vpmovqw %zmm0, %xmm2 {%k1} {z} 2715 ; CHECK-NEXT: vpmovqw %zmm0, %xmm1 {%k1} 2716 ; CHECK-NEXT: vpaddw %xmm2, %xmm1, %xmm1 2717 ; CHECK-NEXT: vpmovqw %zmm0, %xmm0 2718 ; CHECK-NEXT: vpaddw %xmm1, %xmm0, %xmm0 2719 ; CHECK-NEXT: vzeroupper 2720 ; CHECK-NEXT: retq 2721 %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 -1) 2722 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2) 2723 %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2) 2724 %res3 = add <8 x i16> %res0, %res1 2725 %res4 = add <8 x i16> %res3, %res2 2726 ret <8 x i16> %res4 2727 } 2728 2729 declare void @llvm.x86.avx512.mask.pmov.qw.mem.512(i8* %ptr, <8 x i64>, i8) 2730 2731 define void @test_int_x86_avx512_mask_pmov_qw_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) { 2732 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qw_mem_512: 2733 ; CHECK: ## %bb.0: 2734 ; CHECK-NEXT: kmovw %esi, %k1 2735 ; CHECK-NEXT: vpmovqw %zmm0, (%rdi) 2736 ; CHECK-NEXT: vpmovqw %zmm0, (%rdi) {%k1} 2737 ; CHECK-NEXT: vzeroupper 2738 ; CHECK-NEXT: retq 2739 call void @llvm.x86.avx512.mask.pmov.qw.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1) 2740 call void @llvm.x86.avx512.mask.pmov.qw.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2) 2741 ret void 2742 } 2743 2744 declare <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.512(<8 x i64>, <8 x i16>, i8) 2745 2746 define <8 x i16>@test_int_x86_avx512_mask_pmovs_qw_512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2) { 2747 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qw_512: 2748 ; CHECK: ## %bb.0: 2749 ; CHECK-NEXT: kmovw %edi, %k1 2750 ; CHECK-NEXT: vpmovsqw %zmm0, %xmm2 {%k1} {z} 2751 ; CHECK-NEXT: vpmovsqw %zmm0, %xmm1 {%k1} 2752 ; CHECK-NEXT: vpaddw %xmm2, %xmm1, %xmm1 2753 ; CHECK-NEXT: vpmovsqw %zmm0, %xmm0 2754 ; CHECK-NEXT: vpaddw %xmm1, %xmm0, %xmm0 2755 ; CHECK-NEXT: vzeroupper 2756 ; CHECK-NEXT: retq 2757 %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 -1) 2758 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2) 2759 %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.512(<8 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2) 2760 %res3 = add <8 x i16> %res0, %res1 2761 %res4 = add <8 x i16> %res3, %res2 2762 ret <8 x i16> %res4 2763 } 2764 2765 declare void @llvm.x86.avx512.mask.pmovs.qw.mem.512(i8* %ptr, <8 x i64>, i8) 2766 2767 define void @test_int_x86_avx512_mask_pmovs_qw_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) { 2768 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qw_mem_512: 2769 ; CHECK: ## %bb.0: 2770 ; CHECK-NEXT: kmovw %esi, %k1 2771 ; CHECK-NEXT: vpmovsqw %zmm0, (%rdi) 2772 ; CHECK-NEXT: vpmovsqw %zmm0, (%rdi) {%k1} 2773 ; CHECK-NEXT: vzeroupper 2774 ; CHECK-NEXT: retq 2775 call void @llvm.x86.avx512.mask.pmovs.qw.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1) 2776 call void @llvm.x86.avx512.mask.pmovs.qw.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2) 2777 ret void 2778 } 2779 2780 declare <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.512(<8 x i64>, <8 x i16>, i8) 2781 2782 define <8 x i16>@test_int_x86_avx512_mask_pmovus_qw_512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2) { 2783 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qw_512: 2784 ; CHECK: ## %bb.0: 2785 ; CHECK-NEXT: kmovw %edi, %k1 2786 ; CHECK-NEXT: vpmovusqw %zmm0, %xmm2 {%k1} {z} 2787 ; CHECK-NEXT: vpmovusqw %zmm0, %xmm1 {%k1} 2788 ; CHECK-NEXT: vpaddw %xmm2, %xmm1, %xmm1 2789 ; CHECK-NEXT: vpmovusqw %zmm0, %xmm0 2790 ; CHECK-NEXT: vpaddw %xmm1, %xmm0, %xmm0 2791 ; CHECK-NEXT: vzeroupper 2792 ; CHECK-NEXT: retq 2793 %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 -1) 2794 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2) 2795 %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.512(<8 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2) 2796 %res3 = add <8 x i16> %res0, %res1 2797 %res4 = add <8 x i16> %res3, %res2 2798 ret <8 x i16> %res4 2799 } 2800 2801 declare void @llvm.x86.avx512.mask.pmovus.qw.mem.512(i8* %ptr, <8 x i64>, i8) 2802 2803 define void @test_int_x86_avx512_mask_pmovus_qw_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) { 2804 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qw_mem_512: 2805 ; CHECK: ## %bb.0: 2806 ; CHECK-NEXT: kmovw %esi, %k1 2807 ; CHECK-NEXT: vpmovusqw %zmm0, (%rdi) 2808 ; CHECK-NEXT: vpmovusqw %zmm0, (%rdi) {%k1} 2809 ; CHECK-NEXT: vzeroupper 2810 ; CHECK-NEXT: retq 2811 call void @llvm.x86.avx512.mask.pmovus.qw.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1) 2812 call void @llvm.x86.avx512.mask.pmovus.qw.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2) 2813 ret void 2814 } 2815 2816 declare <8 x i32> @llvm.x86.avx512.mask.pmov.qd.512(<8 x i64>, <8 x i32>, i8) 2817 2818 define <8 x i32>@test_int_x86_avx512_mask_pmov_qd_512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2) { 2819 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qd_512: 2820 ; CHECK: ## %bb.0: 2821 ; CHECK-NEXT: kmovw %edi, %k1 2822 ; CHECK-NEXT: vpmovqd %zmm0, %ymm2 {%k1} {z} 2823 ; CHECK-NEXT: vpmovqd %zmm0, %ymm1 {%k1} 2824 ; CHECK-NEXT: vpaddd %ymm2, %ymm1, %ymm1 2825 ; CHECK-NEXT: vpmovqd %zmm0, %ymm0 2826 ; CHECK-NEXT: vpaddd %ymm1, %ymm0, %ymm0 2827 ; CHECK-NEXT: retq 2828 %res0 = call <8 x i32> @llvm.x86.avx512.mask.pmov.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 -1) 2829 %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmov.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2) 2830 %res2 = call <8 x i32> @llvm.x86.avx512.mask.pmov.qd.512(<8 x i64> %x0, <8 x i32> zeroinitializer, i8 %x2) 2831 %res3 = add <8 x i32> %res0, %res1 2832 %res4 = add <8 x i32> %res3, %res2 2833 ret <8 x i32> %res4 2834 } 2835 2836 declare void @llvm.x86.avx512.mask.pmov.qd.mem.512(i8* %ptr, <8 x i64>, i8) 2837 2838 define void @test_int_x86_avx512_mask_pmov_qd_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) { 2839 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qd_mem_512: 2840 ; CHECK: ## %bb.0: 2841 ; CHECK-NEXT: kmovw %esi, %k1 2842 ; CHECK-NEXT: vpmovqd %zmm0, (%rdi) 2843 ; CHECK-NEXT: vpmovqd %zmm0, (%rdi) {%k1} 2844 ; CHECK-NEXT: vzeroupper 2845 ; CHECK-NEXT: retq 2846 call void @llvm.x86.avx512.mask.pmov.qd.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1) 2847 call void @llvm.x86.avx512.mask.pmov.qd.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2) 2848 ret void 2849 } 2850 2851 declare <8 x i32> @llvm.x86.avx512.mask.pmovs.qd.512(<8 x i64>, <8 x i32>, i8) 2852 2853 define <8 x i32>@test_int_x86_avx512_mask_pmovs_qd_512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2) { 2854 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qd_512: 2855 ; CHECK: ## %bb.0: 2856 ; CHECK-NEXT: kmovw %edi, %k1 2857 ; CHECK-NEXT: vpmovsqd %zmm0, %ymm2 {%k1} {z} 2858 ; CHECK-NEXT: vpmovsqd %zmm0, %ymm1 {%k1} 2859 ; CHECK-NEXT: vpaddd %ymm2, %ymm1, %ymm1 2860 ; CHECK-NEXT: vpmovsqd %zmm0, %ymm0 2861 ; CHECK-NEXT: vpaddd %ymm1, %ymm0, %ymm0 2862 ; CHECK-NEXT: retq 2863 %res0 = call <8 x i32> @llvm.x86.avx512.mask.pmovs.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 -1) 2864 %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmovs.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2) 2865 %res2 = call <8 x i32> @llvm.x86.avx512.mask.pmovs.qd.512(<8 x i64> %x0, <8 x i32> zeroinitializer, i8 %x2) 2866 %res3 = add <8 x i32> %res0, %res1 2867 %res4 = add <8 x i32> %res3, %res2 2868 ret <8 x i32> %res4 2869 } 2870 2871 declare void @llvm.x86.avx512.mask.pmovs.qd.mem.512(i8* %ptr, <8 x i64>, i8) 2872 2873 define void @test_int_x86_avx512_mask_pmovs_qd_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) { 2874 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qd_mem_512: 2875 ; CHECK: ## %bb.0: 2876 ; CHECK-NEXT: kmovw %esi, %k1 2877 ; CHECK-NEXT: vpmovsqd %zmm0, (%rdi) 2878 ; CHECK-NEXT: vpmovsqd %zmm0, (%rdi) {%k1} 2879 ; CHECK-NEXT: vzeroupper 2880 ; CHECK-NEXT: retq 2881 call void @llvm.x86.avx512.mask.pmovs.qd.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1) 2882 call void @llvm.x86.avx512.mask.pmovs.qd.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2) 2883 ret void 2884 } 2885 2886 declare <8 x i32> @llvm.x86.avx512.mask.pmovus.qd.512(<8 x i64>, <8 x i32>, i8) 2887 2888 define <8 x i32>@test_int_x86_avx512_mask_pmovus_qd_512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2) { 2889 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qd_512: 2890 ; CHECK: ## %bb.0: 2891 ; CHECK-NEXT: kmovw %edi, %k1 2892 ; CHECK-NEXT: vpmovusqd %zmm0, %ymm2 {%k1} {z} 2893 ; CHECK-NEXT: vpmovusqd %zmm0, %ymm1 {%k1} 2894 ; CHECK-NEXT: vpaddd %ymm2, %ymm1, %ymm1 2895 ; CHECK-NEXT: vpmovusqd %zmm0, %ymm0 2896 ; CHECK-NEXT: vpaddd %ymm1, %ymm0, %ymm0 2897 ; CHECK-NEXT: retq 2898 %res0 = call <8 x i32> @llvm.x86.avx512.mask.pmovus.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 -1) 2899 %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmovus.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2) 2900 %res2 = call <8 x i32> @llvm.x86.avx512.mask.pmovus.qd.512(<8 x i64> %x0, <8 x i32> zeroinitializer, i8 %x2) 2901 %res3 = add <8 x i32> %res0, %res1 2902 %res4 = add <8 x i32> %res3, %res2 2903 ret <8 x i32> %res4 2904 } 2905 2906 declare void @llvm.x86.avx512.mask.pmovus.qd.mem.512(i8* %ptr, <8 x i64>, i8) 2907 2908 define void @test_int_x86_avx512_mask_pmovus_qd_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) { 2909 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qd_mem_512: 2910 ; CHECK: ## %bb.0: 2911 ; CHECK-NEXT: kmovw %esi, %k1 2912 ; CHECK-NEXT: vpmovusqd %zmm0, (%rdi) 2913 ; CHECK-NEXT: vpmovusqd %zmm0, (%rdi) {%k1} 2914 ; CHECK-NEXT: vzeroupper 2915 ; CHECK-NEXT: retq 2916 call void @llvm.x86.avx512.mask.pmovus.qd.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1) 2917 call void @llvm.x86.avx512.mask.pmovus.qd.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2) 2918 ret void 2919 } 2920 2921 declare <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32>, <16 x i8>, i16) 2922 2923 define <16 x i8>@test_int_x86_avx512_mask_pmov_db_512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2) { 2924 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_db_512: 2925 ; CHECK: ## %bb.0: 2926 ; CHECK-NEXT: kmovw %edi, %k1 2927 ; CHECK-NEXT: vpmovdb %zmm0, %xmm2 {%k1} {z} 2928 ; CHECK-NEXT: vpmovdb %zmm0, %xmm1 {%k1} 2929 ; CHECK-NEXT: vpaddb %xmm2, %xmm1, %xmm1 2930 ; CHECK-NEXT: vpmovdb %zmm0, %xmm0 2931 ; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0 2932 ; CHECK-NEXT: vzeroupper 2933 ; CHECK-NEXT: retq 2934 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 -1) 2935 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2) 2936 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32> %x0, <16 x i8> zeroinitializer, i16 %x2) 2937 %res3 = add <16 x i8> %res0, %res1 2938 %res4 = add <16 x i8> %res3, %res2 2939 ret <16 x i8> %res4 2940 } 2941 2942 declare void @llvm.x86.avx512.mask.pmov.db.mem.512(i8* %ptr, <16 x i32>, i16) 2943 2944 define void @test_int_x86_avx512_mask_pmov_db_mem_512(i8* %ptr, <16 x i32> %x1, i16 %x2) { 2945 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_db_mem_512: 2946 ; CHECK: ## %bb.0: 2947 ; CHECK-NEXT: kmovw %esi, %k1 2948 ; CHECK-NEXT: vpmovdb %zmm0, (%rdi) 2949 ; CHECK-NEXT: vpmovdb %zmm0, (%rdi) {%k1} 2950 ; CHECK-NEXT: vzeroupper 2951 ; CHECK-NEXT: retq 2952 call void @llvm.x86.avx512.mask.pmov.db.mem.512(i8* %ptr, <16 x i32> %x1, i16 -1) 2953 call void @llvm.x86.avx512.mask.pmov.db.mem.512(i8* %ptr, <16 x i32> %x1, i16 %x2) 2954 ret void 2955 } 2956 2957 declare <16 x i8> @llvm.x86.avx512.mask.pmovs.db.512(<16 x i32>, <16 x i8>, i16) 2958 2959 define <16 x i8>@test_int_x86_avx512_mask_pmovs_db_512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2) { 2960 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_db_512: 2961 ; CHECK: ## %bb.0: 2962 ; CHECK-NEXT: kmovw %edi, %k1 2963 ; CHECK-NEXT: vpmovsdb %zmm0, %xmm2 {%k1} {z} 2964 ; CHECK-NEXT: vpmovsdb %zmm0, %xmm1 {%k1} 2965 ; CHECK-NEXT: vpaddb %xmm2, %xmm1, %xmm1 2966 ; CHECK-NEXT: vpmovsdb %zmm0, %xmm0 2967 ; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0 2968 ; CHECK-NEXT: vzeroupper 2969 ; CHECK-NEXT: retq 2970 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 -1) 2971 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2) 2972 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.512(<16 x i32> %x0, <16 x i8> zeroinitializer, i16 %x2) 2973 %res3 = add <16 x i8> %res0, %res1 2974 %res4 = add <16 x i8> %res3, %res2 2975 ret <16 x i8> %res4 2976 } 2977 2978 declare void @llvm.x86.avx512.mask.pmovs.db.mem.512(i8* %ptr, <16 x i32>, i16) 2979 2980 define void @test_int_x86_avx512_mask_pmovs_db_mem_512(i8* %ptr, <16 x i32> %x1, i16 %x2) { 2981 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_db_mem_512: 2982 ; CHECK: ## %bb.0: 2983 ; CHECK-NEXT: kmovw %esi, %k1 2984 ; CHECK-NEXT: vpmovsdb %zmm0, (%rdi) 2985 ; CHECK-NEXT: vpmovsdb %zmm0, (%rdi) {%k1} 2986 ; CHECK-NEXT: vzeroupper 2987 ; CHECK-NEXT: retq 2988 call void @llvm.x86.avx512.mask.pmovs.db.mem.512(i8* %ptr, <16 x i32> %x1, i16 -1) 2989 call void @llvm.x86.avx512.mask.pmovs.db.mem.512(i8* %ptr, <16 x i32> %x1, i16 %x2) 2990 ret void 2991 } 2992 2993 declare <16 x i8> @llvm.x86.avx512.mask.pmovus.db.512(<16 x i32>, <16 x i8>, i16) 2994 2995 define <16 x i8>@test_int_x86_avx512_mask_pmovus_db_512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2) { 2996 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_db_512: 2997 ; CHECK: ## %bb.0: 2998 ; CHECK-NEXT: kmovw %edi, %k1 2999 ; CHECK-NEXT: vpmovusdb %zmm0, %xmm2 {%k1} {z} 3000 ; CHECK-NEXT: vpmovusdb %zmm0, %xmm1 {%k1} 3001 ; CHECK-NEXT: vpaddb %xmm2, %xmm1, %xmm1 3002 ; CHECK-NEXT: vpmovusdb %zmm0, %xmm0 3003 ; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0 3004 ; CHECK-NEXT: vzeroupper 3005 ; CHECK-NEXT: retq 3006 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 -1) 3007 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2) 3008 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.512(<16 x i32> %x0, <16 x i8> zeroinitializer, i16 %x2) 3009 %res3 = add <16 x i8> %res0, %res1 3010 %res4 = add <16 x i8> %res3, %res2 3011 ret <16 x i8> %res4 3012 } 3013 3014 declare void @llvm.x86.avx512.mask.pmovus.db.mem.512(i8* %ptr, <16 x i32>, i16) 3015 3016 define void @test_int_x86_avx512_mask_pmovus_db_mem_512(i8* %ptr, <16 x i32> %x1, i16 %x2) { 3017 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_db_mem_512: 3018 ; CHECK: ## %bb.0: 3019 ; CHECK-NEXT: kmovw %esi, %k1 3020 ; CHECK-NEXT: vpmovusdb %zmm0, (%rdi) 3021 ; CHECK-NEXT: vpmovusdb %zmm0, (%rdi) {%k1} 3022 ; CHECK-NEXT: vzeroupper 3023 ; CHECK-NEXT: retq 3024 call void @llvm.x86.avx512.mask.pmovus.db.mem.512(i8* %ptr, <16 x i32> %x1, i16 -1) 3025 call void @llvm.x86.avx512.mask.pmovus.db.mem.512(i8* %ptr, <16 x i32> %x1, i16 %x2) 3026 ret void 3027 } 3028 3029 declare <16 x i16> @llvm.x86.avx512.mask.pmov.dw.512(<16 x i32>, <16 x i16>, i16) 3030 3031 define <16 x i16>@test_int_x86_avx512_mask_pmov_dw_512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2) { 3032 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_dw_512: 3033 ; CHECK: ## %bb.0: 3034 ; CHECK-NEXT: kmovw %edi, %k1 3035 ; CHECK-NEXT: vpmovdw %zmm0, %ymm2 {%k1} {z} 3036 ; CHECK-NEXT: vpmovdw %zmm0, %ymm1 {%k1} 3037 ; CHECK-NEXT: vpaddw %ymm2, %ymm1, %ymm1 3038 ; CHECK-NEXT: vpmovdw %zmm0, %ymm0 3039 ; CHECK-NEXT: vpaddw %ymm1, %ymm0, %ymm0 3040 ; CHECK-NEXT: retq 3041 %res0 = call <16 x i16> @llvm.x86.avx512.mask.pmov.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 -1) 3042 %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmov.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2) 3043 %res2 = call <16 x i16> @llvm.x86.avx512.mask.pmov.dw.512(<16 x i32> %x0, <16 x i16> zeroinitializer, i16 %x2) 3044 %res3 = add <16 x i16> %res0, %res1 3045 %res4 = add <16 x i16> %res3, %res2 3046 ret <16 x i16> %res4 3047 } 3048 3049 declare void @llvm.x86.avx512.mask.pmov.dw.mem.512(i8* %ptr, <16 x i32>, i16) 3050 3051 define void @test_int_x86_avx512_mask_pmov_dw_mem_512(i8* %ptr, <16 x i32> %x1, i16 %x2) { 3052 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_dw_mem_512: 3053 ; CHECK: ## %bb.0: 3054 ; CHECK-NEXT: kmovw %esi, %k1 3055 ; CHECK-NEXT: vpmovdw %zmm0, (%rdi) 3056 ; CHECK-NEXT: vpmovdw %zmm0, (%rdi) {%k1} 3057 ; CHECK-NEXT: vzeroupper 3058 ; CHECK-NEXT: retq 3059 call void @llvm.x86.avx512.mask.pmov.dw.mem.512(i8* %ptr, <16 x i32> %x1, i16 -1) 3060 call void @llvm.x86.avx512.mask.pmov.dw.mem.512(i8* %ptr, <16 x i32> %x1, i16 %x2) 3061 ret void 3062 } 3063 3064 declare <16 x i16> @llvm.x86.avx512.mask.pmovs.dw.512(<16 x i32>, <16 x i16>, i16) 3065 3066 define <16 x i16>@test_int_x86_avx512_mask_pmovs_dw_512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2) { 3067 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_dw_512: 3068 ; CHECK: ## %bb.0: 3069 ; CHECK-NEXT: kmovw %edi, %k1 3070 ; CHECK-NEXT: vpmovsdw %zmm0, %ymm2 {%k1} {z} 3071 ; CHECK-NEXT: vpmovsdw %zmm0, %ymm1 {%k1} 3072 ; CHECK-NEXT: vpaddw %ymm2, %ymm1, %ymm1 3073 ; CHECK-NEXT: vpmovsdw %zmm0, %ymm0 3074 ; CHECK-NEXT: vpaddw %ymm1, %ymm0, %ymm0 3075 ; CHECK-NEXT: retq 3076 %res0 = call <16 x i16> @llvm.x86.avx512.mask.pmovs.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 -1) 3077 %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmovs.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2) 3078 %res2 = call <16 x i16> @llvm.x86.avx512.mask.pmovs.dw.512(<16 x i32> %x0, <16 x i16> zeroinitializer, i16 %x2) 3079 %res3 = add <16 x i16> %res0, %res1 3080 %res4 = add <16 x i16> %res3, %res2 3081 ret <16 x i16> %res4 3082 } 3083 3084 declare void @llvm.x86.avx512.mask.pmovs.dw.mem.512(i8* %ptr, <16 x i32>, i16) 3085 3086 define void @test_int_x86_avx512_mask_pmovs_dw_mem_512(i8* %ptr, <16 x i32> %x1, i16 %x2) { 3087 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_dw_mem_512: 3088 ; CHECK: ## %bb.0: 3089 ; CHECK-NEXT: kmovw %esi, %k1 3090 ; CHECK-NEXT: vpmovsdw %zmm0, (%rdi) 3091 ; CHECK-NEXT: vpmovsdw %zmm0, (%rdi) {%k1} 3092 ; CHECK-NEXT: vzeroupper 3093 ; CHECK-NEXT: retq 3094 call void @llvm.x86.avx512.mask.pmovs.dw.mem.512(i8* %ptr, <16 x i32> %x1, i16 -1) 3095 call void @llvm.x86.avx512.mask.pmovs.dw.mem.512(i8* %ptr, <16 x i32> %x1, i16 %x2) 3096 ret void 3097 } 3098 3099 declare <16 x i16> @llvm.x86.avx512.mask.pmovus.dw.512(<16 x i32>, <16 x i16>, i16) 3100 3101 define <16 x i16>@test_int_x86_avx512_mask_pmovus_dw_512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2) { 3102 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_dw_512: 3103 ; CHECK: ## %bb.0: 3104 ; CHECK-NEXT: kmovw %edi, %k1 3105 ; CHECK-NEXT: vpmovusdw %zmm0, %ymm2 {%k1} {z} 3106 ; CHECK-NEXT: vpmovusdw %zmm0, %ymm1 {%k1} 3107 ; CHECK-NEXT: vpaddw %ymm2, %ymm1, %ymm1 3108 ; CHECK-NEXT: vpmovusdw %zmm0, %ymm0 3109 ; CHECK-NEXT: vpaddw %ymm1, %ymm0, %ymm0 3110 ; CHECK-NEXT: retq 3111 %res0 = call <16 x i16> @llvm.x86.avx512.mask.pmovus.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 -1) 3112 %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmovus.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2) 3113 %res2 = call <16 x i16> @llvm.x86.avx512.mask.pmovus.dw.512(<16 x i32> %x0, <16 x i16> zeroinitializer, i16 %x2) 3114 %res3 = add <16 x i16> %res0, %res1 3115 %res4 = add <16 x i16> %res3, %res2 3116 ret <16 x i16> %res4 3117 } 3118 3119 declare void @llvm.x86.avx512.mask.pmovus.dw.mem.512(i8* %ptr, <16 x i32>, i16) 3120 3121 define void @test_int_x86_avx512_mask_pmovus_dw_mem_512(i8* %ptr, <16 x i32> %x1, i16 %x2) { 3122 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_dw_mem_512: 3123 ; CHECK: ## %bb.0: 3124 ; CHECK-NEXT: kmovw %esi, %k1 3125 ; CHECK-NEXT: vpmovusdw %zmm0, (%rdi) 3126 ; CHECK-NEXT: vpmovusdw %zmm0, (%rdi) {%k1} 3127 ; CHECK-NEXT: vzeroupper 3128 ; CHECK-NEXT: retq 3129 call void @llvm.x86.avx512.mask.pmovus.dw.mem.512(i8* %ptr, <16 x i32> %x1, i16 -1) 3130 call void @llvm.x86.avx512.mask.pmovus.dw.mem.512(i8* %ptr, <16 x i32> %x1, i16 %x2) 3131 ret void 3132 } 3133 3134 declare <16 x float> @llvm.x86.avx512.mask.cvtdq2ps.512(<16 x i32>, <16 x float>, i16, i32) 3135 3136 define <16 x float>@test_int_x86_avx512_mask_cvt_dq2ps_512(<16 x i32> %x0, <16 x float> %x1, i16 %x2) { 3137 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2ps_512: 3138 ; CHECK: ## %bb.0: 3139 ; CHECK-NEXT: kmovw %edi, %k1 3140 ; CHECK-NEXT: vcvtdq2ps %zmm0, %zmm1 {%k1} 3141 ; CHECK-NEXT: vcvtdq2ps {rn-sae}, %zmm0, %zmm0 3142 ; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0 3143 ; CHECK-NEXT: retq 3144 %res = call <16 x float> @llvm.x86.avx512.mask.cvtdq2ps.512(<16 x i32> %x0, <16 x float> %x1, i16 %x2, i32 4) 3145 %res1 = call <16 x float> @llvm.x86.avx512.mask.cvtdq2ps.512(<16 x i32> %x0, <16 x float> %x1, i16 -1, i32 0) 3146 %res2 = fadd <16 x float> %res, %res1 3147 ret <16 x float> %res2 3148 } 3149 3150 declare <8 x i32> @llvm.x86.avx512.mask.cvtpd2dq.512(<8 x double>, <8 x i32>, i8, i32) 3151 3152 define <8 x i32>@test_int_x86_avx512_mask_cvt_pd2dq_512(<8 x double> %x0, <8 x i32> %x1, i8 %x2) { 3153 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2dq_512: 3154 ; CHECK: ## %bb.0: 3155 ; CHECK-NEXT: kmovw %edi, %k1 3156 ; CHECK-NEXT: vcvtpd2dq %zmm0, %ymm1 {%k1} 3157 ; CHECK-NEXT: vcvtpd2dq {rn-sae}, %zmm0, %ymm0 3158 ; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 3159 ; CHECK-NEXT: retq 3160 %res = call <8 x i32> @llvm.x86.avx512.mask.cvtpd2dq.512(<8 x double> %x0, <8 x i32> %x1, i8 %x2, i32 4) 3161 %res1 = call <8 x i32> @llvm.x86.avx512.mask.cvtpd2dq.512(<8 x double> %x0, <8 x i32> %x1, i8 -1, i32 0) 3162 %res2 = add <8 x i32> %res, %res1 3163 ret <8 x i32> %res2 3164 } 3165 3166 declare <8 x float> @llvm.x86.avx512.mask.cvtpd2ps.512(<8 x double>, <8 x float>, i8, i32) 3167 3168 define <8 x float>@test_int_x86_avx512_mask_cvt_pd2ps_512(<8 x double> %x0, <8 x float> %x1, i8 %x2) { 3169 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2ps_512: 3170 ; CHECK: ## %bb.0: 3171 ; CHECK-NEXT: kmovw %edi, %k1 3172 ; CHECK-NEXT: vcvtpd2ps %zmm0, %ymm1 {%k1} 3173 ; CHECK-NEXT: vcvtpd2ps {ru-sae}, %zmm0, %ymm0 3174 ; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0 3175 ; CHECK-NEXT: retq 3176 %res = call <8 x float> @llvm.x86.avx512.mask.cvtpd2ps.512(<8 x double> %x0, <8 x float> %x1, i8 %x2, i32 4) 3177 %res1 = call <8 x float> @llvm.x86.avx512.mask.cvtpd2ps.512(<8 x double> %x0, <8 x float> %x1, i8 -1, i32 2) 3178 %res2 = fadd <8 x float> %res, %res1 3179 ret <8 x float> %res2 3180 } 3181 3182 declare <8 x i32> @llvm.x86.avx512.mask.cvtpd2udq.512(<8 x double>, <8 x i32>, i8, i32) 3183 3184 define <8 x i32>@test_int_x86_avx512_mask_cvt_pd2udq_512(<8 x double> %x0, <8 x i32> %x1, i8 %x2) { 3185 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2udq_512: 3186 ; CHECK: ## %bb.0: 3187 ; CHECK-NEXT: kmovw %edi, %k1 3188 ; CHECK-NEXT: vcvtpd2udq {ru-sae}, %zmm0, %ymm1 {%k1} 3189 ; CHECK-NEXT: vcvtpd2udq {rn-sae}, %zmm0, %ymm0 3190 ; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 3191 ; CHECK-NEXT: retq 3192 %res = call <8 x i32> @llvm.x86.avx512.mask.cvtpd2udq.512(<8 x double> %x0, <8 x i32> %x1, i8 %x2, i32 2) 3193 %res1 = call <8 x i32> @llvm.x86.avx512.mask.cvtpd2udq.512(<8 x double> %x0, <8 x i32> %x1, i8 -1, i32 0) 3194 %res2 = add <8 x i32> %res, %res1 3195 ret <8 x i32> %res2 3196 } 3197 3198 declare <16 x i32> @llvm.x86.avx512.mask.cvtps2dq.512(<16 x float>, <16 x i32>, i16, i32) 3199 3200 define <16 x i32>@test_int_x86_avx512_mask_cvt_ps2dq_512(<16 x float> %x0, <16 x i32> %x1, i16 %x2) { 3201 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2dq_512: 3202 ; CHECK: ## %bb.0: 3203 ; CHECK-NEXT: kmovw %edi, %k1 3204 ; CHECK-NEXT: vcvtps2dq {ru-sae}, %zmm0, %zmm1 {%k1} 3205 ; CHECK-NEXT: vcvtps2dq {rn-sae}, %zmm0, %zmm0 3206 ; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0 3207 ; CHECK-NEXT: retq 3208 %res = call <16 x i32> @llvm.x86.avx512.mask.cvtps2dq.512(<16 x float> %x0, <16 x i32> %x1, i16 %x2, i32 2) 3209 %res1 = call <16 x i32> @llvm.x86.avx512.mask.cvtps2dq.512(<16 x float> %x0, <16 x i32> %x1, i16 -1, i32 0) 3210 %res2 = add <16 x i32> %res, %res1 3211 ret <16 x i32> %res2 3212 } 3213 3214 declare <8 x double> @llvm.x86.avx512.mask.cvtps2pd.512(<8 x float>, <8 x double>, i8, i32) 3215 3216 define <8 x double>@test_int_x86_avx512_mask_cvt_ps2pd_512(<8 x float> %x0, <8 x double> %x1, i8 %x2) { 3217 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2pd_512: 3218 ; CHECK: ## %bb.0: 3219 ; CHECK-NEXT: kmovw %edi, %k1 3220 ; CHECK-NEXT: vcvtps2pd %ymm0, %zmm1 {%k1} 3221 ; CHECK-NEXT: vcvtps2pd {sae}, %ymm0, %zmm0 3222 ; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0 3223 ; CHECK-NEXT: retq 3224 %res = call <8 x double> @llvm.x86.avx512.mask.cvtps2pd.512(<8 x float> %x0, <8 x double> %x1, i8 %x2, i32 4) 3225 %res1 = call <8 x double> @llvm.x86.avx512.mask.cvtps2pd.512(<8 x float> %x0, <8 x double> %x1, i8 -1, i32 8) 3226 %res2 = fadd <8 x double> %res, %res1 3227 ret <8 x double> %res2 3228 } 3229 3230 declare <16 x i32> @llvm.x86.avx512.mask.cvtps2udq.512(<16 x float>, <16 x i32>, i16, i32) 3231 3232 define <16 x i32>@test_int_x86_avx512_mask_cvt_ps2udq_512(<16 x float> %x0, <16 x i32> %x1, i16 %x2) { 3233 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2udq_512: 3234 ; CHECK: ## %bb.0: 3235 ; CHECK-NEXT: kmovw %edi, %k1 3236 ; CHECK-NEXT: vcvtps2udq {ru-sae}, %zmm0, %zmm1 {%k1} 3237 ; CHECK-NEXT: vcvtps2udq {rn-sae}, %zmm0, %zmm0 3238 ; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0 3239 ; CHECK-NEXT: retq 3240 %res = call <16 x i32> @llvm.x86.avx512.mask.cvtps2udq.512(<16 x float> %x0, <16 x i32> %x1, i16 %x2, i32 2) 3241 %res1 = call <16 x i32> @llvm.x86.avx512.mask.cvtps2udq.512(<16 x float> %x0, <16 x i32> %x1, i16 -1, i32 0) 3242 %res2 = add <16 x i32> %res, %res1 3243 ret <16 x i32> %res2 3244 } 3245 3246 declare <8 x i32> @llvm.x86.avx512.mask.cvttpd2dq.512(<8 x double>, <8 x i32>, i8, i32) 3247 3248 define <8 x i32>@test_int_x86_avx512_mask_cvtt_pd2dq_512(<8 x double> %x0, <8 x i32> %x1, i8 %x2) { 3249 ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_pd2dq_512: 3250 ; CHECK: ## %bb.0: 3251 ; CHECK-NEXT: kmovw %edi, %k1 3252 ; CHECK-NEXT: vcvttpd2dq %zmm0, %ymm1 {%k1} 3253 ; CHECK-NEXT: vcvttpd2dq {sae}, %zmm0, %ymm0 3254 ; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 3255 ; CHECK-NEXT: retq 3256 %res = call <8 x i32> @llvm.x86.avx512.mask.cvttpd2dq.512(<8 x double> %x0, <8 x i32> %x1, i8 %x2, i32 4) 3257 %res1 = call <8 x i32> @llvm.x86.avx512.mask.cvttpd2dq.512(<8 x double> %x0, <8 x i32> %x1, i8 -1, i32 8) 3258 %res2 = add <8 x i32> %res, %res1 3259 ret <8 x i32> %res2 3260 } 3261 3262 declare <16 x float> @llvm.x86.avx512.mask.cvtudq2ps.512(<16 x i32>, <16 x float>, i16, i32) 3263 3264 define <16 x float>@test_int_x86_avx512_mask_cvt_udq2ps_512(<16 x i32> %x0, <16 x float> %x1, i16 %x2) { 3265 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2ps_512: 3266 ; CHECK: ## %bb.0: 3267 ; CHECK-NEXT: kmovw %edi, %k1 3268 ; CHECK-NEXT: vcvtudq2ps %zmm0, %zmm1 {%k1} 3269 ; CHECK-NEXT: vcvtudq2ps {rn-sae}, %zmm0, %zmm0 3270 ; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0 3271 ; CHECK-NEXT: retq 3272 %res = call <16 x float> @llvm.x86.avx512.mask.cvtudq2ps.512(<16 x i32> %x0, <16 x float> %x1, i16 %x2, i32 4) 3273 %res1 = call <16 x float> @llvm.x86.avx512.mask.cvtudq2ps.512(<16 x i32> %x0, <16 x float> %x1, i16 -1, i32 0) 3274 %res2 = fadd <16 x float> %res, %res1 3275 ret <16 x float> %res2 3276 } 3277 3278 declare <8 x i32> @llvm.x86.avx512.mask.cvttpd2udq.512(<8 x double>, <8 x i32>, i8, i32) 3279 3280 define <8 x i32>@test_int_x86_avx512_mask_cvtt_pd2udq_512(<8 x double> %x0, <8 x i32> %x1, i8 %x2) { 3281 ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_pd2udq_512: 3282 ; CHECK: ## %bb.0: 3283 ; CHECK-NEXT: kmovw %edi, %k1 3284 ; CHECK-NEXT: vcvttpd2udq %zmm0, %ymm1 {%k1} 3285 ; CHECK-NEXT: vcvttpd2udq {sae}, %zmm0, %ymm0 3286 ; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 3287 ; CHECK-NEXT: retq 3288 %res = call <8 x i32> @llvm.x86.avx512.mask.cvttpd2udq.512(<8 x double> %x0, <8 x i32> %x1, i8 %x2, i32 4) 3289 %res1 = call <8 x i32> @llvm.x86.avx512.mask.cvttpd2udq.512(<8 x double> %x0, <8 x i32> %x1, i8 -1, i32 8) 3290 %res2 = add <8 x i32> %res, %res1 3291 ret <8 x i32> %res2 3292 } 3293 3294 declare <16 x i32> @llvm.x86.avx512.mask.cvttps2dq.512(<16 x float>, <16 x i32>, i16, i32) 3295 3296 define <16 x i32>@test_int_x86_avx512_mask_cvtt_ps2dq_512(<16 x float> %x0, <16 x i32> %x1, i16 %x2) { 3297 ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ps2dq_512: 3298 ; CHECK: ## %bb.0: 3299 ; CHECK-NEXT: kmovw %edi, %k1 3300 ; CHECK-NEXT: vcvttps2dq %zmm0, %zmm1 {%k1} 3301 ; CHECK-NEXT: vcvttps2dq {sae}, %zmm0, %zmm0 3302 ; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0 3303 ; CHECK-NEXT: retq 3304 %res = call <16 x i32> @llvm.x86.avx512.mask.cvttps2dq.512(<16 x float> %x0, <16 x i32> %x1, i16 %x2, i32 4) 3305 %res1 = call <16 x i32> @llvm.x86.avx512.mask.cvttps2dq.512(<16 x float> %x0, <16 x i32> %x1, i16 -1, i32 8) 3306 %res2 = add <16 x i32> %res, %res1 3307 ret <16 x i32> %res2 3308 } 3309 3310 declare <16 x i32> @llvm.x86.avx512.mask.cvttps2udq.512(<16 x float>, <16 x i32>, i16, i32) 3311 3312 define <16 x i32>@test_int_x86_avx512_mask_cvtt_ps2udq_512(<16 x float> %x0, <16 x i32> %x1, i16 %x2) { 3313 ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ps2udq_512: 3314 ; CHECK: ## %bb.0: 3315 ; CHECK-NEXT: kmovw %edi, %k1 3316 ; CHECK-NEXT: vcvttps2udq %zmm0, %zmm1 {%k1} 3317 ; CHECK-NEXT: vcvttps2udq {sae}, %zmm0, %zmm0 3318 ; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0 3319 ; CHECK-NEXT: retq 3320 %res = call <16 x i32> @llvm.x86.avx512.mask.cvttps2udq.512(<16 x float> %x0, <16 x i32> %x1, i16 %x2, i32 4) 3321 %res1 = call <16 x i32> @llvm.x86.avx512.mask.cvttps2udq.512(<16 x float> %x0, <16 x i32> %x1, i16 -1, i32 8) 3322 %res2 = add <16 x i32> %res, %res1 3323 ret <16 x i32> %res2 3324 } 3325 3326 declare <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32) nounwind readnone 3327 3328 define <4 x float> @test_getexp_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) { 3329 ; CHECK-LABEL: test_getexp_ss: 3330 ; CHECK: ## %bb.0: 3331 ; CHECK-NEXT: kmovw %edi, %k1 3332 ; CHECK-NEXT: vmovaps %xmm2, %xmm3 3333 ; CHECK-NEXT: vgetexpss %xmm1, %xmm0, %xmm3 {%k1} 3334 ; CHECK-NEXT: vgetexpss {sae}, %xmm1, %xmm0, %xmm4 {%k1} {z} 3335 ; CHECK-NEXT: vgetexpss {sae}, %xmm1, %xmm0, %xmm5 3336 ; CHECK-NEXT: vaddps %xmm5, %xmm4, %xmm4 3337 ; CHECK-NEXT: vgetexpss {sae}, %xmm1, %xmm0, %xmm2 {%k1} 3338 ; CHECK-NEXT: vaddps %xmm2, %xmm3, %xmm0 3339 ; CHECK-NEXT: vaddps %xmm4, %xmm0, %xmm0 3340 ; CHECK-NEXT: retq 3341 %res0 = call <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 4) 3342 %res1 = call <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 8) 3343 %res2 = call <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 %mask, i32 8) 3344 %res3 = call <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1, i32 8) 3345 3346 %res.1 = fadd <4 x float> %res0, %res1 3347 %res.2 = fadd <4 x float> %res2, %res3 3348 %res = fadd <4 x float> %res.1, %res.2 3349 ret <4 x float> %res 3350 } 3351 3352 declare <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32) nounwind readnone 3353 3354 define <2 x double> @test_getexp_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) { 3355 ; CHECK-LABEL: test_getexp_sd: 3356 ; CHECK: ## %bb.0: 3357 ; CHECK-NEXT: vgetexpsd %xmm1, %xmm0, %xmm3 3358 ; CHECK-NEXT: kmovw %edi, %k1 3359 ; CHECK-NEXT: vmovapd %xmm2, %xmm4 3360 ; CHECK-NEXT: vgetexpsd %xmm1, %xmm0, %xmm4 {%k1} 3361 ; CHECK-NEXT: vgetexpsd {sae}, %xmm1, %xmm0, %xmm5 {%k1} {z} 3362 ; CHECK-NEXT: vaddpd %xmm3, %xmm5, %xmm3 3363 ; CHECK-NEXT: vgetexpsd {sae}, %xmm1, %xmm0, %xmm2 {%k1} 3364 ; CHECK-NEXT: vaddpd %xmm2, %xmm4, %xmm0 3365 ; CHECK-NEXT: vaddpd %xmm3, %xmm0, %xmm0 3366 ; CHECK-NEXT: retq 3367 %res0 = call <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 4) 3368 %res1 = call <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 8) 3369 %res2 = call <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 %mask, i32 8) 3370 %res3 = call <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1, i32 4) 3371 3372 %res.1 = fadd <2 x double> %res0, %res1 3373 %res.2 = fadd <2 x double> %res2, %res3 3374 %res = fadd <2 x double> %res.1, %res.2 3375 ret <2 x double> %res 3376 } 3377 3378 declare i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double>, <2 x double>, i32, i8, i32) 3379 3380 define i8@test_int_x86_avx512_mask_cmp_sd(<2 x double> %x0, <2 x double> %x1, i8 %x3, i32 %x4) { 3381 ; CHECK-LABEL: test_int_x86_avx512_mask_cmp_sd: 3382 ; CHECK: ## %bb.0: 3383 ; CHECK-NEXT: kmovw %edi, %k1 3384 ; CHECK-NEXT: vcmpnltsd {sae}, %xmm1, %xmm0, %k0 {%k1} 3385 ; CHECK-NEXT: kmovw %k0, %eax 3386 ; CHECK-NEXT: ## kill: def $al killed $al killed $eax 3387 ; CHECK-NEXT: retq 3388 3389 %res4 = call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> %x0, <2 x double> %x1, i32 5, i8 %x3, i32 8) 3390 ret i8 %res4 3391 } 3392 3393 define i8@test_int_x86_avx512_mask_cmp_sd_all(<2 x double> %x0, <2 x double> %x1, i8 %x3, i32 %x4) { 3394 ; CHECK-LABEL: test_int_x86_avx512_mask_cmp_sd_all: 3395 ; CHECK: ## %bb.0: 3396 ; CHECK-NEXT: vcmplesd %xmm1, %xmm0, %k0 3397 ; CHECK-NEXT: kmovw %k0, %ecx 3398 ; CHECK-NEXT: vcmpunordsd {sae}, %xmm1, %xmm0, %k0 3399 ; CHECK-NEXT: kmovw %k0, %edx 3400 ; CHECK-NEXT: kmovw %edi, %k1 3401 ; CHECK-NEXT: vcmpneqsd %xmm1, %xmm0, %k0 {%k1} 3402 ; CHECK-NEXT: kmovw %k0, %esi 3403 ; CHECK-NEXT: vcmpnltsd {sae}, %xmm1, %xmm0, %k0 {%k1} 3404 ; CHECK-NEXT: kmovw %k0, %eax 3405 ; CHECK-NEXT: orb %sil, %al 3406 ; CHECK-NEXT: orb %dl, %al 3407 ; CHECK-NEXT: orb %cl, %al 3408 ; CHECK-NEXT: ## kill: def $al killed $al killed $eax 3409 ; CHECK-NEXT: retq 3410 3411 %res1 = call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> %x0, <2 x double> %x1, i32 2, i8 -1, i32 4) 3412 %res2 = call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> %x0, <2 x double> %x1, i32 3, i8 -1, i32 8) 3413 %res3 = call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> %x0, <2 x double> %x1, i32 4, i8 %x3, i32 4) 3414 %res4 = call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> %x0, <2 x double> %x1, i32 5, i8 %x3, i32 8) 3415 3416 %res11 = or i8 %res1, %res2 3417 %res12 = or i8 %res3, %res4 3418 %res13 = or i8 %res11, %res12 3419 ret i8 %res13 3420 } 3421 3422 declare i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float>, <4 x float>, i32, i8, i32) 3423 3424 define i8@test_int_x86_avx512_mask_cmp_ss(<4 x float> %x0, <4 x float> %x1, i8 %x3, i32 %x4) { 3425 ; CHECK-LABEL: test_int_x86_avx512_mask_cmp_ss: 3426 ; CHECK: ## %bb.0: 3427 ; CHECK-NEXT: kmovw %edi, %k1 3428 ; CHECK-NEXT: vcmpunordss %xmm1, %xmm0, %k0 {%k1} 3429 ; CHECK-NEXT: kmovw %k0, %eax 3430 ; CHECK-NEXT: ## kill: def $al killed $al killed $eax 3431 ; CHECK-NEXT: retq 3432 3433 %res2 = call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> %x0, <4 x float> %x1, i32 3, i8 %x3, i32 4) 3434 ret i8 %res2 3435 } 3436 3437 3438 define i8@test_int_x86_avx512_mask_cmp_ss_all(<4 x float> %x0, <4 x float> %x1, i8 %x3, i32 %x4) { 3439 ; CHECK-LABEL: test_int_x86_avx512_mask_cmp_ss_all: 3440 ; CHECK: ## %bb.0: 3441 ; CHECK-NEXT: vcmpless %xmm1, %xmm0, %k0 3442 ; CHECK-NEXT: kmovw %k0, %ecx 3443 ; CHECK-NEXT: vcmpunordss {sae}, %xmm1, %xmm0, %k0 3444 ; CHECK-NEXT: kmovw %k0, %edx 3445 ; CHECK-NEXT: kmovw %edi, %k1 3446 ; CHECK-NEXT: vcmpneqss %xmm1, %xmm0, %k0 {%k1} 3447 ; CHECK-NEXT: kmovw %k0, %esi 3448 ; CHECK-NEXT: vcmpnltss {sae}, %xmm1, %xmm0, %k0 {%k1} 3449 ; CHECK-NEXT: kmovw %k0, %eax 3450 ; CHECK-NEXT: andb %sil, %al 3451 ; CHECK-NEXT: andb %dl, %al 3452 ; CHECK-NEXT: andb %cl, %al 3453 ; CHECK-NEXT: ## kill: def $al killed $al killed $eax 3454 ; CHECK-NEXT: retq 3455 %res1 = call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> %x0, <4 x float> %x1, i32 2, i8 -1, i32 4) 3456 %res2 = call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> %x0, <4 x float> %x1, i32 3, i8 -1, i32 8) 3457 %res3 = call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> %x0, <4 x float> %x1, i32 4, i8 %x3, i32 4) 3458 %res4 = call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> %x0, <4 x float> %x1, i32 5, i8 %x3, i32 8) 3459 3460 %res11 = and i8 %res1, %res2 3461 %res12 = and i8 %res3, %res4 3462 %res13 = and i8 %res11, %res12 3463 ret i8 %res13 3464 } 3465 3466 declare <8 x double> @llvm.x86.avx512.mask.getmant.pd.512(<8 x double>, i32, <8 x double>, i8, i32) 3467 3468 define <8 x double>@test_int_x86_avx512_mask_getmant_pd_512(<8 x double> %x0, <8 x double> %x2, i8 %x3) { 3469 ; CHECK-LABEL: test_int_x86_avx512_mask_getmant_pd_512: 3470 ; CHECK: ## %bb.0: 3471 ; CHECK-NEXT: kmovw %edi, %k1 3472 ; CHECK-NEXT: vgetmantpd $11, %zmm0, %zmm1 {%k1} 3473 ; CHECK-NEXT: vgetmantpd $11, {sae}, %zmm0, %zmm0 3474 ; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0 3475 ; CHECK-NEXT: retq 3476 %res = call <8 x double> @llvm.x86.avx512.mask.getmant.pd.512(<8 x double> %x0, i32 11, <8 x double> %x2, i8 %x3, i32 4) 3477 %res1 = call <8 x double> @llvm.x86.avx512.mask.getmant.pd.512(<8 x double> %x0, i32 11, <8 x double> %x2, i8 -1, i32 8) 3478 %res2 = fadd <8 x double> %res, %res1 3479 ret <8 x double> %res2 3480 } 3481 3482 declare <16 x float> @llvm.x86.avx512.mask.getmant.ps.512(<16 x float>, i32, <16 x float>, i16, i32) 3483 3484 define <16 x float>@test_int_x86_avx512_mask_getmant_ps_512(<16 x float> %x0, <16 x float> %x2, i16 %x3) { 3485 ; CHECK-LABEL: test_int_x86_avx512_mask_getmant_ps_512: 3486 ; CHECK: ## %bb.0: 3487 ; CHECK-NEXT: kmovw %edi, %k1 3488 ; CHECK-NEXT: vgetmantps $11, %zmm0, %zmm1 {%k1} 3489 ; CHECK-NEXT: vgetmantps $11, {sae}, %zmm0, %zmm0 3490 ; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0 3491 ; CHECK-NEXT: retq 3492 %res = call <16 x float> @llvm.x86.avx512.mask.getmant.ps.512(<16 x float> %x0, i32 11, <16 x float> %x2, i16 %x3, i32 4) 3493 %res1 = call <16 x float> @llvm.x86.avx512.mask.getmant.ps.512(<16 x float> %x0, i32 11, <16 x float> %x2, i16 -1, i32 8) 3494 %res2 = fadd <16 x float> %res, %res1 3495 ret <16 x float> %res2 3496 } 3497 3498 declare <2 x double> @llvm.x86.avx512.mask.getmant.sd(<2 x double>, <2 x double>, i32, <2 x double>, i8, i32) 3499 3500 define <2 x double>@test_int_x86_avx512_mask_getmant_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) { 3501 ; CHECK-LABEL: test_int_x86_avx512_mask_getmant_sd: 3502 ; CHECK: ## %bb.0: 3503 ; CHECK-NEXT: vgetmantsd $11, %xmm1, %xmm0, %xmm3 3504 ; CHECK-NEXT: kmovw %edi, %k1 3505 ; CHECK-NEXT: vmovapd %xmm2, %xmm4 3506 ; CHECK-NEXT: vgetmantsd $11, %xmm1, %xmm0, %xmm4 {%k1} 3507 ; CHECK-NEXT: vgetmantsd $11, %xmm1, %xmm0, %xmm5 {%k1} {z} 3508 ; CHECK-NEXT: vaddpd %xmm5, %xmm4, %xmm4 3509 ; CHECK-NEXT: vgetmantsd $11, {sae}, %xmm1, %xmm0, %xmm2 {%k1} 3510 ; CHECK-NEXT: vaddpd %xmm3, %xmm2, %xmm0 3511 ; CHECK-NEXT: vaddpd %xmm0, %xmm4, %xmm0 3512 ; CHECK-NEXT: retq 3513 %res = call <2 x double> @llvm.x86.avx512.mask.getmant.sd(<2 x double> %x0, <2 x double> %x1, i32 11, <2 x double> %x2, i8 %x3, i32 4) 3514 %res1 = call <2 x double> @llvm.x86.avx512.mask.getmant.sd(<2 x double> %x0, <2 x double> %x1, i32 11, <2 x double> zeroinitializer, i8 %x3, i32 4) 3515 %res2 = call <2 x double> @llvm.x86.avx512.mask.getmant.sd(<2 x double> %x0, <2 x double> %x1, i32 11, <2 x double> %x2, i8 %x3, i32 8) 3516 %res3 = call <2 x double> @llvm.x86.avx512.mask.getmant.sd(<2 x double> %x0, <2 x double> %x1, i32 11, <2 x double> %x2, i8 -1, i32 4) 3517 %res11 = fadd <2 x double> %res, %res1 3518 %res12 = fadd <2 x double> %res2, %res3 3519 %res13 = fadd <2 x double> %res11, %res12 3520 ret <2 x double> %res13 3521 } 3522 3523 declare <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float>, <4 x float>, i32, <4 x float>, i8, i32) 3524 3525 define <4 x float>@test_int_x86_avx512_mask_getmant_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) { 3526 ; CHECK-LABEL: test_int_x86_avx512_mask_getmant_ss: 3527 ; CHECK: ## %bb.0: 3528 ; CHECK-NEXT: vgetmantss $11, %xmm1, %xmm0, %xmm3 3529 ; CHECK-NEXT: kmovw %edi, %k1 3530 ; CHECK-NEXT: vgetmantss $11, %xmm1, %xmm0, %xmm2 {%k1} 3531 ; CHECK-NEXT: vgetmantss $11, %xmm1, %xmm0, %xmm4 {%k1} {z} 3532 ; CHECK-NEXT: vaddps %xmm4, %xmm2, %xmm2 3533 ; CHECK-NEXT: vgetmantss $11, {sae}, %xmm1, %xmm0, %xmm0 3534 ; CHECK-NEXT: vaddps %xmm3, %xmm0, %xmm0 3535 ; CHECK-NEXT: vaddps %xmm0, %xmm2, %xmm0 3536 ; CHECK-NEXT: retq 3537 %res = call <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float> %x0, <4 x float> %x1, i32 11, <4 x float> %x2, i8 %x3, i32 4) 3538 %res1 = call <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float> %x0, <4 x float> %x1, i32 11, <4 x float> zeroinitializer, i8 %x3, i32 4) 3539 %res2 = call <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float> %x0, <4 x float> %x1, i32 11, <4 x float> %x2, i8 -1, i32 8) 3540 %res3 = call <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float> %x0, <4 x float> %x1, i32 11, <4 x float> %x2, i8 -1, i32 4) 3541 %res11 = fadd <4 x float> %res, %res1 3542 %res12 = fadd <4 x float> %res2, %res3 3543 %res13 = fadd <4 x float> %res11, %res12 3544 ret <4 x float> %res13 3545 } 3546 3547 declare <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double>, <8 x i64>) 3548 3549 define <8 x double>@test_int_x86_avx512_vpermilvar_pd_512(<8 x double> %x0, <8 x i64> %x1) { 3550 ; CHECK-LABEL: test_int_x86_avx512_vpermilvar_pd_512: 3551 ; CHECK: ## %bb.0: 3552 ; CHECK-NEXT: vpermilpd %zmm1, %zmm0, %zmm0 3553 ; CHECK-NEXT: retq 3554 %res = call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> %x0, <8 x i64> %x1) 3555 ret <8 x double> %res 3556 } 3557 3558 define <8 x double>@test_int_x86_avx512_vpermilvar_pd_512_mask(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %mask) { 3559 ; CHECK-LABEL: test_int_x86_avx512_vpermilvar_pd_512_mask: 3560 ; CHECK: ## %bb.0: 3561 ; CHECK-NEXT: kmovw %edi, %k1 3562 ; CHECK-NEXT: vpermilpd %zmm1, %zmm0, %zmm2 {%k1} 3563 ; CHECK-NEXT: vmovapd %zmm2, %zmm0 3564 ; CHECK-NEXT: retq 3565 %res = call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> %x0, <8 x i64> %x1) 3566 %mask.cast = bitcast i8 %mask to <8 x i1> 3567 %res2 = select <8 x i1> %mask.cast, <8 x double> %res, <8 x double> %x2 3568 ret <8 x double> %res2 3569 } 3570 3571 define <8 x double>@test_int_x86_avx512_vpermilvar_pd_512_maskz(<8 x double> %x0, <8 x i64> %x1, i8 %mask) { 3572 ; CHECK-LABEL: test_int_x86_avx512_vpermilvar_pd_512_maskz: 3573 ; CHECK: ## %bb.0: 3574 ; CHECK-NEXT: kmovw %edi, %k1 3575 ; CHECK-NEXT: vpermilpd %zmm1, %zmm0, %zmm0 {%k1} {z} 3576 ; CHECK-NEXT: retq 3577 %res = call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> %x0, <8 x i64> %x1) 3578 %mask.cast = bitcast i8 %mask to <8 x i1> 3579 %res2 = select <8 x i1> %mask.cast, <8 x double> %res, <8 x double> zeroinitializer 3580 ret <8 x double> %res2 3581 } 3582 3583 declare <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float>, <16 x i32>) 3584 3585 define <16 x float>@test_int_x86_avx512_vpermilvar_ps_512(<16 x float> %x0, <16 x i32> %x1) { 3586 ; CHECK-LABEL: test_int_x86_avx512_vpermilvar_ps_512: 3587 ; CHECK: ## %bb.0: 3588 ; CHECK-NEXT: vpermilps %zmm1, %zmm0, %zmm0 3589 ; CHECK-NEXT: retq 3590 %res = call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> %x1) 3591 ret <16 x float> %res 3592 } 3593 3594 define <16 x float>@test_int_x86_avx512_vpermilvar_ps_512_mask(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %mask) { 3595 ; CHECK-LABEL: test_int_x86_avx512_vpermilvar_ps_512_mask: 3596 ; CHECK: ## %bb.0: 3597 ; CHECK-NEXT: kmovw %edi, %k1 3598 ; CHECK-NEXT: vpermilps %zmm1, %zmm0, %zmm2 {%k1} 3599 ; CHECK-NEXT: vmovaps %zmm2, %zmm0 3600 ; CHECK-NEXT: retq 3601 %res = call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> %x1) 3602 %mask.cast = bitcast i16 %mask to <16 x i1> 3603 %res2 = select <16 x i1> %mask.cast, <16 x float> %res, <16 x float> %x2 3604 ret <16 x float> %res2 3605 } 3606 3607 define <16 x float>@test_int_x86_avx512_vpermilvar_ps_512_maskz(<16 x float> %x0, <16 x i32> %x1, i16 %mask) { 3608 ; CHECK-LABEL: test_int_x86_avx512_vpermilvar_ps_512_maskz: 3609 ; CHECK: ## %bb.0: 3610 ; CHECK-NEXT: kmovw %edi, %k1 3611 ; CHECK-NEXT: vpermilps %zmm1, %zmm0, %zmm0 {%k1} {z} 3612 ; CHECK-NEXT: retq 3613 %res = call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> %x1) 3614 %mask.cast = bitcast i16 %mask to <16 x i1> 3615 %res2 = select <16 x i1> %mask.cast, <16 x float> %res, <16 x float> zeroinitializer 3616 ret <16 x float> %res2 3617 } 3618 3619 ; Test case to make sure we can print shuffle decode comments for constant pool loads. 3620 define <16 x float>@test_int_x86_avx512_vpermilvar_ps_512_constant_pool(<16 x float> %x0, <16 x i32> %x1) { 3621 ; CHECK-LABEL: test_int_x86_avx512_vpermilvar_ps_512_constant_pool: 3622 ; CHECK: ## %bb.0: 3623 ; CHECK-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[1,0,3,2,4,5,6,7,10,11,8,9,14,15,13,12] 3624 ; CHECK-NEXT: retq 3625 %res = call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 1, i32 0>) 3626 ret <16 x float> %res 3627 } 3628 3629 define <16 x float>@test_int_x86_avx512_vpermilvar_ps_512_constant_pool_mask(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %mask) { 3630 ; CHECK-LABEL: test_int_x86_avx512_vpermilvar_ps_512_constant_pool_mask: 3631 ; CHECK: ## %bb.0: 3632 ; CHECK-NEXT: kmovw %edi, %k1 3633 ; CHECK-NEXT: vpermilps {{.*#+}} zmm2 {%k1} = zmm0[1,0,3,2,4,5,6,7,10,11,8,9,14,15,13,12] 3634 ; CHECK-NEXT: vmovaps %zmm2, %zmm0 3635 ; CHECK-NEXT: retq 3636 %res = call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 1, i32 0>) 3637 %mask.cast = bitcast i16 %mask to <16 x i1> 3638 %res2 = select <16 x i1> %mask.cast, <16 x float> %res, <16 x float> %x2 3639 ret <16 x float> %res2 3640 } 3641 3642 define <16 x float>@test_int_x86_avx512_vpermilvar_ps_512_constant_pool_maskz(<16 x float> %x0, <16 x i32> %x1, i16 %mask) { 3643 ; CHECK-LABEL: test_int_x86_avx512_vpermilvar_ps_512_constant_pool_maskz: 3644 ; CHECK: ## %bb.0: 3645 ; CHECK-NEXT: kmovw %edi, %k1 3646 ; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} {z} = zmm0[1,0,3,2,4,5,6,7,10,11,8,9,14,15,13,12] 3647 ; CHECK-NEXT: retq 3648 %res = call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 1, i32 0>) 3649 %mask.cast = bitcast i16 %mask to <16 x i1> 3650 %res2 = select <16 x i1> %mask.cast, <16 x float> %res, <16 x float> zeroinitializer 3651 ret <16 x float> %res2 3652 } 3653 3654 declare <2 x double> @llvm.x86.avx512.mask.cvtss2sd.round(<2 x double>, <4 x float>, <2 x double>, i8, i32) 3655 3656 define <2 x double>@test_int_x86_avx512_mask_cvt_ss2sd_round(<2 x double> %x0,<4 x float> %x1, <2 x double> %x2, i8 %x3) { 3657 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ss2sd_round: 3658 ; CHECK: ## %bb.0: 3659 ; CHECK-NEXT: kmovw %edi, %k1 3660 ; CHECK-NEXT: vcvtss2sd %xmm1, %xmm0, %xmm2 {%k1} 3661 ; CHECK-NEXT: vcvtss2sd {sae}, %xmm1, %xmm0, %xmm0 3662 ; CHECK-NEXT: vaddpd %xmm0, %xmm2, %xmm0 3663 ; CHECK-NEXT: retq 3664 %res = call <2 x double> @llvm.x86.avx512.mask.cvtss2sd.round(<2 x double> %x0, <4 x float> %x1, <2 x double> %x2, i8 %x3, i32 4) 3665 %res1 = call <2 x double> @llvm.x86.avx512.mask.cvtss2sd.round(<2 x double> %x0, <4 x float> %x1, <2 x double> %x2, i8 -1, i32 8) 3666 %res2 = fadd <2 x double> %res, %res1 3667 ret <2 x double> %res2 3668 } 3669 3670 declare <4 x float> @llvm.x86.avx512.mask.cvtsd2ss.round(<4 x float>, <2 x double>, <4 x float>, i8, i32) 3671 3672 define <4 x float>@test_int_x86_avx512_mask_cvt_sd2ss_round(<4 x float> %x0,<2 x double> %x1, <4 x float> %x2, i8 %x3) { 3673 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_sd2ss_round: 3674 ; CHECK: ## %bb.0: 3675 ; CHECK-NEXT: kmovw %edi, %k1 3676 ; CHECK-NEXT: vcvtsd2ss {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1} 3677 ; CHECK-NEXT: vcvtsd2ss {rn-sae}, %xmm1, %xmm0, %xmm0 3678 ; CHECK-NEXT: vaddps %xmm0, %xmm2, %xmm0 3679 ; CHECK-NEXT: retq 3680 %res = call <4 x float> @llvm.x86.avx512.mask.cvtsd2ss.round(<4 x float> %x0, <2 x double> %x1, <4 x float> %x2, i8 %x3, i32 3) 3681 %res1 = call <4 x float> @llvm.x86.avx512.mask.cvtsd2ss.round(<4 x float> %x0, <2 x double> %x1, <4 x float> %x2, i8 -1, i32 8) 3682 %res2 = fadd <4 x float> %res, %res1 3683 ret <4 x float> %res2 3684 } 3685 3686 declare <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i32) 3687 3688 define <16 x i32>@test_int_x86_avx512_mask_pternlog_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x4) { 3689 ; CHECK-LABEL: test_int_x86_avx512_mask_pternlog_d_512: 3690 ; CHECK: ## %bb.0: 3691 ; CHECK-NEXT: vmovdqa64 %zmm0, %zmm3 3692 ; CHECK-NEXT: vpternlogd $33, %zmm2, %zmm1, %zmm3 3693 ; CHECK-NEXT: kmovw %edi, %k1 3694 ; CHECK-NEXT: vpternlogd $33, %zmm2, %zmm1, %zmm0 {%k1} 3695 ; CHECK-NEXT: vpaddd %zmm3, %zmm0, %zmm0 3696 ; CHECK-NEXT: retq 3697 %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33) 3698 %2 = bitcast i16 %x4 to <16 x i1> 3699 %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x0 3700 %4 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33) 3701 %res2 = add <16 x i32> %3, %4 3702 ret <16 x i32> %res2 3703 } 3704 3705 define <16 x i32>@test_int_x86_avx512_maskz_pternlog_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x4) { 3706 ; CHECK-LABEL: test_int_x86_avx512_maskz_pternlog_d_512: 3707 ; CHECK: ## %bb.0: 3708 ; CHECK-NEXT: vmovdqa64 %zmm0, %zmm3 3709 ; CHECK-NEXT: vpternlogd $33, %zmm2, %zmm1, %zmm3 3710 ; CHECK-NEXT: kmovw %edi, %k1 3711 ; CHECK-NEXT: vpternlogd $33, %zmm2, %zmm1, %zmm0 {%k1} {z} 3712 ; CHECK-NEXT: vpaddd %zmm3, %zmm0, %zmm0 3713 ; CHECK-NEXT: retq 3714 %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33) 3715 %2 = bitcast i16 %x4 to <16 x i1> 3716 %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> zeroinitializer 3717 %4 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33) 3718 %res2 = add <16 x i32> %3, %4 3719 ret <16 x i32> %res2 3720 } 3721 3722 declare <8 x i64> @llvm.x86.avx512.pternlog.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i32) 3723 3724 define <8 x i64>@test_int_x86_avx512_mask_pternlog_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x4) { 3725 ; CHECK-LABEL: test_int_x86_avx512_mask_pternlog_q_512: 3726 ; CHECK: ## %bb.0: 3727 ; CHECK-NEXT: vmovdqa64 %zmm0, %zmm3 3728 ; CHECK-NEXT: vpternlogq $33, %zmm2, %zmm1, %zmm3 3729 ; CHECK-NEXT: kmovw %edi, %k1 3730 ; CHECK-NEXT: vpternlogq $33, %zmm2, %zmm1, %zmm0 {%k1} 3731 ; CHECK-NEXT: vpaddq %zmm3, %zmm0, %zmm0 3732 ; CHECK-NEXT: retq 3733 %1 = call <8 x i64> @llvm.x86.avx512.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33) 3734 %2 = bitcast i8 %x4 to <8 x i1> 3735 %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %x0 3736 %4 = call <8 x i64> @llvm.x86.avx512.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33) 3737 %res2 = add <8 x i64> %3, %4 3738 ret <8 x i64> %res2 3739 } 3740 3741 define <8 x i64>@test_int_x86_avx512_maskz_pternlog_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x4) { 3742 ; CHECK-LABEL: test_int_x86_avx512_maskz_pternlog_q_512: 3743 ; CHECK: ## %bb.0: 3744 ; CHECK-NEXT: vmovdqa64 %zmm0, %zmm3 3745 ; CHECK-NEXT: vpternlogq $33, %zmm2, %zmm1, %zmm3 3746 ; CHECK-NEXT: kmovw %edi, %k1 3747 ; CHECK-NEXT: vpternlogq $33, %zmm2, %zmm1, %zmm0 {%k1} {z} 3748 ; CHECK-NEXT: vpaddq %zmm3, %zmm0, %zmm0 3749 ; CHECK-NEXT: retq 3750 %1 = call <8 x i64> @llvm.x86.avx512.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33) 3751 %2 = bitcast i8 %x4 to <8 x i1> 3752 %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> zeroinitializer 3753 %4 = call <8 x i64> @llvm.x86.avx512.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33) 3754 %res2 = add <8 x i64> %3, %4 3755 ret <8 x i64> %res2 3756 } 3757 3758 define i32 @test_x86_avx512_comi_sd_eq_sae(<2 x double> %a0, <2 x double> %a1) { 3759 ; CHECK-LABEL: test_x86_avx512_comi_sd_eq_sae: 3760 ; CHECK: ## %bb.0: 3761 ; CHECK-NEXT: vcmpeqsd {sae}, %xmm1, %xmm0, %k0 3762 ; CHECK-NEXT: kmovw %k0, %eax 3763 ; CHECK-NEXT: retq 3764 %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 0, i32 8) 3765 ret i32 %res 3766 } 3767 3768 define i32 @test_x86_avx512_ucomi_sd_eq_sae(<2 x double> %a0, <2 x double> %a1) { 3769 ; CHECK-LABEL: test_x86_avx512_ucomi_sd_eq_sae: 3770 ; CHECK: ## %bb.0: 3771 ; CHECK-NEXT: vcmpeq_uqsd {sae}, %xmm1, %xmm0, %k0 3772 ; CHECK-NEXT: kmovw %k0, %eax 3773 ; CHECK-NEXT: retq 3774 %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 8, i32 8) 3775 ret i32 %res 3776 } 3777 3778 define i32 @test_x86_avx512_comi_sd_eq(<2 x double> %a0, <2 x double> %a1) { 3779 ; CHECK-LABEL: test_x86_avx512_comi_sd_eq: 3780 ; CHECK: ## %bb.0: 3781 ; CHECK-NEXT: vcmpeqsd %xmm1, %xmm0, %k0 3782 ; CHECK-NEXT: kmovw %k0, %eax 3783 ; CHECK-NEXT: retq 3784 %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 0, i32 4) 3785 ret i32 %res 3786 } 3787 3788 define i32 @test_x86_avx512_ucomi_sd_eq(<2 x double> %a0, <2 x double> %a1) { 3789 ; CHECK-LABEL: test_x86_avx512_ucomi_sd_eq: 3790 ; CHECK: ## %bb.0: 3791 ; CHECK-NEXT: vcmpeq_uqsd %xmm1, %xmm0, %k0 3792 ; CHECK-NEXT: kmovw %k0, %eax 3793 ; CHECK-NEXT: retq 3794 %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 8, i32 4) 3795 ret i32 %res 3796 } 3797 3798 define i32 @test_x86_avx512_comi_sd_lt_sae(<2 x double> %a0, <2 x double> %a1) { 3799 ; CHECK-LABEL: test_x86_avx512_comi_sd_lt_sae: 3800 ; CHECK: ## %bb.0: 3801 ; CHECK-NEXT: vcmpltsd {sae}, %xmm1, %xmm0, %k0 3802 ; CHECK-NEXT: kmovw %k0, %eax 3803 ; CHECK-NEXT: retq 3804 %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 1, i32 8) 3805 ret i32 %res 3806 } 3807 3808 define i32 @test_x86_avx512_ucomi_sd_lt_sae(<2 x double> %a0, <2 x double> %a1) { 3809 ; CHECK-LABEL: test_x86_avx512_ucomi_sd_lt_sae: 3810 ; CHECK: ## %bb.0: 3811 ; CHECK-NEXT: vcmpngesd {sae}, %xmm1, %xmm0, %k0 3812 ; CHECK-NEXT: kmovw %k0, %eax 3813 ; CHECK-NEXT: retq 3814 %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 9, i32 8) 3815 ret i32 %res 3816 } 3817 3818 define i32 @test_x86_avx512_comi_sd_lt(<2 x double> %a0, <2 x double> %a1) { 3819 ; CHECK-LABEL: test_x86_avx512_comi_sd_lt: 3820 ; CHECK: ## %bb.0: 3821 ; CHECK-NEXT: vcmpltsd %xmm1, %xmm0, %k0 3822 ; CHECK-NEXT: kmovw %k0, %eax 3823 ; CHECK-NEXT: retq 3824 %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 1, i32 4) 3825 ret i32 %res 3826 } 3827 3828 define i32 @test_x86_avx512_ucomi_sd_lt(<2 x double> %a0, <2 x double> %a1) { 3829 ; CHECK-LABEL: test_x86_avx512_ucomi_sd_lt: 3830 ; CHECK: ## %bb.0: 3831 ; CHECK-NEXT: vcmpngesd %xmm1, %xmm0, %k0 3832 ; CHECK-NEXT: kmovw %k0, %eax 3833 ; CHECK-NEXT: retq 3834 %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 9, i32 4) 3835 ret i32 %res 3836 } 3837 3838 declare i32 @llvm.x86.avx512.vcomi.sd(<2 x double>, <2 x double>, i32, i32) 3839 3840 define i32 @test_x86_avx512_ucomi_ss_lt(<4 x float> %a0, <4 x float> %a1) { 3841 ; CHECK-LABEL: test_x86_avx512_ucomi_ss_lt: 3842 ; CHECK: ## %bb.0: 3843 ; CHECK-NEXT: vcmpngess %xmm1, %xmm0, %k0 3844 ; CHECK-NEXT: kmovw %k0, %eax 3845 ; CHECK-NEXT: retq 3846 %res = call i32 @llvm.x86.avx512.vcomi.ss(<4 x float> %a0, <4 x float> %a1, i32 9, i32 4) 3847 ret i32 %res 3848 } 3849 3850 declare i32 @llvm.x86.avx512.vcomi.ss(<4 x float>, <4 x float>, i32, i32) 3851 3852 declare <16 x i32> @llvm.x86.avx512.prolv.d.512(<16 x i32>, <16 x i32>) 3853 3854 define <16 x i32>@test_int_x86_avx512_mask_prolv_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) { 3855 ; CHECK-LABEL: test_int_x86_avx512_mask_prolv_d_512: 3856 ; CHECK: ## %bb.0: 3857 ; CHECK-NEXT: vprolvd %zmm1, %zmm0, %zmm3 3858 ; CHECK-NEXT: kmovw %edi, %k1 3859 ; CHECK-NEXT: vprolvd %zmm1, %zmm0, %zmm2 {%k1} 3860 ; CHECK-NEXT: vprolvd %zmm1, %zmm0, %zmm0 {%k1} {z} 3861 ; CHECK-NEXT: vpaddd %zmm3, %zmm0, %zmm0 3862 ; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0 3863 ; CHECK-NEXT: retq 3864 %1 = call <16 x i32> @llvm.x86.avx512.prolv.d.512(<16 x i32> %x0, <16 x i32> %x1) 3865 %2 = bitcast i16 %x3 to <16 x i1> 3866 %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x2 3867 %4 = call <16 x i32> @llvm.x86.avx512.prolv.d.512(<16 x i32> %x0, <16 x i32> %x1) 3868 %5 = bitcast i16 %x3 to <16 x i1> 3869 %6 = select <16 x i1> %5, <16 x i32> %4, <16 x i32> zeroinitializer 3870 %7 = call <16 x i32> @llvm.x86.avx512.prolv.d.512(<16 x i32> %x0, <16 x i32> %x1) 3871 %res3 = add <16 x i32> %3, %6 3872 %res4 = add <16 x i32> %res3, %7 3873 ret <16 x i32> %res4 3874 } 3875 3876 declare <8 x i64> @llvm.x86.avx512.prolv.q.512(<8 x i64>, <8 x i64>) 3877 3878 define <8 x i64>@test_int_x86_avx512_mask_prolv_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) { 3879 ; CHECK-LABEL: test_int_x86_avx512_mask_prolv_q_512: 3880 ; CHECK: ## %bb.0: 3881 ; CHECK-NEXT: vprolvq %zmm1, %zmm0, %zmm3 3882 ; CHECK-NEXT: kmovw %edi, %k1 3883 ; CHECK-NEXT: vprolvq %zmm1, %zmm0, %zmm2 {%k1} 3884 ; CHECK-NEXT: vprolvq %zmm1, %zmm0, %zmm0 {%k1} {z} 3885 ; CHECK-NEXT: vpaddq %zmm3, %zmm0, %zmm0 3886 ; CHECK-NEXT: vpaddq %zmm0, %zmm2, %zmm0 3887 ; CHECK-NEXT: retq 3888 %1 = call <8 x i64> @llvm.x86.avx512.prolv.q.512(<8 x i64> %x0, <8 x i64> %x1) 3889 %2 = bitcast i8 %x3 to <8 x i1> 3890 %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %x2 3891 %4 = call <8 x i64> @llvm.x86.avx512.prolv.q.512(<8 x i64> %x0, <8 x i64> %x1) 3892 %5 = bitcast i8 %x3 to <8 x i1> 3893 %6 = select <8 x i1> %5, <8 x i64> %4, <8 x i64> zeroinitializer 3894 %7 = call <8 x i64> @llvm.x86.avx512.prolv.q.512(<8 x i64> %x0, <8 x i64> %x1) 3895 %res3 = add <8 x i64> %3, %6 3896 %res4 = add <8 x i64> %res3, %7 3897 ret <8 x i64> %res4 3898 } 3899 3900 declare <16 x i32> @llvm.x86.avx512.prorv.d.512(<16 x i32>, <16 x i32>) 3901 3902 define <16 x i32>@test_int_x86_avx512_mask_prorv_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) { 3903 ; CHECK-LABEL: test_int_x86_avx512_mask_prorv_d_512: 3904 ; CHECK: ## %bb.0: 3905 ; CHECK-NEXT: vprorvd %zmm1, %zmm0, %zmm3 3906 ; CHECK-NEXT: kmovw %edi, %k1 3907 ; CHECK-NEXT: vprorvd %zmm1, %zmm0, %zmm2 {%k1} 3908 ; CHECK-NEXT: vprorvd %zmm1, %zmm0, %zmm0 {%k1} {z} 3909 ; CHECK-NEXT: vpaddd %zmm3, %zmm0, %zmm0 3910 ; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0 3911 ; CHECK-NEXT: retq 3912 %1 = call <16 x i32> @llvm.x86.avx512.prorv.d.512(<16 x i32> %x0, <16 x i32> %x1) 3913 %2 = bitcast i16 %x3 to <16 x i1> 3914 %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x2 3915 %4 = call <16 x i32> @llvm.x86.avx512.prorv.d.512(<16 x i32> %x0, <16 x i32> %x1) 3916 %5 = bitcast i16 %x3 to <16 x i1> 3917 %6 = select <16 x i1> %5, <16 x i32> %4, <16 x i32> zeroinitializer 3918 %7 = call <16 x i32> @llvm.x86.avx512.prorv.d.512(<16 x i32> %x0, <16 x i32> %x1) 3919 %res3 = add <16 x i32> %3, %6 3920 %res4 = add <16 x i32> %res3, %7 3921 ret <16 x i32> %res4 3922 } 3923 3924 declare <8 x i64> @llvm.x86.avx512.prorv.q.512(<8 x i64>, <8 x i64>) 3925 3926 define <8 x i64>@test_int_x86_avx512_mask_prorv_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) { 3927 ; CHECK-LABEL: test_int_x86_avx512_mask_prorv_q_512: 3928 ; CHECK: ## %bb.0: 3929 ; CHECK-NEXT: vprorvq %zmm1, %zmm0, %zmm3 3930 ; CHECK-NEXT: kmovw %edi, %k1 3931 ; CHECK-NEXT: vprorvq %zmm1, %zmm0, %zmm2 {%k1} 3932 ; CHECK-NEXT: vprorvq %zmm1, %zmm0, %zmm0 {%k1} {z} 3933 ; CHECK-NEXT: vpaddq %zmm3, %zmm0, %zmm0 3934 ; CHECK-NEXT: vpaddq %zmm0, %zmm2, %zmm0 3935 ; CHECK-NEXT: retq 3936 %1 = call <8 x i64> @llvm.x86.avx512.prorv.q.512(<8 x i64> %x0, <8 x i64> %x1) 3937 %2 = bitcast i8 %x3 to <8 x i1> 3938 %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %x2 3939 %4 = call <8 x i64> @llvm.x86.avx512.prorv.q.512(<8 x i64> %x0, <8 x i64> %x1) 3940 %5 = bitcast i8 %x3 to <8 x i1> 3941 %6 = select <8 x i1> %5, <8 x i64> %4, <8 x i64> zeroinitializer 3942 %7 = call <8 x i64> @llvm.x86.avx512.prorv.q.512(<8 x i64> %x0, <8 x i64> %x1) 3943 %res3 = add <8 x i64> %3, %6 3944 %res4 = add <8 x i64> %res3, %7 3945 ret <8 x i64> %res4 3946 } 3947 3948 declare <16 x i32> @llvm.x86.avx512.prol.d.512(<16 x i32>, i32) 3949 3950 define <16 x i32>@test_int_x86_avx512_mask_prol_d_512(<16 x i32> %x0, i32 %x1, <16 x i32> %x2, i16 %x3) { 3951 ; CHECK-LABEL: test_int_x86_avx512_mask_prol_d_512: 3952 ; CHECK: ## %bb.0: 3953 ; CHECK-NEXT: vprold $3, %zmm0, %zmm2 3954 ; CHECK-NEXT: kmovw %esi, %k1 3955 ; CHECK-NEXT: vprold $3, %zmm0, %zmm1 {%k1} 3956 ; CHECK-NEXT: vprold $3, %zmm0, %zmm0 {%k1} {z} 3957 ; CHECK-NEXT: vpaddd %zmm2, %zmm0, %zmm0 3958 ; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0 3959 ; CHECK-NEXT: retq 3960 %1 = call <16 x i32> @llvm.x86.avx512.prol.d.512(<16 x i32> %x0, i32 3) 3961 %2 = bitcast i16 %x3 to <16 x i1> 3962 %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x2 3963 %4 = call <16 x i32> @llvm.x86.avx512.prol.d.512(<16 x i32> %x0, i32 3) 3964 %5 = bitcast i16 %x3 to <16 x i1> 3965 %6 = select <16 x i1> %5, <16 x i32> %4, <16 x i32> zeroinitializer 3966 %7 = call <16 x i32> @llvm.x86.avx512.prol.d.512(<16 x i32> %x0, i32 3) 3967 %res3 = add <16 x i32> %3, %6 3968 %res4 = add <16 x i32> %res3, %7 3969 ret <16 x i32> %res4 3970 } 3971 3972 declare <8 x i64> @llvm.x86.avx512.prol.q.512(<8 x i64>, i32) 3973 3974 define <8 x i64>@test_int_x86_avx512_mask_prol_q_512(<8 x i64> %x0, i32 %x1, <8 x i64> %x2, i8 %x3) { 3975 ; CHECK-LABEL: test_int_x86_avx512_mask_prol_q_512: 3976 ; CHECK: ## %bb.0: 3977 ; CHECK-NEXT: vprolq $3, %zmm0, %zmm2 3978 ; CHECK-NEXT: kmovw %esi, %k1 3979 ; CHECK-NEXT: vprolq $3, %zmm0, %zmm1 {%k1} 3980 ; CHECK-NEXT: vprolq $3, %zmm0, %zmm0 {%k1} {z} 3981 ; CHECK-NEXT: vpaddq %zmm2, %zmm0, %zmm0 3982 ; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0 3983 ; CHECK-NEXT: retq 3984 %1 = call <8 x i64> @llvm.x86.avx512.prol.q.512(<8 x i64> %x0, i32 3) 3985 %2 = bitcast i8 %x3 to <8 x i1> 3986 %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %x2 3987 %4 = call <8 x i64> @llvm.x86.avx512.prol.q.512(<8 x i64> %x0, i32 3) 3988 %5 = bitcast i8 %x3 to <8 x i1> 3989 %6 = select <8 x i1> %5, <8 x i64> %4, <8 x i64> zeroinitializer 3990 %7 = call <8 x i64> @llvm.x86.avx512.prol.q.512(<8 x i64> %x0, i32 3) 3991 %res3 = add <8 x i64> %3, %6 3992 %res4 = add <8 x i64> %res3, %7 3993 ret <8 x i64> %res4 3994 } 3995 3996 declare <16 x i32> @llvm.x86.avx512.pror.d.512(<16 x i32>, i32) 3997 3998 define <16 x i32>@test_int_x86_avx512_mask_pror_d_512(<16 x i32> %x0, i32 %x1, <16 x i32> %x2, i16 %x3) { 3999 ; CHECK-LABEL: test_int_x86_avx512_mask_pror_d_512: 4000 ; CHECK: ## %bb.0: 4001 ; CHECK-NEXT: vprord $3, %zmm0, %zmm2 4002 ; CHECK-NEXT: kmovw %esi, %k1 4003 ; CHECK-NEXT: vprord $3, %zmm0, %zmm1 {%k1} 4004 ; CHECK-NEXT: vprord $3, %zmm0, %zmm0 {%k1} {z} 4005 ; CHECK-NEXT: vpaddd %zmm2, %zmm0, %zmm0 4006 ; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0 4007 ; CHECK-NEXT: retq 4008 %1 = call <16 x i32> @llvm.x86.avx512.pror.d.512(<16 x i32> %x0, i32 3) 4009 %2 = bitcast i16 %x3 to <16 x i1> 4010 %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x2 4011 %4 = call <16 x i32> @llvm.x86.avx512.pror.d.512(<16 x i32> %x0, i32 3) 4012 %5 = bitcast i16 %x3 to <16 x i1> 4013 %6 = select <16 x i1> %5, <16 x i32> %4, <16 x i32> zeroinitializer 4014 %7 = call <16 x i32> @llvm.x86.avx512.pror.d.512(<16 x i32> %x0, i32 3) 4015 %res3 = add <16 x i32> %3, %6 4016 %res4 = add <16 x i32> %res3, %7 4017 ret <16 x i32> %res4 4018 } 4019 4020 declare <8 x i64> @llvm.x86.avx512.pror.q.512(<8 x i64>, i32) 4021 4022 define <8 x i64>@test_int_x86_avx512_mask_pror_q_512(<8 x i64> %x0, i32 %x1, <8 x i64> %x2, i8 %x3) { 4023 ; CHECK-LABEL: test_int_x86_avx512_mask_pror_q_512: 4024 ; CHECK: ## %bb.0: 4025 ; CHECK-NEXT: vprorq $3, %zmm0, %zmm2 4026 ; CHECK-NEXT: kmovw %esi, %k1 4027 ; CHECK-NEXT: vprorq $3, %zmm0, %zmm1 {%k1} 4028 ; CHECK-NEXT: vprorq $3, %zmm0, %zmm0 {%k1} {z} 4029 ; CHECK-NEXT: vpaddq %zmm2, %zmm0, %zmm0 4030 ; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0 4031 ; CHECK-NEXT: retq 4032 %1 = call <8 x i64> @llvm.x86.avx512.pror.q.512(<8 x i64> %x0, i32 3) 4033 %2 = bitcast i8 %x3 to <8 x i1> 4034 %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %x2 4035 %4 = call <8 x i64> @llvm.x86.avx512.pror.q.512(<8 x i64> %x0, i32 3) 4036 %5 = bitcast i8 %x3 to <8 x i1> 4037 %6 = select <8 x i1> %5, <8 x i64> %4, <8 x i64> zeroinitializer 4038 %7 = call <8 x i64> @llvm.x86.avx512.pror.q.512(<8 x i64> %x0, i32 3) 4039 %res3 = add <8 x i64> %3, %6 4040 %res4 = add <8 x i64> %res3, %7 4041 ret <8 x i64> %res4 4042 } 4043 4044 declare <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double>, <8 x i64>) 4045 4046 define <8 x double>@test_int_x86_avx512_mask_permvar_df_512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3) { 4047 ; CHECK-LABEL: test_int_x86_avx512_mask_permvar_df_512: 4048 ; CHECK: ## %bb.0: 4049 ; CHECK-NEXT: vpermpd %zmm0, %zmm1, %zmm3 4050 ; CHECK-NEXT: kmovw %edi, %k1 4051 ; CHECK-NEXT: vpermpd %zmm0, %zmm1, %zmm2 {%k1} 4052 ; CHECK-NEXT: vpermpd %zmm0, %zmm1, %zmm0 {%k1} {z} 4053 ; CHECK-NEXT: vaddpd %zmm0, %zmm2, %zmm0 4054 ; CHECK-NEXT: vaddpd %zmm3, %zmm0, %zmm0 4055 ; CHECK-NEXT: retq 4056 %1 = call <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double> %x0, <8 x i64> %x1) 4057 %2 = bitcast i8 %x3 to <8 x i1> 4058 %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> %x2 4059 %4 = call <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double> %x0, <8 x i64> %x1) 4060 %5 = bitcast i8 %x3 to <8 x i1> 4061 %6 = select <8 x i1> %5, <8 x double> %4, <8 x double> zeroinitializer 4062 %7 = call <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double> %x0, <8 x i64> %x1) 4063 %res3 = fadd <8 x double> %3, %6 4064 %res4 = fadd <8 x double> %res3, %7 4065 ret <8 x double> %res4 4066 } 4067 4068 declare <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64>, <8 x i64>) 4069 4070 define <8 x i64>@test_int_x86_avx512_mask_permvar_di_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) { 4071 ; CHECK-LABEL: test_int_x86_avx512_mask_permvar_di_512: 4072 ; CHECK: ## %bb.0: 4073 ; CHECK-NEXT: vpermq %zmm0, %zmm1, %zmm3 4074 ; CHECK-NEXT: kmovw %edi, %k1 4075 ; CHECK-NEXT: vpermq %zmm0, %zmm1, %zmm2 {%k1} 4076 ; CHECK-NEXT: vpermq %zmm0, %zmm1, %zmm0 {%k1} {z} 4077 ; CHECK-NEXT: vpaddq %zmm3, %zmm0, %zmm0 4078 ; CHECK-NEXT: vpaddq %zmm0, %zmm2, %zmm0 4079 ; CHECK-NEXT: retq 4080 %1 = call <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64> %x0, <8 x i64> %x1) 4081 %2 = bitcast i8 %x3 to <8 x i1> 4082 %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %x2 4083 %4 = call <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64> %x0, <8 x i64> %x1) 4084 %5 = bitcast i8 %x3 to <8 x i1> 4085 %6 = select <8 x i1> %5, <8 x i64> %4, <8 x i64> zeroinitializer 4086 %7 = call <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64> %x0, <8 x i64> %x1) 4087 %res3 = add <8 x i64> %3, %6 4088 %res4 = add <8 x i64> %res3, %7 4089 ret <8 x i64> %res4 4090 } 4091 4092 declare <16 x float> @llvm.x86.avx512.permvar.sf.512(<16 x float>, <16 x i32>) 4093 4094 define <16 x float>@test_int_x86_avx512_mask_permvar_sf_512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3) { 4095 ; CHECK-LABEL: test_int_x86_avx512_mask_permvar_sf_512: 4096 ; CHECK: ## %bb.0: 4097 ; CHECK-NEXT: vpermps %zmm0, %zmm1, %zmm3 4098 ; CHECK-NEXT: kmovw %edi, %k1 4099 ; CHECK-NEXT: vpermps %zmm0, %zmm1, %zmm2 {%k1} 4100 ; CHECK-NEXT: vpermps %zmm0, %zmm1, %zmm0 {%k1} {z} 4101 ; CHECK-NEXT: vaddps %zmm0, %zmm2, %zmm0 4102 ; CHECK-NEXT: vaddps %zmm3, %zmm0, %zmm0 4103 ; CHECK-NEXT: retq 4104 %1 = call <16 x float> @llvm.x86.avx512.permvar.sf.512(<16 x float> %x0, <16 x i32> %x1) 4105 %2 = bitcast i16 %x3 to <16 x i1> 4106 %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %x2 4107 %4 = call <16 x float> @llvm.x86.avx512.permvar.sf.512(<16 x float> %x0, <16 x i32> %x1) 4108 %5 = bitcast i16 %x3 to <16 x i1> 4109 %6 = select <16 x i1> %5, <16 x float> %4, <16 x float> zeroinitializer 4110 %7 = call <16 x float> @llvm.x86.avx512.permvar.sf.512(<16 x float> %x0, <16 x i32> %x1) 4111 %res3 = fadd <16 x float> %3, %6 4112 %res4 = fadd <16 x float> %res3, %7 4113 ret <16 x float> %res4 4114 } 4115 4116 declare <16 x i32> @llvm.x86.avx512.permvar.si.512(<16 x i32>, <16 x i32>) 4117 4118 define <16 x i32>@test_int_x86_avx512_mask_permvar_si_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) { 4119 ; CHECK-LABEL: test_int_x86_avx512_mask_permvar_si_512: 4120 ; CHECK: ## %bb.0: 4121 ; CHECK-NEXT: vpermd %zmm0, %zmm1, %zmm3 4122 ; CHECK-NEXT: kmovw %edi, %k1 4123 ; CHECK-NEXT: vpermd %zmm0, %zmm1, %zmm2 {%k1} 4124 ; CHECK-NEXT: vpermd %zmm0, %zmm1, %zmm0 {%k1} {z} 4125 ; CHECK-NEXT: vpaddd %zmm3, %zmm0, %zmm0 4126 ; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0 4127 ; CHECK-NEXT: retq 4128 %1 = call <16 x i32> @llvm.x86.avx512.permvar.si.512(<16 x i32> %x0, <16 x i32> %x1) 4129 %2 = bitcast i16 %x3 to <16 x i1> 4130 %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x2 4131 %4 = call <16 x i32> @llvm.x86.avx512.permvar.si.512(<16 x i32> %x0, <16 x i32> %x1) 4132 %5 = bitcast i16 %x3 to <16 x i1> 4133 %6 = select <16 x i1> %5, <16 x i32> %4, <16 x i32> zeroinitializer 4134 %7 = call <16 x i32> @llvm.x86.avx512.permvar.si.512(<16 x i32> %x0, <16 x i32> %x1) 4135 %res3 = add <16 x i32> %3, %6 4136 %res4 = add <16 x i32> %res3, %7 4137 ret <16 x i32> %res4 4138 } 4139 4140 declare <8 x double> @llvm.x86.avx512.mask.fixupimm.pd.512(<8 x double>, <8 x double>, <8 x i64>, i32, i8, i32) 4141 4142 define <8 x double>@test_int_x86_avx512_mask_fixupimm_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x i64> %x2, i8 %x4) { 4143 ; CHECK-LABEL: test_int_x86_avx512_mask_fixupimm_pd_512: 4144 ; CHECK: ## %bb.0: 4145 ; CHECK-NEXT: kmovw %edi, %k1 4146 ; CHECK-NEXT: vmovapd %zmm0, %zmm3 4147 ; CHECK-NEXT: vfixupimmpd $4, %zmm2, %zmm1, %zmm3 {%k1} 4148 ; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4 4149 ; CHECK-NEXT: vfixupimmpd $5, %zmm2, %zmm1, %zmm4 {%k1} {z} 4150 ; CHECK-NEXT: vaddpd %zmm4, %zmm3, %zmm3 4151 ; CHECK-NEXT: vfixupimmpd $3, {sae}, %zmm2, %zmm1, %zmm0 4152 ; CHECK-NEXT: vaddpd %zmm0, %zmm3, %zmm0 4153 ; CHECK-NEXT: retq 4154 %res = call <8 x double> @llvm.x86.avx512.mask.fixupimm.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x i64> %x2, i32 4, i8 %x4, i32 4) 4155 %res1 = call <8 x double> @llvm.x86.avx512.mask.fixupimm.pd.512(<8 x double> zeroinitializer, <8 x double> %x1, <8 x i64> %x2, i32 5, i8 %x4, i32 4) 4156 %res2 = call <8 x double> @llvm.x86.avx512.mask.fixupimm.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x i64> %x2, i32 3, i8 -1, i32 8) 4157 %res3 = fadd <8 x double> %res, %res1 4158 %res4 = fadd <8 x double> %res3, %res2 4159 ret <8 x double> %res4 4160 } 4161 4162 define <8 x double>@test_int_x86_avx512_mask_fixupimm_pd_512_load(<8 x double> %x0, <8 x double> %x1, <8 x i64>* %x2ptr) { 4163 ; CHECK-LABEL: test_int_x86_avx512_mask_fixupimm_pd_512_load: 4164 ; CHECK: ## %bb.0: 4165 ; CHECK-NEXT: vfixupimmpd $3, (%rdi), %zmm1, %zmm0 4166 ; CHECK-NEXT: retq 4167 %x2 = load <8 x i64>, <8 x i64>* %x2ptr 4168 %res = call <8 x double> @llvm.x86.avx512.mask.fixupimm.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x i64> %x2, i32 3, i8 -1, i32 4) 4169 ret <8 x double> %res 4170 } 4171 4172 declare <8 x double> @llvm.x86.avx512.maskz.fixupimm.pd.512(<8 x double>, <8 x double>, <8 x i64>, i32, i8, i32) 4173 4174 define <8 x double>@test_int_x86_avx512_maskz_fixupimm_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x i64> %x2, i8 %x4) { 4175 ; CHECK-LABEL: test_int_x86_avx512_maskz_fixupimm_pd_512: 4176 ; CHECK: ## %bb.0: 4177 ; CHECK-NEXT: kmovw %edi, %k1 4178 ; CHECK-NEXT: vmovapd %zmm0, %zmm3 4179 ; CHECK-NEXT: vfixupimmpd $3, %zmm2, %zmm1, %zmm3 {%k1} {z} 4180 ; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4 4181 ; CHECK-NEXT: vmovapd %zmm0, %zmm5 4182 ; CHECK-NEXT: vfixupimmpd $5, %zmm4, %zmm1, %zmm5 {%k1} {z} 4183 ; CHECK-NEXT: vaddpd %zmm5, %zmm3, %zmm3 4184 ; CHECK-NEXT: vfixupimmpd $2, {sae}, %zmm2, %zmm1, %zmm0 4185 ; CHECK-NEXT: vaddpd %zmm0, %zmm3, %zmm0 4186 ; CHECK-NEXT: retq 4187 %res = call <8 x double> @llvm.x86.avx512.maskz.fixupimm.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x i64> %x2, i32 3, i8 %x4, i32 4) 4188 %res1 = call <8 x double> @llvm.x86.avx512.maskz.fixupimm.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x i64> zeroinitializer, i32 5, i8 %x4, i32 4) 4189 %res2 = call <8 x double> @llvm.x86.avx512.maskz.fixupimm.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x i64> %x2, i32 2, i8 -1, i32 8) 4190 %res3 = fadd <8 x double> %res, %res1 4191 %res4 = fadd <8 x double> %res3, %res2 4192 ret <8 x double> %res4 4193 } 4194 4195 declare <4 x float> @llvm.x86.avx512.mask.fixupimm.ss(<4 x float>, <4 x float>, <4 x i32>, i32, i8, i32) 4196 4197 define <4 x float>@test_int_x86_avx512_mask_fixupimm_ss(<4 x float> %x0, <4 x float> %x1, <4 x i32> %x2, i8 %x4) { 4198 ; CHECK-LABEL: test_int_x86_avx512_mask_fixupimm_ss: 4199 ; CHECK: ## %bb.0: 4200 ; CHECK-NEXT: kmovw %edi, %k1 4201 ; CHECK-NEXT: vmovaps %xmm0, %xmm3 4202 ; CHECK-NEXT: vfixupimmss $5, %xmm2, %xmm1, %xmm3 {%k1} 4203 ; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4 4204 ; CHECK-NEXT: vmovaps %xmm0, %xmm5 4205 ; CHECK-NEXT: vfixupimmss $5, %xmm4, %xmm1, %xmm5 {%k1} 4206 ; CHECK-NEXT: vaddps %xmm5, %xmm3, %xmm3 4207 ; CHECK-NEXT: vfixupimmss $5, {sae}, %xmm2, %xmm1, %xmm0 4208 ; CHECK-NEXT: vaddps %xmm0, %xmm3, %xmm0 4209 ; CHECK-NEXT: retq 4210 %res = call <4 x float> @llvm.x86.avx512.mask.fixupimm.ss(<4 x float> %x0, <4 x float> %x1, <4 x i32> %x2, i32 5, i8 %x4, i32 4) 4211 %res1 = call <4 x float> @llvm.x86.avx512.mask.fixupimm.ss(<4 x float> %x0, <4 x float> %x1, <4 x i32> zeroinitializer, i32 5, i8 %x4, i32 4) 4212 %res2 = call <4 x float> @llvm.x86.avx512.mask.fixupimm.ss(<4 x float> %x0, <4 x float> %x1, <4 x i32> %x2, i32 5, i8 -1, i32 8) 4213 %res3 = fadd <4 x float> %res, %res1 4214 %res4 = fadd <4 x float> %res3, %res2 4215 ret <4 x float> %res4 4216 } 4217 4218 declare <4 x float> @llvm.x86.avx512.maskz.fixupimm.ss(<4 x float>, <4 x float>, <4 x i32>, i32, i8, i32) 4219 4220 define <4 x float>@test_int_x86_avx512_maskz_fixupimm_ss(<4 x float> %x0, <4 x float> %x1, <4 x i32> %x2, i8 %x4) { 4221 ; CHECK-LABEL: test_int_x86_avx512_maskz_fixupimm_ss: 4222 ; CHECK: ## %bb.0: 4223 ; CHECK-NEXT: vmovaps %xmm0, %xmm3 4224 ; CHECK-NEXT: vfixupimmss $5, %xmm2, %xmm1, %xmm3 4225 ; CHECK-NEXT: kmovw %edi, %k1 4226 ; CHECK-NEXT: vmovaps %xmm0, %xmm4 4227 ; CHECK-NEXT: vfixupimmss $5, %xmm2, %xmm1, %xmm4 {%k1} {z} 4228 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 4229 ; CHECK-NEXT: vfixupimmss $5, {sae}, %xmm2, %xmm1, %xmm0 {%k1} {z} 4230 ; CHECK-NEXT: vaddps %xmm0, %xmm4, %xmm0 4231 ; CHECK-NEXT: vaddps %xmm3, %xmm0, %xmm0 4232 ; CHECK-NEXT: retq 4233 %res = call <4 x float> @llvm.x86.avx512.maskz.fixupimm.ss(<4 x float> %x0, <4 x float> %x1, <4 x i32> %x2, i32 5, i8 %x4, i32 4) 4234 %res1 = call <4 x float> @llvm.x86.avx512.maskz.fixupimm.ss(<4 x float> %x0, <4 x float> %x1, <4 x i32> zeroinitializer, i32 5, i8 %x4, i32 8) 4235 %res2 = call <4 x float> @llvm.x86.avx512.maskz.fixupimm.ss(<4 x float> %x0, <4 x float> %x1, <4 x i32> %x2, i32 5, i8 -1, i32 4) 4236 %res3 = fadd <4 x float> %res, %res1 4237 %res4 = fadd <4 x float> %res3, %res2 4238 ret <4 x float> %res4 4239 } 4240 4241 declare <16 x float> @llvm.x86.avx512.mask.fixupimm.ps.512(<16 x float>, <16 x float>, <16 x i32>, i32, i16, i32) 4242 4243 define <16 x float>@test_int_x86_avx512_mask_fixupimm_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x i32> %x2, i16 %x4) { 4244 ; CHECK-LABEL: test_int_x86_avx512_mask_fixupimm_ps_512: 4245 ; CHECK: ## %bb.0: 4246 ; CHECK-NEXT: kmovw %edi, %k1 4247 ; CHECK-NEXT: vmovaps %zmm0, %zmm3 4248 ; CHECK-NEXT: vfixupimmps $5, %zmm2, %zmm1, %zmm3 {%k1} 4249 ; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4 4250 ; CHECK-NEXT: vmovaps %zmm0, %zmm5 4251 ; CHECK-NEXT: vfixupimmps $5, %zmm4, %zmm1, %zmm5 {%k1} 4252 ; CHECK-NEXT: vaddps %zmm5, %zmm3, %zmm3 4253 ; CHECK-NEXT: vfixupimmps $5, {sae}, %zmm2, %zmm1, %zmm0 4254 ; CHECK-NEXT: vaddps %zmm0, %zmm3, %zmm0 4255 ; CHECK-NEXT: retq 4256 %res = call <16 x float> @llvm.x86.avx512.mask.fixupimm.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x i32> %x2, i32 5, i16 %x4, i32 4) 4257 %res1 = call <16 x float> @llvm.x86.avx512.mask.fixupimm.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x i32> zeroinitializer, i32 5, i16 %x4, i32 4) 4258 %res2 = call <16 x float> @llvm.x86.avx512.mask.fixupimm.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x i32> %x2, i32 5, i16 -1, i32 8) 4259 %res3 = fadd <16 x float> %res, %res1 4260 %res4 = fadd <16 x float> %res3, %res2 4261 ret <16 x float> %res4 4262 } 4263 4264 define <16 x float>@test_int_x86_avx512_mask_fixupimm_ps_512_load(<16 x float> %x0, <16 x float> %x1, <16 x i32>* %x2ptr) { 4265 ; CHECK-LABEL: test_int_x86_avx512_mask_fixupimm_ps_512_load: 4266 ; CHECK: ## %bb.0: 4267 ; CHECK-NEXT: vfixupimmps $5, (%rdi), %zmm1, %zmm0 4268 ; CHECK-NEXT: retq 4269 %x2 = load <16 x i32>, <16 x i32>* %x2ptr 4270 %res = call <16 x float> @llvm.x86.avx512.mask.fixupimm.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x i32> %x2, i32 5, i16 -1, i32 4) 4271 ret <16 x float> %res 4272 } 4273 4274 declare <16 x float> @llvm.x86.avx512.maskz.fixupimm.ps.512(<16 x float>, <16 x float>, <16 x i32>, i32, i16, i32) 4275 4276 define <16 x float>@test_int_x86_avx512_maskz_fixupimm_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x i32> %x2, i16 %x4) { 4277 ; CHECK-LABEL: test_int_x86_avx512_maskz_fixupimm_ps_512: 4278 ; CHECK: ## %bb.0: 4279 ; CHECK-NEXT: kmovw %edi, %k1 4280 ; CHECK-NEXT: vmovaps %zmm0, %zmm3 4281 ; CHECK-NEXT: vfixupimmps $5, %zmm2, %zmm1, %zmm3 4282 ; CHECK-NEXT: vmovaps %zmm0, %zmm4 4283 ; CHECK-NEXT: vfixupimmps $5, %zmm2, %zmm1, %zmm4 {%k1} {z} 4284 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 4285 ; CHECK-NEXT: vfixupimmps $5, {sae}, %zmm2, %zmm1, %zmm0 {%k1} {z} 4286 ; CHECK-NEXT: vaddps %zmm0, %zmm4, %zmm0 4287 ; CHECK-NEXT: vaddps %zmm3, %zmm0, %zmm0 4288 ; CHECK-NEXT: retq 4289 %res = call <16 x float> @llvm.x86.avx512.maskz.fixupimm.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x i32> %x2, i32 5, i16 %x4, i32 4) 4290 %res1 = call <16 x float> @llvm.x86.avx512.maskz.fixupimm.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x i32> zeroinitializer, i32 5, i16 %x4, i32 8) 4291 %res2 = call <16 x float> @llvm.x86.avx512.maskz.fixupimm.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x i32> %x2, i32 5, i16 -1, i32 4) 4292 %res3 = fadd <16 x float> %res, %res1 4293 %res4 = fadd <16 x float> %res3, %res2 4294 ret <16 x float> %res4 4295 } 4296 4297 declare <2 x double> @llvm.x86.avx512.mask.fixupimm.sd(<2 x double>, <2 x double>, <2 x i64>, i32, i8, i32) 4298 4299 define <2 x double>@test_int_x86_avx512_mask_fixupimm_sd(<2 x double> %x0, <2 x double> %x1, <2 x i64> %x2, i8 %x4) { 4300 ; CHECK-LABEL: test_int_x86_avx512_mask_fixupimm_sd: 4301 ; CHECK: ## %bb.0: 4302 ; CHECK-NEXT: vmovapd %xmm0, %xmm3 4303 ; CHECK-NEXT: vfixupimmsd $5, %xmm2, %xmm1, %xmm3 4304 ; CHECK-NEXT: kmovw %edi, %k1 4305 ; CHECK-NEXT: vmovapd %xmm0, %xmm4 4306 ; CHECK-NEXT: vfixupimmsd $5, %xmm2, %xmm1, %xmm4 {%k1} 4307 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 4308 ; CHECK-NEXT: vfixupimmsd $5, {sae}, %xmm2, %xmm1, %xmm0 {%k1} 4309 ; CHECK-NEXT: vaddpd %xmm0, %xmm4, %xmm0 4310 ; CHECK-NEXT: vaddpd %xmm3, %xmm0, %xmm0 4311 ; CHECK-NEXT: retq 4312 %res = call <2 x double> @llvm.x86.avx512.mask.fixupimm.sd(<2 x double> %x0, <2 x double> %x1, <2 x i64> %x2, i32 5, i8 %x4, i32 4) 4313 %res1 = call <2 x double> @llvm.x86.avx512.mask.fixupimm.sd(<2 x double> %x0, <2 x double> %x1, <2 x i64> zeroinitializer, i32 5, i8 %x4, i32 8) 4314 %res2 = call <2 x double> @llvm.x86.avx512.mask.fixupimm.sd(<2 x double> %x0, <2 x double> %x1, <2 x i64> %x2, i32 5, i8 -1, i32 4) 4315 %res3 = fadd <2 x double> %res, %res1 4316 %res4 = fadd <2 x double> %res3, %res2 4317 ret <2 x double> %res4 4318 } 4319 4320 declare <2 x double> @llvm.x86.avx512.maskz.fixupimm.sd(<2 x double>, <2 x double>, <2 x i64>, i32, i8, i32) 4321 4322 define <2 x double>@test_int_x86_avx512_maskz_fixupimm_sd(<2 x double> %x0, <2 x double> %x1, <2 x i64> %x2, i8 %x4) { 4323 ; CHECK-LABEL: test_int_x86_avx512_maskz_fixupimm_sd: 4324 ; CHECK: ## %bb.0: 4325 ; CHECK-NEXT: kmovw %edi, %k1 4326 ; CHECK-NEXT: vmovapd %xmm0, %xmm3 4327 ; CHECK-NEXT: vfixupimmsd $5, %xmm2, %xmm1, %xmm3 {%k1} {z} 4328 ; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4 4329 ; CHECK-NEXT: vmovapd %xmm0, %xmm5 4330 ; CHECK-NEXT: vfixupimmsd $5, {sae}, %xmm4, %xmm1, %xmm5 {%k1} {z} 4331 ; CHECK-NEXT: vaddpd %xmm5, %xmm3, %xmm3 4332 ; CHECK-NEXT: vfixupimmsd $5, {sae}, %xmm2, %xmm1, %xmm0 {%k1} {z} 4333 ; CHECK-NEXT: vaddpd %xmm0, %xmm3, %xmm0 4334 ; CHECK-NEXT: retq 4335 %res = call <2 x double> @llvm.x86.avx512.maskz.fixupimm.sd(<2 x double> %x0, <2 x double> %x1, <2 x i64> %x2, i32 5, i8 %x4, i32 4) 4336 %res1 = call <2 x double> @llvm.x86.avx512.maskz.fixupimm.sd(<2 x double> %x0, <2 x double> %x1, <2 x i64> zeroinitializer, i32 5, i8 %x4, i32 8) 4337 %res2 = call <2 x double> @llvm.x86.avx512.maskz.fixupimm.sd(<2 x double> %x0, <2 x double> %x1, <2 x i64> %x2, i32 5, i8 %x4, i32 8) 4338 %res3 = fadd <2 x double> %res, %res1 4339 %res4 = fadd <2 x double> %res3, %res2 4340 ret <2 x double> %res4 4341 } 4342 4343 declare double @llvm.fma.f64(double, double, double) #1 4344 declare double @llvm.x86.avx512.vfmadd.f64(double, double, double, i32) #0 4345 4346 define <2 x double>@test_int_x86_avx512_mask_vfmadd_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3,i32 %x4 ){ 4347 ; CHECK-LABEL: test_int_x86_avx512_mask_vfmadd_sd: 4348 ; CHECK: ## %bb.0: 4349 ; CHECK-NEXT: vmovapd %xmm0, %xmm3 4350 ; CHECK-NEXT: vfmadd213sd {{.*#+}} xmm3 = (xmm1 * xmm3) + xmm2 4351 ; CHECK-NEXT: kmovw %edi, %k1 4352 ; CHECK-NEXT: vmovapd %xmm0, %xmm4 4353 ; CHECK-NEXT: vfmadd213sd {{.*#+}} xmm4 = (xmm1 * xmm4) + xmm2 4354 ; CHECK-NEXT: vaddpd %xmm4, %xmm3, %xmm3 4355 ; CHECK-NEXT: vmovapd %xmm0, %xmm4 4356 ; CHECK-NEXT: vfmadd213sd {rz-sae}, %xmm2, %xmm1, %xmm4 4357 ; CHECK-NEXT: vfmadd213sd {rz-sae}, %xmm2, %xmm1, %xmm0 {%k1} 4358 ; CHECK-NEXT: vaddpd %xmm0, %xmm4, %xmm0 4359 ; CHECK-NEXT: vaddpd %xmm0, %xmm3, %xmm0 4360 ; CHECK-NEXT: retq 4361 %1 = extractelement <2 x double> %x0, i64 0 4362 %2 = extractelement <2 x double> %x1, i64 0 4363 %3 = extractelement <2 x double> %x2, i64 0 4364 %4 = call double @llvm.fma.f64(double %1, double %2, double %3) 4365 %5 = insertelement <2 x double> %x0, double %4, i64 0 4366 %6 = extractelement <2 x double> %x0, i64 0 4367 %7 = extractelement <2 x double> %x1, i64 0 4368 %8 = extractelement <2 x double> %x2, i64 0 4369 %9 = call double @llvm.fma.f64(double %6, double %7, double %8) 4370 %10 = bitcast i8 %x3 to <8 x i1> 4371 %11 = extractelement <8 x i1> %10, i64 0 4372 %12 = select i1 %11, double %9, double %6 4373 %13 = insertelement <2 x double> %x0, double %12, i64 0 4374 %14 = extractelement <2 x double> %x0, i64 0 4375 %15 = extractelement <2 x double> %x1, i64 0 4376 %16 = extractelement <2 x double> %x2, i64 0 4377 %17 = call double @llvm.x86.avx512.vfmadd.f64(double %14, double %15, double %16, i32 3) 4378 %18 = insertelement <2 x double> %x0, double %17, i64 0 4379 %19 = extractelement <2 x double> %x0, i64 0 4380 %20 = extractelement <2 x double> %x1, i64 0 4381 %21 = extractelement <2 x double> %x2, i64 0 4382 %22 = call double @llvm.x86.avx512.vfmadd.f64(double %19, double %20, double %21, i32 3) 4383 %23 = bitcast i8 %x3 to <8 x i1> 4384 %24 = extractelement <8 x i1> %23, i64 0 4385 %25 = select i1 %24, double %22, double %19 4386 %26 = insertelement <2 x double> %x0, double %25, i64 0 4387 %res4 = fadd <2 x double> %5, %13 4388 %res5 = fadd <2 x double> %18, %26 4389 %res6 = fadd <2 x double> %res4, %res5 4390 ret <2 x double> %res6 4391 } 4392 4393 define <4 x float>@test_int_x86_avx512_mask_vfmadd_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3,i32 %x4 ){ 4394 ; CHECK-LABEL: test_int_x86_avx512_mask_vfmadd_ss: 4395 ; CHECK: ## %bb.0: 4396 ; CHECK-NEXT: vmovaps %xmm0, %xmm3 4397 ; CHECK-NEXT: vfmadd213ss {{.*#+}} xmm3 = (xmm1 * xmm3) + xmm2 4398 ; CHECK-NEXT: kmovw %edi, %k1 4399 ; CHECK-NEXT: vmovaps %xmm0, %xmm4 4400 ; CHECK-NEXT: vfmadd213ss {{.*#+}} xmm4 = (xmm1 * xmm4) + xmm2 4401 ; CHECK-NEXT: vaddps %xmm4, %xmm3, %xmm3 4402 ; CHECK-NEXT: vmovaps %xmm0, %xmm4 4403 ; CHECK-NEXT: vfmadd213ss {rz-sae}, %xmm2, %xmm1, %xmm4 4404 ; CHECK-NEXT: vfmadd213ss {rz-sae}, %xmm2, %xmm1, %xmm0 {%k1} 4405 ; CHECK-NEXT: vaddps %xmm0, %xmm4, %xmm0 4406 ; CHECK-NEXT: vaddps %xmm0, %xmm3, %xmm0 4407 ; CHECK-NEXT: retq 4408 %1 = extractelement <4 x float> %x0, i64 0 4409 %2 = extractelement <4 x float> %x1, i64 0 4410 %3 = extractelement <4 x float> %x2, i64 0 4411 %4 = call float @llvm.fma.f32(float %1, float %2, float %3) 4412 %5 = insertelement <4 x float> %x0, float %4, i64 0 4413 %6 = extractelement <4 x float> %x0, i64 0 4414 %7 = extractelement <4 x float> %x1, i64 0 4415 %8 = extractelement <4 x float> %x2, i64 0 4416 %9 = call float @llvm.fma.f32(float %6, float %7, float %8) 4417 %10 = bitcast i8 %x3 to <8 x i1> 4418 %11 = extractelement <8 x i1> %10, i64 0 4419 %12 = select i1 %11, float %9, float %6 4420 %13 = insertelement <4 x float> %x0, float %12, i64 0 4421 %14 = extractelement <4 x float> %x0, i64 0 4422 %15 = extractelement <4 x float> %x1, i64 0 4423 %16 = extractelement <4 x float> %x2, i64 0 4424 %17 = call float @llvm.x86.avx512.vfmadd.f32(float %14, float %15, float %16, i32 3) 4425 %18 = insertelement <4 x float> %x0, float %17, i64 0 4426 %19 = extractelement <4 x float> %x0, i64 0 4427 %20 = extractelement <4 x float> %x1, i64 0 4428 %21 = extractelement <4 x float> %x2, i64 0 4429 %22 = call float @llvm.x86.avx512.vfmadd.f32(float %19, float %20, float %21, i32 3) 4430 %23 = bitcast i8 %x3 to <8 x i1> 4431 %24 = extractelement <8 x i1> %23, i64 0 4432 %25 = select i1 %24, float %22, float %19 4433 %26 = insertelement <4 x float> %x0, float %25, i64 0 4434 %res4 = fadd <4 x float> %5, %13 4435 %res5 = fadd <4 x float> %18, %26 4436 %res6 = fadd <4 x float> %res4, %res5 4437 ret <4 x float> %res6 4438 } 4439 4440 define <2 x double>@test_int_x86_avx512_maskz_vfmadd_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3,i32 %x4 ){ 4441 ; CHECK-LABEL: test_int_x86_avx512_maskz_vfmadd_sd: 4442 ; CHECK: ## %bb.0: 4443 ; CHECK-NEXT: kmovw %edi, %k1 4444 ; CHECK-NEXT: vmovapd %xmm0, %xmm3 4445 ; CHECK-NEXT: vfmadd213sd {{.*#+}} xmm3 = (xmm1 * xmm3) + xmm2 4446 ; CHECK-NEXT: vfmadd213sd {rz-sae}, %xmm2, %xmm1, %xmm0 {%k1} {z} 4447 ; CHECK-NEXT: vaddpd %xmm0, %xmm3, %xmm0 4448 ; CHECK-NEXT: retq 4449 %1 = extractelement <2 x double> %x0, i64 0 4450 %2 = extractelement <2 x double> %x1, i64 0 4451 %3 = extractelement <2 x double> %x2, i64 0 4452 %4 = call double @llvm.fma.f64(double %1, double %2, double %3) 4453 %5 = bitcast i8 %x3 to <8 x i1> 4454 %6 = extractelement <8 x i1> %5, i64 0 4455 %7 = select i1 %6, double %4, double 0.000000e+00 4456 %8 = insertelement <2 x double> %x0, double %7, i64 0 4457 %9 = extractelement <2 x double> %x0, i64 0 4458 %10 = extractelement <2 x double> %x1, i64 0 4459 %11 = extractelement <2 x double> %x2, i64 0 4460 %12 = call double @llvm.x86.avx512.vfmadd.f64(double %9, double %10, double %11, i32 3) 4461 %13 = bitcast i8 %x3 to <8 x i1> 4462 %14 = extractelement <8 x i1> %13, i64 0 4463 %15 = select i1 %14, double %12, double 0.000000e+00 4464 %16 = insertelement <2 x double> %x0, double %15, i64 0 4465 %res2 = fadd <2 x double> %8, %16 4466 ret <2 x double> %res2 4467 } 4468 4469 declare float @llvm.fma.f32(float, float, float) #1 4470 declare float @llvm.x86.avx512.vfmadd.f32(float, float, float, i32) #0 4471 4472 define <4 x float>@test_int_x86_avx512_maskz_vfmadd_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3,i32 %x4 ){ 4473 ; CHECK-LABEL: test_int_x86_avx512_maskz_vfmadd_ss: 4474 ; CHECK: ## %bb.0: 4475 ; CHECK-NEXT: kmovw %edi, %k1 4476 ; CHECK-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 4477 ; CHECK-NEXT: retq 4478 %1 = extractelement <4 x float> %x0, i64 0 4479 %2 = extractelement <4 x float> %x1, i64 0 4480 %3 = extractelement <4 x float> %x2, i64 0 4481 %4 = call float @llvm.fma.f32(float %1, float %2, float %3) 4482 %5 = bitcast i8 %x3 to <8 x i1> 4483 %6 = extractelement <8 x i1> %5, i64 0 4484 %7 = select i1 %6, float %4, float 0.000000e+00 4485 %8 = insertelement <4 x float> %x0, float %7, i64 0 4486 %9 = extractelement <4 x float> %x0, i64 0 4487 %10 = extractelement <4 x float> %x1, i64 0 4488 %11 = extractelement <4 x float> %x2, i64 0 4489 %12 = call float @llvm.x86.avx512.vfmadd.f32(float %9, float %10, float %11, i32 3) 4490 %13 = bitcast i8 %x3 to <8 x i1> 4491 %14 = extractelement <8 x i1> %13, i64 0 4492 %15 = select i1 %14, float %12, float 0.000000e+00 4493 %16 = insertelement <4 x float> %x0, float %15, i64 0 4494 %res2 = fadd <4 x float> %8, %16 4495 ret <4 x float> %8 4496 } 4497 4498 define <2 x double>@test_int_x86_avx512_mask3_vfmadd_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3,i32 %x4 ){ 4499 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmadd_sd: 4500 ; CHECK: ## %bb.0: 4501 ; CHECK-NEXT: vmovapd %xmm2, %xmm3 4502 ; CHECK-NEXT: vfmadd231sd {{.*#+}} xmm3 = (xmm0 * xmm1) + xmm3 4503 ; CHECK-NEXT: kmovw %edi, %k1 4504 ; CHECK-NEXT: vmovapd %xmm2, %xmm4 4505 ; CHECK-NEXT: vfmadd231sd {{.*#+}} xmm4 = (xmm0 * xmm1) + xmm4 4506 ; CHECK-NEXT: vaddpd %xmm4, %xmm3, %xmm3 4507 ; CHECK-NEXT: vmovapd %xmm2, %xmm4 4508 ; CHECK-NEXT: vfmadd231sd {rz-sae}, %xmm1, %xmm0, %xmm4 4509 ; CHECK-NEXT: vfmadd231sd {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1} 4510 ; CHECK-NEXT: vaddpd %xmm2, %xmm4, %xmm0 4511 ; CHECK-NEXT: vaddpd %xmm0, %xmm3, %xmm0 4512 ; CHECK-NEXT: retq 4513 %1 = extractelement <2 x double> %x0, i64 0 4514 %2 = extractelement <2 x double> %x1, i64 0 4515 %3 = extractelement <2 x double> %x2, i64 0 4516 %4 = call double @llvm.fma.f64(double %1, double %2, double %3) 4517 %5 = insertelement <2 x double> %x2, double %4, i64 0 4518 %6 = extractelement <2 x double> %x0, i64 0 4519 %7 = extractelement <2 x double> %x1, i64 0 4520 %8 = extractelement <2 x double> %x2, i64 0 4521 %9 = call double @llvm.fma.f64(double %6, double %7, double %8) 4522 %10 = bitcast i8 %x3 to <8 x i1> 4523 %11 = extractelement <8 x i1> %10, i64 0 4524 %12 = select i1 %11, double %9, double %8 4525 %13 = insertelement <2 x double> %x2, double %12, i64 0 4526 %14 = extractelement <2 x double> %x0, i64 0 4527 %15 = extractelement <2 x double> %x1, i64 0 4528 %16 = extractelement <2 x double> %x2, i64 0 4529 %17 = call double @llvm.x86.avx512.vfmadd.f64(double %14, double %15, double %16, i32 3) 4530 %18 = insertelement <2 x double> %x2, double %17, i64 0 4531 %19 = extractelement <2 x double> %x0, i64 0 4532 %20 = extractelement <2 x double> %x1, i64 0 4533 %21 = extractelement <2 x double> %x2, i64 0 4534 %22 = call double @llvm.x86.avx512.vfmadd.f64(double %19, double %20, double %21, i32 3) 4535 %23 = bitcast i8 %x3 to <8 x i1> 4536 %24 = extractelement <8 x i1> %23, i64 0 4537 %25 = select i1 %24, double %22, double %21 4538 %26 = insertelement <2 x double> %x2, double %25, i64 0 4539 %res4 = fadd <2 x double> %5, %13 4540 %res5 = fadd <2 x double> %18, %26 4541 %res6 = fadd <2 x double> %res4, %res5 4542 ret <2 x double> %res6 4543 } 4544 4545 define <4 x float>@test_int_x86_avx512_mask3_vfmadd_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3,i32 %x4 ){ 4546 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmadd_ss: 4547 ; CHECK: ## %bb.0: 4548 ; CHECK-NEXT: vmovaps %xmm2, %xmm3 4549 ; CHECK-NEXT: vfmadd231ss {{.*#+}} xmm3 = (xmm0 * xmm1) + xmm3 4550 ; CHECK-NEXT: kmovw %edi, %k1 4551 ; CHECK-NEXT: vmovaps %xmm2, %xmm4 4552 ; CHECK-NEXT: vfmadd231ss {{.*#+}} xmm4 = (xmm0 * xmm1) + xmm4 4553 ; CHECK-NEXT: vaddps %xmm4, %xmm3, %xmm3 4554 ; CHECK-NEXT: vmovaps %xmm2, %xmm4 4555 ; CHECK-NEXT: vfmadd231ss {rz-sae}, %xmm1, %xmm0, %xmm4 4556 ; CHECK-NEXT: vfmadd231ss {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1} 4557 ; CHECK-NEXT: vaddps %xmm2, %xmm4, %xmm0 4558 ; CHECK-NEXT: vaddps %xmm0, %xmm3, %xmm0 4559 ; CHECK-NEXT: retq 4560 %1 = extractelement <4 x float> %x0, i64 0 4561 %2 = extractelement <4 x float> %x1, i64 0 4562 %3 = extractelement <4 x float> %x2, i64 0 4563 %4 = call float @llvm.fma.f32(float %1, float %2, float %3) 4564 %5 = insertelement <4 x float> %x2, float %4, i64 0 4565 %6 = extractelement <4 x float> %x0, i64 0 4566 %7 = extractelement <4 x float> %x1, i64 0 4567 %8 = extractelement <4 x float> %x2, i64 0 4568 %9 = call float @llvm.fma.f32(float %6, float %7, float %8) 4569 %10 = bitcast i8 %x3 to <8 x i1> 4570 %11 = extractelement <8 x i1> %10, i64 0 4571 %12 = select i1 %11, float %9, float %8 4572 %13 = insertelement <4 x float> %x2, float %12, i64 0 4573 %14 = extractelement <4 x float> %x0, i64 0 4574 %15 = extractelement <4 x float> %x1, i64 0 4575 %16 = extractelement <4 x float> %x2, i64 0 4576 %17 = call float @llvm.x86.avx512.vfmadd.f32(float %14, float %15, float %16, i32 3) 4577 %18 = insertelement <4 x float> %x2, float %17, i64 0 4578 %19 = extractelement <4 x float> %x0, i64 0 4579 %20 = extractelement <4 x float> %x1, i64 0 4580 %21 = extractelement <4 x float> %x2, i64 0 4581 %22 = call float @llvm.x86.avx512.vfmadd.f32(float %19, float %20, float %21, i32 3) 4582 %23 = bitcast i8 %x3 to <8 x i1> 4583 %24 = extractelement <8 x i1> %23, i64 0 4584 %25 = select i1 %24, float %22, float %21 4585 %26 = insertelement <4 x float> %x2, float %25, i64 0 4586 %res4 = fadd <4 x float> %5, %13 4587 %res5 = fadd <4 x float> %18, %26 4588 %res6 = fadd <4 x float> %res4, %res5 4589 ret <4 x float> %res6 4590 } 4591 4592 define void @fmadd_ss_mask_memfold(float* %a, float* %b, i8 %c) { 4593 ; CHECK-LABEL: fmadd_ss_mask_memfold: 4594 ; CHECK: ## %bb.0: 4595 ; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 4596 ; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero 4597 ; CHECK-NEXT: vfmadd213ss {{.*#+}} xmm1 = (xmm0 * xmm1) + xmm0 4598 ; CHECK-NEXT: kmovw %edx, %k1 4599 ; CHECK-NEXT: vmovss %xmm1, %xmm0, %xmm0 {%k1} 4600 ; CHECK-NEXT: vmovss %xmm0, (%rdi) 4601 ; CHECK-NEXT: retq 4602 %a.val = load float, float* %a 4603 %av0 = insertelement <4 x float> undef, float %a.val, i32 0 4604 %av1 = insertelement <4 x float> %av0, float 0.000000e+00, i32 1 4605 %av2 = insertelement <4 x float> %av1, float 0.000000e+00, i32 2 4606 %av = insertelement <4 x float> %av2, float 0.000000e+00, i32 3 4607 4608 %b.val = load float, float* %b 4609 %bv0 = insertelement <4 x float> undef, float %b.val, i32 0 4610 %bv1 = insertelement <4 x float> %bv0, float 0.000000e+00, i32 1 4611 %bv2 = insertelement <4 x float> %bv1, float 0.000000e+00, i32 2 4612 %bv = insertelement <4 x float> %bv2, float 0.000000e+00, i32 3 4613 %1 = extractelement <4 x float> %av, i64 0 4614 %2 = extractelement <4 x float> %bv, i64 0 4615 %3 = extractelement <4 x float> %av, i64 0 4616 %4 = call float @llvm.fma.f32(float %1, float %2, float %3) 4617 %5 = bitcast i8 %c to <8 x i1> 4618 %6 = extractelement <8 x i1> %5, i64 0 4619 %7 = select i1 %6, float %4, float %1 4620 %8 = insertelement <4 x float> %av, float %7, i64 0 4621 %sr = extractelement <4 x float> %8, i32 0 4622 store float %sr, float* %a 4623 ret void 4624 } 4625 4626 define void @fmadd_ss_maskz_memfold(float* %a, float* %b, i8 %c) { 4627 ; CHECK-LABEL: fmadd_ss_maskz_memfold: 4628 ; CHECK: ## %bb.0: 4629 ; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 4630 ; CHECK-NEXT: vfmadd231ss {{.*#+}} xmm0 = (xmm0 * mem) + xmm0 4631 ; CHECK-NEXT: kmovw %edx, %k1 4632 ; CHECK-NEXT: vmovss %xmm0, %xmm0, %xmm0 {%k1} {z} 4633 ; CHECK-NEXT: vmovss %xmm0, (%rdi) 4634 ; CHECK-NEXT: retq 4635 %a.val = load float, float* %a 4636 %av0 = insertelement <4 x float> undef, float %a.val, i32 0 4637 %av1 = insertelement <4 x float> %av0, float 0.000000e+00, i32 1 4638 %av2 = insertelement <4 x float> %av1, float 0.000000e+00, i32 2 4639 %av = insertelement <4 x float> %av2, float 0.000000e+00, i32 3 4640 4641 %b.val = load float, float* %b 4642 %bv0 = insertelement <4 x float> undef, float %b.val, i32 0 4643 %bv1 = insertelement <4 x float> %bv0, float 0.000000e+00, i32 1 4644 %bv2 = insertelement <4 x float> %bv1, float 0.000000e+00, i32 2 4645 %bv = insertelement <4 x float> %bv2, float 0.000000e+00, i32 3 4646 %1 = extractelement <4 x float> %av, i64 0 4647 %2 = extractelement <4 x float> %bv, i64 0 4648 %3 = extractelement <4 x float> %av, i64 0 4649 %4 = call float @llvm.fma.f32(float %1, float %2, float %3) 4650 %5 = bitcast i8 %c to <8 x i1> 4651 %6 = extractelement <8 x i1> %5, i64 0 4652 %7 = select i1 %6, float %4, float 0.000000e+00 4653 %8 = insertelement <4 x float> %av, float %7, i64 0 4654 %sr = extractelement <4 x float> %8, i32 0 4655 store float %sr, float* %a 4656 ret void 4657 } 4658 4659 define void @fmadd_sd_mask_memfold(double* %a, double* %b, i8 %c) { 4660 ; CHECK-LABEL: fmadd_sd_mask_memfold: 4661 ; CHECK: ## %bb.0: 4662 ; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 4663 ; CHECK-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 4664 ; CHECK-NEXT: vfmadd213sd {{.*#+}} xmm1 = (xmm0 * xmm1) + xmm0 4665 ; CHECK-NEXT: kmovw %edx, %k1 4666 ; CHECK-NEXT: vmovsd %xmm1, %xmm0, %xmm0 {%k1} 4667 ; CHECK-NEXT: vmovsd %xmm0, (%rdi) 4668 ; CHECK-NEXT: retq 4669 %a.val = load double, double* %a 4670 %av0 = insertelement <2 x double> undef, double %a.val, i32 0 4671 %av = insertelement <2 x double> %av0, double 0.000000e+00, i32 1 4672 4673 %b.val = load double, double* %b 4674 %bv0 = insertelement <2 x double> undef, double %b.val, i32 0 4675 %bv = insertelement <2 x double> %bv0, double 0.000000e+00, i32 1 4676 %1 = extractelement <2 x double> %av, i64 0 4677 %2 = extractelement <2 x double> %bv, i64 0 4678 %3 = extractelement <2 x double> %av, i64 0 4679 %4 = call double @llvm.fma.f64(double %1, double %2, double %3) 4680 %5 = bitcast i8 %c to <8 x i1> 4681 %6 = extractelement <8 x i1> %5, i64 0 4682 %7 = select i1 %6, double %4, double %1 4683 %8 = insertelement <2 x double> %av, double %7, i64 0 4684 %sr = extractelement <2 x double> %8, i32 0 4685 store double %sr, double* %a 4686 ret void 4687 } 4688 4689 define void @fmadd_sd_maskz_memfold(double* %a, double* %b, i8 %c) { 4690 ; CHECK-LABEL: fmadd_sd_maskz_memfold: 4691 ; CHECK: ## %bb.0: 4692 ; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 4693 ; CHECK-NEXT: vfmadd231sd {{.*#+}} xmm0 = (xmm0 * mem) + xmm0 4694 ; CHECK-NEXT: kmovw %edx, %k1 4695 ; CHECK-NEXT: vmovsd %xmm0, %xmm0, %xmm0 {%k1} {z} 4696 ; CHECK-NEXT: vmovsd %xmm0, (%rdi) 4697 ; CHECK-NEXT: retq 4698 %a.val = load double, double* %a 4699 %av0 = insertelement <2 x double> undef, double %a.val, i32 0 4700 %av = insertelement <2 x double> %av0, double 0.000000e+00, i32 1 4701 4702 %b.val = load double, double* %b 4703 %bv0 = insertelement <2 x double> undef, double %b.val, i32 0 4704 %bv = insertelement <2 x double> %bv0, double 0.000000e+00, i32 1 4705 %1 = extractelement <2 x double> %av, i64 0 4706 %2 = extractelement <2 x double> %bv, i64 0 4707 %3 = extractelement <2 x double> %av, i64 0 4708 %4 = call double @llvm.fma.f64(double %1, double %2, double %3) 4709 %5 = bitcast i8 %c to <8 x i1> 4710 %6 = extractelement <8 x i1> %5, i64 0 4711 %7 = select i1 %6, double %4, double 0.000000e+00 4712 %8 = insertelement <2 x double> %av, double %7, i64 0 4713 %sr = extractelement <2 x double> %8, i32 0 4714 store double %sr, double* %a 4715 ret void 4716 } 4717 4718 define <2 x double>@test_int_x86_avx512_mask3_vfmsub_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3,i32 %x4 ){ 4719 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsub_sd: 4720 ; CHECK: ## %bb.0: 4721 ; CHECK-NEXT: vmovapd %xmm2, %xmm3 4722 ; CHECK-NEXT: vfmsub231sd {{.*#+}} xmm3 = (xmm0 * xmm1) - xmm3 4723 ; CHECK-NEXT: kmovw %edi, %k1 4724 ; CHECK-NEXT: vmovapd %xmm2, %xmm4 4725 ; CHECK-NEXT: vfmsub231sd {{.*#+}} xmm4 = (xmm0 * xmm1) - xmm4 4726 ; CHECK-NEXT: vaddpd %xmm4, %xmm3, %xmm3 4727 ; CHECK-NEXT: vmovapd %xmm2, %xmm4 4728 ; CHECK-NEXT: vfmsub231sd {rz-sae}, %xmm1, %xmm0, %xmm4 4729 ; CHECK-NEXT: vfmsub231sd {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1} 4730 ; CHECK-NEXT: vaddpd %xmm2, %xmm4, %xmm0 4731 ; CHECK-NEXT: vaddpd %xmm0, %xmm3, %xmm0 4732 ; CHECK-NEXT: retq 4733 %1 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %x2 4734 %2 = extractelement <2 x double> %x0, i64 0 4735 %3 = extractelement <2 x double> %x1, i64 0 4736 %4 = extractelement <2 x double> %1, i64 0 4737 %5 = call double @llvm.fma.f64(double %2, double %3, double %4) 4738 %6 = extractelement <2 x double> %x2, i64 0 4739 %7 = insertelement <2 x double> %x2, double %5, i64 0 4740 %8 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %x2 4741 %9 = extractelement <2 x double> %x0, i64 0 4742 %10 = extractelement <2 x double> %x1, i64 0 4743 %11 = extractelement <2 x double> %8, i64 0 4744 %12 = call double @llvm.fma.f64(double %9, double %10, double %11) 4745 %13 = extractelement <2 x double> %x2, i64 0 4746 %14 = bitcast i8 %x3 to <8 x i1> 4747 %15 = extractelement <8 x i1> %14, i64 0 4748 %16 = select i1 %15, double %12, double %13 4749 %17 = insertelement <2 x double> %x2, double %16, i64 0 4750 %18 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %x2 4751 %19 = extractelement <2 x double> %x0, i64 0 4752 %20 = extractelement <2 x double> %x1, i64 0 4753 %21 = extractelement <2 x double> %18, i64 0 4754 %22 = call double @llvm.x86.avx512.vfmadd.f64(double %19, double %20, double %21, i32 3) 4755 %23 = extractelement <2 x double> %x2, i64 0 4756 %24 = insertelement <2 x double> %x2, double %22, i64 0 4757 %25 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %x2 4758 %26 = extractelement <2 x double> %x0, i64 0 4759 %27 = extractelement <2 x double> %x1, i64 0 4760 %28 = extractelement <2 x double> %25, i64 0 4761 %29 = call double @llvm.x86.avx512.vfmadd.f64(double %26, double %27, double %28, i32 3) 4762 %30 = extractelement <2 x double> %x2, i64 0 4763 %31 = bitcast i8 %x3 to <8 x i1> 4764 %32 = extractelement <8 x i1> %31, i64 0 4765 %33 = select i1 %32, double %29, double %30 4766 %34 = insertelement <2 x double> %x2, double %33, i64 0 4767 %res4 = fadd <2 x double> %7, %17 4768 %res5 = fadd <2 x double> %24, %34 4769 %res6 = fadd <2 x double> %res4, %res5 4770 ret <2 x double> %res6 4771 } 4772 4773 define <4 x float>@test_int_x86_avx512_mask3_vfmsub_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3,i32 %x4 ){ 4774 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsub_ss: 4775 ; CHECK: ## %bb.0: 4776 ; CHECK-NEXT: vmovaps %xmm2, %xmm3 4777 ; CHECK-NEXT: vfmsub231ss {{.*#+}} xmm3 = (xmm0 * xmm1) - xmm3 4778 ; CHECK-NEXT: kmovw %edi, %k1 4779 ; CHECK-NEXT: vmovaps %xmm2, %xmm4 4780 ; CHECK-NEXT: vfmsub231ss {{.*#+}} xmm4 = (xmm0 * xmm1) - xmm4 4781 ; CHECK-NEXT: vaddps %xmm4, %xmm3, %xmm3 4782 ; CHECK-NEXT: vmovaps %xmm2, %xmm4 4783 ; CHECK-NEXT: vfmsub231ss {rz-sae}, %xmm1, %xmm0, %xmm4 4784 ; CHECK-NEXT: vfmsub231ss {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1} 4785 ; CHECK-NEXT: vaddps %xmm2, %xmm4, %xmm0 4786 ; CHECK-NEXT: vaddps %xmm0, %xmm3, %xmm0 4787 ; CHECK-NEXT: retq 4788 %1 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x2 4789 %2 = extractelement <4 x float> %x0, i64 0 4790 %3 = extractelement <4 x float> %x1, i64 0 4791 %4 = extractelement <4 x float> %1, i64 0 4792 %5 = call float @llvm.fma.f32(float %2, float %3, float %4) 4793 %6 = extractelement <4 x float> %x2, i64 0 4794 %7 = insertelement <4 x float> %x2, float %5, i64 0 4795 %8 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x2 4796 %9 = extractelement <4 x float> %x0, i64 0 4797 %10 = extractelement <4 x float> %x1, i64 0 4798 %11 = extractelement <4 x float> %8, i64 0 4799 %12 = call float @llvm.fma.f32(float %9, float %10, float %11) 4800 %13 = extractelement <4 x float> %x2, i64 0 4801 %14 = bitcast i8 %x3 to <8 x i1> 4802 %15 = extractelement <8 x i1> %14, i64 0 4803 %16 = select i1 %15, float %12, float %13 4804 %17 = insertelement <4 x float> %x2, float %16, i64 0 4805 %18 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x2 4806 %19 = extractelement <4 x float> %x0, i64 0 4807 %20 = extractelement <4 x float> %x1, i64 0 4808 %21 = extractelement <4 x float> %18, i64 0 4809 %22 = call float @llvm.x86.avx512.vfmadd.f32(float %19, float %20, float %21, i32 3) 4810 %23 = extractelement <4 x float> %x2, i64 0 4811 %24 = insertelement <4 x float> %x2, float %22, i64 0 4812 %25 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x2 4813 %26 = extractelement <4 x float> %x0, i64 0 4814 %27 = extractelement <4 x float> %x1, i64 0 4815 %28 = extractelement <4 x float> %25, i64 0 4816 %29 = call float @llvm.x86.avx512.vfmadd.f32(float %26, float %27, float %28, i32 3) 4817 %30 = extractelement <4 x float> %x2, i64 0 4818 %31 = bitcast i8 %x3 to <8 x i1> 4819 %32 = extractelement <8 x i1> %31, i64 0 4820 %33 = select i1 %32, float %29, float %30 4821 %34 = insertelement <4 x float> %x2, float %33, i64 0 4822 %res4 = fadd <4 x float> %7, %17 4823 %res5 = fadd <4 x float> %24, %34 4824 %res6 = fadd <4 x float> %res4, %res5 4825 ret <4 x float> %res6 4826 } 4827 4828 define <2 x double>@test_int_x86_avx512_mask3_vfnmsub_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3,i32 %x4 ){ 4829 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfnmsub_sd: 4830 ; CHECK: ## %bb.0: 4831 ; CHECK-NEXT: vmovapd %xmm2, %xmm3 4832 ; CHECK-NEXT: vfnmsub231sd {{.*#+}} xmm3 = -(xmm0 * xmm1) - xmm3 4833 ; CHECK-NEXT: kmovw %edi, %k1 4834 ; CHECK-NEXT: vmovapd %xmm2, %xmm4 4835 ; CHECK-NEXT: vfnmsub231sd {{.*#+}} xmm4 = -(xmm0 * xmm1) - xmm4 4836 ; CHECK-NEXT: vaddpd %xmm4, %xmm3, %xmm3 4837 ; CHECK-NEXT: vmovapd %xmm2, %xmm4 4838 ; CHECK-NEXT: vfnmsub231sd {rz-sae}, %xmm1, %xmm0, %xmm4 4839 ; CHECK-NEXT: vfnmsub231sd {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1} 4840 ; CHECK-NEXT: vaddpd %xmm2, %xmm4, %xmm0 4841 ; CHECK-NEXT: vaddpd %xmm0, %xmm3, %xmm0 4842 ; CHECK-NEXT: retq 4843 %1 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %x0 4844 %2 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %x2 4845 %3 = extractelement <2 x double> %1, i64 0 4846 %4 = extractelement <2 x double> %x1, i64 0 4847 %5 = extractelement <2 x double> %2, i64 0 4848 %6 = call double @llvm.fma.f64(double %3, double %4, double %5) 4849 %7 = extractelement <2 x double> %x2, i64 0 4850 %8 = insertelement <2 x double> %x2, double %6, i64 0 4851 %9 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %x0 4852 %10 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %x2 4853 %11 = extractelement <2 x double> %9, i64 0 4854 %12 = extractelement <2 x double> %x1, i64 0 4855 %13 = extractelement <2 x double> %10, i64 0 4856 %14 = call double @llvm.fma.f64(double %11, double %12, double %13) 4857 %15 = extractelement <2 x double> %x2, i64 0 4858 %16 = bitcast i8 %x3 to <8 x i1> 4859 %17 = extractelement <8 x i1> %16, i64 0 4860 %18 = select i1 %17, double %14, double %15 4861 %19 = insertelement <2 x double> %x2, double %18, i64 0 4862 %20 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %x0 4863 %21 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %x2 4864 %22 = extractelement <2 x double> %20, i64 0 4865 %23 = extractelement <2 x double> %x1, i64 0 4866 %24 = extractelement <2 x double> %21, i64 0 4867 %25 = call double @llvm.x86.avx512.vfmadd.f64(double %22, double %23, double %24, i32 3) 4868 %26 = extractelement <2 x double> %x2, i64 0 4869 %27 = insertelement <2 x double> %x2, double %25, i64 0 4870 %28 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %x0 4871 %29 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %x2 4872 %30 = extractelement <2 x double> %28, i64 0 4873 %31 = extractelement <2 x double> %x1, i64 0 4874 %32 = extractelement <2 x double> %29, i64 0 4875 %33 = call double @llvm.x86.avx512.vfmadd.f64(double %30, double %31, double %32, i32 3) 4876 %34 = extractelement <2 x double> %x2, i64 0 4877 %35 = bitcast i8 %x3 to <8 x i1> 4878 %36 = extractelement <8 x i1> %35, i64 0 4879 %37 = select i1 %36, double %33, double %34 4880 %38 = insertelement <2 x double> %x2, double %37, i64 0 4881 %res4 = fadd <2 x double> %8, %19 4882 %res5 = fadd <2 x double> %27, %38 4883 %res6 = fadd <2 x double> %res4, %res5 4884 ret <2 x double> %res6 4885 } 4886 4887 define <4 x float>@test_int_x86_avx512_mask3_vfnmsub_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3,i32 %x4 ){ 4888 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfnmsub_ss: 4889 ; CHECK: ## %bb.0: 4890 ; CHECK-NEXT: vmovaps %xmm2, %xmm3 4891 ; CHECK-NEXT: vfnmsub231ss {{.*#+}} xmm3 = -(xmm0 * xmm1) - xmm3 4892 ; CHECK-NEXT: kmovw %edi, %k1 4893 ; CHECK-NEXT: vmovaps %xmm2, %xmm4 4894 ; CHECK-NEXT: vfnmsub231ss {{.*#+}} xmm4 = -(xmm0 * xmm1) - xmm4 4895 ; CHECK-NEXT: vaddps %xmm4, %xmm3, %xmm3 4896 ; CHECK-NEXT: vmovaps %xmm2, %xmm4 4897 ; CHECK-NEXT: vfnmsub231ss {rz-sae}, %xmm1, %xmm0, %xmm4 4898 ; CHECK-NEXT: vfnmsub231ss {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1} 4899 ; CHECK-NEXT: vaddps %xmm2, %xmm4, %xmm0 4900 ; CHECK-NEXT: vaddps %xmm0, %xmm3, %xmm0 4901 ; CHECK-NEXT: retq 4902 %1 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x0 4903 %2 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x2 4904 %3 = extractelement <4 x float> %1, i64 0 4905 %4 = extractelement <4 x float> %x1, i64 0 4906 %5 = extractelement <4 x float> %2, i64 0 4907 %6 = call float @llvm.fma.f32(float %3, float %4, float %5) 4908 %7 = extractelement <4 x float> %x2, i64 0 4909 %8 = insertelement <4 x float> %x2, float %6, i64 0 4910 %9 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x0 4911 %10 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x2 4912 %11 = extractelement <4 x float> %9, i64 0 4913 %12 = extractelement <4 x float> %x1, i64 0 4914 %13 = extractelement <4 x float> %10, i64 0 4915 %14 = call float @llvm.fma.f32(float %11, float %12, float %13) 4916 %15 = extractelement <4 x float> %x2, i64 0 4917 %16 = bitcast i8 %x3 to <8 x i1> 4918 %17 = extractelement <8 x i1> %16, i64 0 4919 %18 = select i1 %17, float %14, float %15 4920 %19 = insertelement <4 x float> %x2, float %18, i64 0 4921 %20 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x0 4922 %21 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x2 4923 %22 = extractelement <4 x float> %20, i64 0 4924 %23 = extractelement <4 x float> %x1, i64 0 4925 %24 = extractelement <4 x float> %21, i64 0 4926 %25 = call float @llvm.x86.avx512.vfmadd.f32(float %22, float %23, float %24, i32 3) 4927 %26 = extractelement <4 x float> %x2, i64 0 4928 %27 = insertelement <4 x float> %x2, float %25, i64 0 4929 %28 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x0 4930 %29 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x2 4931 %30 = extractelement <4 x float> %28, i64 0 4932 %31 = extractelement <4 x float> %x1, i64 0 4933 %32 = extractelement <4 x float> %29, i64 0 4934 %33 = call float @llvm.x86.avx512.vfmadd.f32(float %30, float %31, float %32, i32 3) 4935 %34 = extractelement <4 x float> %x2, i64 0 4936 %35 = bitcast i8 %x3 to <8 x i1> 4937 %36 = extractelement <8 x i1> %35, i64 0 4938 %37 = select i1 %36, float %33, float %34 4939 %38 = insertelement <4 x float> %x2, float %37, i64 0 4940 %res4 = fadd <4 x float> %8, %19 4941 %res5 = fadd <4 x float> %27, %38 4942 %res6 = fadd <4 x float> %res4, %res5 4943 ret <4 x float> %res6 4944 } 4945 4946 define <4 x float>@test_int_x86_avx512_mask3_vfmadd_ss_rm(<4 x float> %x0, <4 x float> %x1, float *%ptr_b ,i8 %x3,i32 %x4) { 4947 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmadd_ss_rm: 4948 ; CHECK: ## %bb.0: 4949 ; CHECK-NEXT: kmovw %esi, %k1 4950 ; CHECK-NEXT: vfmadd231ss {{.*#+}} xmm1 = (xmm0 * mem) + xmm1 4951 ; CHECK-NEXT: vmovaps %xmm1, %xmm0 4952 ; CHECK-NEXT: retq 4953 %q = load float, float* %ptr_b 4954 %vecinit.i = insertelement <4 x float> undef, float %q, i32 0 4955 %1 = extractelement <4 x float> %x0, i64 0 4956 %2 = extractelement <4 x float> %vecinit.i, i64 0 4957 %3 = extractelement <4 x float> %x1, i64 0 4958 %4 = call float @llvm.fma.f32(float %1, float %2, float %3) 4959 %5 = bitcast i8 %x3 to <8 x i1> 4960 %6 = extractelement <8 x i1> %5, i64 0 4961 %7 = select i1 %6, float %4, float %3 4962 %8 = insertelement <4 x float> %x1, float %7, i64 0 4963 ret <4 x float> %8 4964 } 4965 4966 define <4 x float>@test_int_x86_avx512_mask_vfmadd_ss_rm(<4 x float> %x0, <4 x float> %x1,float *%ptr_b ,i8 %x3,i32 %x4) { 4967 ; CHECK-LABEL: test_int_x86_avx512_mask_vfmadd_ss_rm: 4968 ; CHECK: ## %bb.0: 4969 ; CHECK-NEXT: kmovw %esi, %k1 4970 ; CHECK-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 4971 ; CHECK-NEXT: retq 4972 %q = load float, float* %ptr_b 4973 %vecinit.i = insertelement <4 x float> undef, float %q, i32 0 4974 %1 = extractelement <4 x float> %x0, i64 0 4975 %2 = extractelement <4 x float> %vecinit.i, i64 0 4976 %3 = extractelement <4 x float> %x1, i64 0 4977 %4 = call float @llvm.fma.f32(float %1, float %2, float %3) 4978 %5 = bitcast i8 %x3 to <8 x i1> 4979 %6 = extractelement <8 x i1> %5, i64 0 4980 %7 = select i1 %6, float %4, float %1 4981 %8 = insertelement <4 x float> %x0, float %7, i64 0 4982 ret <4 x float> %8 4983 } 4984 4985 4986 define <4 x float>@test_int_x86_avx512_maskz_vfmadd_ss_rm(<4 x float> %x0, <4 x float> %x1,float *%ptr_b ,i8 %x3,i32 %x4) { 4987 ; CHECK-LABEL: test_int_x86_avx512_maskz_vfmadd_ss_rm: 4988 ; CHECK: ## %bb.0: 4989 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 4990 ; CHECK-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 4991 ; CHECK-NEXT: retq 4992 %q = load float, float* %ptr_b 4993 %vecinit.i = insertelement <4 x float> undef, float %q, i32 0 4994 %1 = extractelement <4 x float> %x0, i64 0 4995 %2 = extractelement <4 x float> %x1, i64 0 4996 %3 = extractelement <4 x float> %vecinit.i, i64 0 4997 %4 = call float @llvm.fma.f32(float %1, float %2, float %3) 4998 %5 = select i1 false, float %4, float 0.000000e+00 4999 %6 = insertelement <4 x float> %x0, float %5, i64 0 5000 ret <4 x float> %6 5001 } 5002 5003 define <16 x i32> @test_x86_avx512_psll_d_512(<16 x i32> %a0, <4 x i32> %a1) { 5004 ; CHECK-LABEL: test_x86_avx512_psll_d_512: 5005 ; CHECK: ## %bb.0: 5006 ; CHECK-NEXT: vpslld %xmm1, %zmm0, %zmm0 5007 ; CHECK-NEXT: retq 5008 %res = call <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32> %a0, <4 x i32> %a1) ; <<16 x i32>> [#uses=1] 5009 ret <16 x i32> %res 5010 } 5011 define <16 x i32> @test_x86_avx512_mask_psll_d_512(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %passthru, i16 %mask) { 5012 ; CHECK-LABEL: test_x86_avx512_mask_psll_d_512: 5013 ; CHECK: ## %bb.0: 5014 ; CHECK-NEXT: kmovw %edi, %k1 5015 ; CHECK-NEXT: vpslld %xmm1, %zmm0, %zmm2 {%k1} 5016 ; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0 5017 ; CHECK-NEXT: retq 5018 %res = call <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32> %a0, <4 x i32> %a1) ; <<16 x i32>> [#uses=1] 5019 %mask.cast = bitcast i16 %mask to <16 x i1> 5020 %res2 = select <16 x i1> %mask.cast, <16 x i32> %res, <16 x i32> %passthru 5021 ret <16 x i32> %res2 5022 } 5023 define <16 x i32> @test_x86_avx512_maskz_psll_d_512(<16 x i32> %a0, <4 x i32> %a1, i16 %mask) { 5024 ; CHECK-LABEL: test_x86_avx512_maskz_psll_d_512: 5025 ; CHECK: ## %bb.0: 5026 ; CHECK-NEXT: kmovw %edi, %k1 5027 ; CHECK-NEXT: vpslld %xmm1, %zmm0, %zmm0 {%k1} {z} 5028 ; CHECK-NEXT: retq 5029 %res = call <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32> %a0, <4 x i32> %a1) ; <<16 x i32>> [#uses=1] 5030 %mask.cast = bitcast i16 %mask to <16 x i1> 5031 %res2 = select <16 x i1> %mask.cast, <16 x i32> %res, <16 x i32> zeroinitializer 5032 ret <16 x i32> %res2 5033 } 5034 declare <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32>, <4 x i32>) nounwind readnone 5035 5036 5037 define <8 x i64> @test_x86_avx512_psll_q_512(<8 x i64> %a0, <2 x i64> %a1) { 5038 ; CHECK-LABEL: test_x86_avx512_psll_q_512: 5039 ; CHECK: ## %bb.0: 5040 ; CHECK-NEXT: vpsllq %xmm1, %zmm0, %zmm0 5041 ; CHECK-NEXT: retq 5042 %res = call <8 x i64> @llvm.x86.avx512.psll.q.512(<8 x i64> %a0, <2 x i64> %a1) ; <<8 x i64>> [#uses=1] 5043 ret <8 x i64> %res 5044 } 5045 define <8 x i64> @test_x86_avx512_mask_psll_q_512(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %passthru, i8 %mask) { 5046 ; CHECK-LABEL: test_x86_avx512_mask_psll_q_512: 5047 ; CHECK: ## %bb.0: 5048 ; CHECK-NEXT: kmovw %edi, %k1 5049 ; CHECK-NEXT: vpsllq %xmm1, %zmm0, %zmm2 {%k1} 5050 ; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0 5051 ; CHECK-NEXT: retq 5052 %res = call <8 x i64> @llvm.x86.avx512.psll.q.512(<8 x i64> %a0, <2 x i64> %a1) ; <<8 x i64>> [#uses=1] 5053 %mask.cast = bitcast i8 %mask to <8 x i1> 5054 %res2 = select <8 x i1> %mask.cast, <8 x i64> %res, <8 x i64> %passthru 5055 ret <8 x i64> %res2 5056 } 5057 define <8 x i64> @test_x86_avx512_maskz_psll_q_512(<8 x i64> %a0, <2 x i64> %a1, i8 %mask) { 5058 ; CHECK-LABEL: test_x86_avx512_maskz_psll_q_512: 5059 ; CHECK: ## %bb.0: 5060 ; CHECK-NEXT: kmovw %edi, %k1 5061 ; CHECK-NEXT: vpsllq %xmm1, %zmm0, %zmm0 {%k1} {z} 5062 ; CHECK-NEXT: retq 5063 %res = call <8 x i64> @llvm.x86.avx512.psll.q.512(<8 x i64> %a0, <2 x i64> %a1) ; <<8 x i64>> [#uses=1] 5064 %mask.cast = bitcast i8 %mask to <8 x i1> 5065 %res2 = select <8 x i1> %mask.cast, <8 x i64> %res, <8 x i64> zeroinitializer 5066 ret <8 x i64> %res2 5067 } 5068 declare <8 x i64> @llvm.x86.avx512.psll.q.512(<8 x i64>, <2 x i64>) nounwind readnone 5069 5070 5071 define <16 x i32> @test_x86_avx512_pslli_d_512(<16 x i32> %a0) { 5072 ; CHECK-LABEL: test_x86_avx512_pslli_d_512: 5073 ; CHECK: ## %bb.0: 5074 ; CHECK-NEXT: vpslld $7, %zmm0, %zmm0 5075 ; CHECK-NEXT: retq 5076 %res = call <16 x i32> @llvm.x86.avx512.pslli.d.512(<16 x i32> %a0, i32 7) ; <<16 x i32>> [#uses=1] 5077 ret <16 x i32> %res 5078 } 5079 define <16 x i32> @test_x86_avx512_mask_pslli_d_512(<16 x i32> %a0, <16 x i32> %passthru, i16 %mask) { 5080 ; CHECK-LABEL: test_x86_avx512_mask_pslli_d_512: 5081 ; CHECK: ## %bb.0: 5082 ; CHECK-NEXT: kmovw %edi, %k1 5083 ; CHECK-NEXT: vpslld $7, %zmm0, %zmm1 {%k1} 5084 ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 5085 ; CHECK-NEXT: retq 5086 %res = call <16 x i32> @llvm.x86.avx512.pslli.d.512(<16 x i32> %a0, i32 7) ; <<16 x i32>> [#uses=1] 5087 %mask.cast = bitcast i16 %mask to <16 x i1> 5088 %res2 = select <16 x i1> %mask.cast, <16 x i32> %res, <16 x i32> %passthru 5089 ret <16 x i32> %res2 5090 } 5091 define <16 x i32> @test_x86_avx512_maskz_pslli_d_512(<16 x i32> %a0, i16 %mask) { 5092 ; CHECK-LABEL: test_x86_avx512_maskz_pslli_d_512: 5093 ; CHECK: ## %bb.0: 5094 ; CHECK-NEXT: kmovw %edi, %k1 5095 ; CHECK-NEXT: vpslld $7, %zmm0, %zmm0 {%k1} {z} 5096 ; CHECK-NEXT: retq 5097 %res = call <16 x i32> @llvm.x86.avx512.pslli.d.512(<16 x i32> %a0, i32 7) ; <<16 x i32>> [#uses=1] 5098 %mask.cast = bitcast i16 %mask to <16 x i1> 5099 %res2 = select <16 x i1> %mask.cast, <16 x i32> %res, <16 x i32> zeroinitializer 5100 ret <16 x i32> %res2 5101 } 5102 declare <16 x i32> @llvm.x86.avx512.pslli.d.512(<16 x i32>, i32) nounwind readnone 5103 5104 5105 define <8 x i64> @test_x86_avx512_pslli_q_512(<8 x i64> %a0) { 5106 ; CHECK-LABEL: test_x86_avx512_pslli_q_512: 5107 ; CHECK: ## %bb.0: 5108 ; CHECK-NEXT: vpsllq $7, %zmm0, %zmm0 5109 ; CHECK-NEXT: retq 5110 %res = call <8 x i64> @llvm.x86.avx512.pslli.q.512(<8 x i64> %a0, i32 7) ; <<8 x i64>> [#uses=1] 5111 ret <8 x i64> %res 5112 } 5113 define <8 x i64> @test_x86_avx512_mask_pslli_q_512(<8 x i64> %a0, <8 x i64> %passthru, i8 %mask) { 5114 ; CHECK-LABEL: test_x86_avx512_mask_pslli_q_512: 5115 ; CHECK: ## %bb.0: 5116 ; CHECK-NEXT: kmovw %edi, %k1 5117 ; CHECK-NEXT: vpsllq $7, %zmm0, %zmm1 {%k1} 5118 ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 5119 ; CHECK-NEXT: retq 5120 %res = call <8 x i64> @llvm.x86.avx512.pslli.q.512(<8 x i64> %a0, i32 7) ; <<8 x i64>> [#uses=1] 5121 %mask.cast = bitcast i8 %mask to <8 x i1> 5122 %res2 = select <8 x i1> %mask.cast, <8 x i64> %res, <8 x i64> %passthru 5123 ret <8 x i64> %res2 5124 } 5125 define <8 x i64> @test_x86_avx512_maskz_pslli_q_512(<8 x i64> %a0, <8 x i64> %passthru, i8 %mask) { 5126 ; CHECK-LABEL: test_x86_avx512_maskz_pslli_q_512: 5127 ; CHECK: ## %bb.0: 5128 ; CHECK-NEXT: kmovw %edi, %k1 5129 ; CHECK-NEXT: vpsllq $7, %zmm0, %zmm0 {%k1} {z} 5130 ; CHECK-NEXT: retq 5131 %res = call <8 x i64> @llvm.x86.avx512.pslli.q.512(<8 x i64> %a0, i32 7) ; <<8 x i64>> [#uses=1] 5132 %mask.cast = bitcast i8 %mask to <8 x i1> 5133 %res2 = select <8 x i1> %mask.cast, <8 x i64> %res, <8 x i64> zeroinitializer 5134 ret <8 x i64> %res2 5135 } 5136 declare <8 x i64> @llvm.x86.avx512.pslli.q.512(<8 x i64>, i32) nounwind readnone 5137 5138 5139 define <8 x i64> @test_x86_avx512_psra_q_512(<8 x i64> %a0, <2 x i64> %a1) { 5140 ; CHECK-LABEL: test_x86_avx512_psra_q_512: 5141 ; CHECK: ## %bb.0: 5142 ; CHECK-NEXT: vpsraq %xmm1, %zmm0, %zmm0 5143 ; CHECK-NEXT: retq 5144 %res = call <8 x i64> @llvm.x86.avx512.psra.q.512(<8 x i64> %a0, <2 x i64> %a1) ; <<8 x i64>> [#uses=1] 5145 ret <8 x i64> %res 5146 } 5147 define <8 x i64> @test_x86_avx512_mask_psra_q_512(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %passthru, i8 %mask) { 5148 ; CHECK-LABEL: test_x86_avx512_mask_psra_q_512: 5149 ; CHECK: ## %bb.0: 5150 ; CHECK-NEXT: kmovw %edi, %k1 5151 ; CHECK-NEXT: vpsraq %xmm1, %zmm0, %zmm2 {%k1} 5152 ; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0 5153 ; CHECK-NEXT: retq 5154 %res = call <8 x i64> @llvm.x86.avx512.psra.q.512(<8 x i64> %a0, <2 x i64> %a1) ; <<8 x i64>> [#uses=1] 5155 %mask.cast = bitcast i8 %mask to <8 x i1> 5156 %res2 = select <8 x i1> %mask.cast, <8 x i64> %res, <8 x i64> %passthru 5157 ret <8 x i64> %res2 5158 } 5159 define <8 x i64> @test_x86_avx512_maskz_psra_q_512(<8 x i64> %a0, <2 x i64> %a1, i8 %mask) { 5160 ; CHECK-LABEL: test_x86_avx512_maskz_psra_q_512: 5161 ; CHECK: ## %bb.0: 5162 ; CHECK-NEXT: kmovw %edi, %k1 5163 ; CHECK-NEXT: vpsraq %xmm1, %zmm0, %zmm0 {%k1} {z} 5164 ; CHECK-NEXT: retq 5165 %res = call <8 x i64> @llvm.x86.avx512.psra.q.512(<8 x i64> %a0, <2 x i64> %a1) ; <<8 x i64>> [#uses=1] 5166 %mask.cast = bitcast i8 %mask to <8 x i1> 5167 %res2 = select <8 x i1> %mask.cast, <8 x i64> %res, <8 x i64> zeroinitializer 5168 ret <8 x i64> %res2 5169 } 5170 declare <8 x i64> @llvm.x86.avx512.psra.q.512(<8 x i64>, <2 x i64>) nounwind readnone 5171 5172 5173 define <16 x i32> @test_x86_avx512_psra_d_512(<16 x i32> %a0, <4 x i32> %a1) { 5174 ; CHECK-LABEL: test_x86_avx512_psra_d_512: 5175 ; CHECK: ## %bb.0: 5176 ; CHECK-NEXT: vpsrad %xmm1, %zmm0, %zmm0 5177 ; CHECK-NEXT: retq 5178 %res = call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> %a0, <4 x i32> %a1) ; <<16 x i32>> [#uses=1] 5179 ret <16 x i32> %res 5180 } 5181 define <16 x i32> @test_x86_avx512_mask_psra_d_512(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %passthru, i16 %mask) { 5182 ; CHECK-LABEL: test_x86_avx512_mask_psra_d_512: 5183 ; CHECK: ## %bb.0: 5184 ; CHECK-NEXT: kmovw %edi, %k1 5185 ; CHECK-NEXT: vpsrad %xmm1, %zmm0, %zmm2 {%k1} 5186 ; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0 5187 ; CHECK-NEXT: retq 5188 %res = call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> %a0, <4 x i32> %a1) ; <<16 x i32>> [#uses=1] 5189 %mask.cast = bitcast i16 %mask to <16 x i1> 5190 %res2 = select <16 x i1> %mask.cast, <16 x i32> %res, <16 x i32> %passthru 5191 ret <16 x i32> %res2 5192 } 5193 define <16 x i32> @test_x86_avx512_maskz_psra_d_512(<16 x i32> %a0, <4 x i32> %a1, i16 %mask) { 5194 ; CHECK-LABEL: test_x86_avx512_maskz_psra_d_512: 5195 ; CHECK: ## %bb.0: 5196 ; CHECK-NEXT: kmovw %edi, %k1 5197 ; CHECK-NEXT: vpsrad %xmm1, %zmm0, %zmm0 {%k1} {z} 5198 ; CHECK-NEXT: retq 5199 %res = call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> %a0, <4 x i32> %a1) ; <<16 x i32>> [#uses=1] 5200 %mask.cast = bitcast i16 %mask to <16 x i1> 5201 %res2 = select <16 x i1> %mask.cast, <16 x i32> %res, <16 x i32> zeroinitializer 5202 ret <16 x i32> %res2 5203 } 5204 declare <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32>, <4 x i32>) nounwind readnone 5205 5206 5207 5208 define <8 x i64> @test_x86_avx512_psrai_q_512(<8 x i64> %a0) { 5209 ; CHECK-LABEL: test_x86_avx512_psrai_q_512: 5210 ; CHECK: ## %bb.0: 5211 ; CHECK-NEXT: vpsraq $7, %zmm0, %zmm0 5212 ; CHECK-NEXT: retq 5213 %res = call <8 x i64> @llvm.x86.avx512.psrai.q.512(<8 x i64> %a0, i32 7) ; <<8 x i64>> [#uses=1] 5214 ret <8 x i64> %res 5215 } 5216 define <8 x i64> @test_x86_avx512_mask_psrai_q_512(<8 x i64> %a0, <8 x i64> %passthru, i8 %mask) { 5217 ; CHECK-LABEL: test_x86_avx512_mask_psrai_q_512: 5218 ; CHECK: ## %bb.0: 5219 ; CHECK-NEXT: kmovw %edi, %k1 5220 ; CHECK-NEXT: vpsraq $7, %zmm0, %zmm1 {%k1} 5221 ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 5222 ; CHECK-NEXT: retq 5223 %res = call <8 x i64> @llvm.x86.avx512.psrai.q.512(<8 x i64> %a0, i32 7) ; <<8 x i64>> [#uses=1] 5224 %mask.cast = bitcast i8 %mask to <8 x i1> 5225 %res2 = select <8 x i1> %mask.cast, <8 x i64> %res, <8 x i64> %passthru 5226 ret <8 x i64> %res2 5227 } 5228 define <8 x i64> @test_x86_avx512_maskz_psrai_q_512(<8 x i64> %a0, i8 %mask) { 5229 ; CHECK-LABEL: test_x86_avx512_maskz_psrai_q_512: 5230 ; CHECK: ## %bb.0: 5231 ; CHECK-NEXT: kmovw %edi, %k1 5232 ; CHECK-NEXT: vpsraq $7, %zmm0, %zmm0 {%k1} {z} 5233 ; CHECK-NEXT: retq 5234 %res = call <8 x i64> @llvm.x86.avx512.psrai.q.512(<8 x i64> %a0, i32 7) ; <<8 x i64>> [#uses=1] 5235 %mask.cast = bitcast i8 %mask to <8 x i1> 5236 %res2 = select <8 x i1> %mask.cast, <8 x i64> %res, <8 x i64> zeroinitializer 5237 ret <8 x i64> %res2 5238 } 5239 declare <8 x i64> @llvm.x86.avx512.psrai.q.512(<8 x i64>, i32) nounwind readnone 5240 5241 5242 define <16 x i32> @test_x86_avx512_psrai_d_512(<16 x i32> %a0) { 5243 ; CHECK-LABEL: test_x86_avx512_psrai_d_512: 5244 ; CHECK: ## %bb.0: 5245 ; CHECK-NEXT: vpsrad $7, %zmm0, %zmm0 5246 ; CHECK-NEXT: retq 5247 %res = call <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32> %a0, i32 7) ; <<16 x i32>> [#uses=1] 5248 ret <16 x i32> %res 5249 } 5250 define <16 x i32> @test_x86_avx512_mask_psrai_d_512(<16 x i32> %a0, <16 x i32> %passthru, i16 %mask) { 5251 ; CHECK-LABEL: test_x86_avx512_mask_psrai_d_512: 5252 ; CHECK: ## %bb.0: 5253 ; CHECK-NEXT: kmovw %edi, %k1 5254 ; CHECK-NEXT: vpsrad $7, %zmm0, %zmm1 {%k1} 5255 ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 5256 ; CHECK-NEXT: retq 5257 %res = call <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32> %a0, i32 7) ; <<16 x i32>> [#uses=1] 5258 %mask.cast = bitcast i16 %mask to <16 x i1> 5259 %res2 = select <16 x i1> %mask.cast, <16 x i32> %res, <16 x i32> %passthru 5260 ret <16 x i32> %res2 5261 } 5262 define <16 x i32> @test_x86_avx512_maskz_psrai_d_512(<16 x i32> %a0, i16 %mask) { 5263 ; CHECK-LABEL: test_x86_avx512_maskz_psrai_d_512: 5264 ; CHECK: ## %bb.0: 5265 ; CHECK-NEXT: kmovw %edi, %k1 5266 ; CHECK-NEXT: vpsrad $7, %zmm0, %zmm0 {%k1} {z} 5267 ; CHECK-NEXT: retq 5268 %res = call <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32> %a0, i32 7) ; <<16 x i32>> [#uses=1] 5269 %mask.cast = bitcast i16 %mask to <16 x i1> 5270 %res2 = select <16 x i1> %mask.cast, <16 x i32> %res, <16 x i32> zeroinitializer 5271 ret <16 x i32> %res2 5272 } 5273 declare <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32>, i32) nounwind readnone 5274 5275 5276 5277 define <16 x i32> @test_x86_avx512_psrl_d_512(<16 x i32> %a0, <4 x i32> %a1) { 5278 ; CHECK-LABEL: test_x86_avx512_psrl_d_512: 5279 ; CHECK: ## %bb.0: 5280 ; CHECK-NEXT: vpsrld %xmm1, %zmm0, %zmm0 5281 ; CHECK-NEXT: retq 5282 %res = call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> %a0, <4 x i32> %a1) ; <<16 x i32>> [#uses=1] 5283 ret <16 x i32> %res 5284 } 5285 define <16 x i32> @test_x86_avx512_mask_psrl_d_512(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %passthru, i16 %mask) { 5286 ; CHECK-LABEL: test_x86_avx512_mask_psrl_d_512: 5287 ; CHECK: ## %bb.0: 5288 ; CHECK-NEXT: kmovw %edi, %k1 5289 ; CHECK-NEXT: vpsrld %xmm1, %zmm0, %zmm2 {%k1} 5290 ; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0 5291 ; CHECK-NEXT: retq 5292 %res = call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> %a0, <4 x i32> %a1) ; <<16 x i32>> [#uses=1] 5293 %mask.cast = bitcast i16 %mask to <16 x i1> 5294 %res2 = select <16 x i1> %mask.cast, <16 x i32> %res, <16 x i32> %passthru 5295 ret <16 x i32> %res2 5296 } 5297 define <16 x i32> @test_x86_avx512_maskz_psrl_d_512(<16 x i32> %a0, <4 x i32> %a1, i16 %mask) { 5298 ; CHECK-LABEL: test_x86_avx512_maskz_psrl_d_512: 5299 ; CHECK: ## %bb.0: 5300 ; CHECK-NEXT: kmovw %edi, %k1 5301 ; CHECK-NEXT: vpsrld %xmm1, %zmm0, %zmm0 {%k1} {z} 5302 ; CHECK-NEXT: retq 5303 %res = call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> %a0, <4 x i32> %a1) ; <<16 x i32>> [#uses=1] 5304 %mask.cast = bitcast i16 %mask to <16 x i1> 5305 %res2 = select <16 x i1> %mask.cast, <16 x i32> %res, <16 x i32> zeroinitializer 5306 ret <16 x i32> %res2 5307 } 5308 declare <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32>, <4 x i32>) nounwind readnone 5309 5310 5311 define <8 x i64> @test_x86_avx512_psrl_q_512(<8 x i64> %a0, <2 x i64> %a1) { 5312 ; CHECK-LABEL: test_x86_avx512_psrl_q_512: 5313 ; CHECK: ## %bb.0: 5314 ; CHECK-NEXT: vpsrlq %xmm1, %zmm0, %zmm0 5315 ; CHECK-NEXT: retq 5316 %res = call <8 x i64> @llvm.x86.avx512.psrl.q.512(<8 x i64> %a0, <2 x i64> %a1) ; <<8 x i64>> [#uses=1] 5317 ret <8 x i64> %res 5318 } 5319 define <8 x i64> @test_x86_avx512_mask_psrl_q_512(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %passthru, i8 %mask) { 5320 ; CHECK-LABEL: test_x86_avx512_mask_psrl_q_512: 5321 ; CHECK: ## %bb.0: 5322 ; CHECK-NEXT: kmovw %edi, %k1 5323 ; CHECK-NEXT: vpsrlq %xmm1, %zmm0, %zmm2 {%k1} 5324 ; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0 5325 ; CHECK-NEXT: retq 5326 %res = call <8 x i64> @llvm.x86.avx512.psrl.q.512(<8 x i64> %a0, <2 x i64> %a1) ; <<8 x i64>> [#uses=1] 5327 %mask.cast = bitcast i8 %mask to <8 x i1> 5328 %res2 = select <8 x i1> %mask.cast, <8 x i64> %res, <8 x i64> %passthru 5329 ret <8 x i64> %res2 5330 } 5331 define <8 x i64> @test_x86_avx512_maskz_psrl_q_512(<8 x i64> %a0, <2 x i64> %a1, i8 %mask) { 5332 ; CHECK-LABEL: test_x86_avx512_maskz_psrl_q_512: 5333 ; CHECK: ## %bb.0: 5334 ; CHECK-NEXT: kmovw %edi, %k1 5335 ; CHECK-NEXT: vpsrlq %xmm1, %zmm0, %zmm0 {%k1} {z} 5336 ; CHECK-NEXT: retq 5337 %res = call <8 x i64> @llvm.x86.avx512.psrl.q.512(<8 x i64> %a0, <2 x i64> %a1) ; <<8 x i64>> [#uses=1] 5338 %mask.cast = bitcast i8 %mask to <8 x i1> 5339 %res2 = select <8 x i1> %mask.cast, <8 x i64> %res, <8 x i64> zeroinitializer 5340 ret <8 x i64> %res2 5341 } 5342 declare <8 x i64> @llvm.x86.avx512.psrl.q.512(<8 x i64>, <2 x i64>) nounwind readnone 5343 5344 5345 define <16 x i32> @test_x86_avx512_psrli_d_512(<16 x i32> %a0) { 5346 ; CHECK-LABEL: test_x86_avx512_psrli_d_512: 5347 ; CHECK: ## %bb.0: 5348 ; CHECK-NEXT: vpsrld $7, %zmm0, %zmm0 5349 ; CHECK-NEXT: retq 5350 %res = call <16 x i32> @llvm.x86.avx512.psrli.d.512(<16 x i32> %a0, i32 7) ; <<16 x i32>> [#uses=1] 5351 ret <16 x i32> %res 5352 } 5353 define <16 x i32> @test_x86_avx512_mask_psrli_d_512(<16 x i32> %a0, <16 x i32> %passthru, i16 %mask) { 5354 ; CHECK-LABEL: test_x86_avx512_mask_psrli_d_512: 5355 ; CHECK: ## %bb.0: 5356 ; CHECK-NEXT: kmovw %edi, %k1 5357 ; CHECK-NEXT: vpsrld $7, %zmm0, %zmm1 {%k1} 5358 ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 5359 ; CHECK-NEXT: retq 5360 %res = call <16 x i32> @llvm.x86.avx512.psrli.d.512(<16 x i32> %a0, i32 7) ; <<16 x i32>> [#uses=1] 5361 %mask.cast = bitcast i16 %mask to <16 x i1> 5362 %res2 = select <16 x i1> %mask.cast, <16 x i32> %res, <16 x i32> %passthru 5363 ret <16 x i32> %res2 5364 } 5365 define <16 x i32> @test_x86_avx512_maskz_psrli_d_512(<16 x i32> %a0, i16 %mask) { 5366 ; CHECK-LABEL: test_x86_avx512_maskz_psrli_d_512: 5367 ; CHECK: ## %bb.0: 5368 ; CHECK-NEXT: kmovw %edi, %k1 5369 ; CHECK-NEXT: vpsrld $7, %zmm0, %zmm0 {%k1} {z} 5370 ; CHECK-NEXT: retq 5371 %res = call <16 x i32> @llvm.x86.avx512.psrli.d.512(<16 x i32> %a0, i32 7) ; <<16 x i32>> [#uses=1] 5372 %mask.cast = bitcast i16 %mask to <16 x i1> 5373 %res2 = select <16 x i1> %mask.cast, <16 x i32> %res, <16 x i32> zeroinitializer 5374 ret <16 x i32> %res2 5375 } 5376 declare <16 x i32> @llvm.x86.avx512.psrli.d.512(<16 x i32>, i32) nounwind readnone 5377 5378 5379 define <8 x i64> @test_x86_avx512_psrli_q_512(<8 x i64> %a0) { 5380 ; CHECK-LABEL: test_x86_avx512_psrli_q_512: 5381 ; CHECK: ## %bb.0: 5382 ; CHECK-NEXT: vpsrlq $7, %zmm0, %zmm0 5383 ; CHECK-NEXT: retq 5384 %res = call <8 x i64> @llvm.x86.avx512.psrli.q.512(<8 x i64> %a0, i32 7) ; <<8 x i64>> [#uses=1] 5385 ret <8 x i64> %res 5386 } 5387 define <8 x i64> @test_x86_avx512_mask_psrli_q_512(<8 x i64> %a0, <8 x i64> %passthru, i8 %mask) { 5388 ; CHECK-LABEL: test_x86_avx512_mask_psrli_q_512: 5389 ; CHECK: ## %bb.0: 5390 ; CHECK-NEXT: kmovw %edi, %k1 5391 ; CHECK-NEXT: vpsrlq $7, %zmm0, %zmm1 {%k1} 5392 ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 5393 ; CHECK-NEXT: retq 5394 %res = call <8 x i64> @llvm.x86.avx512.psrli.q.512(<8 x i64> %a0, i32 7) ; <<8 x i64>> [#uses=1] 5395 %mask.cast = bitcast i8 %mask to <8 x i1> 5396 %res2 = select <8 x i1> %mask.cast, <8 x i64> %res, <8 x i64> %passthru 5397 ret <8 x i64> %res2 5398 } 5399 define <8 x i64> @test_x86_avx512_maskz_psrli_q_512(<8 x i64> %a0, i8 %mask) { 5400 ; CHECK-LABEL: test_x86_avx512_maskz_psrli_q_512: 5401 ; CHECK: ## %bb.0: 5402 ; CHECK-NEXT: kmovw %edi, %k1 5403 ; CHECK-NEXT: vpsrlq $7, %zmm0, %zmm0 {%k1} {z} 5404 ; CHECK-NEXT: retq 5405 %res = call <8 x i64> @llvm.x86.avx512.psrli.q.512(<8 x i64> %a0, i32 7) ; <<8 x i64>> [#uses=1] 5406 %mask.cast = bitcast i8 %mask to <8 x i1> 5407 %res2 = select <8 x i1> %mask.cast, <8 x i64> %res, <8 x i64> zeroinitializer 5408 ret <8 x i64> %res2 5409 } 5410 declare <8 x i64> @llvm.x86.avx512.psrli.q.512(<8 x i64>, i32) nounwind readnone 5411 5412 define <16 x i32> @test_x86_avx512_psllv_d_512(<16 x i32> %a0, <16 x i32> %a1) { 5413 ; CHECK-LABEL: test_x86_avx512_psllv_d_512: 5414 ; CHECK: ## %bb.0: 5415 ; CHECK-NEXT: vpsllvd %zmm1, %zmm0, %zmm0 5416 ; CHECK-NEXT: retq 5417 %res = call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> %a0, <16 x i32> %a1) 5418 ret <16 x i32> %res 5419 } 5420 5421 define <16 x i32> @test_x86_avx512_mask_psllv_d_512(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask) { 5422 ; CHECK-LABEL: test_x86_avx512_mask_psllv_d_512: 5423 ; CHECK: ## %bb.0: 5424 ; CHECK-NEXT: kmovw %edi, %k1 5425 ; CHECK-NEXT: vpsllvd %zmm1, %zmm0, %zmm2 {%k1} 5426 ; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0 5427 ; CHECK-NEXT: retq 5428 %res = call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> %a0, <16 x i32> %a1) 5429 %mask.cast = bitcast i16 %mask to <16 x i1> 5430 %res2 = select <16 x i1> %mask.cast, <16 x i32> %res, <16 x i32> %a2 5431 ret <16 x i32> %res2 5432 } 5433 5434 define <16 x i32> @test_x86_avx512_maskz_psllv_d_512(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) { 5435 ; CHECK-LABEL: test_x86_avx512_maskz_psllv_d_512: 5436 ; CHECK: ## %bb.0: 5437 ; CHECK-NEXT: kmovw %edi, %k1 5438 ; CHECK-NEXT: vpsllvd %zmm1, %zmm0, %zmm0 {%k1} {z} 5439 ; CHECK-NEXT: retq 5440 %res = call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> %a0, <16 x i32> %a1) 5441 %mask.cast = bitcast i16 %mask to <16 x i1> 5442 %res2 = select <16 x i1> %mask.cast, <16 x i32> %res, <16 x i32> zeroinitializer 5443 ret <16 x i32> %res2 5444 } 5445 5446 declare <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32>, <16 x i32>) nounwind readnone 5447 5448 define <8 x i64> @test_x86_avx512_psllv_q_512(<8 x i64> %a0, <8 x i64> %a1) { 5449 ; CHECK-LABEL: test_x86_avx512_psllv_q_512: 5450 ; CHECK: ## %bb.0: 5451 ; CHECK-NEXT: vpsllvq %zmm1, %zmm0, %zmm0 5452 ; CHECK-NEXT: retq 5453 %res = call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> %a0, <8 x i64> %a1) 5454 ret <8 x i64> %res 5455 } 5456 5457 define <8 x i64> @test_x86_avx512_mask_psllv_q_512(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) { 5458 ; CHECK-LABEL: test_x86_avx512_mask_psllv_q_512: 5459 ; CHECK: ## %bb.0: 5460 ; CHECK-NEXT: kmovw %edi, %k1 5461 ; CHECK-NEXT: vpsllvq %zmm1, %zmm0, %zmm2 {%k1} 5462 ; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0 5463 ; CHECK-NEXT: retq 5464 %res = call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> %a0, <8 x i64> %a1) 5465 %mask.cast = bitcast i8 %mask to <8 x i1> 5466 %res2 = select <8 x i1> %mask.cast, <8 x i64> %res, <8 x i64> %a2 5467 ret <8 x i64> %res2 5468 } 5469 5470 define <8 x i64> @test_x86_avx512_maskz_psllv_q_512(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) { 5471 ; CHECK-LABEL: test_x86_avx512_maskz_psllv_q_512: 5472 ; CHECK: ## %bb.0: 5473 ; CHECK-NEXT: kmovw %edi, %k1 5474 ; CHECK-NEXT: vpsllvq %zmm1, %zmm0, %zmm0 {%k1} {z} 5475 ; CHECK-NEXT: retq 5476 %res = call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> %a0, <8 x i64> %a1) 5477 %mask.cast = bitcast i8 %mask to <8 x i1> 5478 %res2 = select <8 x i1> %mask.cast, <8 x i64> %res, <8 x i64> zeroinitializer 5479 ret <8 x i64> %res2 5480 } 5481 5482 declare <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64>, <8 x i64>) nounwind readnone 5483 5484 define <16 x i32> @test_x86_avx512_psrav_d_512(<16 x i32> %a0, <16 x i32> %a1) { 5485 ; CHECK-LABEL: test_x86_avx512_psrav_d_512: 5486 ; CHECK: ## %bb.0: 5487 ; CHECK-NEXT: vpsravd %zmm1, %zmm0, %zmm0 5488 ; CHECK-NEXT: retq 5489 %res = call <16 x i32> @llvm.x86.avx512.psrav.d.512(<16 x i32> %a0, <16 x i32> %a1) 5490 ret <16 x i32> %res 5491 } 5492 5493 define <16 x i32> @test_x86_avx512_mask_psrav_d_512(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask) { 5494 ; CHECK-LABEL: test_x86_avx512_mask_psrav_d_512: 5495 ; CHECK: ## %bb.0: 5496 ; CHECK-NEXT: kmovw %edi, %k1 5497 ; CHECK-NEXT: vpsravd %zmm1, %zmm0, %zmm2 {%k1} 5498 ; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0 5499 ; CHECK-NEXT: retq 5500 %res = call <16 x i32> @llvm.x86.avx512.psrav.d.512(<16 x i32> %a0, <16 x i32> %a1) 5501 %mask.cast = bitcast i16 %mask to <16 x i1> 5502 %res2 = select <16 x i1> %mask.cast, <16 x i32> %res, <16 x i32> %a2 5503 ret <16 x i32> %res2 5504 } 5505 5506 define <16 x i32> @test_x86_avx512_maskz_psrav_d_512(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) { 5507 ; CHECK-LABEL: test_x86_avx512_maskz_psrav_d_512: 5508 ; CHECK: ## %bb.0: 5509 ; CHECK-NEXT: kmovw %edi, %k1 5510 ; CHECK-NEXT: vpsravd %zmm1, %zmm0, %zmm0 {%k1} {z} 5511 ; CHECK-NEXT: retq 5512 %res = call <16 x i32> @llvm.x86.avx512.psrav.d.512(<16 x i32> %a0, <16 x i32> %a1) 5513 %mask.cast = bitcast i16 %mask to <16 x i1> 5514 %res2 = select <16 x i1> %mask.cast, <16 x i32> %res, <16 x i32> zeroinitializer 5515 ret <16 x i32> %res2 5516 } 5517 5518 declare <16 x i32> @llvm.x86.avx512.psrav.d.512(<16 x i32>, <16 x i32>) nounwind readnone 5519 5520 define <8 x i64> @test_x86_avx512_psrav_q_512(<8 x i64> %a0, <8 x i64> %a1) { 5521 ; CHECK-LABEL: test_x86_avx512_psrav_q_512: 5522 ; CHECK: ## %bb.0: 5523 ; CHECK-NEXT: vpsravq %zmm1, %zmm0, %zmm0 5524 ; CHECK-NEXT: retq 5525 %res = call <8 x i64> @llvm.x86.avx512.psrav.q.512(<8 x i64> %a0, <8 x i64> %a1) 5526 ret <8 x i64> %res 5527 } 5528 5529 define <8 x i64> @test_x86_avx512_mask_psrav_q_512(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) { 5530 ; CHECK-LABEL: test_x86_avx512_mask_psrav_q_512: 5531 ; CHECK: ## %bb.0: 5532 ; CHECK-NEXT: kmovw %edi, %k1 5533 ; CHECK-NEXT: vpsravq %zmm1, %zmm0, %zmm2 {%k1} 5534 ; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0 5535 ; CHECK-NEXT: retq 5536 %res = call <8 x i64> @llvm.x86.avx512.psrav.q.512(<8 x i64> %a0, <8 x i64> %a1) 5537 %mask.cast = bitcast i8 %mask to <8 x i1> 5538 %res2 = select <8 x i1> %mask.cast, <8 x i64> %res, <8 x i64> %a2 5539 ret <8 x i64> %res2 5540 } 5541 5542 define <8 x i64> @test_x86_avx512_maskz_psrav_q_512(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) { 5543 ; CHECK-LABEL: test_x86_avx512_maskz_psrav_q_512: 5544 ; CHECK: ## %bb.0: 5545 ; CHECK-NEXT: kmovw %edi, %k1 5546 ; CHECK-NEXT: vpsravq %zmm1, %zmm0, %zmm0 {%k1} {z} 5547 ; CHECK-NEXT: retq 5548 %res = call <8 x i64> @llvm.x86.avx512.psrav.q.512(<8 x i64> %a0, <8 x i64> %a1) 5549 %mask.cast = bitcast i8 %mask to <8 x i1> 5550 %res2 = select <8 x i1> %mask.cast, <8 x i64> %res, <8 x i64> zeroinitializer 5551 ret <8 x i64> %res2 5552 } 5553 5554 declare <8 x i64> @llvm.x86.avx512.psrav.q.512(<8 x i64>, <8 x i64>) nounwind readnone 5555 5556 define <16 x i32> @test_x86_avx512_psrlv_d_512(<16 x i32> %a0, <16 x i32> %a1) { 5557 ; CHECK-LABEL: test_x86_avx512_psrlv_d_512: 5558 ; CHECK: ## %bb.0: 5559 ; CHECK-NEXT: vpsrlvd %zmm1, %zmm0, %zmm0 5560 ; CHECK-NEXT: retq 5561 %res = call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> %a0, <16 x i32> %a1) 5562 ret <16 x i32> %res 5563 } 5564 5565 define <16 x i32> @test_x86_avx512_mask_psrlv_d_512(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask) { 5566 ; CHECK-LABEL: test_x86_avx512_mask_psrlv_d_512: 5567 ; CHECK: ## %bb.0: 5568 ; CHECK-NEXT: kmovw %edi, %k1 5569 ; CHECK-NEXT: vpsrlvd %zmm1, %zmm0, %zmm2 {%k1} 5570 ; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0 5571 ; CHECK-NEXT: retq 5572 %res = call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> %a0, <16 x i32> %a1) 5573 %mask.cast = bitcast i16 %mask to <16 x i1> 5574 %res2 = select <16 x i1> %mask.cast, <16 x i32> %res, <16 x i32> %a2 5575 ret <16 x i32> %res2 5576 } 5577 5578 define <16 x i32> @test_x86_avx512_maskz_psrlv_d_512(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) { 5579 ; CHECK-LABEL: test_x86_avx512_maskz_psrlv_d_512: 5580 ; CHECK: ## %bb.0: 5581 ; CHECK-NEXT: kmovw %edi, %k1 5582 ; CHECK-NEXT: vpsrlvd %zmm1, %zmm0, %zmm0 {%k1} {z} 5583 ; CHECK-NEXT: retq 5584 %res = call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> %a0, <16 x i32> %a1) 5585 %mask.cast = bitcast i16 %mask to <16 x i1> 5586 %res2 = select <16 x i1> %mask.cast, <16 x i32> %res, <16 x i32> zeroinitializer 5587 ret <16 x i32> %res2 5588 } 5589 5590 declare <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32>, <16 x i32>) nounwind readnone 5591 5592 define <8 x i64> @test_x86_avx512_psrlv_q_512(<8 x i64> %a0, <8 x i64> %a1) { 5593 ; CHECK-LABEL: test_x86_avx512_psrlv_q_512: 5594 ; CHECK: ## %bb.0: 5595 ; CHECK-NEXT: vpsrlvq %zmm1, %zmm0, %zmm0 5596 ; CHECK-NEXT: retq 5597 %res = call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> %a0, <8 x i64> %a1) 5598 ret <8 x i64> %res 5599 } 5600 5601 define <8 x i64> @test_x86_avx512_mask_psrlv_q_512(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) { 5602 ; CHECK-LABEL: test_x86_avx512_mask_psrlv_q_512: 5603 ; CHECK: ## %bb.0: 5604 ; CHECK-NEXT: kmovw %edi, %k1 5605 ; CHECK-NEXT: vpsrlvq %zmm1, %zmm0, %zmm2 {%k1} 5606 ; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0 5607 ; CHECK-NEXT: retq 5608 %res = call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> %a0, <8 x i64> %a1) 5609 %mask.cast = bitcast i8 %mask to <8 x i1> 5610 %res2 = select <8 x i1> %mask.cast, <8 x i64> %res, <8 x i64> %a2 5611 ret <8 x i64> %res2 5612 } 5613 5614 define <8 x i64> @test_x86_avx512_maskz_psrlv_q_512(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) { 5615 ; CHECK-LABEL: test_x86_avx512_maskz_psrlv_q_512: 5616 ; CHECK: ## %bb.0: 5617 ; CHECK-NEXT: kmovw %edi, %k1 5618 ; CHECK-NEXT: vpsrlvq %zmm1, %zmm0, %zmm0 {%k1} {z} 5619 ; CHECK-NEXT: retq 5620 %res = call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> %a0, <8 x i64> %a1) 5621 %mask.cast = bitcast i8 %mask to <8 x i1> 5622 %res2 = select <8 x i1> %mask.cast, <8 x i64> %res, <8 x i64> zeroinitializer 5623 ret <8 x i64> %res2 5624 } 5625 5626 declare <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64>, <8 x i64>) nounwind readnone 5627 5628 define <16 x float> @bad_mask_transition(<8 x double> %a, <8 x double> %b, <8 x double> %c, <8 x double> %d, <16 x float> %e, <16 x float> %f) { 5629 ; CHECK-LABEL: bad_mask_transition: 5630 ; CHECK: ## %bb.0: ## %entry 5631 ; CHECK-NEXT: vcmplt_oqpd %zmm1, %zmm0, %k0 5632 ; CHECK-NEXT: kmovw %k0, %eax 5633 ; CHECK-NEXT: vcmplt_oqpd %zmm3, %zmm2, %k0 5634 ; CHECK-NEXT: kmovw %k0, %ecx 5635 ; CHECK-NEXT: movzbl %al, %eax 5636 ; CHECK-NEXT: movzbl %cl, %ecx 5637 ; CHECK-NEXT: kmovw %eax, %k0 5638 ; CHECK-NEXT: kmovw %ecx, %k1 5639 ; CHECK-NEXT: kunpckbw %k0, %k1, %k1 5640 ; CHECK-NEXT: vblendmps %zmm5, %zmm4, %zmm0 {%k1} 5641 ; CHECK-NEXT: retq 5642 entry: 5643 %0 = call <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double> %a, <8 x double> %b, i32 17, i32 4) 5644 %1 = bitcast <8 x i1> %0 to i8 5645 %2 = call <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double> %c, <8 x double> %d, i32 17, i32 4) 5646 %3 = bitcast <8 x i1> %2 to i8 5647 %conv = zext i8 %1 to i16 5648 %conv2 = zext i8 %3 to i16 5649 %4 = bitcast i16 %conv to <16 x i1> 5650 %5 = bitcast i16 %conv2 to <16 x i1> 5651 %6 = shufflevector <16 x i1> %4, <16 x i1> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 5652 %7 = shufflevector <16 x i1> %5, <16 x i1> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 5653 %8 = shufflevector <8 x i1> %6, <8 x i1> %7, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 5654 %9 = select <16 x i1> %8, <16 x float> %f, <16 x float> %e 5655 ret <16 x float> %9 5656 } 5657 5658 define <16 x float> @bad_mask_transition_2(<8 x double> %a, <8 x double> %b, <8 x double> %c, <8 x double> %d, <16 x float> %e, <16 x float> %f) { 5659 ; CHECK-LABEL: bad_mask_transition_2: 5660 ; CHECK: ## %bb.0: ## %entry 5661 ; CHECK-NEXT: vcmplt_oqpd %zmm1, %zmm0, %k0 5662 ; CHECK-NEXT: kmovw %k0, %eax 5663 ; CHECK-NEXT: movzbl %al, %eax 5664 ; CHECK-NEXT: kmovw %eax, %k1 5665 ; CHECK-NEXT: vblendmps %zmm5, %zmm4, %zmm0 {%k1} 5666 ; CHECK-NEXT: retq 5667 entry: 5668 %0 = call <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double> %a, <8 x double> %b, i32 17, i32 4) 5669 %1 = bitcast <8 x i1> %0 to i8 5670 %conv = zext i8 %1 to i16 5671 %2 = bitcast i16 %conv to <16 x i1> 5672 %3 = select <16 x i1> %2, <16 x float> %f, <16 x float> %e 5673 ret <16 x float> %3 5674 } 5675