1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512F 3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512VL 4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512BW 5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512DQ 6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512bw,+avx512vl | FileCheck %s --check-prefix=CHECK --check-prefix=SKX 7 8 define <8 x double> @addpd512(<8 x double> %y, <8 x double> %x) { 9 ; CHECK-LABEL: addpd512: 10 ; CHECK: # %bb.0: # %entry 11 ; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0 12 ; CHECK-NEXT: retq 13 entry: 14 %add.i = fadd <8 x double> %x, %y 15 ret <8 x double> %add.i 16 } 17 18 define <8 x double> @addpd512fold(<8 x double> %y) { 19 ; CHECK-LABEL: addpd512fold: 20 ; CHECK: # %bb.0: # %entry 21 ; CHECK-NEXT: vaddpd {{.*}}(%rip), %zmm0, %zmm0 22 ; CHECK-NEXT: retq 23 entry: 24 %add.i = fadd <8 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00, double 4.500000e+00, double 3.800000e+00, double 2.300000e+00, double 1.200000e+00> 25 ret <8 x double> %add.i 26 } 27 28 define <16 x float> @addps512(<16 x float> %y, <16 x float> %x) { 29 ; CHECK-LABEL: addps512: 30 ; CHECK: # %bb.0: # %entry 31 ; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0 32 ; CHECK-NEXT: retq 33 entry: 34 %add.i = fadd <16 x float> %x, %y 35 ret <16 x float> %add.i 36 } 37 38 define <16 x float> @addps512fold(<16 x float> %y) { 39 ; CHECK-LABEL: addps512fold: 40 ; CHECK: # %bb.0: # %entry 41 ; CHECK-NEXT: vaddps {{.*}}(%rip), %zmm0, %zmm0 42 ; CHECK-NEXT: retq 43 entry: 44 %add.i = fadd <16 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 4.500000e+00, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000> 45 ret <16 x float> %add.i 46 } 47 48 define <8 x double> @subpd512(<8 x double> %y, <8 x double> %x) { 49 ; CHECK-LABEL: subpd512: 50 ; CHECK: # %bb.0: # %entry 51 ; CHECK-NEXT: vsubpd %zmm0, %zmm1, %zmm0 52 ; CHECK-NEXT: retq 53 entry: 54 %sub.i = fsub <8 x double> %x, %y 55 ret <8 x double> %sub.i 56 } 57 58 define <8 x double> @subpd512fold(<8 x double> %y, <8 x double>* %x) { 59 ; CHECK-LABEL: subpd512fold: 60 ; CHECK: # %bb.0: # %entry 61 ; CHECK-NEXT: vsubpd (%rdi), %zmm0, %zmm0 62 ; CHECK-NEXT: retq 63 entry: 64 %tmp2 = load <8 x double>, <8 x double>* %x, align 8 65 %sub.i = fsub <8 x double> %y, %tmp2 66 ret <8 x double> %sub.i 67 } 68 69 define <16 x float> @subps512(<16 x float> %y, <16 x float> %x) { 70 ; CHECK-LABEL: subps512: 71 ; CHECK: # %bb.0: # %entry 72 ; CHECK-NEXT: vsubps %zmm0, %zmm1, %zmm0 73 ; CHECK-NEXT: retq 74 entry: 75 %sub.i = fsub <16 x float> %x, %y 76 ret <16 x float> %sub.i 77 } 78 79 define <16 x float> @subps512fold(<16 x float> %y, <16 x float>* %x) { 80 ; CHECK-LABEL: subps512fold: 81 ; CHECK: # %bb.0: # %entry 82 ; CHECK-NEXT: vsubps (%rdi), %zmm0, %zmm0 83 ; CHECK-NEXT: retq 84 entry: 85 %tmp2 = load <16 x float>, <16 x float>* %x, align 4 86 %sub.i = fsub <16 x float> %y, %tmp2 87 ret <16 x float> %sub.i 88 } 89 90 define <8 x i64> @imulq512(<8 x i64> %y, <8 x i64> %x) { 91 ; AVX512F-LABEL: imulq512: 92 ; AVX512F: # %bb.0: 93 ; AVX512F-NEXT: vpsrlq $32, %zmm1, %zmm2 94 ; AVX512F-NEXT: vpmuludq %zmm0, %zmm2, %zmm2 95 ; AVX512F-NEXT: vpsrlq $32, %zmm0, %zmm3 96 ; AVX512F-NEXT: vpmuludq %zmm3, %zmm1, %zmm3 97 ; AVX512F-NEXT: vpaddq %zmm2, %zmm3, %zmm2 98 ; AVX512F-NEXT: vpsllq $32, %zmm2, %zmm2 99 ; AVX512F-NEXT: vpmuludq %zmm0, %zmm1, %zmm0 100 ; AVX512F-NEXT: vpaddq %zmm2, %zmm0, %zmm0 101 ; AVX512F-NEXT: retq 102 ; 103 ; AVX512VL-LABEL: imulq512: 104 ; AVX512VL: # %bb.0: 105 ; AVX512VL-NEXT: vpsrlq $32, %zmm1, %zmm2 106 ; AVX512VL-NEXT: vpmuludq %zmm0, %zmm2, %zmm2 107 ; AVX512VL-NEXT: vpsrlq $32, %zmm0, %zmm3 108 ; AVX512VL-NEXT: vpmuludq %zmm3, %zmm1, %zmm3 109 ; AVX512VL-NEXT: vpaddq %zmm2, %zmm3, %zmm2 110 ; AVX512VL-NEXT: vpsllq $32, %zmm2, %zmm2 111 ; AVX512VL-NEXT: vpmuludq %zmm0, %zmm1, %zmm0 112 ; AVX512VL-NEXT: vpaddq %zmm2, %zmm0, %zmm0 113 ; AVX512VL-NEXT: retq 114 ; 115 ; AVX512BW-LABEL: imulq512: 116 ; AVX512BW: # %bb.0: 117 ; AVX512BW-NEXT: vpsrlq $32, %zmm1, %zmm2 118 ; AVX512BW-NEXT: vpmuludq %zmm0, %zmm2, %zmm2 119 ; AVX512BW-NEXT: vpsrlq $32, %zmm0, %zmm3 120 ; AVX512BW-NEXT: vpmuludq %zmm3, %zmm1, %zmm3 121 ; AVX512BW-NEXT: vpaddq %zmm2, %zmm3, %zmm2 122 ; AVX512BW-NEXT: vpsllq $32, %zmm2, %zmm2 123 ; AVX512BW-NEXT: vpmuludq %zmm0, %zmm1, %zmm0 124 ; AVX512BW-NEXT: vpaddq %zmm2, %zmm0, %zmm0 125 ; AVX512BW-NEXT: retq 126 ; 127 ; AVX512DQ-LABEL: imulq512: 128 ; AVX512DQ: # %bb.0: 129 ; AVX512DQ-NEXT: vpmullq %zmm0, %zmm1, %zmm0 130 ; AVX512DQ-NEXT: retq 131 ; 132 ; SKX-LABEL: imulq512: 133 ; SKX: # %bb.0: 134 ; SKX-NEXT: vpmullq %zmm0, %zmm1, %zmm0 135 ; SKX-NEXT: retq 136 %z = mul <8 x i64>%x, %y 137 ret <8 x i64>%z 138 } 139 140 define <4 x i64> @imulq256(<4 x i64> %y, <4 x i64> %x) { 141 ; AVX512F-LABEL: imulq256: 142 ; AVX512F: # %bb.0: 143 ; AVX512F-NEXT: vpsrlq $32, %ymm1, %ymm2 144 ; AVX512F-NEXT: vpmuludq %ymm0, %ymm2, %ymm2 145 ; AVX512F-NEXT: vpsrlq $32, %ymm0, %ymm3 146 ; AVX512F-NEXT: vpmuludq %ymm3, %ymm1, %ymm3 147 ; AVX512F-NEXT: vpaddq %ymm2, %ymm3, %ymm2 148 ; AVX512F-NEXT: vpsllq $32, %ymm2, %ymm2 149 ; AVX512F-NEXT: vpmuludq %ymm0, %ymm1, %ymm0 150 ; AVX512F-NEXT: vpaddq %ymm2, %ymm0, %ymm0 151 ; AVX512F-NEXT: retq 152 ; 153 ; AVX512VL-LABEL: imulq256: 154 ; AVX512VL: # %bb.0: 155 ; AVX512VL-NEXT: vpsrlq $32, %ymm1, %ymm2 156 ; AVX512VL-NEXT: vpmuludq %ymm0, %ymm2, %ymm2 157 ; AVX512VL-NEXT: vpsrlq $32, %ymm0, %ymm3 158 ; AVX512VL-NEXT: vpmuludq %ymm3, %ymm1, %ymm3 159 ; AVX512VL-NEXT: vpaddq %ymm2, %ymm3, %ymm2 160 ; AVX512VL-NEXT: vpsllq $32, %ymm2, %ymm2 161 ; AVX512VL-NEXT: vpmuludq %ymm0, %ymm1, %ymm0 162 ; AVX512VL-NEXT: vpaddq %ymm2, %ymm0, %ymm0 163 ; AVX512VL-NEXT: retq 164 ; 165 ; AVX512BW-LABEL: imulq256: 166 ; AVX512BW: # %bb.0: 167 ; AVX512BW-NEXT: vpsrlq $32, %ymm1, %ymm2 168 ; AVX512BW-NEXT: vpmuludq %ymm0, %ymm2, %ymm2 169 ; AVX512BW-NEXT: vpsrlq $32, %ymm0, %ymm3 170 ; AVX512BW-NEXT: vpmuludq %ymm3, %ymm1, %ymm3 171 ; AVX512BW-NEXT: vpaddq %ymm2, %ymm3, %ymm2 172 ; AVX512BW-NEXT: vpsllq $32, %ymm2, %ymm2 173 ; AVX512BW-NEXT: vpmuludq %ymm0, %ymm1, %ymm0 174 ; AVX512BW-NEXT: vpaddq %ymm2, %ymm0, %ymm0 175 ; AVX512BW-NEXT: retq 176 ; 177 ; AVX512DQ-LABEL: imulq256: 178 ; AVX512DQ: # %bb.0: 179 ; AVX512DQ-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 180 ; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 181 ; AVX512DQ-NEXT: vpmullq %zmm0, %zmm1, %zmm0 182 ; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 183 ; AVX512DQ-NEXT: retq 184 ; 185 ; SKX-LABEL: imulq256: 186 ; SKX: # %bb.0: 187 ; SKX-NEXT: vpmullq %ymm0, %ymm1, %ymm0 188 ; SKX-NEXT: retq 189 %z = mul <4 x i64>%x, %y 190 ret <4 x i64>%z 191 } 192 193 define <2 x i64> @imulq128(<2 x i64> %y, <2 x i64> %x) { 194 ; AVX512F-LABEL: imulq128: 195 ; AVX512F: # %bb.0: 196 ; AVX512F-NEXT: vpsrlq $32, %xmm1, %xmm2 197 ; AVX512F-NEXT: vpmuludq %xmm0, %xmm2, %xmm2 198 ; AVX512F-NEXT: vpsrlq $32, %xmm0, %xmm3 199 ; AVX512F-NEXT: vpmuludq %xmm3, %xmm1, %xmm3 200 ; AVX512F-NEXT: vpaddq %xmm2, %xmm3, %xmm2 201 ; AVX512F-NEXT: vpsllq $32, %xmm2, %xmm2 202 ; AVX512F-NEXT: vpmuludq %xmm0, %xmm1, %xmm0 203 ; AVX512F-NEXT: vpaddq %xmm2, %xmm0, %xmm0 204 ; AVX512F-NEXT: retq 205 ; 206 ; AVX512VL-LABEL: imulq128: 207 ; AVX512VL: # %bb.0: 208 ; AVX512VL-NEXT: vpsrlq $32, %xmm1, %xmm2 209 ; AVX512VL-NEXT: vpmuludq %xmm0, %xmm2, %xmm2 210 ; AVX512VL-NEXT: vpsrlq $32, %xmm0, %xmm3 211 ; AVX512VL-NEXT: vpmuludq %xmm3, %xmm1, %xmm3 212 ; AVX512VL-NEXT: vpaddq %xmm2, %xmm3, %xmm2 213 ; AVX512VL-NEXT: vpsllq $32, %xmm2, %xmm2 214 ; AVX512VL-NEXT: vpmuludq %xmm0, %xmm1, %xmm0 215 ; AVX512VL-NEXT: vpaddq %xmm2, %xmm0, %xmm0 216 ; AVX512VL-NEXT: retq 217 ; 218 ; AVX512BW-LABEL: imulq128: 219 ; AVX512BW: # %bb.0: 220 ; AVX512BW-NEXT: vpsrlq $32, %xmm1, %xmm2 221 ; AVX512BW-NEXT: vpmuludq %xmm0, %xmm2, %xmm2 222 ; AVX512BW-NEXT: vpsrlq $32, %xmm0, %xmm3 223 ; AVX512BW-NEXT: vpmuludq %xmm3, %xmm1, %xmm3 224 ; AVX512BW-NEXT: vpaddq %xmm2, %xmm3, %xmm2 225 ; AVX512BW-NEXT: vpsllq $32, %xmm2, %xmm2 226 ; AVX512BW-NEXT: vpmuludq %xmm0, %xmm1, %xmm0 227 ; AVX512BW-NEXT: vpaddq %xmm2, %xmm0, %xmm0 228 ; AVX512BW-NEXT: retq 229 ; 230 ; AVX512DQ-LABEL: imulq128: 231 ; AVX512DQ: # %bb.0: 232 ; AVX512DQ-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 233 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 234 ; AVX512DQ-NEXT: vpmullq %zmm0, %zmm1, %zmm0 235 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 236 ; AVX512DQ-NEXT: vzeroupper 237 ; AVX512DQ-NEXT: retq 238 ; 239 ; SKX-LABEL: imulq128: 240 ; SKX: # %bb.0: 241 ; SKX-NEXT: vpmullq %xmm0, %xmm1, %xmm0 242 ; SKX-NEXT: retq 243 %z = mul <2 x i64>%x, %y 244 ret <2 x i64>%z 245 } 246 247 define <8 x double> @mulpd512(<8 x double> %y, <8 x double> %x) { 248 ; CHECK-LABEL: mulpd512: 249 ; CHECK: # %bb.0: # %entry 250 ; CHECK-NEXT: vmulpd %zmm0, %zmm1, %zmm0 251 ; CHECK-NEXT: retq 252 entry: 253 %mul.i = fmul <8 x double> %x, %y 254 ret <8 x double> %mul.i 255 } 256 257 define <8 x double> @mulpd512fold(<8 x double> %y) { 258 ; CHECK-LABEL: mulpd512fold: 259 ; CHECK: # %bb.0: # %entry 260 ; CHECK-NEXT: vmulpd {{.*}}(%rip), %zmm0, %zmm0 261 ; CHECK-NEXT: retq 262 entry: 263 %mul.i = fmul <8 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00, double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00> 264 ret <8 x double> %mul.i 265 } 266 267 define <16 x float> @mulps512(<16 x float> %y, <16 x float> %x) { 268 ; CHECK-LABEL: mulps512: 269 ; CHECK: # %bb.0: # %entry 270 ; CHECK-NEXT: vmulps %zmm0, %zmm1, %zmm0 271 ; CHECK-NEXT: retq 272 entry: 273 %mul.i = fmul <16 x float> %x, %y 274 ret <16 x float> %mul.i 275 } 276 277 define <16 x float> @mulps512fold(<16 x float> %y) { 278 ; CHECK-LABEL: mulps512fold: 279 ; CHECK: # %bb.0: # %entry 280 ; CHECK-NEXT: vmulps {{.*}}(%rip), %zmm0, %zmm0 281 ; CHECK-NEXT: retq 282 entry: 283 %mul.i = fmul <16 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000> 284 ret <16 x float> %mul.i 285 } 286 287 define <8 x double> @divpd512(<8 x double> %y, <8 x double> %x) { 288 ; CHECK-LABEL: divpd512: 289 ; CHECK: # %bb.0: # %entry 290 ; CHECK-NEXT: vdivpd %zmm0, %zmm1, %zmm0 291 ; CHECK-NEXT: retq 292 entry: 293 %div.i = fdiv <8 x double> %x, %y 294 ret <8 x double> %div.i 295 } 296 297 define <8 x double> @divpd512fold(<8 x double> %y) { 298 ; CHECK-LABEL: divpd512fold: 299 ; CHECK: # %bb.0: # %entry 300 ; CHECK-NEXT: vdivpd {{.*}}(%rip), %zmm0, %zmm0 301 ; CHECK-NEXT: retq 302 entry: 303 %div.i = fdiv <8 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00, double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00> 304 ret <8 x double> %div.i 305 } 306 307 define <16 x float> @divps512(<16 x float> %y, <16 x float> %x) { 308 ; CHECK-LABEL: divps512: 309 ; CHECK: # %bb.0: # %entry 310 ; CHECK-NEXT: vdivps %zmm0, %zmm1, %zmm0 311 ; CHECK-NEXT: retq 312 entry: 313 %div.i = fdiv <16 x float> %x, %y 314 ret <16 x float> %div.i 315 } 316 317 define <16 x float> @divps512fold(<16 x float> %y) { 318 ; CHECK-LABEL: divps512fold: 319 ; CHECK: # %bb.0: # %entry 320 ; CHECK-NEXT: vdivps {{.*}}(%rip), %zmm0, %zmm0 321 ; CHECK-NEXT: retq 322 entry: 323 %div.i = fdiv <16 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 4.500000e+00, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 4.500000e+00, float 0x4002666660000000, float 0x3FF3333340000000> 324 ret <16 x float> %div.i 325 } 326 327 define <8 x i64> @vpaddq_test(<8 x i64> %i, <8 x i64> %j) nounwind readnone { 328 ; CHECK-LABEL: vpaddq_test: 329 ; CHECK: # %bb.0: 330 ; CHECK-NEXT: vpaddq %zmm1, %zmm0, %zmm0 331 ; CHECK-NEXT: retq 332 %x = add <8 x i64> %i, %j 333 ret <8 x i64> %x 334 } 335 336 define <8 x i64> @vpaddq_fold_test(<8 x i64> %i, <8 x i64>* %j) nounwind { 337 ; CHECK-LABEL: vpaddq_fold_test: 338 ; CHECK: # %bb.0: 339 ; CHECK-NEXT: vpaddq (%rdi), %zmm0, %zmm0 340 ; CHECK-NEXT: retq 341 %tmp = load <8 x i64>, <8 x i64>* %j, align 4 342 %x = add <8 x i64> %i, %tmp 343 ret <8 x i64> %x 344 } 345 346 define <8 x i64> @vpaddq_broadcast_test(<8 x i64> %i) nounwind { 347 ; CHECK-LABEL: vpaddq_broadcast_test: 348 ; CHECK: # %bb.0: 349 ; CHECK-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 350 ; CHECK-NEXT: retq 351 %x = add <8 x i64> %i, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2> 352 ret <8 x i64> %x 353 } 354 355 define <8 x i64> @vpaddq_broadcast2_test(<8 x i64> %i, i64* %j) nounwind { 356 ; CHECK-LABEL: vpaddq_broadcast2_test: 357 ; CHECK: # %bb.0: 358 ; CHECK-NEXT: vpaddq (%rdi){1to8}, %zmm0, %zmm0 359 ; CHECK-NEXT: retq 360 %tmp = load i64, i64* %j 361 %j.0 = insertelement <8 x i64> undef, i64 %tmp, i32 0 362 %j.1 = insertelement <8 x i64> %j.0, i64 %tmp, i32 1 363 %j.2 = insertelement <8 x i64> %j.1, i64 %tmp, i32 2 364 %j.3 = insertelement <8 x i64> %j.2, i64 %tmp, i32 3 365 %j.4 = insertelement <8 x i64> %j.3, i64 %tmp, i32 4 366 %j.5 = insertelement <8 x i64> %j.4, i64 %tmp, i32 5 367 %j.6 = insertelement <8 x i64> %j.5, i64 %tmp, i32 6 368 %j.7 = insertelement <8 x i64> %j.6, i64 %tmp, i32 7 369 %x = add <8 x i64> %i, %j.7 370 ret <8 x i64> %x 371 } 372 373 define <16 x i32> @vpaddd_test(<16 x i32> %i, <16 x i32> %j) nounwind readnone { 374 ; CHECK-LABEL: vpaddd_test: 375 ; CHECK: # %bb.0: 376 ; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0 377 ; CHECK-NEXT: retq 378 %x = add <16 x i32> %i, %j 379 ret <16 x i32> %x 380 } 381 382 define <16 x i32> @vpaddd_fold_test(<16 x i32> %i, <16 x i32>* %j) nounwind { 383 ; CHECK-LABEL: vpaddd_fold_test: 384 ; CHECK: # %bb.0: 385 ; CHECK-NEXT: vpaddd (%rdi), %zmm0, %zmm0 386 ; CHECK-NEXT: retq 387 %tmp = load <16 x i32>, <16 x i32>* %j, align 4 388 %x = add <16 x i32> %i, %tmp 389 ret <16 x i32> %x 390 } 391 392 define <16 x i32> @vpaddd_broadcast_test(<16 x i32> %i) nounwind { 393 ; CHECK-LABEL: vpaddd_broadcast_test: 394 ; CHECK: # %bb.0: 395 ; CHECK-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 396 ; CHECK-NEXT: retq 397 %x = add <16 x i32> %i, <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 398 ret <16 x i32> %x 399 } 400 401 define <16 x i32> @vpaddd_mask_test(<16 x i32> %i, <16 x i32> %j, <16 x i32> %mask1) nounwind readnone { 402 ; CHECK-LABEL: vpaddd_mask_test: 403 ; CHECK: # %bb.0: 404 ; CHECK-NEXT: vptestmd %zmm2, %zmm2, %k1 405 ; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0 {%k1} 406 ; CHECK-NEXT: retq 407 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 408 %x = add <16 x i32> %i, %j 409 %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %i 410 ret <16 x i32> %r 411 } 412 413 define <16 x i32> @vpaddd_maskz_test(<16 x i32> %i, <16 x i32> %j, <16 x i32> %mask1) nounwind readnone { 414 ; CHECK-LABEL: vpaddd_maskz_test: 415 ; CHECK: # %bb.0: 416 ; CHECK-NEXT: vptestmd %zmm2, %zmm2, %k1 417 ; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0 {%k1} {z} 418 ; CHECK-NEXT: retq 419 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 420 %x = add <16 x i32> %i, %j 421 %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer 422 ret <16 x i32> %r 423 } 424 425 define <16 x i32> @vpaddd_mask_fold_test(<16 x i32> %i, <16 x i32>* %j.ptr, <16 x i32> %mask1) nounwind readnone { 426 ; CHECK-LABEL: vpaddd_mask_fold_test: 427 ; CHECK: # %bb.0: 428 ; CHECK-NEXT: vptestmd %zmm1, %zmm1, %k1 429 ; CHECK-NEXT: vpaddd (%rdi), %zmm0, %zmm0 {%k1} 430 ; CHECK-NEXT: retq 431 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 432 %j = load <16 x i32>, <16 x i32>* %j.ptr 433 %x = add <16 x i32> %i, %j 434 %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %i 435 ret <16 x i32> %r 436 } 437 438 define <16 x i32> @vpaddd_mask_broadcast_test(<16 x i32> %i, <16 x i32> %mask1) nounwind readnone { 439 ; CHECK-LABEL: vpaddd_mask_broadcast_test: 440 ; CHECK: # %bb.0: 441 ; CHECK-NEXT: vptestmd %zmm1, %zmm1, %k1 442 ; CHECK-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 {%k1} 443 ; CHECK-NEXT: retq 444 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 445 %x = add <16 x i32> %i, <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4> 446 %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %i 447 ret <16 x i32> %r 448 } 449 450 define <16 x i32> @vpaddd_maskz_fold_test(<16 x i32> %i, <16 x i32>* %j.ptr, <16 x i32> %mask1) nounwind readnone { 451 ; CHECK-LABEL: vpaddd_maskz_fold_test: 452 ; CHECK: # %bb.0: 453 ; CHECK-NEXT: vptestmd %zmm1, %zmm1, %k1 454 ; CHECK-NEXT: vpaddd (%rdi), %zmm0, %zmm0 {%k1} {z} 455 ; CHECK-NEXT: retq 456 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 457 %j = load <16 x i32>, <16 x i32>* %j.ptr 458 %x = add <16 x i32> %i, %j 459 %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer 460 ret <16 x i32> %r 461 } 462 463 define <16 x i32> @vpaddd_maskz_broadcast_test(<16 x i32> %i, <16 x i32> %mask1) nounwind readnone { 464 ; CHECK-LABEL: vpaddd_maskz_broadcast_test: 465 ; CHECK: # %bb.0: 466 ; CHECK-NEXT: vptestmd %zmm1, %zmm1, %k1 467 ; CHECK-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 {%k1} {z} 468 ; CHECK-NEXT: retq 469 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 470 %x = add <16 x i32> %i, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5> 471 %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer 472 ret <16 x i32> %r 473 } 474 475 define <8 x i64> @vpsubq_test(<8 x i64> %i, <8 x i64> %j) nounwind readnone { 476 ; CHECK-LABEL: vpsubq_test: 477 ; CHECK: # %bb.0: 478 ; CHECK-NEXT: vpsubq %zmm1, %zmm0, %zmm0 479 ; CHECK-NEXT: retq 480 %x = sub <8 x i64> %i, %j 481 ret <8 x i64> %x 482 } 483 484 define <16 x i32> @vpsubd_test(<16 x i32> %i, <16 x i32> %j) nounwind readnone { 485 ; CHECK-LABEL: vpsubd_test: 486 ; CHECK: # %bb.0: 487 ; CHECK-NEXT: vpsubd %zmm1, %zmm0, %zmm0 488 ; CHECK-NEXT: retq 489 %x = sub <16 x i32> %i, %j 490 ret <16 x i32> %x 491 } 492 493 define <16 x i32> @vpmulld_test(<16 x i32> %i, <16 x i32> %j) { 494 ; CHECK-LABEL: vpmulld_test: 495 ; CHECK: # %bb.0: 496 ; CHECK-NEXT: vpmulld %zmm1, %zmm0, %zmm0 497 ; CHECK-NEXT: retq 498 %x = mul <16 x i32> %i, %j 499 ret <16 x i32> %x 500 } 501 502 declare float @sqrtf(float) readnone 503 define float @sqrtA(float %a) nounwind uwtable readnone ssp { 504 ; CHECK-LABEL: sqrtA: 505 ; CHECK: # %bb.0: # %entry 506 ; CHECK-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 507 ; CHECK-NEXT: retq 508 entry: 509 %conv1 = tail call float @sqrtf(float %a) nounwind readnone 510 ret float %conv1 511 } 512 513 declare double @sqrt(double) readnone 514 define double @sqrtB(double %a) nounwind uwtable readnone ssp { 515 ; CHECK-LABEL: sqrtB: 516 ; CHECK: # %bb.0: # %entry 517 ; CHECK-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 518 ; CHECK-NEXT: retq 519 entry: 520 %call = tail call double @sqrt(double %a) nounwind readnone 521 ret double %call 522 } 523 524 declare float @llvm.sqrt.f32(float) 525 define float @sqrtC(float %a) nounwind { 526 ; CHECK-LABEL: sqrtC: 527 ; CHECK: # %bb.0: 528 ; CHECK-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 529 ; CHECK-NEXT: retq 530 %b = call float @llvm.sqrt.f32(float %a) 531 ret float %b 532 } 533 534 declare <16 x float> @llvm.sqrt.v16f32(<16 x float>) 535 define <16 x float> @sqrtD(<16 x float> %a) nounwind { 536 ; CHECK-LABEL: sqrtD: 537 ; CHECK: # %bb.0: 538 ; CHECK-NEXT: vsqrtps %zmm0, %zmm0 539 ; CHECK-NEXT: retq 540 %b = call <16 x float> @llvm.sqrt.v16f32(<16 x float> %a) 541 ret <16 x float> %b 542 } 543 544 declare <8 x double> @llvm.sqrt.v8f64(<8 x double>) 545 define <8 x double> @sqrtE(<8 x double> %a) nounwind { 546 ; CHECK-LABEL: sqrtE: 547 ; CHECK: # %bb.0: 548 ; CHECK-NEXT: vsqrtpd %zmm0, %zmm0 549 ; CHECK-NEXT: retq 550 %b = call <8 x double> @llvm.sqrt.v8f64(<8 x double> %a) 551 ret <8 x double> %b 552 } 553 554 define <16 x float> @fadd_broadcast(<16 x float> %a) nounwind { 555 ; CHECK-LABEL: fadd_broadcast: 556 ; CHECK: # %bb.0: 557 ; CHECK-NEXT: vaddps {{.*}}(%rip){1to16}, %zmm0, %zmm0 558 ; CHECK-NEXT: retq 559 %b = fadd <16 x float> %a, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000> 560 ret <16 x float> %b 561 } 562 563 define <8 x i64> @addq_broadcast(<8 x i64> %a) nounwind { 564 ; CHECK-LABEL: addq_broadcast: 565 ; CHECK: # %bb.0: 566 ; CHECK-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 567 ; CHECK-NEXT: retq 568 %b = add <8 x i64> %a, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2> 569 ret <8 x i64> %b 570 } 571 572 define <8 x i64> @orq_broadcast(<8 x i64> %a) nounwind { 573 ; AVX512F-LABEL: orq_broadcast: 574 ; AVX512F: # %bb.0: 575 ; AVX512F-NEXT: vporq {{.*}}(%rip){1to8}, %zmm0, %zmm0 576 ; AVX512F-NEXT: retq 577 ; 578 ; AVX512VL-LABEL: orq_broadcast: 579 ; AVX512VL: # %bb.0: 580 ; AVX512VL-NEXT: vporq {{.*}}(%rip){1to8}, %zmm0, %zmm0 581 ; AVX512VL-NEXT: retq 582 ; 583 ; AVX512BW-LABEL: orq_broadcast: 584 ; AVX512BW: # %bb.0: 585 ; AVX512BW-NEXT: vporq {{.*}}(%rip){1to8}, %zmm0, %zmm0 586 ; AVX512BW-NEXT: retq 587 ; 588 ; AVX512DQ-LABEL: orq_broadcast: 589 ; AVX512DQ: # %bb.0: 590 ; AVX512DQ-NEXT: vorpd {{.*}}(%rip){1to8}, %zmm0, %zmm0 591 ; AVX512DQ-NEXT: retq 592 ; 593 ; SKX-LABEL: orq_broadcast: 594 ; SKX: # %bb.0: 595 ; SKX-NEXT: vorpd {{.*}}(%rip){1to8}, %zmm0, %zmm0 596 ; SKX-NEXT: retq 597 %b = or <8 x i64> %a, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2> 598 ret <8 x i64> %b 599 } 600 601 define <16 x i32> @andd512fold(<16 x i32> %y, <16 x i32>* %x) { 602 ; AVX512F-LABEL: andd512fold: 603 ; AVX512F: # %bb.0: # %entry 604 ; AVX512F-NEXT: vpandq (%rdi), %zmm0, %zmm0 605 ; AVX512F-NEXT: retq 606 ; 607 ; AVX512VL-LABEL: andd512fold: 608 ; AVX512VL: # %bb.0: # %entry 609 ; AVX512VL-NEXT: vpandq (%rdi), %zmm0, %zmm0 610 ; AVX512VL-NEXT: retq 611 ; 612 ; AVX512BW-LABEL: andd512fold: 613 ; AVX512BW: # %bb.0: # %entry 614 ; AVX512BW-NEXT: vpandq (%rdi), %zmm0, %zmm0 615 ; AVX512BW-NEXT: retq 616 ; 617 ; AVX512DQ-LABEL: andd512fold: 618 ; AVX512DQ: # %bb.0: # %entry 619 ; AVX512DQ-NEXT: vandps (%rdi), %zmm0, %zmm0 620 ; AVX512DQ-NEXT: retq 621 ; 622 ; SKX-LABEL: andd512fold: 623 ; SKX: # %bb.0: # %entry 624 ; SKX-NEXT: vandps (%rdi), %zmm0, %zmm0 625 ; SKX-NEXT: retq 626 entry: 627 %a = load <16 x i32>, <16 x i32>* %x, align 4 628 %b = and <16 x i32> %y, %a 629 ret <16 x i32> %b 630 } 631 632 define <8 x i64> @andqbrst(<8 x i64> %p1, i64* %ap) { 633 ; AVX512F-LABEL: andqbrst: 634 ; AVX512F: # %bb.0: # %entry 635 ; AVX512F-NEXT: vpandq (%rdi){1to8}, %zmm0, %zmm0 636 ; AVX512F-NEXT: retq 637 ; 638 ; AVX512VL-LABEL: andqbrst: 639 ; AVX512VL: # %bb.0: # %entry 640 ; AVX512VL-NEXT: vpandq (%rdi){1to8}, %zmm0, %zmm0 641 ; AVX512VL-NEXT: retq 642 ; 643 ; AVX512BW-LABEL: andqbrst: 644 ; AVX512BW: # %bb.0: # %entry 645 ; AVX512BW-NEXT: vpandq (%rdi){1to8}, %zmm0, %zmm0 646 ; AVX512BW-NEXT: retq 647 ; 648 ; AVX512DQ-LABEL: andqbrst: 649 ; AVX512DQ: # %bb.0: # %entry 650 ; AVX512DQ-NEXT: vandpd (%rdi){1to8}, %zmm0, %zmm0 651 ; AVX512DQ-NEXT: retq 652 ; 653 ; SKX-LABEL: andqbrst: 654 ; SKX: # %bb.0: # %entry 655 ; SKX-NEXT: vandpd (%rdi){1to8}, %zmm0, %zmm0 656 ; SKX-NEXT: retq 657 entry: 658 %a = load i64, i64* %ap, align 8 659 %b = insertelement <8 x i64> undef, i64 %a, i32 0 660 %c = shufflevector <8 x i64> %b, <8 x i64> undef, <8 x i32> zeroinitializer 661 %d = and <8 x i64> %p1, %c 662 ret <8 x i64>%d 663 } 664 665 define <16 x float> @test_mask_vaddps(<16 x float> %dst, <16 x float> %i, 666 ; CHECK-LABEL: test_mask_vaddps: 667 ; CHECK: # %bb.0: 668 ; CHECK-NEXT: vptestmd %zmm3, %zmm3, %k1 669 ; CHECK-NEXT: vaddps %zmm2, %zmm1, %zmm0 {%k1} 670 ; CHECK-NEXT: retq 671 <16 x float> %j, <16 x i32> %mask1) 672 nounwind readnone { 673 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 674 %x = fadd <16 x float> %i, %j 675 %r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %dst 676 ret <16 x float> %r 677 } 678 679 define <16 x float> @test_mask_vmulps(<16 x float> %dst, <16 x float> %i, 680 ; CHECK-LABEL: test_mask_vmulps: 681 ; CHECK: # %bb.0: 682 ; CHECK-NEXT: vptestmd %zmm3, %zmm3, %k1 683 ; CHECK-NEXT: vmulps %zmm2, %zmm1, %zmm0 {%k1} 684 ; CHECK-NEXT: retq 685 <16 x float> %j, <16 x i32> %mask1) 686 nounwind readnone { 687 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 688 %x = fmul <16 x float> %i, %j 689 %r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %dst 690 ret <16 x float> %r 691 } 692 693 define <16 x float> @test_mask_vminps(<16 x float> %dst, <16 x float> %i, 694 ; CHECK-LABEL: test_mask_vminps: 695 ; CHECK: # %bb.0: 696 ; CHECK-NEXT: vptestmd %zmm3, %zmm3, %k1 697 ; CHECK-NEXT: vminps %zmm2, %zmm1, %zmm0 {%k1} 698 ; CHECK-NEXT: retq 699 <16 x float> %j, <16 x i32> %mask1) 700 nounwind readnone { 701 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 702 %cmp_res = fcmp olt <16 x float> %i, %j 703 %min = select <16 x i1> %cmp_res, <16 x float> %i, <16 x float> %j 704 %r = select <16 x i1> %mask, <16 x float> %min, <16 x float> %dst 705 ret <16 x float> %r 706 } 707 708 define <8 x double> @test_mask_vminpd(<8 x double> %dst, <8 x double> %i, 709 ; AVX512F-LABEL: test_mask_vminpd: 710 ; AVX512F: # %bb.0: 711 ; AVX512F-NEXT: # kill: def $ymm3 killed $ymm3 def $zmm3 712 ; AVX512F-NEXT: vptestmd %zmm3, %zmm3, %k1 713 ; AVX512F-NEXT: vminpd %zmm2, %zmm1, %zmm0 {%k1} 714 ; AVX512F-NEXT: retq 715 ; 716 ; AVX512VL-LABEL: test_mask_vminpd: 717 ; AVX512VL: # %bb.0: 718 ; AVX512VL-NEXT: vptestmd %ymm3, %ymm3, %k1 719 ; AVX512VL-NEXT: vminpd %zmm2, %zmm1, %zmm0 {%k1} 720 ; AVX512VL-NEXT: retq 721 ; 722 ; AVX512BW-LABEL: test_mask_vminpd: 723 ; AVX512BW: # %bb.0: 724 ; AVX512BW-NEXT: # kill: def $ymm3 killed $ymm3 def $zmm3 725 ; AVX512BW-NEXT: vptestmd %zmm3, %zmm3, %k1 726 ; AVX512BW-NEXT: vminpd %zmm2, %zmm1, %zmm0 {%k1} 727 ; AVX512BW-NEXT: retq 728 ; 729 ; AVX512DQ-LABEL: test_mask_vminpd: 730 ; AVX512DQ: # %bb.0: 731 ; AVX512DQ-NEXT: # kill: def $ymm3 killed $ymm3 def $zmm3 732 ; AVX512DQ-NEXT: vptestmd %zmm3, %zmm3, %k1 733 ; AVX512DQ-NEXT: vminpd %zmm2, %zmm1, %zmm0 {%k1} 734 ; AVX512DQ-NEXT: retq 735 ; 736 ; SKX-LABEL: test_mask_vminpd: 737 ; SKX: # %bb.0: 738 ; SKX-NEXT: vptestmd %ymm3, %ymm3, %k1 739 ; SKX-NEXT: vminpd %zmm2, %zmm1, %zmm0 {%k1} 740 ; SKX-NEXT: retq 741 <8 x double> %j, <8 x i32> %mask1) 742 nounwind readnone { 743 %mask = icmp ne <8 x i32> %mask1, zeroinitializer 744 %cmp_res = fcmp olt <8 x double> %i, %j 745 %min = select <8 x i1> %cmp_res, <8 x double> %i, <8 x double> %j 746 %r = select <8 x i1> %mask, <8 x double> %min, <8 x double> %dst 747 ret <8 x double> %r 748 } 749 750 define <16 x float> @test_mask_vmaxps(<16 x float> %dst, <16 x float> %i, 751 ; CHECK-LABEL: test_mask_vmaxps: 752 ; CHECK: # %bb.0: 753 ; CHECK-NEXT: vptestmd %zmm3, %zmm3, %k1 754 ; CHECK-NEXT: vmaxps %zmm2, %zmm1, %zmm0 {%k1} 755 ; CHECK-NEXT: retq 756 <16 x float> %j, <16 x i32> %mask1) 757 nounwind readnone { 758 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 759 %cmp_res = fcmp ogt <16 x float> %i, %j 760 %max = select <16 x i1> %cmp_res, <16 x float> %i, <16 x float> %j 761 %r = select <16 x i1> %mask, <16 x float> %max, <16 x float> %dst 762 ret <16 x float> %r 763 } 764 765 define <8 x double> @test_mask_vmaxpd(<8 x double> %dst, <8 x double> %i, 766 ; AVX512F-LABEL: test_mask_vmaxpd: 767 ; AVX512F: # %bb.0: 768 ; AVX512F-NEXT: # kill: def $ymm3 killed $ymm3 def $zmm3 769 ; AVX512F-NEXT: vptestmd %zmm3, %zmm3, %k1 770 ; AVX512F-NEXT: vmaxpd %zmm2, %zmm1, %zmm0 {%k1} 771 ; AVX512F-NEXT: retq 772 ; 773 ; AVX512VL-LABEL: test_mask_vmaxpd: 774 ; AVX512VL: # %bb.0: 775 ; AVX512VL-NEXT: vptestmd %ymm3, %ymm3, %k1 776 ; AVX512VL-NEXT: vmaxpd %zmm2, %zmm1, %zmm0 {%k1} 777 ; AVX512VL-NEXT: retq 778 ; 779 ; AVX512BW-LABEL: test_mask_vmaxpd: 780 ; AVX512BW: # %bb.0: 781 ; AVX512BW-NEXT: # kill: def $ymm3 killed $ymm3 def $zmm3 782 ; AVX512BW-NEXT: vptestmd %zmm3, %zmm3, %k1 783 ; AVX512BW-NEXT: vmaxpd %zmm2, %zmm1, %zmm0 {%k1} 784 ; AVX512BW-NEXT: retq 785 ; 786 ; AVX512DQ-LABEL: test_mask_vmaxpd: 787 ; AVX512DQ: # %bb.0: 788 ; AVX512DQ-NEXT: # kill: def $ymm3 killed $ymm3 def $zmm3 789 ; AVX512DQ-NEXT: vptestmd %zmm3, %zmm3, %k1 790 ; AVX512DQ-NEXT: vmaxpd %zmm2, %zmm1, %zmm0 {%k1} 791 ; AVX512DQ-NEXT: retq 792 ; 793 ; SKX-LABEL: test_mask_vmaxpd: 794 ; SKX: # %bb.0: 795 ; SKX-NEXT: vptestmd %ymm3, %ymm3, %k1 796 ; SKX-NEXT: vmaxpd %zmm2, %zmm1, %zmm0 {%k1} 797 ; SKX-NEXT: retq 798 <8 x double> %j, <8 x i32> %mask1) 799 nounwind readnone { 800 %mask = icmp ne <8 x i32> %mask1, zeroinitializer 801 %cmp_res = fcmp ogt <8 x double> %i, %j 802 %max = select <8 x i1> %cmp_res, <8 x double> %i, <8 x double> %j 803 %r = select <8 x i1> %mask, <8 x double> %max, <8 x double> %dst 804 ret <8 x double> %r 805 } 806 807 define <16 x float> @test_mask_vsubps(<16 x float> %dst, <16 x float> %i, 808 ; CHECK-LABEL: test_mask_vsubps: 809 ; CHECK: # %bb.0: 810 ; CHECK-NEXT: vptestmd %zmm3, %zmm3, %k1 811 ; CHECK-NEXT: vsubps %zmm2, %zmm1, %zmm0 {%k1} 812 ; CHECK-NEXT: retq 813 <16 x float> %j, <16 x i32> %mask1) 814 nounwind readnone { 815 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 816 %x = fsub <16 x float> %i, %j 817 %r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %dst 818 ret <16 x float> %r 819 } 820 821 define <16 x float> @test_mask_vdivps(<16 x float> %dst, <16 x float> %i, 822 ; CHECK-LABEL: test_mask_vdivps: 823 ; CHECK: # %bb.0: 824 ; CHECK-NEXT: vptestmd %zmm3, %zmm3, %k1 825 ; CHECK-NEXT: vdivps %zmm2, %zmm1, %zmm0 {%k1} 826 ; CHECK-NEXT: retq 827 <16 x float> %j, <16 x i32> %mask1) 828 nounwind readnone { 829 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 830 %x = fdiv <16 x float> %i, %j 831 %r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %dst 832 ret <16 x float> %r 833 } 834 835 define <8 x double> @test_mask_vaddpd(<8 x double> %dst, <8 x double> %i, 836 ; CHECK-LABEL: test_mask_vaddpd: 837 ; CHECK: # %bb.0: 838 ; CHECK-NEXT: vptestmq %zmm3, %zmm3, %k1 839 ; CHECK-NEXT: vaddpd %zmm2, %zmm1, %zmm0 {%k1} 840 ; CHECK-NEXT: retq 841 <8 x double> %j, <8 x i64> %mask1) 842 nounwind readnone { 843 %mask = icmp ne <8 x i64> %mask1, zeroinitializer 844 %x = fadd <8 x double> %i, %j 845 %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> %dst 846 ret <8 x double> %r 847 } 848 849 define <8 x double> @test_maskz_vaddpd(<8 x double> %i, <8 x double> %j, 850 ; CHECK-LABEL: test_maskz_vaddpd: 851 ; CHECK: # %bb.0: 852 ; CHECK-NEXT: vptestmq %zmm2, %zmm2, %k1 853 ; CHECK-NEXT: vaddpd %zmm1, %zmm0, %zmm0 {%k1} {z} 854 ; CHECK-NEXT: retq 855 <8 x i64> %mask1) nounwind readnone { 856 %mask = icmp ne <8 x i64> %mask1, zeroinitializer 857 %x = fadd <8 x double> %i, %j 858 %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> zeroinitializer 859 ret <8 x double> %r 860 } 861 862 define <8 x double> @test_mask_fold_vaddpd(<8 x double> %dst, <8 x double> %i, 863 ; CHECK-LABEL: test_mask_fold_vaddpd: 864 ; CHECK: # %bb.0: 865 ; CHECK-NEXT: vptestmq %zmm2, %zmm2, %k1 866 ; CHECK-NEXT: vaddpd (%rdi), %zmm1, %zmm0 {%k1} 867 ; CHECK-NEXT: retq 868 <8 x double>* %j, <8 x i64> %mask1) 869 nounwind { 870 %mask = icmp ne <8 x i64> %mask1, zeroinitializer 871 %tmp = load <8 x double>, <8 x double>* %j, align 8 872 %x = fadd <8 x double> %i, %tmp 873 %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> %dst 874 ret <8 x double> %r 875 } 876 877 define <8 x double> @test_maskz_fold_vaddpd(<8 x double> %i, <8 x double>* %j, 878 ; CHECK-LABEL: test_maskz_fold_vaddpd: 879 ; CHECK: # %bb.0: 880 ; CHECK-NEXT: vptestmq %zmm1, %zmm1, %k1 881 ; CHECK-NEXT: vaddpd (%rdi), %zmm0, %zmm0 {%k1} {z} 882 ; CHECK-NEXT: retq 883 <8 x i64> %mask1) nounwind { 884 %mask = icmp ne <8 x i64> %mask1, zeroinitializer 885 %tmp = load <8 x double>, <8 x double>* %j, align 8 886 %x = fadd <8 x double> %i, %tmp 887 %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> zeroinitializer 888 ret <8 x double> %r 889 } 890 891 define <8 x double> @test_broadcast_vaddpd(<8 x double> %i, double* %j) nounwind { 892 ; CHECK-LABEL: test_broadcast_vaddpd: 893 ; CHECK: # %bb.0: 894 ; CHECK-NEXT: vaddpd (%rdi){1to8}, %zmm0, %zmm0 895 ; CHECK-NEXT: retq 896 %tmp = load double, double* %j 897 %b = insertelement <8 x double> undef, double %tmp, i32 0 898 %c = shufflevector <8 x double> %b, <8 x double> undef, 899 <8 x i32> zeroinitializer 900 %x = fadd <8 x double> %c, %i 901 ret <8 x double> %x 902 } 903 904 define <8 x double> @test_mask_broadcast_vaddpd(<8 x double> %dst, <8 x double> %i, 905 ; CHECK-LABEL: test_mask_broadcast_vaddpd: 906 ; CHECK: # %bb.0: 907 ; CHECK-NEXT: vptestmq %zmm2, %zmm2, %k1 908 ; CHECK-NEXT: vaddpd (%rdi){1to8}, %zmm1, %zmm1 {%k1} 909 ; CHECK-NEXT: vmovapd %zmm1, %zmm0 910 ; CHECK-NEXT: retq 911 double* %j, <8 x i64> %mask1) nounwind { 912 %mask = icmp ne <8 x i64> %mask1, zeroinitializer 913 %tmp = load double, double* %j 914 %b = insertelement <8 x double> undef, double %tmp, i32 0 915 %c = shufflevector <8 x double> %b, <8 x double> undef, 916 <8 x i32> zeroinitializer 917 %x = fadd <8 x double> %c, %i 918 %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> %i 919 ret <8 x double> %r 920 } 921 922 define <8 x double> @test_maskz_broadcast_vaddpd(<8 x double> %i, double* %j, 923 ; CHECK-LABEL: test_maskz_broadcast_vaddpd: 924 ; CHECK: # %bb.0: 925 ; CHECK-NEXT: vptestmq %zmm1, %zmm1, %k1 926 ; CHECK-NEXT: vaddpd (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} 927 ; CHECK-NEXT: retq 928 <8 x i64> %mask1) nounwind { 929 %mask = icmp ne <8 x i64> %mask1, zeroinitializer 930 %tmp = load double, double* %j 931 %b = insertelement <8 x double> undef, double %tmp, i32 0 932 %c = shufflevector <8 x double> %b, <8 x double> undef, 933 <8 x i32> zeroinitializer 934 %x = fadd <8 x double> %c, %i 935 %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> zeroinitializer 936 ret <8 x double> %r 937 } 938 939 define <16 x float> @test_fxor(<16 x float> %a) { 940 ; AVX512F-LABEL: test_fxor: 941 ; AVX512F: # %bb.0: 942 ; AVX512F-NEXT: vpxord {{.*}}(%rip){1to16}, %zmm0, %zmm0 943 ; AVX512F-NEXT: retq 944 ; 945 ; AVX512VL-LABEL: test_fxor: 946 ; AVX512VL: # %bb.0: 947 ; AVX512VL-NEXT: vpxord {{.*}}(%rip){1to16}, %zmm0, %zmm0 948 ; AVX512VL-NEXT: retq 949 ; 950 ; AVX512BW-LABEL: test_fxor: 951 ; AVX512BW: # %bb.0: 952 ; AVX512BW-NEXT: vpxord {{.*}}(%rip){1to16}, %zmm0, %zmm0 953 ; AVX512BW-NEXT: retq 954 ; 955 ; AVX512DQ-LABEL: test_fxor: 956 ; AVX512DQ: # %bb.0: 957 ; AVX512DQ-NEXT: vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0 958 ; AVX512DQ-NEXT: retq 959 ; 960 ; SKX-LABEL: test_fxor: 961 ; SKX: # %bb.0: 962 ; SKX-NEXT: vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0 963 ; SKX-NEXT: retq 964 965 %res = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a 966 ret <16 x float>%res 967 } 968 969 define <8 x float> @test_fxor_8f32(<8 x float> %a) { 970 ; AVX512F-LABEL: test_fxor_8f32: 971 ; AVX512F: # %bb.0: 972 ; AVX512F-NEXT: vbroadcastss {{.*#+}} ymm1 = [-0,-0,-0,-0,-0,-0,-0,-0] 973 ; AVX512F-NEXT: vxorps %ymm1, %ymm0, %ymm0 974 ; AVX512F-NEXT: retq 975 ; 976 ; AVX512VL-LABEL: test_fxor_8f32: 977 ; AVX512VL: # %bb.0: 978 ; AVX512VL-NEXT: vpxord {{.*}}(%rip){1to8}, %ymm0, %ymm0 979 ; AVX512VL-NEXT: retq 980 ; 981 ; AVX512BW-LABEL: test_fxor_8f32: 982 ; AVX512BW: # %bb.0: 983 ; AVX512BW-NEXT: vbroadcastss {{.*#+}} ymm1 = [-0,-0,-0,-0,-0,-0,-0,-0] 984 ; AVX512BW-NEXT: vxorps %ymm1, %ymm0, %ymm0 985 ; AVX512BW-NEXT: retq 986 ; 987 ; AVX512DQ-LABEL: test_fxor_8f32: 988 ; AVX512DQ: # %bb.0: 989 ; AVX512DQ-NEXT: vbroadcastss {{.*#+}} ymm1 = [-0,-0,-0,-0,-0,-0,-0,-0] 990 ; AVX512DQ-NEXT: vxorps %ymm1, %ymm0, %ymm0 991 ; AVX512DQ-NEXT: retq 992 ; 993 ; SKX-LABEL: test_fxor_8f32: 994 ; SKX: # %bb.0: 995 ; SKX-NEXT: vxorps {{.*}}(%rip){1to8}, %ymm0, %ymm0 996 ; SKX-NEXT: retq 997 %res = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a 998 ret <8 x float>%res 999 } 1000 1001 define <8 x double> @fabs_v8f64(<8 x double> %p) 1002 ; AVX512F-LABEL: fabs_v8f64: 1003 ; AVX512F: # %bb.0: 1004 ; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm0, %zmm0 1005 ; AVX512F-NEXT: retq 1006 ; 1007 ; AVX512VL-LABEL: fabs_v8f64: 1008 ; AVX512VL: # %bb.0: 1009 ; AVX512VL-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm0, %zmm0 1010 ; AVX512VL-NEXT: retq 1011 ; 1012 ; AVX512BW-LABEL: fabs_v8f64: 1013 ; AVX512BW: # %bb.0: 1014 ; AVX512BW-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm0, %zmm0 1015 ; AVX512BW-NEXT: retq 1016 ; 1017 ; AVX512DQ-LABEL: fabs_v8f64: 1018 ; AVX512DQ: # %bb.0: 1019 ; AVX512DQ-NEXT: vandpd {{.*}}(%rip){1to8}, %zmm0, %zmm0 1020 ; AVX512DQ-NEXT: retq 1021 ; 1022 ; SKX-LABEL: fabs_v8f64: 1023 ; SKX: # %bb.0: 1024 ; SKX-NEXT: vandpd {{.*}}(%rip){1to8}, %zmm0, %zmm0 1025 ; SKX-NEXT: retq 1026 { 1027 %t = call <8 x double> @llvm.fabs.v8f64(<8 x double> %p) 1028 ret <8 x double> %t 1029 } 1030 declare <8 x double> @llvm.fabs.v8f64(<8 x double> %p) 1031 1032 define <16 x float> @fabs_v16f32(<16 x float> %p) 1033 ; AVX512F-LABEL: fabs_v16f32: 1034 ; AVX512F: # %bb.0: 1035 ; AVX512F-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm0, %zmm0 1036 ; AVX512F-NEXT: retq 1037 ; 1038 ; AVX512VL-LABEL: fabs_v16f32: 1039 ; AVX512VL: # %bb.0: 1040 ; AVX512VL-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm0, %zmm0 1041 ; AVX512VL-NEXT: retq 1042 ; 1043 ; AVX512BW-LABEL: fabs_v16f32: 1044 ; AVX512BW: # %bb.0: 1045 ; AVX512BW-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm0, %zmm0 1046 ; AVX512BW-NEXT: retq 1047 ; 1048 ; AVX512DQ-LABEL: fabs_v16f32: 1049 ; AVX512DQ: # %bb.0: 1050 ; AVX512DQ-NEXT: vandps {{.*}}(%rip){1to16}, %zmm0, %zmm0 1051 ; AVX512DQ-NEXT: retq 1052 ; 1053 ; SKX-LABEL: fabs_v16f32: 1054 ; SKX: # %bb.0: 1055 ; SKX-NEXT: vandps {{.*}}(%rip){1to16}, %zmm0, %zmm0 1056 ; SKX-NEXT: retq 1057 { 1058 %t = call <16 x float> @llvm.fabs.v16f32(<16 x float> %p) 1059 ret <16 x float> %t 1060 } 1061 declare <16 x float> @llvm.fabs.v16f32(<16 x float> %p) 1062