1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f | FileCheck --check-prefix=CHECK --check-prefix=AVX512F %s 3 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512vl | FileCheck --check-prefix=CHECK --check-prefix=AVX512VL %s 4 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512bw | FileCheck --check-prefix=CHECK --check-prefix=AVX512BW %s 5 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512dq | FileCheck --check-prefix=CHECK --check-prefix=AVX512DQ %s 6 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512dq -mattr=+avx512bw -mattr=+avx512vl| FileCheck --check-prefix=CHECK --check-prefix=SKX %s 7 8 define <8 x double> @addpd512(<8 x double> %y, <8 x double> %x) { 9 ; CHECK-LABEL: addpd512: 10 ; CHECK: ## BB#0: ## %entry 11 ; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0 12 ; CHECK-NEXT: retq 13 entry: 14 %add.i = fadd <8 x double> %x, %y 15 ret <8 x double> %add.i 16 } 17 18 define <8 x double> @addpd512fold(<8 x double> %y) { 19 ; CHECK-LABEL: addpd512fold: 20 ; CHECK: ## BB#0: ## %entry 21 ; CHECK-NEXT: vaddpd {{.*}}(%rip), %zmm0, %zmm0 22 ; CHECK-NEXT: retq 23 entry: 24 %add.i = fadd <8 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00, double 4.500000e+00, double 3.800000e+00, double 2.300000e+00, double 1.200000e+00> 25 ret <8 x double> %add.i 26 } 27 28 define <16 x float> @addps512(<16 x float> %y, <16 x float> %x) { 29 ; CHECK-LABEL: addps512: 30 ; CHECK: ## BB#0: ## %entry 31 ; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0 32 ; CHECK-NEXT: retq 33 entry: 34 %add.i = fadd <16 x float> %x, %y 35 ret <16 x float> %add.i 36 } 37 38 define <16 x float> @addps512fold(<16 x float> %y) { 39 ; CHECK-LABEL: addps512fold: 40 ; CHECK: ## BB#0: ## %entry 41 ; CHECK-NEXT: vaddps {{.*}}(%rip), %zmm0, %zmm0 42 ; CHECK-NEXT: retq 43 entry: 44 %add.i = fadd <16 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 4.500000e+00, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000> 45 ret <16 x float> %add.i 46 } 47 48 define <8 x double> @subpd512(<8 x double> %y, <8 x double> %x) { 49 ; CHECK-LABEL: subpd512: 50 ; CHECK: ## BB#0: ## %entry 51 ; CHECK-NEXT: vsubpd %zmm0, %zmm1, %zmm0 52 ; CHECK-NEXT: retq 53 entry: 54 %sub.i = fsub <8 x double> %x, %y 55 ret <8 x double> %sub.i 56 } 57 58 define <8 x double> @subpd512fold(<8 x double> %y, <8 x double>* %x) { 59 ; CHECK-LABEL: subpd512fold: 60 ; CHECK: ## BB#0: ## %entry 61 ; CHECK-NEXT: vsubpd (%rdi), %zmm0, %zmm0 62 ; CHECK-NEXT: retq 63 entry: 64 %tmp2 = load <8 x double>, <8 x double>* %x, align 8 65 %sub.i = fsub <8 x double> %y, %tmp2 66 ret <8 x double> %sub.i 67 } 68 69 define <16 x float> @subps512(<16 x float> %y, <16 x float> %x) { 70 ; CHECK-LABEL: subps512: 71 ; CHECK: ## BB#0: ## %entry 72 ; CHECK-NEXT: vsubps %zmm0, %zmm1, %zmm0 73 ; CHECK-NEXT: retq 74 entry: 75 %sub.i = fsub <16 x float> %x, %y 76 ret <16 x float> %sub.i 77 } 78 79 define <16 x float> @subps512fold(<16 x float> %y, <16 x float>* %x) { 80 ; CHECK-LABEL: subps512fold: 81 ; CHECK: ## BB#0: ## %entry 82 ; CHECK-NEXT: vsubps (%rdi), %zmm0, %zmm0 83 ; CHECK-NEXT: retq 84 entry: 85 %tmp2 = load <16 x float>, <16 x float>* %x, align 4 86 %sub.i = fsub <16 x float> %y, %tmp2 87 ret <16 x float> %sub.i 88 } 89 90 define <8 x i64> @imulq512(<8 x i64> %y, <8 x i64> %x) { 91 ; AVX512F-LABEL: imulq512: 92 ; AVX512F: ## BB#0: 93 ; AVX512F-NEXT: vpmuludq %zmm0, %zmm1, %zmm2 94 ; AVX512F-NEXT: vpsrlq $32, %zmm0, %zmm3 95 ; AVX512F-NEXT: vpmuludq %zmm3, %zmm1, %zmm3 96 ; AVX512F-NEXT: vpsllq $32, %zmm3, %zmm3 97 ; AVX512F-NEXT: vpaddq %zmm3, %zmm2, %zmm2 98 ; AVX512F-NEXT: vpsrlq $32, %zmm1, %zmm1 99 ; AVX512F-NEXT: vpmuludq %zmm0, %zmm1, %zmm0 100 ; AVX512F-NEXT: vpsllq $32, %zmm0, %zmm0 101 ; AVX512F-NEXT: vpaddq %zmm0, %zmm2, %zmm0 102 ; AVX512F-NEXT: retq 103 ; 104 ; AVX512VL-LABEL: imulq512: 105 ; AVX512VL: ## BB#0: 106 ; AVX512VL-NEXT: vpmuludq %zmm0, %zmm1, %zmm2 107 ; AVX512VL-NEXT: vpsrlq $32, %zmm0, %zmm3 108 ; AVX512VL-NEXT: vpmuludq %zmm3, %zmm1, %zmm3 109 ; AVX512VL-NEXT: vpsllq $32, %zmm3, %zmm3 110 ; AVX512VL-NEXT: vpaddq %zmm3, %zmm2, %zmm2 111 ; AVX512VL-NEXT: vpsrlq $32, %zmm1, %zmm1 112 ; AVX512VL-NEXT: vpmuludq %zmm0, %zmm1, %zmm0 113 ; AVX512VL-NEXT: vpsllq $32, %zmm0, %zmm0 114 ; AVX512VL-NEXT: vpaddq %zmm0, %zmm2, %zmm0 115 ; AVX512VL-NEXT: retq 116 ; 117 ; AVX512BW-LABEL: imulq512: 118 ; AVX512BW: ## BB#0: 119 ; AVX512BW-NEXT: vpmuludq %zmm0, %zmm1, %zmm2 120 ; AVX512BW-NEXT: vpsrlq $32, %zmm0, %zmm3 121 ; AVX512BW-NEXT: vpmuludq %zmm3, %zmm1, %zmm3 122 ; AVX512BW-NEXT: vpsllq $32, %zmm3, %zmm3 123 ; AVX512BW-NEXT: vpaddq %zmm3, %zmm2, %zmm2 124 ; AVX512BW-NEXT: vpsrlq $32, %zmm1, %zmm1 125 ; AVX512BW-NEXT: vpmuludq %zmm0, %zmm1, %zmm0 126 ; AVX512BW-NEXT: vpsllq $32, %zmm0, %zmm0 127 ; AVX512BW-NEXT: vpaddq %zmm0, %zmm2, %zmm0 128 ; AVX512BW-NEXT: retq 129 ; 130 ; AVX512DQ-LABEL: imulq512: 131 ; AVX512DQ: ## BB#0: 132 ; AVX512DQ-NEXT: vpmullq %zmm0, %zmm1, %zmm0 133 ; AVX512DQ-NEXT: retq 134 ; 135 ; SKX-LABEL: imulq512: 136 ; SKX: ## BB#0: 137 ; SKX-NEXT: vpmullq %zmm0, %zmm1, %zmm0 138 ; SKX-NEXT: retq 139 %z = mul <8 x i64>%x, %y 140 ret <8 x i64>%z 141 } 142 143 define <4 x i64> @imulq256(<4 x i64> %y, <4 x i64> %x) { 144 ; AVX512F-LABEL: imulq256: 145 ; AVX512F: ## BB#0: 146 ; AVX512F-NEXT: vpmuludq %ymm0, %ymm1, %ymm2 147 ; AVX512F-NEXT: vpsrlq $32, %ymm0, %ymm3 148 ; AVX512F-NEXT: vpmuludq %ymm3, %ymm1, %ymm3 149 ; AVX512F-NEXT: vpsllq $32, %ymm3, %ymm3 150 ; AVX512F-NEXT: vpaddq %ymm3, %ymm2, %ymm2 151 ; AVX512F-NEXT: vpsrlq $32, %ymm1, %ymm1 152 ; AVX512F-NEXT: vpmuludq %ymm0, %ymm1, %ymm0 153 ; AVX512F-NEXT: vpsllq $32, %ymm0, %ymm0 154 ; AVX512F-NEXT: vpaddq %ymm0, %ymm2, %ymm0 155 ; AVX512F-NEXT: retq 156 ; 157 ; AVX512VL-LABEL: imulq256: 158 ; AVX512VL: ## BB#0: 159 ; AVX512VL-NEXT: vpmuludq %ymm0, %ymm1, %ymm2 160 ; AVX512VL-NEXT: vpsrlq $32, %ymm0, %ymm3 161 ; AVX512VL-NEXT: vpmuludq %ymm3, %ymm1, %ymm3 162 ; AVX512VL-NEXT: vpsllq $32, %ymm3, %ymm3 163 ; AVX512VL-NEXT: vpaddq %ymm3, %ymm2, %ymm2 164 ; AVX512VL-NEXT: vpsrlq $32, %ymm1, %ymm1 165 ; AVX512VL-NEXT: vpmuludq %ymm0, %ymm1, %ymm0 166 ; AVX512VL-NEXT: vpsllq $32, %ymm0, %ymm0 167 ; AVX512VL-NEXT: vpaddq %ymm0, %ymm2, %ymm0 168 ; AVX512VL-NEXT: retq 169 ; 170 ; AVX512BW-LABEL: imulq256: 171 ; AVX512BW: ## BB#0: 172 ; AVX512BW-NEXT: vpmuludq %ymm0, %ymm1, %ymm2 173 ; AVX512BW-NEXT: vpsrlq $32, %ymm0, %ymm3 174 ; AVX512BW-NEXT: vpmuludq %ymm3, %ymm1, %ymm3 175 ; AVX512BW-NEXT: vpsllq $32, %ymm3, %ymm3 176 ; AVX512BW-NEXT: vpaddq %ymm3, %ymm2, %ymm2 177 ; AVX512BW-NEXT: vpsrlq $32, %ymm1, %ymm1 178 ; AVX512BW-NEXT: vpmuludq %ymm0, %ymm1, %ymm0 179 ; AVX512BW-NEXT: vpsllq $32, %ymm0, %ymm0 180 ; AVX512BW-NEXT: vpaddq %ymm0, %ymm2, %ymm0 181 ; AVX512BW-NEXT: retq 182 ; 183 ; AVX512DQ-LABEL: imulq256: 184 ; AVX512DQ: ## BB#0: 185 ; AVX512DQ-NEXT: vpmuludq %ymm0, %ymm1, %ymm2 186 ; AVX512DQ-NEXT: vpsrlq $32, %ymm0, %ymm3 187 ; AVX512DQ-NEXT: vpmuludq %ymm3, %ymm1, %ymm3 188 ; AVX512DQ-NEXT: vpsllq $32, %ymm3, %ymm3 189 ; AVX512DQ-NEXT: vpaddq %ymm3, %ymm2, %ymm2 190 ; AVX512DQ-NEXT: vpsrlq $32, %ymm1, %ymm1 191 ; AVX512DQ-NEXT: vpmuludq %ymm0, %ymm1, %ymm0 192 ; AVX512DQ-NEXT: vpsllq $32, %ymm0, %ymm0 193 ; AVX512DQ-NEXT: vpaddq %ymm0, %ymm2, %ymm0 194 ; AVX512DQ-NEXT: retq 195 ; 196 ; SKX-LABEL: imulq256: 197 ; SKX: ## BB#0: 198 ; SKX-NEXT: vpmullq %ymm0, %ymm1, %ymm0 199 ; SKX-NEXT: retq 200 %z = mul <4 x i64>%x, %y 201 ret <4 x i64>%z 202 } 203 204 define <2 x i64> @imulq128(<2 x i64> %y, <2 x i64> %x) { 205 ; AVX512F-LABEL: imulq128: 206 ; AVX512F: ## BB#0: 207 ; AVX512F-NEXT: vpmuludq %xmm0, %xmm1, %xmm2 208 ; AVX512F-NEXT: vpsrlq $32, %xmm0, %xmm3 209 ; AVX512F-NEXT: vpmuludq %xmm3, %xmm1, %xmm3 210 ; AVX512F-NEXT: vpsllq $32, %xmm3, %xmm3 211 ; AVX512F-NEXT: vpaddq %xmm3, %xmm2, %xmm2 212 ; AVX512F-NEXT: vpsrlq $32, %xmm1, %xmm1 213 ; AVX512F-NEXT: vpmuludq %xmm0, %xmm1, %xmm0 214 ; AVX512F-NEXT: vpsllq $32, %xmm0, %xmm0 215 ; AVX512F-NEXT: vpaddq %xmm0, %xmm2, %xmm0 216 ; AVX512F-NEXT: retq 217 ; 218 ; AVX512VL-LABEL: imulq128: 219 ; AVX512VL: ## BB#0: 220 ; AVX512VL-NEXT: vpmuludq %xmm0, %xmm1, %xmm2 221 ; AVX512VL-NEXT: vpsrlq $32, %xmm0, %xmm3 222 ; AVX512VL-NEXT: vpmuludq %xmm3, %xmm1, %xmm3 223 ; AVX512VL-NEXT: vpsllq $32, %xmm3, %xmm3 224 ; AVX512VL-NEXT: vpaddq %xmm3, %xmm2, %xmm2 225 ; AVX512VL-NEXT: vpsrlq $32, %xmm1, %xmm1 226 ; AVX512VL-NEXT: vpmuludq %xmm0, %xmm1, %xmm0 227 ; AVX512VL-NEXT: vpsllq $32, %xmm0, %xmm0 228 ; AVX512VL-NEXT: vpaddq %xmm0, %xmm2, %xmm0 229 ; AVX512VL-NEXT: retq 230 ; 231 ; AVX512BW-LABEL: imulq128: 232 ; AVX512BW: ## BB#0: 233 ; AVX512BW-NEXT: vpmuludq %xmm0, %xmm1, %xmm2 234 ; AVX512BW-NEXT: vpsrlq $32, %xmm0, %xmm3 235 ; AVX512BW-NEXT: vpmuludq %xmm3, %xmm1, %xmm3 236 ; AVX512BW-NEXT: vpsllq $32, %xmm3, %xmm3 237 ; AVX512BW-NEXT: vpaddq %xmm3, %xmm2, %xmm2 238 ; AVX512BW-NEXT: vpsrlq $32, %xmm1, %xmm1 239 ; AVX512BW-NEXT: vpmuludq %xmm0, %xmm1, %xmm0 240 ; AVX512BW-NEXT: vpsllq $32, %xmm0, %xmm0 241 ; AVX512BW-NEXT: vpaddq %xmm0, %xmm2, %xmm0 242 ; AVX512BW-NEXT: retq 243 ; 244 ; AVX512DQ-LABEL: imulq128: 245 ; AVX512DQ: ## BB#0: 246 ; AVX512DQ-NEXT: vpmuludq %xmm0, %xmm1, %xmm2 247 ; AVX512DQ-NEXT: vpsrlq $32, %xmm0, %xmm3 248 ; AVX512DQ-NEXT: vpmuludq %xmm3, %xmm1, %xmm3 249 ; AVX512DQ-NEXT: vpsllq $32, %xmm3, %xmm3 250 ; AVX512DQ-NEXT: vpaddq %xmm3, %xmm2, %xmm2 251 ; AVX512DQ-NEXT: vpsrlq $32, %xmm1, %xmm1 252 ; AVX512DQ-NEXT: vpmuludq %xmm0, %xmm1, %xmm0 253 ; AVX512DQ-NEXT: vpsllq $32, %xmm0, %xmm0 254 ; AVX512DQ-NEXT: vpaddq %xmm0, %xmm2, %xmm0 255 ; AVX512DQ-NEXT: retq 256 ; 257 ; SKX-LABEL: imulq128: 258 ; SKX: ## BB#0: 259 ; SKX-NEXT: vpmullq %xmm0, %xmm1, %xmm0 260 ; SKX-NEXT: retq 261 %z = mul <2 x i64>%x, %y 262 ret <2 x i64>%z 263 } 264 265 define <8 x double> @mulpd512(<8 x double> %y, <8 x double> %x) { 266 ; CHECK-LABEL: mulpd512: 267 ; CHECK: ## BB#0: ## %entry 268 ; CHECK-NEXT: vmulpd %zmm0, %zmm1, %zmm0 269 ; CHECK-NEXT: retq 270 entry: 271 %mul.i = fmul <8 x double> %x, %y 272 ret <8 x double> %mul.i 273 } 274 275 define <8 x double> @mulpd512fold(<8 x double> %y) { 276 ; CHECK-LABEL: mulpd512fold: 277 ; CHECK: ## BB#0: ## %entry 278 ; CHECK-NEXT: vmulpd {{.*}}(%rip), %zmm0, %zmm0 279 ; CHECK-NEXT: retq 280 entry: 281 %mul.i = fmul <8 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00, double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00> 282 ret <8 x double> %mul.i 283 } 284 285 define <16 x float> @mulps512(<16 x float> %y, <16 x float> %x) { 286 ; CHECK-LABEL: mulps512: 287 ; CHECK: ## BB#0: ## %entry 288 ; CHECK-NEXT: vmulps %zmm0, %zmm1, %zmm0 289 ; CHECK-NEXT: retq 290 entry: 291 %mul.i = fmul <16 x float> %x, %y 292 ret <16 x float> %mul.i 293 } 294 295 define <16 x float> @mulps512fold(<16 x float> %y) { 296 ; CHECK-LABEL: mulps512fold: 297 ; CHECK: ## BB#0: ## %entry 298 ; CHECK-NEXT: vmulps {{.*}}(%rip), %zmm0, %zmm0 299 ; CHECK-NEXT: retq 300 entry: 301 %mul.i = fmul <16 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000> 302 ret <16 x float> %mul.i 303 } 304 305 define <8 x double> @divpd512(<8 x double> %y, <8 x double> %x) { 306 ; CHECK-LABEL: divpd512: 307 ; CHECK: ## BB#0: ## %entry 308 ; CHECK-NEXT: vdivpd %zmm0, %zmm1, %zmm0 309 ; CHECK-NEXT: retq 310 entry: 311 %div.i = fdiv <8 x double> %x, %y 312 ret <8 x double> %div.i 313 } 314 315 define <8 x double> @divpd512fold(<8 x double> %y) { 316 ; CHECK-LABEL: divpd512fold: 317 ; CHECK: ## BB#0: ## %entry 318 ; CHECK-NEXT: vdivpd {{.*}}(%rip), %zmm0, %zmm0 319 ; CHECK-NEXT: retq 320 entry: 321 %div.i = fdiv <8 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00, double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00> 322 ret <8 x double> %div.i 323 } 324 325 define <16 x float> @divps512(<16 x float> %y, <16 x float> %x) { 326 ; CHECK-LABEL: divps512: 327 ; CHECK: ## BB#0: ## %entry 328 ; CHECK-NEXT: vdivps %zmm0, %zmm1, %zmm0 329 ; CHECK-NEXT: retq 330 entry: 331 %div.i = fdiv <16 x float> %x, %y 332 ret <16 x float> %div.i 333 } 334 335 define <16 x float> @divps512fold(<16 x float> %y) { 336 ; CHECK-LABEL: divps512fold: 337 ; CHECK: ## BB#0: ## %entry 338 ; CHECK-NEXT: vdivps {{.*}}(%rip), %zmm0, %zmm0 339 ; CHECK-NEXT: retq 340 entry: 341 %div.i = fdiv <16 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 4.500000e+00, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 4.500000e+00, float 0x4002666660000000, float 0x3FF3333340000000> 342 ret <16 x float> %div.i 343 } 344 345 define <8 x i64> @vpaddq_test(<8 x i64> %i, <8 x i64> %j) nounwind readnone { 346 ; CHECK-LABEL: vpaddq_test: 347 ; CHECK: ## BB#0: 348 ; CHECK-NEXT: vpaddq %zmm1, %zmm0, %zmm0 349 ; CHECK-NEXT: retq 350 %x = add <8 x i64> %i, %j 351 ret <8 x i64> %x 352 } 353 354 define <8 x i64> @vpaddq_fold_test(<8 x i64> %i, <8 x i64>* %j) nounwind { 355 ; CHECK-LABEL: vpaddq_fold_test: 356 ; CHECK: ## BB#0: 357 ; CHECK-NEXT: vpaddq (%rdi), %zmm0, %zmm0 358 ; CHECK-NEXT: retq 359 %tmp = load <8 x i64>, <8 x i64>* %j, align 4 360 %x = add <8 x i64> %i, %tmp 361 ret <8 x i64> %x 362 } 363 364 define <8 x i64> @vpaddq_broadcast_test(<8 x i64> %i) nounwind { 365 ; CHECK-LABEL: vpaddq_broadcast_test: 366 ; CHECK: ## BB#0: 367 ; CHECK-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 368 ; CHECK-NEXT: retq 369 %x = add <8 x i64> %i, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1> 370 ret <8 x i64> %x 371 } 372 373 define <8 x i64> @vpaddq_broadcast2_test(<8 x i64> %i, i64* %j) nounwind { 374 ; CHECK-LABEL: vpaddq_broadcast2_test: 375 ; CHECK: ## BB#0: 376 ; CHECK-NEXT: vpaddq (%rdi){1to8}, %zmm0, %zmm0 377 ; CHECK-NEXT: retq 378 %tmp = load i64, i64* %j 379 %j.0 = insertelement <8 x i64> undef, i64 %tmp, i32 0 380 %j.1 = insertelement <8 x i64> %j.0, i64 %tmp, i32 1 381 %j.2 = insertelement <8 x i64> %j.1, i64 %tmp, i32 2 382 %j.3 = insertelement <8 x i64> %j.2, i64 %tmp, i32 3 383 %j.4 = insertelement <8 x i64> %j.3, i64 %tmp, i32 4 384 %j.5 = insertelement <8 x i64> %j.4, i64 %tmp, i32 5 385 %j.6 = insertelement <8 x i64> %j.5, i64 %tmp, i32 6 386 %j.7 = insertelement <8 x i64> %j.6, i64 %tmp, i32 7 387 %x = add <8 x i64> %i, %j.7 388 ret <8 x i64> %x 389 } 390 391 define <16 x i32> @vpaddd_test(<16 x i32> %i, <16 x i32> %j) nounwind readnone { 392 ; CHECK-LABEL: vpaddd_test: 393 ; CHECK: ## BB#0: 394 ; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0 395 ; CHECK-NEXT: retq 396 %x = add <16 x i32> %i, %j 397 ret <16 x i32> %x 398 } 399 400 define <16 x i32> @vpaddd_fold_test(<16 x i32> %i, <16 x i32>* %j) nounwind { 401 ; CHECK-LABEL: vpaddd_fold_test: 402 ; CHECK: ## BB#0: 403 ; CHECK-NEXT: vpaddd (%rdi), %zmm0, %zmm0 404 ; CHECK-NEXT: retq 405 %tmp = load <16 x i32>, <16 x i32>* %j, align 4 406 %x = add <16 x i32> %i, %tmp 407 ret <16 x i32> %x 408 } 409 410 define <16 x i32> @vpaddd_broadcast_test(<16 x i32> %i) nounwind { 411 ; CHECK-LABEL: vpaddd_broadcast_test: 412 ; CHECK: ## BB#0: 413 ; CHECK-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 414 ; CHECK-NEXT: retq 415 %x = add <16 x i32> %i, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 416 ret <16 x i32> %x 417 } 418 419 define <16 x i32> @vpaddd_mask_test(<16 x i32> %i, <16 x i32> %j, <16 x i32> %mask1) nounwind readnone { 420 ; CHECK-LABEL: vpaddd_mask_test: 421 ; CHECK: ## BB#0: 422 ; CHECK-NEXT: vpxord %zmm3, %zmm3, %zmm3 423 ; CHECK-NEXT: vpcmpneqd %zmm3, %zmm2, %k1 424 ; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0 {%k1} 425 ; CHECK-NEXT: retq 426 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 427 %x = add <16 x i32> %i, %j 428 %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %i 429 ret <16 x i32> %r 430 } 431 432 define <16 x i32> @vpaddd_maskz_test(<16 x i32> %i, <16 x i32> %j, <16 x i32> %mask1) nounwind readnone { 433 ; CHECK-LABEL: vpaddd_maskz_test: 434 ; CHECK: ## BB#0: 435 ; CHECK-NEXT: vpxord %zmm3, %zmm3, %zmm3 436 ; CHECK-NEXT: vpcmpneqd %zmm3, %zmm2, %k1 437 ; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0 {%k1} {z} 438 ; CHECK-NEXT: retq 439 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 440 %x = add <16 x i32> %i, %j 441 %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer 442 ret <16 x i32> %r 443 } 444 445 define <16 x i32> @vpaddd_mask_fold_test(<16 x i32> %i, <16 x i32>* %j.ptr, <16 x i32> %mask1) nounwind readnone { 446 ; CHECK-LABEL: vpaddd_mask_fold_test: 447 ; CHECK: ## BB#0: 448 ; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2 449 ; CHECK-NEXT: vpcmpneqd %zmm2, %zmm1, %k1 450 ; CHECK-NEXT: vpaddd (%rdi), %zmm0, %zmm0 {%k1} 451 ; CHECK-NEXT: retq 452 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 453 %j = load <16 x i32>, <16 x i32>* %j.ptr 454 %x = add <16 x i32> %i, %j 455 %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %i 456 ret <16 x i32> %r 457 } 458 459 define <16 x i32> @vpaddd_mask_broadcast_test(<16 x i32> %i, <16 x i32> %mask1) nounwind readnone { 460 ; CHECK-LABEL: vpaddd_mask_broadcast_test: 461 ; CHECK: ## BB#0: 462 ; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2 463 ; CHECK-NEXT: vpcmpneqd %zmm2, %zmm1, %k1 464 ; CHECK-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 {%k1} 465 ; CHECK-NEXT: retq 466 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 467 %x = add <16 x i32> %i, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 468 %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %i 469 ret <16 x i32> %r 470 } 471 472 define <16 x i32> @vpaddd_maskz_fold_test(<16 x i32> %i, <16 x i32>* %j.ptr, <16 x i32> %mask1) nounwind readnone { 473 ; CHECK-LABEL: vpaddd_maskz_fold_test: 474 ; CHECK: ## BB#0: 475 ; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2 476 ; CHECK-NEXT: vpcmpneqd %zmm2, %zmm1, %k1 477 ; CHECK-NEXT: vpaddd (%rdi), %zmm0, %zmm0 {%k1} {z} 478 ; CHECK-NEXT: retq 479 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 480 %j = load <16 x i32>, <16 x i32>* %j.ptr 481 %x = add <16 x i32> %i, %j 482 %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer 483 ret <16 x i32> %r 484 } 485 486 define <16 x i32> @vpaddd_maskz_broadcast_test(<16 x i32> %i, <16 x i32> %mask1) nounwind readnone { 487 ; CHECK-LABEL: vpaddd_maskz_broadcast_test: 488 ; CHECK: ## BB#0: 489 ; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2 490 ; CHECK-NEXT: vpcmpneqd %zmm2, %zmm1, %k1 491 ; CHECK-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 {%k1} {z} 492 ; CHECK-NEXT: retq 493 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 494 %x = add <16 x i32> %i, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 495 %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer 496 ret <16 x i32> %r 497 } 498 499 define <8 x i64> @vpsubq_test(<8 x i64> %i, <8 x i64> %j) nounwind readnone { 500 ; CHECK-LABEL: vpsubq_test: 501 ; CHECK: ## BB#0: 502 ; CHECK-NEXT: vpsubq %zmm1, %zmm0, %zmm0 503 ; CHECK-NEXT: retq 504 %x = sub <8 x i64> %i, %j 505 ret <8 x i64> %x 506 } 507 508 define <16 x i32> @vpsubd_test(<16 x i32> %i, <16 x i32> %j) nounwind readnone { 509 ; CHECK-LABEL: vpsubd_test: 510 ; CHECK: ## BB#0: 511 ; CHECK-NEXT: vpsubd %zmm1, %zmm0, %zmm0 512 ; CHECK-NEXT: retq 513 %x = sub <16 x i32> %i, %j 514 ret <16 x i32> %x 515 } 516 517 define <16 x i32> @vpmulld_test(<16 x i32> %i, <16 x i32> %j) { 518 ; CHECK-LABEL: vpmulld_test: 519 ; CHECK: ## BB#0: 520 ; CHECK-NEXT: vpmulld %zmm1, %zmm0, %zmm0 521 ; CHECK-NEXT: retq 522 %x = mul <16 x i32> %i, %j 523 ret <16 x i32> %x 524 } 525 526 declare float @sqrtf(float) readnone 527 define float @sqrtA(float %a) nounwind uwtable readnone ssp { 528 ; CHECK-LABEL: sqrtA: 529 ; CHECK: ## BB#0: ## %entry 530 ; CHECK-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 531 ; CHECK-NEXT: retq 532 entry: 533 %conv1 = tail call float @sqrtf(float %a) nounwind readnone 534 ret float %conv1 535 } 536 537 declare double @sqrt(double) readnone 538 define double @sqrtB(double %a) nounwind uwtable readnone ssp { 539 ; CHECK-LABEL: sqrtB: 540 ; CHECK: ## BB#0: ## %entry 541 ; CHECK-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 542 ; CHECK-NEXT: retq 543 entry: 544 %call = tail call double @sqrt(double %a) nounwind readnone 545 ret double %call 546 } 547 548 declare float @llvm.sqrt.f32(float) 549 define float @sqrtC(float %a) nounwind { 550 ; CHECK-LABEL: sqrtC: 551 ; CHECK: ## BB#0: 552 ; CHECK-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 553 ; CHECK-NEXT: retq 554 %b = call float @llvm.sqrt.f32(float %a) 555 ret float %b 556 } 557 558 declare <16 x float> @llvm.sqrt.v16f32(<16 x float>) 559 define <16 x float> @sqrtD(<16 x float> %a) nounwind { 560 ; CHECK-LABEL: sqrtD: 561 ; CHECK: ## BB#0: 562 ; CHECK-NEXT: vsqrtps %zmm0, %zmm0 563 ; CHECK-NEXT: retq 564 %b = call <16 x float> @llvm.sqrt.v16f32(<16 x float> %a) 565 ret <16 x float> %b 566 } 567 568 declare <8 x double> @llvm.sqrt.v8f64(<8 x double>) 569 define <8 x double> @sqrtE(<8 x double> %a) nounwind { 570 ; CHECK-LABEL: sqrtE: 571 ; CHECK: ## BB#0: 572 ; CHECK-NEXT: vsqrtpd %zmm0, %zmm0 573 ; CHECK-NEXT: retq 574 %b = call <8 x double> @llvm.sqrt.v8f64(<8 x double> %a) 575 ret <8 x double> %b 576 } 577 578 define <16 x float> @fadd_broadcast(<16 x float> %a) nounwind { 579 ; CHECK-LABEL: fadd_broadcast: 580 ; CHECK: ## BB#0: 581 ; CHECK-NEXT: vaddps {{.*}}(%rip){1to16}, %zmm0, %zmm0 582 ; CHECK-NEXT: retq 583 %b = fadd <16 x float> %a, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000> 584 ret <16 x float> %b 585 } 586 587 define <8 x i64> @addq_broadcast(<8 x i64> %a) nounwind { 588 ; CHECK-LABEL: addq_broadcast: 589 ; CHECK: ## BB#0: 590 ; CHECK-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 591 ; CHECK-NEXT: retq 592 %b = add <8 x i64> %a, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2> 593 ret <8 x i64> %b 594 } 595 596 define <8 x i64> @orq_broadcast(<8 x i64> %a) nounwind { 597 ; CHECK-LABEL: orq_broadcast: 598 ; CHECK: ## BB#0: 599 ; CHECK-NEXT: vporq {{.*}}(%rip){1to8}, %zmm0, %zmm0 600 ; CHECK-NEXT: retq 601 %b = or <8 x i64> %a, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2> 602 ret <8 x i64> %b 603 } 604 605 define <16 x i32> @andd512fold(<16 x i32> %y, <16 x i32>* %x) { 606 ; CHECK-LABEL: andd512fold: 607 ; CHECK: ## BB#0: ## %entry 608 ; CHECK-NEXT: vpandd (%rdi), %zmm0, %zmm0 609 ; CHECK-NEXT: retq 610 entry: 611 %a = load <16 x i32>, <16 x i32>* %x, align 4 612 %b = and <16 x i32> %y, %a 613 ret <16 x i32> %b 614 } 615 616 define <8 x i64> @andqbrst(<8 x i64> %p1, i64* %ap) { 617 ; CHECK-LABEL: andqbrst: 618 ; CHECK: ## BB#0: ## %entry 619 ; CHECK-NEXT: vpandq (%rdi){1to8}, %zmm0, %zmm0 620 ; CHECK-NEXT: retq 621 entry: 622 %a = load i64, i64* %ap, align 8 623 %b = insertelement <8 x i64> undef, i64 %a, i32 0 624 %c = shufflevector <8 x i64> %b, <8 x i64> undef, <8 x i32> zeroinitializer 625 %d = and <8 x i64> %p1, %c 626 ret <8 x i64>%d 627 } 628 629 define <16 x float> @test_mask_vaddps(<16 x float> %dst, <16 x float> %i, 630 ; CHECK-LABEL: test_mask_vaddps: 631 ; CHECK: ## BB#0: 632 ; CHECK-NEXT: vpxord %zmm4, %zmm4, %zmm4 633 ; CHECK-NEXT: vpcmpneqd %zmm4, %zmm3, %k1 634 ; CHECK-NEXT: vaddps %zmm2, %zmm1, %zmm0 {%k1} 635 ; CHECK-NEXT: retq 636 <16 x float> %j, <16 x i32> %mask1) 637 nounwind readnone { 638 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 639 %x = fadd <16 x float> %i, %j 640 %r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %dst 641 ret <16 x float> %r 642 } 643 644 define <16 x float> @test_mask_vmulps(<16 x float> %dst, <16 x float> %i, 645 ; CHECK-LABEL: test_mask_vmulps: 646 ; CHECK: ## BB#0: 647 ; CHECK-NEXT: vpxord %zmm4, %zmm4, %zmm4 648 ; CHECK-NEXT: vpcmpneqd %zmm4, %zmm3, %k1 649 ; CHECK-NEXT: vmulps %zmm2, %zmm1, %zmm0 {%k1} 650 ; CHECK-NEXT: retq 651 <16 x float> %j, <16 x i32> %mask1) 652 nounwind readnone { 653 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 654 %x = fmul <16 x float> %i, %j 655 %r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %dst 656 ret <16 x float> %r 657 } 658 659 define <16 x float> @test_mask_vminps(<16 x float> %dst, <16 x float> %i, 660 ; CHECK-LABEL: test_mask_vminps: 661 ; CHECK: ## BB#0: 662 ; CHECK-NEXT: vpxord %zmm4, %zmm4, %zmm4 663 ; CHECK-NEXT: vpcmpneqd %zmm4, %zmm3, %k1 664 ; CHECK-NEXT: vminps %zmm2, %zmm1, %zmm0 {%k1} 665 ; CHECK-NEXT: retq 666 <16 x float> %j, <16 x i32> %mask1) 667 nounwind readnone { 668 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 669 %cmp_res = fcmp olt <16 x float> %i, %j 670 %min = select <16 x i1> %cmp_res, <16 x float> %i, <16 x float> %j 671 %r = select <16 x i1> %mask, <16 x float> %min, <16 x float> %dst 672 ret <16 x float> %r 673 } 674 675 define <8 x double> @test_mask_vminpd(<8 x double> %dst, <8 x double> %i, 676 ; AVX512F-LABEL: test_mask_vminpd: 677 ; AVX512F: ## BB#0: 678 ; AVX512F-NEXT: ## kill: %YMM3<def> %YMM3<kill> %ZMM3<def> 679 ; AVX512F-NEXT: vpxor %ymm4, %ymm4, %ymm4 680 ; AVX512F-NEXT: vpcmpneqd %zmm4, %zmm3, %k1 681 ; AVX512F-NEXT: vminpd %zmm2, %zmm1, %zmm0 {%k1} 682 ; AVX512F-NEXT: retq 683 ; 684 ; AVX512VL-LABEL: test_mask_vminpd: 685 ; AVX512VL: ## BB#0: 686 ; AVX512VL-NEXT: vpxord %ymm4, %ymm4, %ymm4 687 ; AVX512VL-NEXT: vpcmpneqd %ymm4, %ymm3, %k1 688 ; AVX512VL-NEXT: vminpd %zmm2, %zmm1, %zmm0 {%k1} 689 ; AVX512VL-NEXT: retq 690 ; 691 ; AVX512BW-LABEL: test_mask_vminpd: 692 ; AVX512BW: ## BB#0: 693 ; AVX512BW-NEXT: ## kill: %YMM3<def> %YMM3<kill> %ZMM3<def> 694 ; AVX512BW-NEXT: vpxor %ymm4, %ymm4, %ymm4 695 ; AVX512BW-NEXT: vpcmpneqd %zmm4, %zmm3, %k1 696 ; AVX512BW-NEXT: vminpd %zmm2, %zmm1, %zmm0 {%k1} 697 ; AVX512BW-NEXT: retq 698 ; 699 ; AVX512DQ-LABEL: test_mask_vminpd: 700 ; AVX512DQ: ## BB#0: 701 ; AVX512DQ-NEXT: ## kill: %YMM3<def> %YMM3<kill> %ZMM3<def> 702 ; AVX512DQ-NEXT: vpxor %ymm4, %ymm4, %ymm4 703 ; AVX512DQ-NEXT: vpcmpneqd %zmm4, %zmm3, %k1 704 ; AVX512DQ-NEXT: vminpd %zmm2, %zmm1, %zmm0 {%k1} 705 ; AVX512DQ-NEXT: retq 706 ; 707 ; SKX-LABEL: test_mask_vminpd: 708 ; SKX: ## BB#0: 709 ; SKX-NEXT: vpxord %ymm4, %ymm4, %ymm4 710 ; SKX-NEXT: vpcmpneqd %ymm4, %ymm3, %k1 711 ; SKX-NEXT: vminpd %zmm2, %zmm1, %zmm0 {%k1} 712 ; SKX-NEXT: retq 713 <8 x double> %j, <8 x i32> %mask1) 714 nounwind readnone { 715 %mask = icmp ne <8 x i32> %mask1, zeroinitializer 716 %cmp_res = fcmp olt <8 x double> %i, %j 717 %min = select <8 x i1> %cmp_res, <8 x double> %i, <8 x double> %j 718 %r = select <8 x i1> %mask, <8 x double> %min, <8 x double> %dst 719 ret <8 x double> %r 720 } 721 722 define <16 x float> @test_mask_vmaxps(<16 x float> %dst, <16 x float> %i, 723 ; CHECK-LABEL: test_mask_vmaxps: 724 ; CHECK: ## BB#0: 725 ; CHECK-NEXT: vpxord %zmm4, %zmm4, %zmm4 726 ; CHECK-NEXT: vpcmpneqd %zmm4, %zmm3, %k1 727 ; CHECK-NEXT: vmaxps %zmm2, %zmm1, %zmm0 {%k1} 728 ; CHECK-NEXT: retq 729 <16 x float> %j, <16 x i32> %mask1) 730 nounwind readnone { 731 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 732 %cmp_res = fcmp ogt <16 x float> %i, %j 733 %max = select <16 x i1> %cmp_res, <16 x float> %i, <16 x float> %j 734 %r = select <16 x i1> %mask, <16 x float> %max, <16 x float> %dst 735 ret <16 x float> %r 736 } 737 738 define <8 x double> @test_mask_vmaxpd(<8 x double> %dst, <8 x double> %i, 739 ; AVX512F-LABEL: test_mask_vmaxpd: 740 ; AVX512F: ## BB#0: 741 ; AVX512F-NEXT: ## kill: %YMM3<def> %YMM3<kill> %ZMM3<def> 742 ; AVX512F-NEXT: vpxor %ymm4, %ymm4, %ymm4 743 ; AVX512F-NEXT: vpcmpneqd %zmm4, %zmm3, %k1 744 ; AVX512F-NEXT: vmaxpd %zmm2, %zmm1, %zmm0 {%k1} 745 ; AVX512F-NEXT: retq 746 ; 747 ; AVX512VL-LABEL: test_mask_vmaxpd: 748 ; AVX512VL: ## BB#0: 749 ; AVX512VL-NEXT: vpxord %ymm4, %ymm4, %ymm4 750 ; AVX512VL-NEXT: vpcmpneqd %ymm4, %ymm3, %k1 751 ; AVX512VL-NEXT: vmaxpd %zmm2, %zmm1, %zmm0 {%k1} 752 ; AVX512VL-NEXT: retq 753 ; 754 ; AVX512BW-LABEL: test_mask_vmaxpd: 755 ; AVX512BW: ## BB#0: 756 ; AVX512BW-NEXT: ## kill: %YMM3<def> %YMM3<kill> %ZMM3<def> 757 ; AVX512BW-NEXT: vpxor %ymm4, %ymm4, %ymm4 758 ; AVX512BW-NEXT: vpcmpneqd %zmm4, %zmm3, %k1 759 ; AVX512BW-NEXT: vmaxpd %zmm2, %zmm1, %zmm0 {%k1} 760 ; AVX512BW-NEXT: retq 761 ; 762 ; AVX512DQ-LABEL: test_mask_vmaxpd: 763 ; AVX512DQ: ## BB#0: 764 ; AVX512DQ-NEXT: ## kill: %YMM3<def> %YMM3<kill> %ZMM3<def> 765 ; AVX512DQ-NEXT: vpxor %ymm4, %ymm4, %ymm4 766 ; AVX512DQ-NEXT: vpcmpneqd %zmm4, %zmm3, %k1 767 ; AVX512DQ-NEXT: vmaxpd %zmm2, %zmm1, %zmm0 {%k1} 768 ; AVX512DQ-NEXT: retq 769 ; 770 ; SKX-LABEL: test_mask_vmaxpd: 771 ; SKX: ## BB#0: 772 ; SKX-NEXT: vpxord %ymm4, %ymm4, %ymm4 773 ; SKX-NEXT: vpcmpneqd %ymm4, %ymm3, %k1 774 ; SKX-NEXT: vmaxpd %zmm2, %zmm1, %zmm0 {%k1} 775 ; SKX-NEXT: retq 776 <8 x double> %j, <8 x i32> %mask1) 777 nounwind readnone { 778 %mask = icmp ne <8 x i32> %mask1, zeroinitializer 779 %cmp_res = fcmp ogt <8 x double> %i, %j 780 %max = select <8 x i1> %cmp_res, <8 x double> %i, <8 x double> %j 781 %r = select <8 x i1> %mask, <8 x double> %max, <8 x double> %dst 782 ret <8 x double> %r 783 } 784 785 define <16 x float> @test_mask_vsubps(<16 x float> %dst, <16 x float> %i, 786 ; CHECK-LABEL: test_mask_vsubps: 787 ; CHECK: ## BB#0: 788 ; CHECK-NEXT: vpxord %zmm4, %zmm4, %zmm4 789 ; CHECK-NEXT: vpcmpneqd %zmm4, %zmm3, %k1 790 ; CHECK-NEXT: vsubps %zmm2, %zmm1, %zmm0 {%k1} 791 ; CHECK-NEXT: retq 792 <16 x float> %j, <16 x i32> %mask1) 793 nounwind readnone { 794 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 795 %x = fsub <16 x float> %i, %j 796 %r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %dst 797 ret <16 x float> %r 798 } 799 800 define <16 x float> @test_mask_vdivps(<16 x float> %dst, <16 x float> %i, 801 ; CHECK-LABEL: test_mask_vdivps: 802 ; CHECK: ## BB#0: 803 ; CHECK-NEXT: vpxord %zmm4, %zmm4, %zmm4 804 ; CHECK-NEXT: vpcmpneqd %zmm4, %zmm3, %k1 805 ; CHECK-NEXT: vdivps %zmm2, %zmm1, %zmm0 {%k1} 806 ; CHECK-NEXT: retq 807 <16 x float> %j, <16 x i32> %mask1) 808 nounwind readnone { 809 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 810 %x = fdiv <16 x float> %i, %j 811 %r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %dst 812 ret <16 x float> %r 813 } 814 815 define <8 x double> @test_mask_vaddpd(<8 x double> %dst, <8 x double> %i, 816 ; CHECK-LABEL: test_mask_vaddpd: 817 ; CHECK: ## BB#0: 818 ; CHECK-NEXT: vpxord %zmm4, %zmm4, %zmm4 819 ; CHECK-NEXT: vpcmpneqq %zmm4, %zmm3, %k1 820 ; CHECK-NEXT: vaddpd %zmm2, %zmm1, %zmm0 {%k1} 821 ; CHECK-NEXT: retq 822 <8 x double> %j, <8 x i64> %mask1) 823 nounwind readnone { 824 %mask = icmp ne <8 x i64> %mask1, zeroinitializer 825 %x = fadd <8 x double> %i, %j 826 %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> %dst 827 ret <8 x double> %r 828 } 829 830 define <8 x double> @test_maskz_vaddpd(<8 x double> %i, <8 x double> %j, 831 ; CHECK-LABEL: test_maskz_vaddpd: 832 ; CHECK: ## BB#0: 833 ; CHECK-NEXT: vpxord %zmm3, %zmm3, %zmm3 834 ; CHECK-NEXT: vpcmpneqq %zmm3, %zmm2, %k1 835 ; CHECK-NEXT: vaddpd %zmm1, %zmm0, %zmm0 {%k1} {z} 836 ; CHECK-NEXT: retq 837 <8 x i64> %mask1) nounwind readnone { 838 %mask = icmp ne <8 x i64> %mask1, zeroinitializer 839 %x = fadd <8 x double> %i, %j 840 %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> zeroinitializer 841 ret <8 x double> %r 842 } 843 844 define <8 x double> @test_mask_fold_vaddpd(<8 x double> %dst, <8 x double> %i, 845 ; CHECK-LABEL: test_mask_fold_vaddpd: 846 ; CHECK: ## BB#0: 847 ; CHECK-NEXT: vpxord %zmm3, %zmm3, %zmm3 848 ; CHECK-NEXT: vpcmpneqq %zmm3, %zmm2, %k1 849 ; CHECK-NEXT: vaddpd (%rdi), %zmm1, %zmm0 {%k1} 850 ; CHECK-NEXT: retq 851 <8 x double>* %j, <8 x i64> %mask1) 852 nounwind { 853 %mask = icmp ne <8 x i64> %mask1, zeroinitializer 854 %tmp = load <8 x double>, <8 x double>* %j, align 8 855 %x = fadd <8 x double> %i, %tmp 856 %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> %dst 857 ret <8 x double> %r 858 } 859 860 define <8 x double> @test_maskz_fold_vaddpd(<8 x double> %i, <8 x double>* %j, 861 ; CHECK-LABEL: test_maskz_fold_vaddpd: 862 ; CHECK: ## BB#0: 863 ; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2 864 ; CHECK-NEXT: vpcmpneqq %zmm2, %zmm1, %k1 865 ; CHECK-NEXT: vaddpd (%rdi), %zmm0, %zmm0 {%k1} {z} 866 ; CHECK-NEXT: retq 867 <8 x i64> %mask1) nounwind { 868 %mask = icmp ne <8 x i64> %mask1, zeroinitializer 869 %tmp = load <8 x double>, <8 x double>* %j, align 8 870 %x = fadd <8 x double> %i, %tmp 871 %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> zeroinitializer 872 ret <8 x double> %r 873 } 874 875 define <8 x double> @test_broadcast_vaddpd(<8 x double> %i, double* %j) nounwind { 876 ; CHECK-LABEL: test_broadcast_vaddpd: 877 ; CHECK: ## BB#0: 878 ; CHECK-NEXT: vaddpd (%rdi){1to8}, %zmm0, %zmm0 879 ; CHECK-NEXT: retq 880 %tmp = load double, double* %j 881 %b = insertelement <8 x double> undef, double %tmp, i32 0 882 %c = shufflevector <8 x double> %b, <8 x double> undef, 883 <8 x i32> zeroinitializer 884 %x = fadd <8 x double> %c, %i 885 ret <8 x double> %x 886 } 887 888 define <8 x double> @test_mask_broadcast_vaddpd(<8 x double> %dst, <8 x double> %i, 889 ; CHECK-LABEL: test_mask_broadcast_vaddpd: 890 ; CHECK: ## BB#0: 891 ; CHECK-NEXT: vpxord %zmm0, %zmm0, %zmm0 892 ; CHECK-NEXT: vpcmpneqq %zmm0, %zmm2, %k1 893 ; CHECK-NEXT: vaddpd (%rdi){1to8}, %zmm1, %zmm1 {%k1} 894 ; CHECK-NEXT: vmovaps %zmm1, %zmm0 895 ; CHECK-NEXT: retq 896 double* %j, <8 x i64> %mask1) nounwind { 897 %mask = icmp ne <8 x i64> %mask1, zeroinitializer 898 %tmp = load double, double* %j 899 %b = insertelement <8 x double> undef, double %tmp, i32 0 900 %c = shufflevector <8 x double> %b, <8 x double> undef, 901 <8 x i32> zeroinitializer 902 %x = fadd <8 x double> %c, %i 903 %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> %i 904 ret <8 x double> %r 905 } 906 907 define <8 x double> @test_maskz_broadcast_vaddpd(<8 x double> %i, double* %j, 908 ; CHECK-LABEL: test_maskz_broadcast_vaddpd: 909 ; CHECK: ## BB#0: 910 ; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2 911 ; CHECK-NEXT: vpcmpneqq %zmm2, %zmm1, %k1 912 ; CHECK-NEXT: vaddpd (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} 913 ; CHECK-NEXT: retq 914 <8 x i64> %mask1) nounwind { 915 %mask = icmp ne <8 x i64> %mask1, zeroinitializer 916 %tmp = load double, double* %j 917 %b = insertelement <8 x double> undef, double %tmp, i32 0 918 %c = shufflevector <8 x double> %b, <8 x double> undef, 919 <8 x i32> zeroinitializer 920 %x = fadd <8 x double> %c, %i 921 %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> zeroinitializer 922 ret <8 x double> %r 923 } 924 925 define <16 x float> @test_fxor(<16 x float> %a) { 926 ; AVX512F-LABEL: test_fxor: 927 ; AVX512F: ## BB#0: 928 ; AVX512F-NEXT: vpxord {{.*}}(%rip), %zmm0, %zmm0 929 ; AVX512F-NEXT: retq 930 ; 931 ; AVX512VL-LABEL: test_fxor: 932 ; AVX512VL: ## BB#0: 933 ; AVX512VL-NEXT: vpxord {{.*}}(%rip), %zmm0, %zmm0 934 ; AVX512VL-NEXT: retq 935 ; 936 ; AVX512BW-LABEL: test_fxor: 937 ; AVX512BW: ## BB#0: 938 ; AVX512BW-NEXT: vpxord {{.*}}(%rip), %zmm0, %zmm0 939 ; AVX512BW-NEXT: retq 940 ; 941 ; AVX512DQ-LABEL: test_fxor: 942 ; AVX512DQ: ## BB#0: 943 ; AVX512DQ-NEXT: vxorps {{.*}}(%rip), %zmm0, %zmm0 944 ; AVX512DQ-NEXT: retq 945 ; 946 ; SKX-LABEL: test_fxor: 947 ; SKX: ## BB#0: 948 ; SKX-NEXT: vxorps {{.*}}(%rip), %zmm0, %zmm0 949 ; SKX-NEXT: retq 950 951 %res = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a 952 ret <16 x float>%res 953 } 954 955 define <8 x float> @test_fxor_8f32(<8 x float> %a) { 956 ; CHECK-LABEL: test_fxor_8f32: 957 ; CHECK: ## BB#0: 958 ; CHECK-NEXT: vxorps {{.*}}(%rip), %ymm0, %ymm0 959 ; CHECK-NEXT: retq 960 %res = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a 961 ret <8 x float>%res 962 } 963 964 define <8 x double> @fabs_v8f64(<8 x double> %p) 965 ; AVX512F-LABEL: fabs_v8f64: 966 ; AVX512F: ## BB#0: 967 ; AVX512F-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0 968 ; AVX512F-NEXT: retq 969 ; 970 ; AVX512VL-LABEL: fabs_v8f64: 971 ; AVX512VL: ## BB#0: 972 ; AVX512VL-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0 973 ; AVX512VL-NEXT: retq 974 ; 975 ; AVX512BW-LABEL: fabs_v8f64: 976 ; AVX512BW: ## BB#0: 977 ; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0 978 ; AVX512BW-NEXT: retq 979 ; 980 ; AVX512DQ-LABEL: fabs_v8f64: 981 ; AVX512DQ: ## BB#0: 982 ; AVX512DQ-NEXT: vandpd {{.*}}(%rip), %zmm0, %zmm0 983 ; AVX512DQ-NEXT: retq 984 ; 985 ; SKX-LABEL: fabs_v8f64: 986 ; SKX: ## BB#0: 987 ; SKX-NEXT: vandpd {{.*}}(%rip), %zmm0, %zmm0 988 ; SKX-NEXT: retq 989 { 990 %t = call <8 x double> @llvm.fabs.v8f64(<8 x double> %p) 991 ret <8 x double> %t 992 } 993 declare <8 x double> @llvm.fabs.v8f64(<8 x double> %p) 994 995 define <16 x float> @fabs_v16f32(<16 x float> %p) 996 ; AVX512F-LABEL: fabs_v16f32: 997 ; AVX512F: ## BB#0: 998 ; AVX512F-NEXT: vpandd {{.*}}(%rip), %zmm0, %zmm0 999 ; AVX512F-NEXT: retq 1000 ; 1001 ; AVX512VL-LABEL: fabs_v16f32: 1002 ; AVX512VL: ## BB#0: 1003 ; AVX512VL-NEXT: vpandd {{.*}}(%rip), %zmm0, %zmm0 1004 ; AVX512VL-NEXT: retq 1005 ; 1006 ; AVX512BW-LABEL: fabs_v16f32: 1007 ; AVX512BW: ## BB#0: 1008 ; AVX512BW-NEXT: vpandd {{.*}}(%rip), %zmm0, %zmm0 1009 ; AVX512BW-NEXT: retq 1010 ; 1011 ; AVX512DQ-LABEL: fabs_v16f32: 1012 ; AVX512DQ: ## BB#0: 1013 ; AVX512DQ-NEXT: vandps {{.*}}(%rip), %zmm0, %zmm0 1014 ; AVX512DQ-NEXT: retq 1015 ; 1016 ; SKX-LABEL: fabs_v16f32: 1017 ; SKX: ## BB#0: 1018 ; SKX-NEXT: vandps {{.*}}(%rip), %zmm0, %zmm0 1019 ; SKX-NEXT: retq 1020 { 1021 %t = call <16 x float> @llvm.fabs.v16f32(<16 x float> %p) 1022 ret <16 x float> %t 1023 } 1024 declare <16 x float> @llvm.fabs.v16f32(<16 x float> %p) 1025