1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2 3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41 4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1 5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2 6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512BW 7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512VL 8 9 ; 10 ; vXf32 (accum) 11 ; 12 13 define float @test_v2f32(float %a0, <2 x float> %a1) { 14 ; SSE2-LABEL: test_v2f32: 15 ; SSE2: # %bb.0: 16 ; SSE2-NEXT: movaps %xmm1, %xmm0 17 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1],xmm1[2,3] 18 ; SSE2-NEXT: mulps %xmm1, %xmm0 19 ; SSE2-NEXT: retq 20 ; 21 ; SSE41-LABEL: test_v2f32: 22 ; SSE41: # %bb.0: 23 ; SSE41-NEXT: movshdup {{.*#+}} xmm0 = xmm1[1,1,3,3] 24 ; SSE41-NEXT: mulps %xmm1, %xmm0 25 ; SSE41-NEXT: retq 26 ; 27 ; AVX-LABEL: test_v2f32: 28 ; AVX: # %bb.0: 29 ; AVX-NEXT: vmovshdup {{.*#+}} xmm0 = xmm1[1,1,3,3] 30 ; AVX-NEXT: vmulps %xmm0, %xmm1, %xmm0 31 ; AVX-NEXT: retq 32 ; 33 ; AVX512-LABEL: test_v2f32: 34 ; AVX512: # %bb.0: 35 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm0 = xmm1[1,1,3,3] 36 ; AVX512-NEXT: vmulps %xmm0, %xmm1, %xmm0 37 ; AVX512-NEXT: retq 38 %1 = call fast float @llvm.experimental.vector.reduce.fmul.f32.f32.v2f32(float %a0, <2 x float> %a1) 39 ret float %1 40 } 41 42 define float @test_v4f32(float %a0, <4 x float> %a1) { 43 ; SSE2-LABEL: test_v4f32: 44 ; SSE2: # %bb.0: 45 ; SSE2-NEXT: movaps %xmm1, %xmm2 46 ; SSE2-NEXT: movhlps {{.*#+}} xmm2 = xmm1[1],xmm2[1] 47 ; SSE2-NEXT: mulps %xmm1, %xmm2 48 ; SSE2-NEXT: movaps %xmm2, %xmm0 49 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1],xmm2[2,3] 50 ; SSE2-NEXT: mulps %xmm2, %xmm0 51 ; SSE2-NEXT: retq 52 ; 53 ; SSE41-LABEL: test_v4f32: 54 ; SSE41: # %bb.0: 55 ; SSE41-NEXT: movaps %xmm1, %xmm2 56 ; SSE41-NEXT: movhlps {{.*#+}} xmm2 = xmm1[1],xmm2[1] 57 ; SSE41-NEXT: mulps %xmm1, %xmm2 58 ; SSE41-NEXT: movshdup {{.*#+}} xmm0 = xmm2[1,1,3,3] 59 ; SSE41-NEXT: mulps %xmm2, %xmm0 60 ; SSE41-NEXT: retq 61 ; 62 ; AVX-LABEL: test_v4f32: 63 ; AVX: # %bb.0: 64 ; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm1[1,0] 65 ; AVX-NEXT: vmulps %xmm0, %xmm1, %xmm0 66 ; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 67 ; AVX-NEXT: vmulps %xmm1, %xmm0, %xmm0 68 ; AVX-NEXT: retq 69 ; 70 ; AVX512-LABEL: test_v4f32: 71 ; AVX512: # %bb.0: 72 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm1[1,0] 73 ; AVX512-NEXT: vmulps %xmm0, %xmm1, %xmm0 74 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 75 ; AVX512-NEXT: vmulps %xmm1, %xmm0, %xmm0 76 ; AVX512-NEXT: retq 77 %1 = call fast float @llvm.experimental.vector.reduce.fmul.f32.f32.v4f32(float %a0, <4 x float> %a1) 78 ret float %1 79 } 80 81 define float @test_v8f32(float %a0, <8 x float> %a1) { 82 ; SSE2-LABEL: test_v8f32: 83 ; SSE2: # %bb.0: 84 ; SSE2-NEXT: mulps %xmm2, %xmm1 85 ; SSE2-NEXT: movaps %xmm1, %xmm2 86 ; SSE2-NEXT: movhlps {{.*#+}} xmm2 = xmm1[1],xmm2[1] 87 ; SSE2-NEXT: mulps %xmm1, %xmm2 88 ; SSE2-NEXT: movaps %xmm2, %xmm0 89 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1],xmm2[2,3] 90 ; SSE2-NEXT: mulps %xmm2, %xmm0 91 ; SSE2-NEXT: retq 92 ; 93 ; SSE41-LABEL: test_v8f32: 94 ; SSE41: # %bb.0: 95 ; SSE41-NEXT: mulps %xmm2, %xmm1 96 ; SSE41-NEXT: movaps %xmm1, %xmm2 97 ; SSE41-NEXT: movhlps {{.*#+}} xmm2 = xmm1[1],xmm2[1] 98 ; SSE41-NEXT: mulps %xmm1, %xmm2 99 ; SSE41-NEXT: movshdup {{.*#+}} xmm0 = xmm2[1,1,3,3] 100 ; SSE41-NEXT: mulps %xmm2, %xmm0 101 ; SSE41-NEXT: retq 102 ; 103 ; AVX-LABEL: test_v8f32: 104 ; AVX: # %bb.0: 105 ; AVX-NEXT: vextractf128 $1, %ymm1, %xmm0 106 ; AVX-NEXT: vmulps %ymm0, %ymm1, %ymm0 107 ; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] 108 ; AVX-NEXT: vmulps %ymm1, %ymm0, %ymm0 109 ; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 110 ; AVX-NEXT: vmulps %ymm1, %ymm0, %ymm0 111 ; AVX-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 112 ; AVX-NEXT: vzeroupper 113 ; AVX-NEXT: retq 114 ; 115 ; AVX512-LABEL: test_v8f32: 116 ; AVX512: # %bb.0: 117 ; AVX512-NEXT: vextractf128 $1, %ymm1, %xmm0 118 ; AVX512-NEXT: vmulps %ymm0, %ymm1, %ymm0 119 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] 120 ; AVX512-NEXT: vmulps %ymm1, %ymm0, %ymm0 121 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 122 ; AVX512-NEXT: vmulps %ymm1, %ymm0, %ymm0 123 ; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 124 ; AVX512-NEXT: vzeroupper 125 ; AVX512-NEXT: retq 126 %1 = call fast float @llvm.experimental.vector.reduce.fmul.f32.f32.v8f32(float %a0, <8 x float> %a1) 127 ret float %1 128 } 129 130 define float @test_v16f32(float %a0, <16 x float> %a1) { 131 ; SSE2-LABEL: test_v16f32: 132 ; SSE2: # %bb.0: 133 ; SSE2-NEXT: mulps %xmm4, %xmm2 134 ; SSE2-NEXT: mulps %xmm3, %xmm1 135 ; SSE2-NEXT: mulps %xmm2, %xmm1 136 ; SSE2-NEXT: movaps %xmm1, %xmm2 137 ; SSE2-NEXT: movhlps {{.*#+}} xmm2 = xmm1[1],xmm2[1] 138 ; SSE2-NEXT: mulps %xmm1, %xmm2 139 ; SSE2-NEXT: movaps %xmm2, %xmm0 140 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1],xmm2[2,3] 141 ; SSE2-NEXT: mulps %xmm2, %xmm0 142 ; SSE2-NEXT: retq 143 ; 144 ; SSE41-LABEL: test_v16f32: 145 ; SSE41: # %bb.0: 146 ; SSE41-NEXT: mulps %xmm4, %xmm2 147 ; SSE41-NEXT: mulps %xmm3, %xmm1 148 ; SSE41-NEXT: mulps %xmm2, %xmm1 149 ; SSE41-NEXT: movaps %xmm1, %xmm2 150 ; SSE41-NEXT: movhlps {{.*#+}} xmm2 = xmm1[1],xmm2[1] 151 ; SSE41-NEXT: mulps %xmm1, %xmm2 152 ; SSE41-NEXT: movshdup {{.*#+}} xmm0 = xmm2[1,1,3,3] 153 ; SSE41-NEXT: mulps %xmm2, %xmm0 154 ; SSE41-NEXT: retq 155 ; 156 ; AVX-LABEL: test_v16f32: 157 ; AVX: # %bb.0: 158 ; AVX-NEXT: vmulps %ymm2, %ymm1, %ymm0 159 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 160 ; AVX-NEXT: vmulps %ymm1, %ymm0, %ymm0 161 ; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] 162 ; AVX-NEXT: vmulps %ymm1, %ymm0, %ymm0 163 ; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 164 ; AVX-NEXT: vmulps %ymm1, %ymm0, %ymm0 165 ; AVX-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 166 ; AVX-NEXT: vzeroupper 167 ; AVX-NEXT: retq 168 ; 169 ; AVX512-LABEL: test_v16f32: 170 ; AVX512: # %bb.0: 171 ; AVX512-NEXT: vextractf64x4 $1, %zmm1, %ymm0 172 ; AVX512-NEXT: vmulps %zmm0, %zmm1, %zmm0 173 ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1 174 ; AVX512-NEXT: vmulps %zmm1, %zmm0, %zmm0 175 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] 176 ; AVX512-NEXT: vmulps %zmm1, %zmm0, %zmm0 177 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 178 ; AVX512-NEXT: vmulps %zmm1, %zmm0, %zmm0 179 ; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 180 ; AVX512-NEXT: vzeroupper 181 ; AVX512-NEXT: retq 182 %1 = call fast float @llvm.experimental.vector.reduce.fmul.f32.f32.v16f32(float %a0, <16 x float> %a1) 183 ret float %1 184 } 185 186 ; 187 ; vXf32 (one) 188 ; 189 190 define float @test_v2f32_zero(<2 x float> %a0) { 191 ; SSE2-LABEL: test_v2f32_zero: 192 ; SSE2: # %bb.0: 193 ; SSE2-NEXT: movaps %xmm0, %xmm1 194 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3] 195 ; SSE2-NEXT: mulps %xmm0, %xmm1 196 ; SSE2-NEXT: movaps %xmm1, %xmm0 197 ; SSE2-NEXT: retq 198 ; 199 ; SSE41-LABEL: test_v2f32_zero: 200 ; SSE41: # %bb.0: 201 ; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 202 ; SSE41-NEXT: mulps %xmm1, %xmm0 203 ; SSE41-NEXT: retq 204 ; 205 ; AVX-LABEL: test_v2f32_zero: 206 ; AVX: # %bb.0: 207 ; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 208 ; AVX-NEXT: vmulps %xmm1, %xmm0, %xmm0 209 ; AVX-NEXT: retq 210 ; 211 ; AVX512-LABEL: test_v2f32_zero: 212 ; AVX512: # %bb.0: 213 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 214 ; AVX512-NEXT: vmulps %xmm1, %xmm0, %xmm0 215 ; AVX512-NEXT: retq 216 %1 = call fast float @llvm.experimental.vector.reduce.fmul.f32.f32.v2f32(float 1.0, <2 x float> %a0) 217 ret float %1 218 } 219 220 define float @test_v4f32_zero(<4 x float> %a0) { 221 ; SSE2-LABEL: test_v4f32_zero: 222 ; SSE2: # %bb.0: 223 ; SSE2-NEXT: movaps %xmm0, %xmm1 224 ; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] 225 ; SSE2-NEXT: mulps %xmm0, %xmm1 226 ; SSE2-NEXT: movaps %xmm1, %xmm0 227 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1],xmm1[2,3] 228 ; SSE2-NEXT: mulps %xmm1, %xmm0 229 ; SSE2-NEXT: retq 230 ; 231 ; SSE41-LABEL: test_v4f32_zero: 232 ; SSE41: # %bb.0: 233 ; SSE41-NEXT: movaps %xmm0, %xmm1 234 ; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] 235 ; SSE41-NEXT: mulps %xmm0, %xmm1 236 ; SSE41-NEXT: movshdup {{.*#+}} xmm0 = xmm1[1,1,3,3] 237 ; SSE41-NEXT: mulps %xmm0, %xmm1 238 ; SSE41-NEXT: movaps %xmm1, %xmm0 239 ; SSE41-NEXT: retq 240 ; 241 ; AVX-LABEL: test_v4f32_zero: 242 ; AVX: # %bb.0: 243 ; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] 244 ; AVX-NEXT: vmulps %xmm1, %xmm0, %xmm0 245 ; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 246 ; AVX-NEXT: vmulps %xmm1, %xmm0, %xmm0 247 ; AVX-NEXT: retq 248 ; 249 ; AVX512-LABEL: test_v4f32_zero: 250 ; AVX512: # %bb.0: 251 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] 252 ; AVX512-NEXT: vmulps %xmm1, %xmm0, %xmm0 253 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 254 ; AVX512-NEXT: vmulps %xmm1, %xmm0, %xmm0 255 ; AVX512-NEXT: retq 256 %1 = call fast float @llvm.experimental.vector.reduce.fmul.f32.f32.v4f32(float 1.0, <4 x float> %a0) 257 ret float %1 258 } 259 260 define float @test_v8f32_zero(<8 x float> %a0) { 261 ; SSE2-LABEL: test_v8f32_zero: 262 ; SSE2: # %bb.0: 263 ; SSE2-NEXT: mulps %xmm1, %xmm0 264 ; SSE2-NEXT: movaps %xmm0, %xmm1 265 ; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] 266 ; SSE2-NEXT: mulps %xmm0, %xmm1 267 ; SSE2-NEXT: movaps %xmm1, %xmm0 268 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1],xmm1[2,3] 269 ; SSE2-NEXT: mulps %xmm1, %xmm0 270 ; SSE2-NEXT: retq 271 ; 272 ; SSE41-LABEL: test_v8f32_zero: 273 ; SSE41: # %bb.0: 274 ; SSE41-NEXT: mulps %xmm1, %xmm0 275 ; SSE41-NEXT: movaps %xmm0, %xmm1 276 ; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] 277 ; SSE41-NEXT: mulps %xmm0, %xmm1 278 ; SSE41-NEXT: movshdup {{.*#+}} xmm0 = xmm1[1,1,3,3] 279 ; SSE41-NEXT: mulps %xmm0, %xmm1 280 ; SSE41-NEXT: movaps %xmm1, %xmm0 281 ; SSE41-NEXT: retq 282 ; 283 ; AVX-LABEL: test_v8f32_zero: 284 ; AVX: # %bb.0: 285 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 286 ; AVX-NEXT: vmulps %ymm1, %ymm0, %ymm0 287 ; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] 288 ; AVX-NEXT: vmulps %ymm1, %ymm0, %ymm0 289 ; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 290 ; AVX-NEXT: vmulps %ymm1, %ymm0, %ymm0 291 ; AVX-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 292 ; AVX-NEXT: vzeroupper 293 ; AVX-NEXT: retq 294 ; 295 ; AVX512-LABEL: test_v8f32_zero: 296 ; AVX512: # %bb.0: 297 ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1 298 ; AVX512-NEXT: vmulps %ymm1, %ymm0, %ymm0 299 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] 300 ; AVX512-NEXT: vmulps %ymm1, %ymm0, %ymm0 301 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 302 ; AVX512-NEXT: vmulps %ymm1, %ymm0, %ymm0 303 ; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 304 ; AVX512-NEXT: vzeroupper 305 ; AVX512-NEXT: retq 306 %1 = call fast float @llvm.experimental.vector.reduce.fmul.f32.f32.v8f32(float 1.0, <8 x float> %a0) 307 ret float %1 308 } 309 310 define float @test_v16f32_zero(<16 x float> %a0) { 311 ; SSE2-LABEL: test_v16f32_zero: 312 ; SSE2: # %bb.0: 313 ; SSE2-NEXT: mulps %xmm3, %xmm1 314 ; SSE2-NEXT: mulps %xmm2, %xmm0 315 ; SSE2-NEXT: mulps %xmm1, %xmm0 316 ; SSE2-NEXT: movaps %xmm0, %xmm1 317 ; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] 318 ; SSE2-NEXT: mulps %xmm0, %xmm1 319 ; SSE2-NEXT: movaps %xmm1, %xmm0 320 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1],xmm1[2,3] 321 ; SSE2-NEXT: mulps %xmm1, %xmm0 322 ; SSE2-NEXT: retq 323 ; 324 ; SSE41-LABEL: test_v16f32_zero: 325 ; SSE41: # %bb.0: 326 ; SSE41-NEXT: mulps %xmm3, %xmm1 327 ; SSE41-NEXT: mulps %xmm2, %xmm0 328 ; SSE41-NEXT: mulps %xmm1, %xmm0 329 ; SSE41-NEXT: movaps %xmm0, %xmm1 330 ; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] 331 ; SSE41-NEXT: mulps %xmm0, %xmm1 332 ; SSE41-NEXT: movshdup {{.*#+}} xmm0 = xmm1[1,1,3,3] 333 ; SSE41-NEXT: mulps %xmm0, %xmm1 334 ; SSE41-NEXT: movaps %xmm1, %xmm0 335 ; SSE41-NEXT: retq 336 ; 337 ; AVX-LABEL: test_v16f32_zero: 338 ; AVX: # %bb.0: 339 ; AVX-NEXT: vmulps %ymm1, %ymm0, %ymm0 340 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 341 ; AVX-NEXT: vmulps %ymm1, %ymm0, %ymm0 342 ; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] 343 ; AVX-NEXT: vmulps %ymm1, %ymm0, %ymm0 344 ; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 345 ; AVX-NEXT: vmulps %ymm1, %ymm0, %ymm0 346 ; AVX-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 347 ; AVX-NEXT: vzeroupper 348 ; AVX-NEXT: retq 349 ; 350 ; AVX512-LABEL: test_v16f32_zero: 351 ; AVX512: # %bb.0: 352 ; AVX512-NEXT: vextractf64x4 $1, %zmm0, %ymm1 353 ; AVX512-NEXT: vmulps %zmm1, %zmm0, %zmm0 354 ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1 355 ; AVX512-NEXT: vmulps %zmm1, %zmm0, %zmm0 356 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] 357 ; AVX512-NEXT: vmulps %zmm1, %zmm0, %zmm0 358 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 359 ; AVX512-NEXT: vmulps %zmm1, %zmm0, %zmm0 360 ; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 361 ; AVX512-NEXT: vzeroupper 362 ; AVX512-NEXT: retq 363 %1 = call fast float @llvm.experimental.vector.reduce.fmul.f32.f32.v16f32(float 1.0, <16 x float> %a0) 364 ret float %1 365 } 366 367 ; 368 ; vXf32 (undef) 369 ; 370 371 define float @test_v2f32_undef(<2 x float> %a0) { 372 ; SSE2-LABEL: test_v2f32_undef: 373 ; SSE2: # %bb.0: 374 ; SSE2-NEXT: movaps %xmm0, %xmm1 375 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3] 376 ; SSE2-NEXT: mulps %xmm0, %xmm1 377 ; SSE2-NEXT: movaps %xmm1, %xmm0 378 ; SSE2-NEXT: retq 379 ; 380 ; SSE41-LABEL: test_v2f32_undef: 381 ; SSE41: # %bb.0: 382 ; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 383 ; SSE41-NEXT: mulps %xmm1, %xmm0 384 ; SSE41-NEXT: retq 385 ; 386 ; AVX-LABEL: test_v2f32_undef: 387 ; AVX: # %bb.0: 388 ; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 389 ; AVX-NEXT: vmulps %xmm1, %xmm0, %xmm0 390 ; AVX-NEXT: retq 391 ; 392 ; AVX512-LABEL: test_v2f32_undef: 393 ; AVX512: # %bb.0: 394 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 395 ; AVX512-NEXT: vmulps %xmm1, %xmm0, %xmm0 396 ; AVX512-NEXT: retq 397 %1 = call fast float @llvm.experimental.vector.reduce.fmul.f32.f32.v2f32(float undef, <2 x float> %a0) 398 ret float %1 399 } 400 401 define float @test_v4f32_undef(<4 x float> %a0) { 402 ; SSE2-LABEL: test_v4f32_undef: 403 ; SSE2: # %bb.0: 404 ; SSE2-NEXT: movaps %xmm0, %xmm1 405 ; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] 406 ; SSE2-NEXT: mulps %xmm0, %xmm1 407 ; SSE2-NEXT: movaps %xmm1, %xmm0 408 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1],xmm1[2,3] 409 ; SSE2-NEXT: mulps %xmm1, %xmm0 410 ; SSE2-NEXT: retq 411 ; 412 ; SSE41-LABEL: test_v4f32_undef: 413 ; SSE41: # %bb.0: 414 ; SSE41-NEXT: movaps %xmm0, %xmm1 415 ; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] 416 ; SSE41-NEXT: mulps %xmm0, %xmm1 417 ; SSE41-NEXT: movshdup {{.*#+}} xmm0 = xmm1[1,1,3,3] 418 ; SSE41-NEXT: mulps %xmm0, %xmm1 419 ; SSE41-NEXT: movaps %xmm1, %xmm0 420 ; SSE41-NEXT: retq 421 ; 422 ; AVX-LABEL: test_v4f32_undef: 423 ; AVX: # %bb.0: 424 ; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] 425 ; AVX-NEXT: vmulps %xmm1, %xmm0, %xmm0 426 ; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 427 ; AVX-NEXT: vmulps %xmm1, %xmm0, %xmm0 428 ; AVX-NEXT: retq 429 ; 430 ; AVX512-LABEL: test_v4f32_undef: 431 ; AVX512: # %bb.0: 432 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] 433 ; AVX512-NEXT: vmulps %xmm1, %xmm0, %xmm0 434 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 435 ; AVX512-NEXT: vmulps %xmm1, %xmm0, %xmm0 436 ; AVX512-NEXT: retq 437 %1 = call fast float @llvm.experimental.vector.reduce.fmul.f32.f32.v4f32(float undef, <4 x float> %a0) 438 ret float %1 439 } 440 441 define float @test_v8f32_undef(<8 x float> %a0) { 442 ; SSE2-LABEL: test_v8f32_undef: 443 ; SSE2: # %bb.0: 444 ; SSE2-NEXT: mulps %xmm1, %xmm0 445 ; SSE2-NEXT: movaps %xmm0, %xmm1 446 ; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] 447 ; SSE2-NEXT: mulps %xmm0, %xmm1 448 ; SSE2-NEXT: movaps %xmm1, %xmm0 449 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1],xmm1[2,3] 450 ; SSE2-NEXT: mulps %xmm1, %xmm0 451 ; SSE2-NEXT: retq 452 ; 453 ; SSE41-LABEL: test_v8f32_undef: 454 ; SSE41: # %bb.0: 455 ; SSE41-NEXT: mulps %xmm1, %xmm0 456 ; SSE41-NEXT: movaps %xmm0, %xmm1 457 ; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] 458 ; SSE41-NEXT: mulps %xmm0, %xmm1 459 ; SSE41-NEXT: movshdup {{.*#+}} xmm0 = xmm1[1,1,3,3] 460 ; SSE41-NEXT: mulps %xmm0, %xmm1 461 ; SSE41-NEXT: movaps %xmm1, %xmm0 462 ; SSE41-NEXT: retq 463 ; 464 ; AVX-LABEL: test_v8f32_undef: 465 ; AVX: # %bb.0: 466 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 467 ; AVX-NEXT: vmulps %ymm1, %ymm0, %ymm0 468 ; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] 469 ; AVX-NEXT: vmulps %ymm1, %ymm0, %ymm0 470 ; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 471 ; AVX-NEXT: vmulps %ymm1, %ymm0, %ymm0 472 ; AVX-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 473 ; AVX-NEXT: vzeroupper 474 ; AVX-NEXT: retq 475 ; 476 ; AVX512-LABEL: test_v8f32_undef: 477 ; AVX512: # %bb.0: 478 ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1 479 ; AVX512-NEXT: vmulps %ymm1, %ymm0, %ymm0 480 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] 481 ; AVX512-NEXT: vmulps %ymm1, %ymm0, %ymm0 482 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 483 ; AVX512-NEXT: vmulps %ymm1, %ymm0, %ymm0 484 ; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 485 ; AVX512-NEXT: vzeroupper 486 ; AVX512-NEXT: retq 487 %1 = call fast float @llvm.experimental.vector.reduce.fmul.f32.f32.v8f32(float undef, <8 x float> %a0) 488 ret float %1 489 } 490 491 define float @test_v16f32_undef(<16 x float> %a0) { 492 ; SSE2-LABEL: test_v16f32_undef: 493 ; SSE2: # %bb.0: 494 ; SSE2-NEXT: mulps %xmm3, %xmm1 495 ; SSE2-NEXT: mulps %xmm2, %xmm0 496 ; SSE2-NEXT: mulps %xmm1, %xmm0 497 ; SSE2-NEXT: movaps %xmm0, %xmm1 498 ; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] 499 ; SSE2-NEXT: mulps %xmm0, %xmm1 500 ; SSE2-NEXT: movaps %xmm1, %xmm0 501 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1],xmm1[2,3] 502 ; SSE2-NEXT: mulps %xmm1, %xmm0 503 ; SSE2-NEXT: retq 504 ; 505 ; SSE41-LABEL: test_v16f32_undef: 506 ; SSE41: # %bb.0: 507 ; SSE41-NEXT: mulps %xmm3, %xmm1 508 ; SSE41-NEXT: mulps %xmm2, %xmm0 509 ; SSE41-NEXT: mulps %xmm1, %xmm0 510 ; SSE41-NEXT: movaps %xmm0, %xmm1 511 ; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] 512 ; SSE41-NEXT: mulps %xmm0, %xmm1 513 ; SSE41-NEXT: movshdup {{.*#+}} xmm0 = xmm1[1,1,3,3] 514 ; SSE41-NEXT: mulps %xmm0, %xmm1 515 ; SSE41-NEXT: movaps %xmm1, %xmm0 516 ; SSE41-NEXT: retq 517 ; 518 ; AVX-LABEL: test_v16f32_undef: 519 ; AVX: # %bb.0: 520 ; AVX-NEXT: vmulps %ymm1, %ymm0, %ymm0 521 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 522 ; AVX-NEXT: vmulps %ymm1, %ymm0, %ymm0 523 ; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] 524 ; AVX-NEXT: vmulps %ymm1, %ymm0, %ymm0 525 ; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 526 ; AVX-NEXT: vmulps %ymm1, %ymm0, %ymm0 527 ; AVX-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 528 ; AVX-NEXT: vzeroupper 529 ; AVX-NEXT: retq 530 ; 531 ; AVX512-LABEL: test_v16f32_undef: 532 ; AVX512: # %bb.0: 533 ; AVX512-NEXT: vextractf64x4 $1, %zmm0, %ymm1 534 ; AVX512-NEXT: vmulps %zmm1, %zmm0, %zmm0 535 ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1 536 ; AVX512-NEXT: vmulps %zmm1, %zmm0, %zmm0 537 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] 538 ; AVX512-NEXT: vmulps %zmm1, %zmm0, %zmm0 539 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 540 ; AVX512-NEXT: vmulps %zmm1, %zmm0, %zmm0 541 ; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 542 ; AVX512-NEXT: vzeroupper 543 ; AVX512-NEXT: retq 544 %1 = call fast float @llvm.experimental.vector.reduce.fmul.f32.f32.v16f32(float undef, <16 x float> %a0) 545 ret float %1 546 } 547 548 ; 549 ; vXf64 (accum) 550 ; 551 552 define double @test_v2f64(double %a0, <2 x double> %a1) { 553 ; SSE-LABEL: test_v2f64: 554 ; SSE: # %bb.0: 555 ; SSE-NEXT: movaps %xmm1, %xmm0 556 ; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] 557 ; SSE-NEXT: mulpd %xmm1, %xmm0 558 ; SSE-NEXT: retq 559 ; 560 ; AVX-LABEL: test_v2f64: 561 ; AVX: # %bb.0: 562 ; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm1[1,0] 563 ; AVX-NEXT: vmulpd %xmm0, %xmm1, %xmm0 564 ; AVX-NEXT: retq 565 ; 566 ; AVX512-LABEL: test_v2f64: 567 ; AVX512: # %bb.0: 568 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm1[1,0] 569 ; AVX512-NEXT: vmulpd %xmm0, %xmm1, %xmm0 570 ; AVX512-NEXT: retq 571 %1 = call fast double @llvm.experimental.vector.reduce.fmul.f64.f64.v2f64(double %a0, <2 x double> %a1) 572 ret double %1 573 } 574 575 define double @test_v4f64(double %a0, <4 x double> %a1) { 576 ; SSE-LABEL: test_v4f64: 577 ; SSE: # %bb.0: 578 ; SSE-NEXT: mulpd %xmm2, %xmm1 579 ; SSE-NEXT: movapd %xmm1, %xmm0 580 ; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] 581 ; SSE-NEXT: mulpd %xmm1, %xmm0 582 ; SSE-NEXT: retq 583 ; 584 ; AVX-LABEL: test_v4f64: 585 ; AVX: # %bb.0: 586 ; AVX-NEXT: vextractf128 $1, %ymm1, %xmm0 587 ; AVX-NEXT: vmulpd %ymm0, %ymm1, %ymm0 588 ; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] 589 ; AVX-NEXT: vmulpd %ymm1, %ymm0, %ymm0 590 ; AVX-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 591 ; AVX-NEXT: vzeroupper 592 ; AVX-NEXT: retq 593 ; 594 ; AVX512-LABEL: test_v4f64: 595 ; AVX512: # %bb.0: 596 ; AVX512-NEXT: vextractf128 $1, %ymm1, %xmm0 597 ; AVX512-NEXT: vmulpd %ymm0, %ymm1, %ymm0 598 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] 599 ; AVX512-NEXT: vmulpd %ymm1, %ymm0, %ymm0 600 ; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 601 ; AVX512-NEXT: vzeroupper 602 ; AVX512-NEXT: retq 603 %1 = call fast double @llvm.experimental.vector.reduce.fmul.f64.f64.v4f64(double %a0, <4 x double> %a1) 604 ret double %1 605 } 606 607 define double @test_v8f64(double %a0, <8 x double> %a1) { 608 ; SSE-LABEL: test_v8f64: 609 ; SSE: # %bb.0: 610 ; SSE-NEXT: mulpd %xmm4, %xmm2 611 ; SSE-NEXT: mulpd %xmm3, %xmm1 612 ; SSE-NEXT: mulpd %xmm2, %xmm1 613 ; SSE-NEXT: movapd %xmm1, %xmm0 614 ; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] 615 ; SSE-NEXT: mulpd %xmm1, %xmm0 616 ; SSE-NEXT: retq 617 ; 618 ; AVX-LABEL: test_v8f64: 619 ; AVX: # %bb.0: 620 ; AVX-NEXT: vmulpd %ymm2, %ymm1, %ymm0 621 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 622 ; AVX-NEXT: vmulpd %ymm1, %ymm0, %ymm0 623 ; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] 624 ; AVX-NEXT: vmulpd %ymm1, %ymm0, %ymm0 625 ; AVX-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 626 ; AVX-NEXT: vzeroupper 627 ; AVX-NEXT: retq 628 ; 629 ; AVX512-LABEL: test_v8f64: 630 ; AVX512: # %bb.0: 631 ; AVX512-NEXT: vextractf64x4 $1, %zmm1, %ymm0 632 ; AVX512-NEXT: vmulpd %zmm0, %zmm1, %zmm0 633 ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1 634 ; AVX512-NEXT: vmulpd %zmm1, %zmm0, %zmm0 635 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] 636 ; AVX512-NEXT: vmulpd %zmm1, %zmm0, %zmm0 637 ; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 638 ; AVX512-NEXT: vzeroupper 639 ; AVX512-NEXT: retq 640 %1 = call fast double @llvm.experimental.vector.reduce.fmul.f64.f64.v8f64(double %a0, <8 x double> %a1) 641 ret double %1 642 } 643 644 define double @test_v16f64(double %a0, <16 x double> %a1) { 645 ; SSE-LABEL: test_v16f64: 646 ; SSE: # %bb.0: 647 ; SSE-NEXT: mulpd %xmm6, %xmm2 648 ; SSE-NEXT: mulpd %xmm7, %xmm3 649 ; SSE-NEXT: mulpd %xmm5, %xmm1 650 ; SSE-NEXT: mulpd %xmm3, %xmm1 651 ; SSE-NEXT: mulpd {{[0-9]+}}(%rsp), %xmm4 652 ; SSE-NEXT: mulpd %xmm2, %xmm4 653 ; SSE-NEXT: mulpd %xmm1, %xmm4 654 ; SSE-NEXT: movapd %xmm4, %xmm0 655 ; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm4[1],xmm0[1] 656 ; SSE-NEXT: mulpd %xmm4, %xmm0 657 ; SSE-NEXT: retq 658 ; 659 ; AVX-LABEL: test_v16f64: 660 ; AVX: # %bb.0: 661 ; AVX-NEXT: vmulpd %ymm4, %ymm2, %ymm0 662 ; AVX-NEXT: vmulpd %ymm3, %ymm1, %ymm1 663 ; AVX-NEXT: vmulpd %ymm0, %ymm1, %ymm0 664 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 665 ; AVX-NEXT: vmulpd %ymm1, %ymm0, %ymm0 666 ; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] 667 ; AVX-NEXT: vmulpd %ymm1, %ymm0, %ymm0 668 ; AVX-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 669 ; AVX-NEXT: vzeroupper 670 ; AVX-NEXT: retq 671 ; 672 ; AVX512-LABEL: test_v16f64: 673 ; AVX512: # %bb.0: 674 ; AVX512-NEXT: vmulpd %zmm2, %zmm1, %zmm0 675 ; AVX512-NEXT: vextractf64x4 $1, %zmm0, %ymm1 676 ; AVX512-NEXT: vmulpd %zmm1, %zmm0, %zmm0 677 ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1 678 ; AVX512-NEXT: vmulpd %zmm1, %zmm0, %zmm0 679 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] 680 ; AVX512-NEXT: vmulpd %zmm1, %zmm0, %zmm0 681 ; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 682 ; AVX512-NEXT: vzeroupper 683 ; AVX512-NEXT: retq 684 %1 = call fast double @llvm.experimental.vector.reduce.fmul.f64.f64.v16f64(double %a0, <16 x double> %a1) 685 ret double %1 686 } 687 688 ; 689 ; vXf64 (one) 690 ; 691 692 define double @test_v2f64_zero(<2 x double> %a0) { 693 ; SSE-LABEL: test_v2f64_zero: 694 ; SSE: # %bb.0: 695 ; SSE-NEXT: movaps %xmm0, %xmm1 696 ; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] 697 ; SSE-NEXT: mulpd %xmm0, %xmm1 698 ; SSE-NEXT: movapd %xmm1, %xmm0 699 ; SSE-NEXT: retq 700 ; 701 ; AVX-LABEL: test_v2f64_zero: 702 ; AVX: # %bb.0: 703 ; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] 704 ; AVX-NEXT: vmulpd %xmm1, %xmm0, %xmm0 705 ; AVX-NEXT: retq 706 ; 707 ; AVX512-LABEL: test_v2f64_zero: 708 ; AVX512: # %bb.0: 709 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] 710 ; AVX512-NEXT: vmulpd %xmm1, %xmm0, %xmm0 711 ; AVX512-NEXT: retq 712 %1 = call fast double @llvm.experimental.vector.reduce.fmul.f64.f64.v2f64(double 1.0, <2 x double> %a0) 713 ret double %1 714 } 715 716 define double @test_v4f64_zero(<4 x double> %a0) { 717 ; SSE-LABEL: test_v4f64_zero: 718 ; SSE: # %bb.0: 719 ; SSE-NEXT: mulpd %xmm1, %xmm0 720 ; SSE-NEXT: movapd %xmm0, %xmm1 721 ; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] 722 ; SSE-NEXT: mulpd %xmm0, %xmm1 723 ; SSE-NEXT: movapd %xmm1, %xmm0 724 ; SSE-NEXT: retq 725 ; 726 ; AVX-LABEL: test_v4f64_zero: 727 ; AVX: # %bb.0: 728 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 729 ; AVX-NEXT: vmulpd %ymm1, %ymm0, %ymm0 730 ; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] 731 ; AVX-NEXT: vmulpd %ymm1, %ymm0, %ymm0 732 ; AVX-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 733 ; AVX-NEXT: vzeroupper 734 ; AVX-NEXT: retq 735 ; 736 ; AVX512-LABEL: test_v4f64_zero: 737 ; AVX512: # %bb.0: 738 ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1 739 ; AVX512-NEXT: vmulpd %ymm1, %ymm0, %ymm0 740 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] 741 ; AVX512-NEXT: vmulpd %ymm1, %ymm0, %ymm0 742 ; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 743 ; AVX512-NEXT: vzeroupper 744 ; AVX512-NEXT: retq 745 %1 = call fast double @llvm.experimental.vector.reduce.fmul.f64.f64.v4f64(double 1.0, <4 x double> %a0) 746 ret double %1 747 } 748 749 define double @test_v8f64_zero(<8 x double> %a0) { 750 ; SSE-LABEL: test_v8f64_zero: 751 ; SSE: # %bb.0: 752 ; SSE-NEXT: mulpd %xmm3, %xmm1 753 ; SSE-NEXT: mulpd %xmm2, %xmm0 754 ; SSE-NEXT: mulpd %xmm1, %xmm0 755 ; SSE-NEXT: movapd %xmm0, %xmm1 756 ; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] 757 ; SSE-NEXT: mulpd %xmm0, %xmm1 758 ; SSE-NEXT: movapd %xmm1, %xmm0 759 ; SSE-NEXT: retq 760 ; 761 ; AVX-LABEL: test_v8f64_zero: 762 ; AVX: # %bb.0: 763 ; AVX-NEXT: vmulpd %ymm1, %ymm0, %ymm0 764 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 765 ; AVX-NEXT: vmulpd %ymm1, %ymm0, %ymm0 766 ; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] 767 ; AVX-NEXT: vmulpd %ymm1, %ymm0, %ymm0 768 ; AVX-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 769 ; AVX-NEXT: vzeroupper 770 ; AVX-NEXT: retq 771 ; 772 ; AVX512-LABEL: test_v8f64_zero: 773 ; AVX512: # %bb.0: 774 ; AVX512-NEXT: vextractf64x4 $1, %zmm0, %ymm1 775 ; AVX512-NEXT: vmulpd %zmm1, %zmm0, %zmm0 776 ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1 777 ; AVX512-NEXT: vmulpd %zmm1, %zmm0, %zmm0 778 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] 779 ; AVX512-NEXT: vmulpd %zmm1, %zmm0, %zmm0 780 ; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 781 ; AVX512-NEXT: vzeroupper 782 ; AVX512-NEXT: retq 783 %1 = call fast double @llvm.experimental.vector.reduce.fmul.f64.f64.v8f64(double 1.0, <8 x double> %a0) 784 ret double %1 785 } 786 787 define double @test_v16f64_zero(<16 x double> %a0) { 788 ; SSE-LABEL: test_v16f64_zero: 789 ; SSE: # %bb.0: 790 ; SSE-NEXT: mulpd %xmm6, %xmm2 791 ; SSE-NEXT: mulpd %xmm4, %xmm0 792 ; SSE-NEXT: mulpd %xmm2, %xmm0 793 ; SSE-NEXT: mulpd %xmm7, %xmm3 794 ; SSE-NEXT: mulpd %xmm5, %xmm1 795 ; SSE-NEXT: mulpd %xmm3, %xmm1 796 ; SSE-NEXT: mulpd %xmm0, %xmm1 797 ; SSE-NEXT: movapd %xmm1, %xmm0 798 ; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] 799 ; SSE-NEXT: mulpd %xmm1, %xmm0 800 ; SSE-NEXT: retq 801 ; 802 ; AVX-LABEL: test_v16f64_zero: 803 ; AVX: # %bb.0: 804 ; AVX-NEXT: vmulpd %ymm3, %ymm1, %ymm1 805 ; AVX-NEXT: vmulpd %ymm2, %ymm0, %ymm0 806 ; AVX-NEXT: vmulpd %ymm1, %ymm0, %ymm0 807 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 808 ; AVX-NEXT: vmulpd %ymm1, %ymm0, %ymm0 809 ; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] 810 ; AVX-NEXT: vmulpd %ymm1, %ymm0, %ymm0 811 ; AVX-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 812 ; AVX-NEXT: vzeroupper 813 ; AVX-NEXT: retq 814 ; 815 ; AVX512-LABEL: test_v16f64_zero: 816 ; AVX512: # %bb.0: 817 ; AVX512-NEXT: vmulpd %zmm1, %zmm0, %zmm0 818 ; AVX512-NEXT: vextractf64x4 $1, %zmm0, %ymm1 819 ; AVX512-NEXT: vmulpd %zmm1, %zmm0, %zmm0 820 ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1 821 ; AVX512-NEXT: vmulpd %zmm1, %zmm0, %zmm0 822 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] 823 ; AVX512-NEXT: vmulpd %zmm1, %zmm0, %zmm0 824 ; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 825 ; AVX512-NEXT: vzeroupper 826 ; AVX512-NEXT: retq 827 %1 = call fast double @llvm.experimental.vector.reduce.fmul.f64.f64.v16f64(double 1.0, <16 x double> %a0) 828 ret double %1 829 } 830 831 ; 832 ; vXf64 (undef) 833 ; 834 835 define double @test_v2f64_undef(<2 x double> %a0) { 836 ; SSE-LABEL: test_v2f64_undef: 837 ; SSE: # %bb.0: 838 ; SSE-NEXT: movaps %xmm0, %xmm1 839 ; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] 840 ; SSE-NEXT: mulpd %xmm0, %xmm1 841 ; SSE-NEXT: movapd %xmm1, %xmm0 842 ; SSE-NEXT: retq 843 ; 844 ; AVX-LABEL: test_v2f64_undef: 845 ; AVX: # %bb.0: 846 ; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] 847 ; AVX-NEXT: vmulpd %xmm1, %xmm0, %xmm0 848 ; AVX-NEXT: retq 849 ; 850 ; AVX512-LABEL: test_v2f64_undef: 851 ; AVX512: # %bb.0: 852 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] 853 ; AVX512-NEXT: vmulpd %xmm1, %xmm0, %xmm0 854 ; AVX512-NEXT: retq 855 %1 = call fast double @llvm.experimental.vector.reduce.fmul.f64.f64.v2f64(double undef, <2 x double> %a0) 856 ret double %1 857 } 858 859 define double @test_v4f64_undef(<4 x double> %a0) { 860 ; SSE-LABEL: test_v4f64_undef: 861 ; SSE: # %bb.0: 862 ; SSE-NEXT: mulpd %xmm1, %xmm0 863 ; SSE-NEXT: movapd %xmm0, %xmm1 864 ; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] 865 ; SSE-NEXT: mulpd %xmm0, %xmm1 866 ; SSE-NEXT: movapd %xmm1, %xmm0 867 ; SSE-NEXT: retq 868 ; 869 ; AVX-LABEL: test_v4f64_undef: 870 ; AVX: # %bb.0: 871 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 872 ; AVX-NEXT: vmulpd %ymm1, %ymm0, %ymm0 873 ; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] 874 ; AVX-NEXT: vmulpd %ymm1, %ymm0, %ymm0 875 ; AVX-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 876 ; AVX-NEXT: vzeroupper 877 ; AVX-NEXT: retq 878 ; 879 ; AVX512-LABEL: test_v4f64_undef: 880 ; AVX512: # %bb.0: 881 ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1 882 ; AVX512-NEXT: vmulpd %ymm1, %ymm0, %ymm0 883 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] 884 ; AVX512-NEXT: vmulpd %ymm1, %ymm0, %ymm0 885 ; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 886 ; AVX512-NEXT: vzeroupper 887 ; AVX512-NEXT: retq 888 %1 = call fast double @llvm.experimental.vector.reduce.fmul.f64.f64.v4f64(double undef, <4 x double> %a0) 889 ret double %1 890 } 891 892 define double @test_v8f64_undef(<8 x double> %a0) { 893 ; SSE-LABEL: test_v8f64_undef: 894 ; SSE: # %bb.0: 895 ; SSE-NEXT: mulpd %xmm3, %xmm1 896 ; SSE-NEXT: mulpd %xmm2, %xmm0 897 ; SSE-NEXT: mulpd %xmm1, %xmm0 898 ; SSE-NEXT: movapd %xmm0, %xmm1 899 ; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] 900 ; SSE-NEXT: mulpd %xmm0, %xmm1 901 ; SSE-NEXT: movapd %xmm1, %xmm0 902 ; SSE-NEXT: retq 903 ; 904 ; AVX-LABEL: test_v8f64_undef: 905 ; AVX: # %bb.0: 906 ; AVX-NEXT: vmulpd %ymm1, %ymm0, %ymm0 907 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 908 ; AVX-NEXT: vmulpd %ymm1, %ymm0, %ymm0 909 ; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] 910 ; AVX-NEXT: vmulpd %ymm1, %ymm0, %ymm0 911 ; AVX-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 912 ; AVX-NEXT: vzeroupper 913 ; AVX-NEXT: retq 914 ; 915 ; AVX512-LABEL: test_v8f64_undef: 916 ; AVX512: # %bb.0: 917 ; AVX512-NEXT: vextractf64x4 $1, %zmm0, %ymm1 918 ; AVX512-NEXT: vmulpd %zmm1, %zmm0, %zmm0 919 ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1 920 ; AVX512-NEXT: vmulpd %zmm1, %zmm0, %zmm0 921 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] 922 ; AVX512-NEXT: vmulpd %zmm1, %zmm0, %zmm0 923 ; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 924 ; AVX512-NEXT: vzeroupper 925 ; AVX512-NEXT: retq 926 %1 = call fast double @llvm.experimental.vector.reduce.fmul.f64.f64.v8f64(double undef, <8 x double> %a0) 927 ret double %1 928 } 929 930 define double @test_v16f64_undef(<16 x double> %a0) { 931 ; SSE-LABEL: test_v16f64_undef: 932 ; SSE: # %bb.0: 933 ; SSE-NEXT: mulpd %xmm6, %xmm2 934 ; SSE-NEXT: mulpd %xmm4, %xmm0 935 ; SSE-NEXT: mulpd %xmm2, %xmm0 936 ; SSE-NEXT: mulpd %xmm7, %xmm3 937 ; SSE-NEXT: mulpd %xmm5, %xmm1 938 ; SSE-NEXT: mulpd %xmm3, %xmm1 939 ; SSE-NEXT: mulpd %xmm0, %xmm1 940 ; SSE-NEXT: movapd %xmm1, %xmm0 941 ; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] 942 ; SSE-NEXT: mulpd %xmm1, %xmm0 943 ; SSE-NEXT: retq 944 ; 945 ; AVX-LABEL: test_v16f64_undef: 946 ; AVX: # %bb.0: 947 ; AVX-NEXT: vmulpd %ymm3, %ymm1, %ymm1 948 ; AVX-NEXT: vmulpd %ymm2, %ymm0, %ymm0 949 ; AVX-NEXT: vmulpd %ymm1, %ymm0, %ymm0 950 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 951 ; AVX-NEXT: vmulpd %ymm1, %ymm0, %ymm0 952 ; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] 953 ; AVX-NEXT: vmulpd %ymm1, %ymm0, %ymm0 954 ; AVX-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 955 ; AVX-NEXT: vzeroupper 956 ; AVX-NEXT: retq 957 ; 958 ; AVX512-LABEL: test_v16f64_undef: 959 ; AVX512: # %bb.0: 960 ; AVX512-NEXT: vmulpd %zmm1, %zmm0, %zmm0 961 ; AVX512-NEXT: vextractf64x4 $1, %zmm0, %ymm1 962 ; AVX512-NEXT: vmulpd %zmm1, %zmm0, %zmm0 963 ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1 964 ; AVX512-NEXT: vmulpd %zmm1, %zmm0, %zmm0 965 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] 966 ; AVX512-NEXT: vmulpd %zmm1, %zmm0, %zmm0 967 ; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 968 ; AVX512-NEXT: vzeroupper 969 ; AVX512-NEXT: retq 970 %1 = call fast double @llvm.experimental.vector.reduce.fmul.f64.f64.v16f64(double undef, <16 x double> %a0) 971 ret double %1 972 } 973 974 declare float @llvm.experimental.vector.reduce.fmul.f32.f32.v2f32(float, <2 x float>) 975 declare float @llvm.experimental.vector.reduce.fmul.f32.f32.v4f32(float, <4 x float>) 976 declare float @llvm.experimental.vector.reduce.fmul.f32.f32.v8f32(float, <8 x float>) 977 declare float @llvm.experimental.vector.reduce.fmul.f32.f32.v16f32(float, <16 x float>) 978 979 declare double @llvm.experimental.vector.reduce.fmul.f64.f64.v2f64(double, <2 x double>) 980 declare double @llvm.experimental.vector.reduce.fmul.f64.f64.v4f64(double, <4 x double>) 981 declare double @llvm.experimental.vector.reduce.fmul.f64.f64.v8f64(double, <8 x double>) 982 declare double @llvm.experimental.vector.reduce.fmul.f64.f64.v16f64(double, <16 x double>) 983