1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2 3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41 4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1 5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2 6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512BW 7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512VL 8 9 ; 10 ; vXf32 (accum) 11 ; 12 13 define float @test_v2f32(float %a0, <2 x float> %a1) { 14 ; SSE2-LABEL: test_v2f32: 15 ; SSE2: # %bb.0: 16 ; SSE2-NEXT: mulss %xmm1, %xmm0 17 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,2,3] 18 ; SSE2-NEXT: mulss %xmm1, %xmm0 19 ; SSE2-NEXT: retq 20 ; 21 ; SSE41-LABEL: test_v2f32: 22 ; SSE41: # %bb.0: 23 ; SSE41-NEXT: mulss %xmm1, %xmm0 24 ; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm1[1,1,3,3] 25 ; SSE41-NEXT: mulss %xmm1, %xmm0 26 ; SSE41-NEXT: retq 27 ; 28 ; AVX-LABEL: test_v2f32: 29 ; AVX: # %bb.0: 30 ; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0 31 ; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm1[1,1,3,3] 32 ; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0 33 ; AVX-NEXT: retq 34 ; 35 ; AVX512-LABEL: test_v2f32: 36 ; AVX512: # %bb.0: 37 ; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm0 38 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm1[1,1,3,3] 39 ; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm0 40 ; AVX512-NEXT: retq 41 %1 = call float @llvm.experimental.vector.reduce.fmul.f32.f32.v2f32(float %a0, <2 x float> %a1) 42 ret float %1 43 } 44 45 define float @test_v4f32(float %a0, <4 x float> %a1) { 46 ; SSE2-LABEL: test_v4f32: 47 ; SSE2: # %bb.0: 48 ; SSE2-NEXT: mulss %xmm1, %xmm0 49 ; SSE2-NEXT: movaps %xmm1, %xmm2 50 ; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1],xmm1[2,3] 51 ; SSE2-NEXT: mulss %xmm2, %xmm0 52 ; SSE2-NEXT: movaps %xmm1, %xmm2 53 ; SSE2-NEXT: movhlps {{.*#+}} xmm2 = xmm1[1],xmm2[1] 54 ; SSE2-NEXT: mulss %xmm2, %xmm0 55 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3] 56 ; SSE2-NEXT: mulss %xmm1, %xmm0 57 ; SSE2-NEXT: retq 58 ; 59 ; SSE41-LABEL: test_v4f32: 60 ; SSE41: # %bb.0: 61 ; SSE41-NEXT: mulss %xmm1, %xmm0 62 ; SSE41-NEXT: movshdup {{.*#+}} xmm2 = xmm1[1,1,3,3] 63 ; SSE41-NEXT: mulss %xmm2, %xmm0 64 ; SSE41-NEXT: movaps %xmm1, %xmm2 65 ; SSE41-NEXT: movhlps {{.*#+}} xmm2 = xmm1[1],xmm2[1] 66 ; SSE41-NEXT: mulss %xmm2, %xmm0 67 ; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3] 68 ; SSE41-NEXT: mulss %xmm1, %xmm0 69 ; SSE41-NEXT: retq 70 ; 71 ; AVX-LABEL: test_v4f32: 72 ; AVX: # %bb.0: 73 ; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0 74 ; AVX-NEXT: vmovshdup {{.*#+}} xmm2 = xmm1[1,1,3,3] 75 ; AVX-NEXT: vmulss %xmm2, %xmm0, %xmm0 76 ; AVX-NEXT: vpermilpd {{.*#+}} xmm2 = xmm1[1,0] 77 ; AVX-NEXT: vmulss %xmm2, %xmm0, %xmm0 78 ; AVX-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[3,1,2,3] 79 ; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0 80 ; AVX-NEXT: retq 81 ; 82 ; AVX512-LABEL: test_v4f32: 83 ; AVX512: # %bb.0: 84 ; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm0 85 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm2 = xmm1[1,1,3,3] 86 ; AVX512-NEXT: vmulss %xmm2, %xmm0, %xmm0 87 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm1[1,0] 88 ; AVX512-NEXT: vmulss %xmm2, %xmm0, %xmm0 89 ; AVX512-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[3,1,2,3] 90 ; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm0 91 ; AVX512-NEXT: retq 92 %1 = call float @llvm.experimental.vector.reduce.fmul.f32.f32.v4f32(float %a0, <4 x float> %a1) 93 ret float %1 94 } 95 96 define float @test_v8f32(float %a0, <8 x float> %a1) { 97 ; SSE2-LABEL: test_v8f32: 98 ; SSE2: # %bb.0: 99 ; SSE2-NEXT: mulss %xmm1, %xmm0 100 ; SSE2-NEXT: movaps %xmm1, %xmm3 101 ; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1],xmm1[2,3] 102 ; SSE2-NEXT: mulss %xmm3, %xmm0 103 ; SSE2-NEXT: movaps %xmm1, %xmm3 104 ; SSE2-NEXT: movhlps {{.*#+}} xmm3 = xmm1[1],xmm3[1] 105 ; SSE2-NEXT: mulss %xmm3, %xmm0 106 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3] 107 ; SSE2-NEXT: mulss %xmm1, %xmm0 108 ; SSE2-NEXT: mulss %xmm2, %xmm0 109 ; SSE2-NEXT: movaps %xmm2, %xmm1 110 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm2[2,3] 111 ; SSE2-NEXT: mulss %xmm1, %xmm0 112 ; SSE2-NEXT: movaps %xmm2, %xmm1 113 ; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm2[1],xmm1[1] 114 ; SSE2-NEXT: mulss %xmm1, %xmm0 115 ; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,1,2,3] 116 ; SSE2-NEXT: mulss %xmm2, %xmm0 117 ; SSE2-NEXT: retq 118 ; 119 ; SSE41-LABEL: test_v8f32: 120 ; SSE41: # %bb.0: 121 ; SSE41-NEXT: mulss %xmm1, %xmm0 122 ; SSE41-NEXT: movshdup {{.*#+}} xmm3 = xmm1[1,1,3,3] 123 ; SSE41-NEXT: mulss %xmm3, %xmm0 124 ; SSE41-NEXT: movaps %xmm1, %xmm3 125 ; SSE41-NEXT: movhlps {{.*#+}} xmm3 = xmm1[1],xmm3[1] 126 ; SSE41-NEXT: mulss %xmm3, %xmm0 127 ; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3] 128 ; SSE41-NEXT: mulss %xmm1, %xmm0 129 ; SSE41-NEXT: mulss %xmm2, %xmm0 130 ; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm2[1,1,3,3] 131 ; SSE41-NEXT: mulss %xmm1, %xmm0 132 ; SSE41-NEXT: movaps %xmm2, %xmm1 133 ; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm2[1],xmm1[1] 134 ; SSE41-NEXT: mulss %xmm1, %xmm0 135 ; SSE41-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,1,2,3] 136 ; SSE41-NEXT: mulss %xmm2, %xmm0 137 ; SSE41-NEXT: retq 138 ; 139 ; AVX-LABEL: test_v8f32: 140 ; AVX: # %bb.0: 141 ; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0 142 ; AVX-NEXT: vmovshdup {{.*#+}} xmm2 = xmm1[1,1,3,3] 143 ; AVX-NEXT: vmulss %xmm2, %xmm0, %xmm0 144 ; AVX-NEXT: vpermilpd {{.*#+}} xmm2 = xmm1[1,0] 145 ; AVX-NEXT: vmulss %xmm2, %xmm0, %xmm0 146 ; AVX-NEXT: vpermilps {{.*#+}} xmm2 = xmm1[3,1,2,3] 147 ; AVX-NEXT: vmulss %xmm2, %xmm0, %xmm0 148 ; AVX-NEXT: vextractf128 $1, %ymm1, %xmm1 149 ; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0 150 ; AVX-NEXT: vmovshdup {{.*#+}} xmm2 = xmm1[1,1,3,3] 151 ; AVX-NEXT: vmulss %xmm2, %xmm0, %xmm0 152 ; AVX-NEXT: vpermilpd {{.*#+}} xmm2 = xmm1[1,0] 153 ; AVX-NEXT: vmulss %xmm2, %xmm0, %xmm0 154 ; AVX-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[3,1,2,3] 155 ; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0 156 ; AVX-NEXT: vzeroupper 157 ; AVX-NEXT: retq 158 ; 159 ; AVX512-LABEL: test_v8f32: 160 ; AVX512: # %bb.0: 161 ; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm0 162 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm2 = xmm1[1,1,3,3] 163 ; AVX512-NEXT: vmulss %xmm2, %xmm0, %xmm0 164 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm1[1,0] 165 ; AVX512-NEXT: vmulss %xmm2, %xmm0, %xmm0 166 ; AVX512-NEXT: vpermilps {{.*#+}} xmm2 = xmm1[3,1,2,3] 167 ; AVX512-NEXT: vmulss %xmm2, %xmm0, %xmm0 168 ; AVX512-NEXT: vextractf128 $1, %ymm1, %xmm1 169 ; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm0 170 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm2 = xmm1[1,1,3,3] 171 ; AVX512-NEXT: vmulss %xmm2, %xmm0, %xmm0 172 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm1[1,0] 173 ; AVX512-NEXT: vmulss %xmm2, %xmm0, %xmm0 174 ; AVX512-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[3,1,2,3] 175 ; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm0 176 ; AVX512-NEXT: vzeroupper 177 ; AVX512-NEXT: retq 178 %1 = call float @llvm.experimental.vector.reduce.fmul.f32.f32.v8f32(float %a0, <8 x float> %a1) 179 ret float %1 180 } 181 182 define float @test_v16f32(float %a0, <16 x float> %a1) { 183 ; SSE2-LABEL: test_v16f32: 184 ; SSE2: # %bb.0: 185 ; SSE2-NEXT: mulss %xmm1, %xmm0 186 ; SSE2-NEXT: movaps %xmm1, %xmm5 187 ; SSE2-NEXT: shufps {{.*#+}} xmm5 = xmm5[1,1],xmm1[2,3] 188 ; SSE2-NEXT: mulss %xmm5, %xmm0 189 ; SSE2-NEXT: movaps %xmm1, %xmm5 190 ; SSE2-NEXT: movhlps {{.*#+}} xmm5 = xmm1[1],xmm5[1] 191 ; SSE2-NEXT: mulss %xmm5, %xmm0 192 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3] 193 ; SSE2-NEXT: mulss %xmm1, %xmm0 194 ; SSE2-NEXT: mulss %xmm2, %xmm0 195 ; SSE2-NEXT: movaps %xmm2, %xmm1 196 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm2[2,3] 197 ; SSE2-NEXT: mulss %xmm1, %xmm0 198 ; SSE2-NEXT: movaps %xmm2, %xmm1 199 ; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm2[1],xmm1[1] 200 ; SSE2-NEXT: mulss %xmm1, %xmm0 201 ; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,1,2,3] 202 ; SSE2-NEXT: mulss %xmm2, %xmm0 203 ; SSE2-NEXT: mulss %xmm3, %xmm0 204 ; SSE2-NEXT: movaps %xmm3, %xmm1 205 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm3[2,3] 206 ; SSE2-NEXT: mulss %xmm1, %xmm0 207 ; SSE2-NEXT: movaps %xmm3, %xmm1 208 ; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm3[1],xmm1[1] 209 ; SSE2-NEXT: mulss %xmm1, %xmm0 210 ; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1,2,3] 211 ; SSE2-NEXT: mulss %xmm3, %xmm0 212 ; SSE2-NEXT: mulss %xmm4, %xmm0 213 ; SSE2-NEXT: movaps %xmm4, %xmm1 214 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm4[2,3] 215 ; SSE2-NEXT: mulss %xmm1, %xmm0 216 ; SSE2-NEXT: movaps %xmm4, %xmm1 217 ; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm4[1],xmm1[1] 218 ; SSE2-NEXT: mulss %xmm1, %xmm0 219 ; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[3,1,2,3] 220 ; SSE2-NEXT: mulss %xmm4, %xmm0 221 ; SSE2-NEXT: retq 222 ; 223 ; SSE41-LABEL: test_v16f32: 224 ; SSE41: # %bb.0: 225 ; SSE41-NEXT: mulss %xmm1, %xmm0 226 ; SSE41-NEXT: movshdup {{.*#+}} xmm5 = xmm1[1,1,3,3] 227 ; SSE41-NEXT: mulss %xmm5, %xmm0 228 ; SSE41-NEXT: movaps %xmm1, %xmm5 229 ; SSE41-NEXT: movhlps {{.*#+}} xmm5 = xmm1[1],xmm5[1] 230 ; SSE41-NEXT: mulss %xmm5, %xmm0 231 ; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3] 232 ; SSE41-NEXT: mulss %xmm1, %xmm0 233 ; SSE41-NEXT: mulss %xmm2, %xmm0 234 ; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm2[1,1,3,3] 235 ; SSE41-NEXT: mulss %xmm1, %xmm0 236 ; SSE41-NEXT: movaps %xmm2, %xmm1 237 ; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm2[1],xmm1[1] 238 ; SSE41-NEXT: mulss %xmm1, %xmm0 239 ; SSE41-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,1,2,3] 240 ; SSE41-NEXT: mulss %xmm2, %xmm0 241 ; SSE41-NEXT: mulss %xmm3, %xmm0 242 ; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm3[1,1,3,3] 243 ; SSE41-NEXT: mulss %xmm1, %xmm0 244 ; SSE41-NEXT: movaps %xmm3, %xmm1 245 ; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm3[1],xmm1[1] 246 ; SSE41-NEXT: mulss %xmm1, %xmm0 247 ; SSE41-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1,2,3] 248 ; SSE41-NEXT: mulss %xmm3, %xmm0 249 ; SSE41-NEXT: mulss %xmm4, %xmm0 250 ; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm4[1,1,3,3] 251 ; SSE41-NEXT: mulss %xmm1, %xmm0 252 ; SSE41-NEXT: movaps %xmm4, %xmm1 253 ; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm4[1],xmm1[1] 254 ; SSE41-NEXT: mulss %xmm1, %xmm0 255 ; SSE41-NEXT: shufps {{.*#+}} xmm4 = xmm4[3,1,2,3] 256 ; SSE41-NEXT: mulss %xmm4, %xmm0 257 ; SSE41-NEXT: retq 258 ; 259 ; AVX-LABEL: test_v16f32: 260 ; AVX: # %bb.0: 261 ; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0 262 ; AVX-NEXT: vmovshdup {{.*#+}} xmm3 = xmm1[1,1,3,3] 263 ; AVX-NEXT: vmulss %xmm3, %xmm0, %xmm0 264 ; AVX-NEXT: vpermilpd {{.*#+}} xmm3 = xmm1[1,0] 265 ; AVX-NEXT: vmulss %xmm3, %xmm0, %xmm0 266 ; AVX-NEXT: vpermilps {{.*#+}} xmm3 = xmm1[3,1,2,3] 267 ; AVX-NEXT: vmulss %xmm3, %xmm0, %xmm0 268 ; AVX-NEXT: vextractf128 $1, %ymm1, %xmm1 269 ; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0 270 ; AVX-NEXT: vmovshdup {{.*#+}} xmm3 = xmm1[1,1,3,3] 271 ; AVX-NEXT: vmulss %xmm3, %xmm0, %xmm0 272 ; AVX-NEXT: vpermilpd {{.*#+}} xmm3 = xmm1[1,0] 273 ; AVX-NEXT: vmulss %xmm3, %xmm0, %xmm0 274 ; AVX-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[3,1,2,3] 275 ; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0 276 ; AVX-NEXT: vmulss %xmm2, %xmm0, %xmm0 277 ; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm2[1,1,3,3] 278 ; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0 279 ; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm2[1,0] 280 ; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0 281 ; AVX-NEXT: vpermilps {{.*#+}} xmm1 = xmm2[3,1,2,3] 282 ; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0 283 ; AVX-NEXT: vextractf128 $1, %ymm2, %xmm1 284 ; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0 285 ; AVX-NEXT: vmovshdup {{.*#+}} xmm2 = xmm1[1,1,3,3] 286 ; AVX-NEXT: vmulss %xmm2, %xmm0, %xmm0 287 ; AVX-NEXT: vpermilpd {{.*#+}} xmm2 = xmm1[1,0] 288 ; AVX-NEXT: vmulss %xmm2, %xmm0, %xmm0 289 ; AVX-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[3,1,2,3] 290 ; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0 291 ; AVX-NEXT: vzeroupper 292 ; AVX-NEXT: retq 293 ; 294 ; AVX512-LABEL: test_v16f32: 295 ; AVX512: # %bb.0: 296 ; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm0 297 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm2 = xmm1[1,1,3,3] 298 ; AVX512-NEXT: vmulss %xmm2, %xmm0, %xmm0 299 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm1[1,0] 300 ; AVX512-NEXT: vmulss %xmm2, %xmm0, %xmm0 301 ; AVX512-NEXT: vpermilps {{.*#+}} xmm2 = xmm1[3,1,2,3] 302 ; AVX512-NEXT: vmulss %xmm2, %xmm0, %xmm0 303 ; AVX512-NEXT: vextractf128 $1, %ymm1, %xmm2 304 ; AVX512-NEXT: vmulss %xmm2, %xmm0, %xmm0 305 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm3 = xmm2[1,1,3,3] 306 ; AVX512-NEXT: vmulss %xmm3, %xmm0, %xmm0 307 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm3 = xmm2[1,0] 308 ; AVX512-NEXT: vmulss %xmm3, %xmm0, %xmm0 309 ; AVX512-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[3,1,2,3] 310 ; AVX512-NEXT: vmulss %xmm2, %xmm0, %xmm0 311 ; AVX512-NEXT: vextractf32x4 $2, %zmm1, %xmm2 312 ; AVX512-NEXT: vmulss %xmm2, %xmm0, %xmm0 313 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm3 = xmm2[1,1,3,3] 314 ; AVX512-NEXT: vmulss %xmm3, %xmm0, %xmm0 315 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm3 = xmm2[1,0] 316 ; AVX512-NEXT: vmulss %xmm3, %xmm0, %xmm0 317 ; AVX512-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[3,1,2,3] 318 ; AVX512-NEXT: vmulss %xmm2, %xmm0, %xmm0 319 ; AVX512-NEXT: vextractf32x4 $3, %zmm1, %xmm1 320 ; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm0 321 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm2 = xmm1[1,1,3,3] 322 ; AVX512-NEXT: vmulss %xmm2, %xmm0, %xmm0 323 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm1[1,0] 324 ; AVX512-NEXT: vmulss %xmm2, %xmm0, %xmm0 325 ; AVX512-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[3,1,2,3] 326 ; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm0 327 ; AVX512-NEXT: vzeroupper 328 ; AVX512-NEXT: retq 329 %1 = call float @llvm.experimental.vector.reduce.fmul.f32.f32.v16f32(float %a0, <16 x float> %a1) 330 ret float %1 331 } 332 333 ; 334 ; vXf32 (one) 335 ; 336 337 define float @test_v2f32_one(<2 x float> %a0) { 338 ; SSE2-LABEL: test_v2f32_one: 339 ; SSE2: # %bb.0: 340 ; SSE2-NEXT: movaps %xmm0, %xmm1 341 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3] 342 ; SSE2-NEXT: mulss %xmm0, %xmm1 343 ; SSE2-NEXT: movaps %xmm1, %xmm0 344 ; SSE2-NEXT: retq 345 ; 346 ; SSE41-LABEL: test_v2f32_one: 347 ; SSE41: # %bb.0: 348 ; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 349 ; SSE41-NEXT: mulss %xmm1, %xmm0 350 ; SSE41-NEXT: retq 351 ; 352 ; AVX-LABEL: test_v2f32_one: 353 ; AVX: # %bb.0: 354 ; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 355 ; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0 356 ; AVX-NEXT: retq 357 ; 358 ; AVX512-LABEL: test_v2f32_one: 359 ; AVX512: # %bb.0: 360 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 361 ; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm0 362 ; AVX512-NEXT: retq 363 %1 = call float @llvm.experimental.vector.reduce.fmul.f32.f32.v2f32(float 1.0, <2 x float> %a0) 364 ret float %1 365 } 366 367 define float @test_v4f32_one(<4 x float> %a0) { 368 ; SSE2-LABEL: test_v4f32_one: 369 ; SSE2: # %bb.0: 370 ; SSE2-NEXT: movaps %xmm0, %xmm1 371 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3] 372 ; SSE2-NEXT: mulss %xmm0, %xmm1 373 ; SSE2-NEXT: movaps %xmm0, %xmm2 374 ; SSE2-NEXT: movhlps {{.*#+}} xmm2 = xmm0[1],xmm2[1] 375 ; SSE2-NEXT: mulss %xmm1, %xmm2 376 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] 377 ; SSE2-NEXT: mulss %xmm2, %xmm0 378 ; SSE2-NEXT: retq 379 ; 380 ; SSE41-LABEL: test_v4f32_one: 381 ; SSE41: # %bb.0: 382 ; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 383 ; SSE41-NEXT: mulss %xmm0, %xmm1 384 ; SSE41-NEXT: movaps %xmm0, %xmm2 385 ; SSE41-NEXT: movhlps {{.*#+}} xmm2 = xmm0[1],xmm2[1] 386 ; SSE41-NEXT: mulss %xmm1, %xmm2 387 ; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] 388 ; SSE41-NEXT: mulss %xmm2, %xmm0 389 ; SSE41-NEXT: retq 390 ; 391 ; AVX-LABEL: test_v4f32_one: 392 ; AVX: # %bb.0: 393 ; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 394 ; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm1 395 ; AVX-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] 396 ; AVX-NEXT: vmulss %xmm2, %xmm1, %xmm1 397 ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3] 398 ; AVX-NEXT: vmulss %xmm0, %xmm1, %xmm0 399 ; AVX-NEXT: retq 400 ; 401 ; AVX512-LABEL: test_v4f32_one: 402 ; AVX512: # %bb.0: 403 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 404 ; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm1 405 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] 406 ; AVX512-NEXT: vmulss %xmm2, %xmm1, %xmm1 407 ; AVX512-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3] 408 ; AVX512-NEXT: vmulss %xmm0, %xmm1, %xmm0 409 ; AVX512-NEXT: retq 410 %1 = call float @llvm.experimental.vector.reduce.fmul.f32.f32.v4f32(float 1.0, <4 x float> %a0) 411 ret float %1 412 } 413 414 define float @test_v8f32_one(<8 x float> %a0) { 415 ; SSE2-LABEL: test_v8f32_one: 416 ; SSE2: # %bb.0: 417 ; SSE2-NEXT: movaps %xmm0, %xmm2 418 ; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1],xmm0[2,3] 419 ; SSE2-NEXT: mulss %xmm0, %xmm2 420 ; SSE2-NEXT: movaps %xmm0, %xmm3 421 ; SSE2-NEXT: movhlps {{.*#+}} xmm3 = xmm0[1],xmm3[1] 422 ; SSE2-NEXT: mulss %xmm2, %xmm3 423 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] 424 ; SSE2-NEXT: mulss %xmm3, %xmm0 425 ; SSE2-NEXT: mulss %xmm1, %xmm0 426 ; SSE2-NEXT: movaps %xmm1, %xmm2 427 ; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1],xmm1[2,3] 428 ; SSE2-NEXT: mulss %xmm2, %xmm0 429 ; SSE2-NEXT: movaps %xmm1, %xmm2 430 ; SSE2-NEXT: movhlps {{.*#+}} xmm2 = xmm1[1],xmm2[1] 431 ; SSE2-NEXT: mulss %xmm2, %xmm0 432 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3] 433 ; SSE2-NEXT: mulss %xmm1, %xmm0 434 ; SSE2-NEXT: retq 435 ; 436 ; SSE41-LABEL: test_v8f32_one: 437 ; SSE41: # %bb.0: 438 ; SSE41-NEXT: movshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] 439 ; SSE41-NEXT: mulss %xmm0, %xmm2 440 ; SSE41-NEXT: movaps %xmm0, %xmm3 441 ; SSE41-NEXT: movhlps {{.*#+}} xmm3 = xmm0[1],xmm3[1] 442 ; SSE41-NEXT: mulss %xmm2, %xmm3 443 ; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] 444 ; SSE41-NEXT: mulss %xmm3, %xmm0 445 ; SSE41-NEXT: mulss %xmm1, %xmm0 446 ; SSE41-NEXT: movshdup {{.*#+}} xmm2 = xmm1[1,1,3,3] 447 ; SSE41-NEXT: mulss %xmm2, %xmm0 448 ; SSE41-NEXT: movaps %xmm1, %xmm2 449 ; SSE41-NEXT: movhlps {{.*#+}} xmm2 = xmm1[1],xmm2[1] 450 ; SSE41-NEXT: mulss %xmm2, %xmm0 451 ; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3] 452 ; SSE41-NEXT: mulss %xmm1, %xmm0 453 ; SSE41-NEXT: retq 454 ; 455 ; AVX-LABEL: test_v8f32_one: 456 ; AVX: # %bb.0: 457 ; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 458 ; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm1 459 ; AVX-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] 460 ; AVX-NEXT: vmulss %xmm2, %xmm1, %xmm1 461 ; AVX-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,1,2,3] 462 ; AVX-NEXT: vmulss %xmm2, %xmm1, %xmm1 463 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0 464 ; AVX-NEXT: vmulss %xmm0, %xmm1, %xmm1 465 ; AVX-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] 466 ; AVX-NEXT: vmulss %xmm2, %xmm1, %xmm1 467 ; AVX-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] 468 ; AVX-NEXT: vmulss %xmm2, %xmm1, %xmm1 469 ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3] 470 ; AVX-NEXT: vmulss %xmm0, %xmm1, %xmm0 471 ; AVX-NEXT: vzeroupper 472 ; AVX-NEXT: retq 473 ; 474 ; AVX512-LABEL: test_v8f32_one: 475 ; AVX512: # %bb.0: 476 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 477 ; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm1 478 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] 479 ; AVX512-NEXT: vmulss %xmm2, %xmm1, %xmm1 480 ; AVX512-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,1,2,3] 481 ; AVX512-NEXT: vmulss %xmm2, %xmm1, %xmm1 482 ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm0 483 ; AVX512-NEXT: vmulss %xmm0, %xmm1, %xmm1 484 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] 485 ; AVX512-NEXT: vmulss %xmm2, %xmm1, %xmm1 486 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] 487 ; AVX512-NEXT: vmulss %xmm2, %xmm1, %xmm1 488 ; AVX512-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3] 489 ; AVX512-NEXT: vmulss %xmm0, %xmm1, %xmm0 490 ; AVX512-NEXT: vzeroupper 491 ; AVX512-NEXT: retq 492 %1 = call float @llvm.experimental.vector.reduce.fmul.f32.f32.v8f32(float 1.0, <8 x float> %a0) 493 ret float %1 494 } 495 496 define float @test_v16f32_one(<16 x float> %a0) { 497 ; SSE2-LABEL: test_v16f32_one: 498 ; SSE2: # %bb.0: 499 ; SSE2-NEXT: movaps %xmm0, %xmm4 500 ; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,1],xmm0[2,3] 501 ; SSE2-NEXT: mulss %xmm0, %xmm4 502 ; SSE2-NEXT: movaps %xmm0, %xmm5 503 ; SSE2-NEXT: movhlps {{.*#+}} xmm5 = xmm0[1],xmm5[1] 504 ; SSE2-NEXT: mulss %xmm4, %xmm5 505 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] 506 ; SSE2-NEXT: mulss %xmm5, %xmm0 507 ; SSE2-NEXT: mulss %xmm1, %xmm0 508 ; SSE2-NEXT: movaps %xmm1, %xmm4 509 ; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,1],xmm1[2,3] 510 ; SSE2-NEXT: mulss %xmm4, %xmm0 511 ; SSE2-NEXT: movaps %xmm1, %xmm4 512 ; SSE2-NEXT: movhlps {{.*#+}} xmm4 = xmm1[1],xmm4[1] 513 ; SSE2-NEXT: mulss %xmm4, %xmm0 514 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3] 515 ; SSE2-NEXT: mulss %xmm1, %xmm0 516 ; SSE2-NEXT: mulss %xmm2, %xmm0 517 ; SSE2-NEXT: movaps %xmm2, %xmm1 518 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm2[2,3] 519 ; SSE2-NEXT: mulss %xmm1, %xmm0 520 ; SSE2-NEXT: movaps %xmm2, %xmm1 521 ; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm2[1],xmm1[1] 522 ; SSE2-NEXT: mulss %xmm1, %xmm0 523 ; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,1,2,3] 524 ; SSE2-NEXT: mulss %xmm2, %xmm0 525 ; SSE2-NEXT: mulss %xmm3, %xmm0 526 ; SSE2-NEXT: movaps %xmm3, %xmm1 527 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm3[2,3] 528 ; SSE2-NEXT: mulss %xmm1, %xmm0 529 ; SSE2-NEXT: movaps %xmm3, %xmm1 530 ; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm3[1],xmm1[1] 531 ; SSE2-NEXT: mulss %xmm1, %xmm0 532 ; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1,2,3] 533 ; SSE2-NEXT: mulss %xmm3, %xmm0 534 ; SSE2-NEXT: retq 535 ; 536 ; SSE41-LABEL: test_v16f32_one: 537 ; SSE41: # %bb.0: 538 ; SSE41-NEXT: movshdup {{.*#+}} xmm4 = xmm0[1,1,3,3] 539 ; SSE41-NEXT: mulss %xmm0, %xmm4 540 ; SSE41-NEXT: movaps %xmm0, %xmm5 541 ; SSE41-NEXT: movhlps {{.*#+}} xmm5 = xmm0[1],xmm5[1] 542 ; SSE41-NEXT: mulss %xmm4, %xmm5 543 ; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] 544 ; SSE41-NEXT: mulss %xmm5, %xmm0 545 ; SSE41-NEXT: mulss %xmm1, %xmm0 546 ; SSE41-NEXT: movshdup {{.*#+}} xmm4 = xmm1[1,1,3,3] 547 ; SSE41-NEXT: mulss %xmm4, %xmm0 548 ; SSE41-NEXT: movaps %xmm1, %xmm4 549 ; SSE41-NEXT: movhlps {{.*#+}} xmm4 = xmm1[1],xmm4[1] 550 ; SSE41-NEXT: mulss %xmm4, %xmm0 551 ; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3] 552 ; SSE41-NEXT: mulss %xmm1, %xmm0 553 ; SSE41-NEXT: mulss %xmm2, %xmm0 554 ; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm2[1,1,3,3] 555 ; SSE41-NEXT: mulss %xmm1, %xmm0 556 ; SSE41-NEXT: movaps %xmm2, %xmm1 557 ; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm2[1],xmm1[1] 558 ; SSE41-NEXT: mulss %xmm1, %xmm0 559 ; SSE41-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,1,2,3] 560 ; SSE41-NEXT: mulss %xmm2, %xmm0 561 ; SSE41-NEXT: mulss %xmm3, %xmm0 562 ; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm3[1,1,3,3] 563 ; SSE41-NEXT: mulss %xmm1, %xmm0 564 ; SSE41-NEXT: movaps %xmm3, %xmm1 565 ; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm3[1],xmm1[1] 566 ; SSE41-NEXT: mulss %xmm1, %xmm0 567 ; SSE41-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1,2,3] 568 ; SSE41-NEXT: mulss %xmm3, %xmm0 569 ; SSE41-NEXT: retq 570 ; 571 ; AVX-LABEL: test_v16f32_one: 572 ; AVX: # %bb.0: 573 ; AVX-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] 574 ; AVX-NEXT: vmulss %xmm2, %xmm0, %xmm2 575 ; AVX-NEXT: vpermilpd {{.*#+}} xmm3 = xmm0[1,0] 576 ; AVX-NEXT: vmulss %xmm3, %xmm2, %xmm2 577 ; AVX-NEXT: vpermilps {{.*#+}} xmm3 = xmm0[3,1,2,3] 578 ; AVX-NEXT: vmulss %xmm3, %xmm2, %xmm2 579 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0 580 ; AVX-NEXT: vmulss %xmm0, %xmm2, %xmm2 581 ; AVX-NEXT: vmovshdup {{.*#+}} xmm3 = xmm0[1,1,3,3] 582 ; AVX-NEXT: vmulss %xmm3, %xmm2, %xmm2 583 ; AVX-NEXT: vpermilpd {{.*#+}} xmm3 = xmm0[1,0] 584 ; AVX-NEXT: vmulss %xmm3, %xmm2, %xmm2 585 ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3] 586 ; AVX-NEXT: vmulss %xmm0, %xmm2, %xmm0 587 ; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0 588 ; AVX-NEXT: vmovshdup {{.*#+}} xmm2 = xmm1[1,1,3,3] 589 ; AVX-NEXT: vmulss %xmm2, %xmm0, %xmm0 590 ; AVX-NEXT: vpermilpd {{.*#+}} xmm2 = xmm1[1,0] 591 ; AVX-NEXT: vmulss %xmm2, %xmm0, %xmm0 592 ; AVX-NEXT: vpermilps {{.*#+}} xmm2 = xmm1[3,1,2,3] 593 ; AVX-NEXT: vmulss %xmm2, %xmm0, %xmm0 594 ; AVX-NEXT: vextractf128 $1, %ymm1, %xmm1 595 ; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0 596 ; AVX-NEXT: vmovshdup {{.*#+}} xmm2 = xmm1[1,1,3,3] 597 ; AVX-NEXT: vmulss %xmm2, %xmm0, %xmm0 598 ; AVX-NEXT: vpermilpd {{.*#+}} xmm2 = xmm1[1,0] 599 ; AVX-NEXT: vmulss %xmm2, %xmm0, %xmm0 600 ; AVX-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[3,1,2,3] 601 ; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0 602 ; AVX-NEXT: vzeroupper 603 ; AVX-NEXT: retq 604 ; 605 ; AVX512-LABEL: test_v16f32_one: 606 ; AVX512: # %bb.0: 607 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 608 ; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm1 609 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] 610 ; AVX512-NEXT: vmulss %xmm2, %xmm1, %xmm1 611 ; AVX512-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,1,2,3] 612 ; AVX512-NEXT: vmulss %xmm2, %xmm1, %xmm1 613 ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm2 614 ; AVX512-NEXT: vmulss %xmm2, %xmm1, %xmm1 615 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm3 = xmm2[1,1,3,3] 616 ; AVX512-NEXT: vmulss %xmm3, %xmm1, %xmm1 617 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm3 = xmm2[1,0] 618 ; AVX512-NEXT: vmulss %xmm3, %xmm1, %xmm1 619 ; AVX512-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[3,1,2,3] 620 ; AVX512-NEXT: vmulss %xmm2, %xmm1, %xmm1 621 ; AVX512-NEXT: vextractf32x4 $2, %zmm0, %xmm2 622 ; AVX512-NEXT: vmulss %xmm2, %xmm1, %xmm1 623 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm3 = xmm2[1,1,3,3] 624 ; AVX512-NEXT: vmulss %xmm3, %xmm1, %xmm1 625 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm3 = xmm2[1,0] 626 ; AVX512-NEXT: vmulss %xmm3, %xmm1, %xmm1 627 ; AVX512-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[3,1,2,3] 628 ; AVX512-NEXT: vmulss %xmm2, %xmm1, %xmm1 629 ; AVX512-NEXT: vextractf32x4 $3, %zmm0, %xmm0 630 ; AVX512-NEXT: vmulss %xmm0, %xmm1, %xmm1 631 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] 632 ; AVX512-NEXT: vmulss %xmm2, %xmm1, %xmm1 633 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] 634 ; AVX512-NEXT: vmulss %xmm2, %xmm1, %xmm1 635 ; AVX512-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3] 636 ; AVX512-NEXT: vmulss %xmm0, %xmm1, %xmm0 637 ; AVX512-NEXT: vzeroupper 638 ; AVX512-NEXT: retq 639 %1 = call float @llvm.experimental.vector.reduce.fmul.f32.f32.v16f32(float 1.0, <16 x float> %a0) 640 ret float %1 641 } 642 643 ; 644 ; vXf32 (undef) 645 ; 646 647 define float @test_v2f32_undef(<2 x float> %a0) { 648 ; SSE2-LABEL: test_v2f32_undef: 649 ; SSE2: # %bb.0: 650 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3] 651 ; SSE2-NEXT: mulss {{.*}}(%rip), %xmm0 652 ; SSE2-NEXT: retq 653 ; 654 ; SSE41-LABEL: test_v2f32_undef: 655 ; SSE41: # %bb.0: 656 ; SSE41-NEXT: movshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] 657 ; SSE41-NEXT: mulss {{.*}}(%rip), %xmm0 658 ; SSE41-NEXT: retq 659 ; 660 ; AVX-LABEL: test_v2f32_undef: 661 ; AVX: # %bb.0: 662 ; AVX-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] 663 ; AVX-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 664 ; AVX-NEXT: retq 665 ; 666 ; AVX512-LABEL: test_v2f32_undef: 667 ; AVX512: # %bb.0: 668 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] 669 ; AVX512-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 670 ; AVX512-NEXT: retq 671 %1 = call float @llvm.experimental.vector.reduce.fmul.f32.f32.v2f32(float undef, <2 x float> %a0) 672 ret float %1 673 } 674 675 define float @test_v4f32_undef(<4 x float> %a0) { 676 ; SSE2-LABEL: test_v4f32_undef: 677 ; SSE2: # %bb.0: 678 ; SSE2-NEXT: movaps %xmm0, %xmm1 679 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3] 680 ; SSE2-NEXT: mulss {{.*}}(%rip), %xmm1 681 ; SSE2-NEXT: movaps %xmm0, %xmm2 682 ; SSE2-NEXT: movhlps {{.*#+}} xmm2 = xmm0[1],xmm2[1] 683 ; SSE2-NEXT: mulss %xmm1, %xmm2 684 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] 685 ; SSE2-NEXT: mulss %xmm2, %xmm0 686 ; SSE2-NEXT: retq 687 ; 688 ; SSE41-LABEL: test_v4f32_undef: 689 ; SSE41: # %bb.0: 690 ; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 691 ; SSE41-NEXT: mulss {{.*}}(%rip), %xmm1 692 ; SSE41-NEXT: movaps %xmm0, %xmm2 693 ; SSE41-NEXT: movhlps {{.*#+}} xmm2 = xmm0[1],xmm2[1] 694 ; SSE41-NEXT: mulss %xmm1, %xmm2 695 ; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] 696 ; SSE41-NEXT: mulss %xmm2, %xmm0 697 ; SSE41-NEXT: retq 698 ; 699 ; AVX-LABEL: test_v4f32_undef: 700 ; AVX: # %bb.0: 701 ; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 702 ; AVX-NEXT: vmulss {{.*}}(%rip), %xmm1, %xmm1 703 ; AVX-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] 704 ; AVX-NEXT: vmulss %xmm2, %xmm1, %xmm1 705 ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3] 706 ; AVX-NEXT: vmulss %xmm0, %xmm1, %xmm0 707 ; AVX-NEXT: retq 708 ; 709 ; AVX512-LABEL: test_v4f32_undef: 710 ; AVX512: # %bb.0: 711 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 712 ; AVX512-NEXT: vmulss {{.*}}(%rip), %xmm1, %xmm1 713 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] 714 ; AVX512-NEXT: vmulss %xmm2, %xmm1, %xmm1 715 ; AVX512-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3] 716 ; AVX512-NEXT: vmulss %xmm0, %xmm1, %xmm0 717 ; AVX512-NEXT: retq 718 %1 = call float @llvm.experimental.vector.reduce.fmul.f32.f32.v4f32(float undef, <4 x float> %a0) 719 ret float %1 720 } 721 722 define float @test_v8f32_undef(<8 x float> %a0) { 723 ; SSE2-LABEL: test_v8f32_undef: 724 ; SSE2: # %bb.0: 725 ; SSE2-NEXT: movaps %xmm0, %xmm2 726 ; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1],xmm0[2,3] 727 ; SSE2-NEXT: mulss {{.*}}(%rip), %xmm2 728 ; SSE2-NEXT: movaps %xmm0, %xmm3 729 ; SSE2-NEXT: movhlps {{.*#+}} xmm3 = xmm0[1],xmm3[1] 730 ; SSE2-NEXT: mulss %xmm2, %xmm3 731 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] 732 ; SSE2-NEXT: mulss %xmm3, %xmm0 733 ; SSE2-NEXT: mulss %xmm1, %xmm0 734 ; SSE2-NEXT: movaps %xmm1, %xmm2 735 ; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1],xmm1[2,3] 736 ; SSE2-NEXT: mulss %xmm2, %xmm0 737 ; SSE2-NEXT: movaps %xmm1, %xmm2 738 ; SSE2-NEXT: movhlps {{.*#+}} xmm2 = xmm1[1],xmm2[1] 739 ; SSE2-NEXT: mulss %xmm2, %xmm0 740 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3] 741 ; SSE2-NEXT: mulss %xmm1, %xmm0 742 ; SSE2-NEXT: retq 743 ; 744 ; SSE41-LABEL: test_v8f32_undef: 745 ; SSE41: # %bb.0: 746 ; SSE41-NEXT: movshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] 747 ; SSE41-NEXT: mulss {{.*}}(%rip), %xmm2 748 ; SSE41-NEXT: movaps %xmm0, %xmm3 749 ; SSE41-NEXT: movhlps {{.*#+}} xmm3 = xmm0[1],xmm3[1] 750 ; SSE41-NEXT: mulss %xmm2, %xmm3 751 ; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] 752 ; SSE41-NEXT: mulss %xmm3, %xmm0 753 ; SSE41-NEXT: mulss %xmm1, %xmm0 754 ; SSE41-NEXT: movshdup {{.*#+}} xmm2 = xmm1[1,1,3,3] 755 ; SSE41-NEXT: mulss %xmm2, %xmm0 756 ; SSE41-NEXT: movaps %xmm1, %xmm2 757 ; SSE41-NEXT: movhlps {{.*#+}} xmm2 = xmm1[1],xmm2[1] 758 ; SSE41-NEXT: mulss %xmm2, %xmm0 759 ; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3] 760 ; SSE41-NEXT: mulss %xmm1, %xmm0 761 ; SSE41-NEXT: retq 762 ; 763 ; AVX-LABEL: test_v8f32_undef: 764 ; AVX: # %bb.0: 765 ; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 766 ; AVX-NEXT: vmulss {{.*}}(%rip), %xmm1, %xmm1 767 ; AVX-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] 768 ; AVX-NEXT: vmulss %xmm2, %xmm1, %xmm1 769 ; AVX-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,1,2,3] 770 ; AVX-NEXT: vmulss %xmm2, %xmm1, %xmm1 771 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0 772 ; AVX-NEXT: vmulss %xmm0, %xmm1, %xmm1 773 ; AVX-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] 774 ; AVX-NEXT: vmulss %xmm2, %xmm1, %xmm1 775 ; AVX-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] 776 ; AVX-NEXT: vmulss %xmm2, %xmm1, %xmm1 777 ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3] 778 ; AVX-NEXT: vmulss %xmm0, %xmm1, %xmm0 779 ; AVX-NEXT: vzeroupper 780 ; AVX-NEXT: retq 781 ; 782 ; AVX512-LABEL: test_v8f32_undef: 783 ; AVX512: # %bb.0: 784 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 785 ; AVX512-NEXT: vmulss {{.*}}(%rip), %xmm1, %xmm1 786 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] 787 ; AVX512-NEXT: vmulss %xmm2, %xmm1, %xmm1 788 ; AVX512-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,1,2,3] 789 ; AVX512-NEXT: vmulss %xmm2, %xmm1, %xmm1 790 ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm0 791 ; AVX512-NEXT: vmulss %xmm0, %xmm1, %xmm1 792 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] 793 ; AVX512-NEXT: vmulss %xmm2, %xmm1, %xmm1 794 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] 795 ; AVX512-NEXT: vmulss %xmm2, %xmm1, %xmm1 796 ; AVX512-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3] 797 ; AVX512-NEXT: vmulss %xmm0, %xmm1, %xmm0 798 ; AVX512-NEXT: vzeroupper 799 ; AVX512-NEXT: retq 800 %1 = call float @llvm.experimental.vector.reduce.fmul.f32.f32.v8f32(float undef, <8 x float> %a0) 801 ret float %1 802 } 803 804 define float @test_v16f32_undef(<16 x float> %a0) { 805 ; SSE2-LABEL: test_v16f32_undef: 806 ; SSE2: # %bb.0: 807 ; SSE2-NEXT: movaps %xmm0, %xmm4 808 ; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,1],xmm0[2,3] 809 ; SSE2-NEXT: mulss {{.*}}(%rip), %xmm4 810 ; SSE2-NEXT: movaps %xmm0, %xmm5 811 ; SSE2-NEXT: movhlps {{.*#+}} xmm5 = xmm0[1],xmm5[1] 812 ; SSE2-NEXT: mulss %xmm4, %xmm5 813 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] 814 ; SSE2-NEXT: mulss %xmm5, %xmm0 815 ; SSE2-NEXT: mulss %xmm1, %xmm0 816 ; SSE2-NEXT: movaps %xmm1, %xmm4 817 ; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,1],xmm1[2,3] 818 ; SSE2-NEXT: mulss %xmm4, %xmm0 819 ; SSE2-NEXT: movaps %xmm1, %xmm4 820 ; SSE2-NEXT: movhlps {{.*#+}} xmm4 = xmm1[1],xmm4[1] 821 ; SSE2-NEXT: mulss %xmm4, %xmm0 822 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3] 823 ; SSE2-NEXT: mulss %xmm1, %xmm0 824 ; SSE2-NEXT: mulss %xmm2, %xmm0 825 ; SSE2-NEXT: movaps %xmm2, %xmm1 826 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm2[2,3] 827 ; SSE2-NEXT: mulss %xmm1, %xmm0 828 ; SSE2-NEXT: movaps %xmm2, %xmm1 829 ; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm2[1],xmm1[1] 830 ; SSE2-NEXT: mulss %xmm1, %xmm0 831 ; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,1,2,3] 832 ; SSE2-NEXT: mulss %xmm2, %xmm0 833 ; SSE2-NEXT: mulss %xmm3, %xmm0 834 ; SSE2-NEXT: movaps %xmm3, %xmm1 835 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm3[2,3] 836 ; SSE2-NEXT: mulss %xmm1, %xmm0 837 ; SSE2-NEXT: movaps %xmm3, %xmm1 838 ; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm3[1],xmm1[1] 839 ; SSE2-NEXT: mulss %xmm1, %xmm0 840 ; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1,2,3] 841 ; SSE2-NEXT: mulss %xmm3, %xmm0 842 ; SSE2-NEXT: retq 843 ; 844 ; SSE41-LABEL: test_v16f32_undef: 845 ; SSE41: # %bb.0: 846 ; SSE41-NEXT: movshdup {{.*#+}} xmm4 = xmm0[1,1,3,3] 847 ; SSE41-NEXT: mulss {{.*}}(%rip), %xmm4 848 ; SSE41-NEXT: movaps %xmm0, %xmm5 849 ; SSE41-NEXT: movhlps {{.*#+}} xmm5 = xmm0[1],xmm5[1] 850 ; SSE41-NEXT: mulss %xmm4, %xmm5 851 ; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] 852 ; SSE41-NEXT: mulss %xmm5, %xmm0 853 ; SSE41-NEXT: mulss %xmm1, %xmm0 854 ; SSE41-NEXT: movshdup {{.*#+}} xmm4 = xmm1[1,1,3,3] 855 ; SSE41-NEXT: mulss %xmm4, %xmm0 856 ; SSE41-NEXT: movaps %xmm1, %xmm4 857 ; SSE41-NEXT: movhlps {{.*#+}} xmm4 = xmm1[1],xmm4[1] 858 ; SSE41-NEXT: mulss %xmm4, %xmm0 859 ; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3] 860 ; SSE41-NEXT: mulss %xmm1, %xmm0 861 ; SSE41-NEXT: mulss %xmm2, %xmm0 862 ; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm2[1,1,3,3] 863 ; SSE41-NEXT: mulss %xmm1, %xmm0 864 ; SSE41-NEXT: movaps %xmm2, %xmm1 865 ; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm2[1],xmm1[1] 866 ; SSE41-NEXT: mulss %xmm1, %xmm0 867 ; SSE41-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,1,2,3] 868 ; SSE41-NEXT: mulss %xmm2, %xmm0 869 ; SSE41-NEXT: mulss %xmm3, %xmm0 870 ; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm3[1,1,3,3] 871 ; SSE41-NEXT: mulss %xmm1, %xmm0 872 ; SSE41-NEXT: movaps %xmm3, %xmm1 873 ; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm3[1],xmm1[1] 874 ; SSE41-NEXT: mulss %xmm1, %xmm0 875 ; SSE41-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1,2,3] 876 ; SSE41-NEXT: mulss %xmm3, %xmm0 877 ; SSE41-NEXT: retq 878 ; 879 ; AVX-LABEL: test_v16f32_undef: 880 ; AVX: # %bb.0: 881 ; AVX-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] 882 ; AVX-NEXT: vmulss {{.*}}(%rip), %xmm2, %xmm2 883 ; AVX-NEXT: vpermilpd {{.*#+}} xmm3 = xmm0[1,0] 884 ; AVX-NEXT: vmulss %xmm3, %xmm2, %xmm2 885 ; AVX-NEXT: vpermilps {{.*#+}} xmm3 = xmm0[3,1,2,3] 886 ; AVX-NEXT: vmulss %xmm3, %xmm2, %xmm2 887 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0 888 ; AVX-NEXT: vmulss %xmm0, %xmm2, %xmm2 889 ; AVX-NEXT: vmovshdup {{.*#+}} xmm3 = xmm0[1,1,3,3] 890 ; AVX-NEXT: vmulss %xmm3, %xmm2, %xmm2 891 ; AVX-NEXT: vpermilpd {{.*#+}} xmm3 = xmm0[1,0] 892 ; AVX-NEXT: vmulss %xmm3, %xmm2, %xmm2 893 ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3] 894 ; AVX-NEXT: vmulss %xmm0, %xmm2, %xmm0 895 ; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0 896 ; AVX-NEXT: vmovshdup {{.*#+}} xmm2 = xmm1[1,1,3,3] 897 ; AVX-NEXT: vmulss %xmm2, %xmm0, %xmm0 898 ; AVX-NEXT: vpermilpd {{.*#+}} xmm2 = xmm1[1,0] 899 ; AVX-NEXT: vmulss %xmm2, %xmm0, %xmm0 900 ; AVX-NEXT: vpermilps {{.*#+}} xmm2 = xmm1[3,1,2,3] 901 ; AVX-NEXT: vmulss %xmm2, %xmm0, %xmm0 902 ; AVX-NEXT: vextractf128 $1, %ymm1, %xmm1 903 ; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0 904 ; AVX-NEXT: vmovshdup {{.*#+}} xmm2 = xmm1[1,1,3,3] 905 ; AVX-NEXT: vmulss %xmm2, %xmm0, %xmm0 906 ; AVX-NEXT: vpermilpd {{.*#+}} xmm2 = xmm1[1,0] 907 ; AVX-NEXT: vmulss %xmm2, %xmm0, %xmm0 908 ; AVX-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[3,1,2,3] 909 ; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0 910 ; AVX-NEXT: vzeroupper 911 ; AVX-NEXT: retq 912 ; 913 ; AVX512-LABEL: test_v16f32_undef: 914 ; AVX512: # %bb.0: 915 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 916 ; AVX512-NEXT: vmulss {{.*}}(%rip), %xmm1, %xmm1 917 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] 918 ; AVX512-NEXT: vmulss %xmm2, %xmm1, %xmm1 919 ; AVX512-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,1,2,3] 920 ; AVX512-NEXT: vmulss %xmm2, %xmm1, %xmm1 921 ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm2 922 ; AVX512-NEXT: vmulss %xmm2, %xmm1, %xmm1 923 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm3 = xmm2[1,1,3,3] 924 ; AVX512-NEXT: vmulss %xmm3, %xmm1, %xmm1 925 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm3 = xmm2[1,0] 926 ; AVX512-NEXT: vmulss %xmm3, %xmm1, %xmm1 927 ; AVX512-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[3,1,2,3] 928 ; AVX512-NEXT: vmulss %xmm2, %xmm1, %xmm1 929 ; AVX512-NEXT: vextractf32x4 $2, %zmm0, %xmm2 930 ; AVX512-NEXT: vmulss %xmm2, %xmm1, %xmm1 931 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm3 = xmm2[1,1,3,3] 932 ; AVX512-NEXT: vmulss %xmm3, %xmm1, %xmm1 933 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm3 = xmm2[1,0] 934 ; AVX512-NEXT: vmulss %xmm3, %xmm1, %xmm1 935 ; AVX512-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[3,1,2,3] 936 ; AVX512-NEXT: vmulss %xmm2, %xmm1, %xmm1 937 ; AVX512-NEXT: vextractf32x4 $3, %zmm0, %xmm0 938 ; AVX512-NEXT: vmulss %xmm0, %xmm1, %xmm1 939 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] 940 ; AVX512-NEXT: vmulss %xmm2, %xmm1, %xmm1 941 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] 942 ; AVX512-NEXT: vmulss %xmm2, %xmm1, %xmm1 943 ; AVX512-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3] 944 ; AVX512-NEXT: vmulss %xmm0, %xmm1, %xmm0 945 ; AVX512-NEXT: vzeroupper 946 ; AVX512-NEXT: retq 947 %1 = call float @llvm.experimental.vector.reduce.fmul.f32.f32.v16f32(float undef, <16 x float> %a0) 948 ret float %1 949 } 950 951 ; 952 ; vXf64 (accum) 953 ; 954 955 define double @test_v2f64(double %a0, <2 x double> %a1) { 956 ; SSE-LABEL: test_v2f64: 957 ; SSE: # %bb.0: 958 ; SSE-NEXT: mulsd %xmm1, %xmm0 959 ; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] 960 ; SSE-NEXT: mulsd %xmm1, %xmm0 961 ; SSE-NEXT: retq 962 ; 963 ; AVX-LABEL: test_v2f64: 964 ; AVX: # %bb.0: 965 ; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 966 ; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0] 967 ; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 968 ; AVX-NEXT: retq 969 ; 970 ; AVX512-LABEL: test_v2f64: 971 ; AVX512: # %bb.0: 972 ; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm0 973 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0] 974 ; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm0 975 ; AVX512-NEXT: retq 976 %1 = call double @llvm.experimental.vector.reduce.fmul.f64.f64.v2f64(double %a0, <2 x double> %a1) 977 ret double %1 978 } 979 980 define double @test_v4f64(double %a0, <4 x double> %a1) { 981 ; SSE-LABEL: test_v4f64: 982 ; SSE: # %bb.0: 983 ; SSE-NEXT: mulsd %xmm1, %xmm0 984 ; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] 985 ; SSE-NEXT: mulsd %xmm1, %xmm0 986 ; SSE-NEXT: mulsd %xmm2, %xmm0 987 ; SSE-NEXT: movhlps {{.*#+}} xmm2 = xmm2[1,1] 988 ; SSE-NEXT: mulsd %xmm2, %xmm0 989 ; SSE-NEXT: retq 990 ; 991 ; AVX-LABEL: test_v4f64: 992 ; AVX: # %bb.0: 993 ; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 994 ; AVX-NEXT: vpermilpd {{.*#+}} xmm2 = xmm1[1,0] 995 ; AVX-NEXT: vmulsd %xmm2, %xmm0, %xmm0 996 ; AVX-NEXT: vextractf128 $1, %ymm1, %xmm1 997 ; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 998 ; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0] 999 ; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 1000 ; AVX-NEXT: vzeroupper 1001 ; AVX-NEXT: retq 1002 ; 1003 ; AVX512-LABEL: test_v4f64: 1004 ; AVX512: # %bb.0: 1005 ; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm0 1006 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm1[1,0] 1007 ; AVX512-NEXT: vmulsd %xmm2, %xmm0, %xmm0 1008 ; AVX512-NEXT: vextractf128 $1, %ymm1, %xmm1 1009 ; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm0 1010 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0] 1011 ; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm0 1012 ; AVX512-NEXT: vzeroupper 1013 ; AVX512-NEXT: retq 1014 %1 = call double @llvm.experimental.vector.reduce.fmul.f64.f64.v4f64(double %a0, <4 x double> %a1) 1015 ret double %1 1016 } 1017 1018 define double @test_v8f64(double %a0, <8 x double> %a1) { 1019 ; SSE-LABEL: test_v8f64: 1020 ; SSE: # %bb.0: 1021 ; SSE-NEXT: mulsd %xmm1, %xmm0 1022 ; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] 1023 ; SSE-NEXT: mulsd %xmm1, %xmm0 1024 ; SSE-NEXT: mulsd %xmm2, %xmm0 1025 ; SSE-NEXT: movhlps {{.*#+}} xmm2 = xmm2[1,1] 1026 ; SSE-NEXT: mulsd %xmm2, %xmm0 1027 ; SSE-NEXT: mulsd %xmm3, %xmm0 1028 ; SSE-NEXT: movhlps {{.*#+}} xmm3 = xmm3[1,1] 1029 ; SSE-NEXT: mulsd %xmm3, %xmm0 1030 ; SSE-NEXT: mulsd %xmm4, %xmm0 1031 ; SSE-NEXT: movhlps {{.*#+}} xmm4 = xmm4[1,1] 1032 ; SSE-NEXT: mulsd %xmm4, %xmm0 1033 ; SSE-NEXT: retq 1034 ; 1035 ; AVX-LABEL: test_v8f64: 1036 ; AVX: # %bb.0: 1037 ; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 1038 ; AVX-NEXT: vpermilpd {{.*#+}} xmm3 = xmm1[1,0] 1039 ; AVX-NEXT: vmulsd %xmm3, %xmm0, %xmm0 1040 ; AVX-NEXT: vextractf128 $1, %ymm1, %xmm1 1041 ; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 1042 ; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0] 1043 ; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 1044 ; AVX-NEXT: vmulsd %xmm2, %xmm0, %xmm0 1045 ; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm2[1,0] 1046 ; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 1047 ; AVX-NEXT: vextractf128 $1, %ymm2, %xmm1 1048 ; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 1049 ; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0] 1050 ; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 1051 ; AVX-NEXT: vzeroupper 1052 ; AVX-NEXT: retq 1053 ; 1054 ; AVX512-LABEL: test_v8f64: 1055 ; AVX512: # %bb.0: 1056 ; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm0 1057 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm1[1,0] 1058 ; AVX512-NEXT: vmulsd %xmm2, %xmm0, %xmm0 1059 ; AVX512-NEXT: vextractf128 $1, %ymm1, %xmm2 1060 ; AVX512-NEXT: vmulsd %xmm2, %xmm0, %xmm0 1061 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0] 1062 ; AVX512-NEXT: vmulsd %xmm2, %xmm0, %xmm0 1063 ; AVX512-NEXT: vextractf32x4 $2, %zmm1, %xmm2 1064 ; AVX512-NEXT: vmulsd %xmm2, %xmm0, %xmm0 1065 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0] 1066 ; AVX512-NEXT: vmulsd %xmm2, %xmm0, %xmm0 1067 ; AVX512-NEXT: vextractf32x4 $3, %zmm1, %xmm1 1068 ; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm0 1069 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0] 1070 ; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm0 1071 ; AVX512-NEXT: vzeroupper 1072 ; AVX512-NEXT: retq 1073 %1 = call double @llvm.experimental.vector.reduce.fmul.f64.f64.v8f64(double %a0, <8 x double> %a1) 1074 ret double %1 1075 } 1076 1077 define double @test_v16f64(double %a0, <16 x double> %a1) { 1078 ; SSE-LABEL: test_v16f64: 1079 ; SSE: # %bb.0: 1080 ; SSE-NEXT: movapd {{[0-9]+}}(%rsp), %xmm8 1081 ; SSE-NEXT: mulsd %xmm1, %xmm0 1082 ; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] 1083 ; SSE-NEXT: mulsd %xmm1, %xmm0 1084 ; SSE-NEXT: mulsd %xmm2, %xmm0 1085 ; SSE-NEXT: movhlps {{.*#+}} xmm2 = xmm2[1,1] 1086 ; SSE-NEXT: mulsd %xmm2, %xmm0 1087 ; SSE-NEXT: mulsd %xmm3, %xmm0 1088 ; SSE-NEXT: movhlps {{.*#+}} xmm3 = xmm3[1,1] 1089 ; SSE-NEXT: mulsd %xmm3, %xmm0 1090 ; SSE-NEXT: mulsd %xmm4, %xmm0 1091 ; SSE-NEXT: movhlps {{.*#+}} xmm4 = xmm4[1,1] 1092 ; SSE-NEXT: mulsd %xmm4, %xmm0 1093 ; SSE-NEXT: mulsd %xmm5, %xmm0 1094 ; SSE-NEXT: movhlps {{.*#+}} xmm5 = xmm5[1,1] 1095 ; SSE-NEXT: mulsd %xmm5, %xmm0 1096 ; SSE-NEXT: mulsd %xmm6, %xmm0 1097 ; SSE-NEXT: movhlps {{.*#+}} xmm6 = xmm6[1,1] 1098 ; SSE-NEXT: mulsd %xmm6, %xmm0 1099 ; SSE-NEXT: mulsd %xmm7, %xmm0 1100 ; SSE-NEXT: movhlps {{.*#+}} xmm7 = xmm7[1,1] 1101 ; SSE-NEXT: mulsd %xmm7, %xmm0 1102 ; SSE-NEXT: mulsd %xmm8, %xmm0 1103 ; SSE-NEXT: movhlps {{.*#+}} xmm8 = xmm8[1,1] 1104 ; SSE-NEXT: mulsd %xmm8, %xmm0 1105 ; SSE-NEXT: retq 1106 ; 1107 ; AVX-LABEL: test_v16f64: 1108 ; AVX: # %bb.0: 1109 ; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 1110 ; AVX-NEXT: vpermilpd {{.*#+}} xmm5 = xmm1[1,0] 1111 ; AVX-NEXT: vmulsd %xmm5, %xmm0, %xmm0 1112 ; AVX-NEXT: vextractf128 $1, %ymm1, %xmm1 1113 ; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 1114 ; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0] 1115 ; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 1116 ; AVX-NEXT: vmulsd %xmm2, %xmm0, %xmm0 1117 ; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm2[1,0] 1118 ; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 1119 ; AVX-NEXT: vextractf128 $1, %ymm2, %xmm1 1120 ; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 1121 ; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0] 1122 ; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 1123 ; AVX-NEXT: vmulsd %xmm3, %xmm0, %xmm0 1124 ; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm3[1,0] 1125 ; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 1126 ; AVX-NEXT: vextractf128 $1, %ymm3, %xmm1 1127 ; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 1128 ; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0] 1129 ; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 1130 ; AVX-NEXT: vmulsd %xmm4, %xmm0, %xmm0 1131 ; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm4[1,0] 1132 ; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 1133 ; AVX-NEXT: vextractf128 $1, %ymm4, %xmm1 1134 ; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 1135 ; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0] 1136 ; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 1137 ; AVX-NEXT: vzeroupper 1138 ; AVX-NEXT: retq 1139 ; 1140 ; AVX512-LABEL: test_v16f64: 1141 ; AVX512: # %bb.0: 1142 ; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm0 1143 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm3 = xmm1[1,0] 1144 ; AVX512-NEXT: vmulsd %xmm3, %xmm0, %xmm0 1145 ; AVX512-NEXT: vextractf128 $1, %ymm1, %xmm3 1146 ; AVX512-NEXT: vmulsd %xmm3, %xmm0, %xmm0 1147 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm3 = xmm3[1,0] 1148 ; AVX512-NEXT: vmulsd %xmm3, %xmm0, %xmm0 1149 ; AVX512-NEXT: vextractf32x4 $2, %zmm1, %xmm3 1150 ; AVX512-NEXT: vmulsd %xmm3, %xmm0, %xmm0 1151 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm3 = xmm3[1,0] 1152 ; AVX512-NEXT: vmulsd %xmm3, %xmm0, %xmm0 1153 ; AVX512-NEXT: vextractf32x4 $3, %zmm1, %xmm1 1154 ; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm0 1155 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0] 1156 ; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm0 1157 ; AVX512-NEXT: vmulsd %xmm2, %xmm0, %xmm0 1158 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm2[1,0] 1159 ; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm0 1160 ; AVX512-NEXT: vextractf128 $1, %ymm2, %xmm1 1161 ; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm0 1162 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0] 1163 ; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm0 1164 ; AVX512-NEXT: vextractf32x4 $2, %zmm2, %xmm1 1165 ; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm0 1166 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0] 1167 ; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm0 1168 ; AVX512-NEXT: vextractf32x4 $3, %zmm2, %xmm1 1169 ; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm0 1170 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0] 1171 ; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm0 1172 ; AVX512-NEXT: vzeroupper 1173 ; AVX512-NEXT: retq 1174 %1 = call double @llvm.experimental.vector.reduce.fmul.f64.f64.v16f64(double %a0, <16 x double> %a1) 1175 ret double %1 1176 } 1177 1178 ; 1179 ; vXf64 (one) 1180 ; 1181 1182 define double @test_v2f64_one(<2 x double> %a0) { 1183 ; SSE-LABEL: test_v2f64_one: 1184 ; SSE: # %bb.0: 1185 ; SSE-NEXT: movaps %xmm0, %xmm1 1186 ; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] 1187 ; SSE-NEXT: mulsd %xmm0, %xmm1 1188 ; SSE-NEXT: movapd %xmm1, %xmm0 1189 ; SSE-NEXT: retq 1190 ; 1191 ; AVX-LABEL: test_v2f64_one: 1192 ; AVX: # %bb.0: 1193 ; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] 1194 ; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 1195 ; AVX-NEXT: retq 1196 ; 1197 ; AVX512-LABEL: test_v2f64_one: 1198 ; AVX512: # %bb.0: 1199 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] 1200 ; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm0 1201 ; AVX512-NEXT: retq 1202 %1 = call double @llvm.experimental.vector.reduce.fmul.f64.f64.v2f64(double 1.0, <2 x double> %a0) 1203 ret double %1 1204 } 1205 1206 define double @test_v4f64_one(<4 x double> %a0) { 1207 ; SSE-LABEL: test_v4f64_one: 1208 ; SSE: # %bb.0: 1209 ; SSE-NEXT: movaps %xmm0, %xmm2 1210 ; SSE-NEXT: movhlps {{.*#+}} xmm2 = xmm0[1],xmm2[1] 1211 ; SSE-NEXT: mulsd %xmm0, %xmm2 1212 ; SSE-NEXT: mulsd %xmm1, %xmm2 1213 ; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] 1214 ; SSE-NEXT: mulsd %xmm1, %xmm2 1215 ; SSE-NEXT: movapd %xmm2, %xmm0 1216 ; SSE-NEXT: retq 1217 ; 1218 ; AVX-LABEL: test_v4f64_one: 1219 ; AVX: # %bb.0: 1220 ; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] 1221 ; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm1 1222 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0 1223 ; AVX-NEXT: vmulsd %xmm0, %xmm1, %xmm1 1224 ; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 1225 ; AVX-NEXT: vmulsd %xmm0, %xmm1, %xmm0 1226 ; AVX-NEXT: vzeroupper 1227 ; AVX-NEXT: retq 1228 ; 1229 ; AVX512-LABEL: test_v4f64_one: 1230 ; AVX512: # %bb.0: 1231 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] 1232 ; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm1 1233 ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm0 1234 ; AVX512-NEXT: vmulsd %xmm0, %xmm1, %xmm1 1235 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 1236 ; AVX512-NEXT: vmulsd %xmm0, %xmm1, %xmm0 1237 ; AVX512-NEXT: vzeroupper 1238 ; AVX512-NEXT: retq 1239 %1 = call double @llvm.experimental.vector.reduce.fmul.f64.f64.v4f64(double 1.0, <4 x double> %a0) 1240 ret double %1 1241 } 1242 1243 define double @test_v8f64_one(<8 x double> %a0) { 1244 ; SSE-LABEL: test_v8f64_one: 1245 ; SSE: # %bb.0: 1246 ; SSE-NEXT: movaps %xmm0, %xmm4 1247 ; SSE-NEXT: movhlps {{.*#+}} xmm4 = xmm0[1],xmm4[1] 1248 ; SSE-NEXT: mulsd %xmm0, %xmm4 1249 ; SSE-NEXT: mulsd %xmm1, %xmm4 1250 ; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] 1251 ; SSE-NEXT: mulsd %xmm1, %xmm4 1252 ; SSE-NEXT: mulsd %xmm2, %xmm4 1253 ; SSE-NEXT: movhlps {{.*#+}} xmm2 = xmm2[1,1] 1254 ; SSE-NEXT: mulsd %xmm2, %xmm4 1255 ; SSE-NEXT: mulsd %xmm3, %xmm4 1256 ; SSE-NEXT: movhlps {{.*#+}} xmm3 = xmm3[1,1] 1257 ; SSE-NEXT: mulsd %xmm3, %xmm4 1258 ; SSE-NEXT: movapd %xmm4, %xmm0 1259 ; SSE-NEXT: retq 1260 ; 1261 ; AVX-LABEL: test_v8f64_one: 1262 ; AVX: # %bb.0: 1263 ; AVX-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] 1264 ; AVX-NEXT: vmulsd %xmm2, %xmm0, %xmm2 1265 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0 1266 ; AVX-NEXT: vmulsd %xmm0, %xmm2, %xmm2 1267 ; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 1268 ; AVX-NEXT: vmulsd %xmm0, %xmm2, %xmm0 1269 ; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 1270 ; AVX-NEXT: vpermilpd {{.*#+}} xmm2 = xmm1[1,0] 1271 ; AVX-NEXT: vmulsd %xmm2, %xmm0, %xmm0 1272 ; AVX-NEXT: vextractf128 $1, %ymm1, %xmm1 1273 ; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 1274 ; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0] 1275 ; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 1276 ; AVX-NEXT: vzeroupper 1277 ; AVX-NEXT: retq 1278 ; 1279 ; AVX512-LABEL: test_v8f64_one: 1280 ; AVX512: # %bb.0: 1281 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] 1282 ; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm1 1283 ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm2 1284 ; AVX512-NEXT: vmulsd %xmm2, %xmm1, %xmm1 1285 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0] 1286 ; AVX512-NEXT: vmulsd %xmm2, %xmm1, %xmm1 1287 ; AVX512-NEXT: vextractf32x4 $2, %zmm0, %xmm2 1288 ; AVX512-NEXT: vmulsd %xmm2, %xmm1, %xmm1 1289 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0] 1290 ; AVX512-NEXT: vmulsd %xmm2, %xmm1, %xmm1 1291 ; AVX512-NEXT: vextractf32x4 $3, %zmm0, %xmm0 1292 ; AVX512-NEXT: vmulsd %xmm0, %xmm1, %xmm1 1293 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 1294 ; AVX512-NEXT: vmulsd %xmm0, %xmm1, %xmm0 1295 ; AVX512-NEXT: vzeroupper 1296 ; AVX512-NEXT: retq 1297 %1 = call double @llvm.experimental.vector.reduce.fmul.f64.f64.v8f64(double 1.0, <8 x double> %a0) 1298 ret double %1 1299 } 1300 1301 define double @test_v16f64_one(<16 x double> %a0) { 1302 ; SSE-LABEL: test_v16f64_one: 1303 ; SSE: # %bb.0: 1304 ; SSE-NEXT: movaps %xmm0, %xmm8 1305 ; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] 1306 ; SSE-NEXT: mulsd %xmm8, %xmm0 1307 ; SSE-NEXT: mulsd %xmm1, %xmm0 1308 ; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] 1309 ; SSE-NEXT: mulsd %xmm1, %xmm0 1310 ; SSE-NEXT: mulsd %xmm2, %xmm0 1311 ; SSE-NEXT: movhlps {{.*#+}} xmm2 = xmm2[1,1] 1312 ; SSE-NEXT: mulsd %xmm2, %xmm0 1313 ; SSE-NEXT: mulsd %xmm3, %xmm0 1314 ; SSE-NEXT: movhlps {{.*#+}} xmm3 = xmm3[1,1] 1315 ; SSE-NEXT: mulsd %xmm3, %xmm0 1316 ; SSE-NEXT: mulsd %xmm4, %xmm0 1317 ; SSE-NEXT: movhlps {{.*#+}} xmm4 = xmm4[1,1] 1318 ; SSE-NEXT: mulsd %xmm4, %xmm0 1319 ; SSE-NEXT: mulsd %xmm5, %xmm0 1320 ; SSE-NEXT: movhlps {{.*#+}} xmm5 = xmm5[1,1] 1321 ; SSE-NEXT: mulsd %xmm5, %xmm0 1322 ; SSE-NEXT: mulsd %xmm6, %xmm0 1323 ; SSE-NEXT: movhlps {{.*#+}} xmm6 = xmm6[1,1] 1324 ; SSE-NEXT: mulsd %xmm6, %xmm0 1325 ; SSE-NEXT: mulsd %xmm7, %xmm0 1326 ; SSE-NEXT: movhlps {{.*#+}} xmm7 = xmm7[1,1] 1327 ; SSE-NEXT: mulsd %xmm7, %xmm0 1328 ; SSE-NEXT: retq 1329 ; 1330 ; AVX-LABEL: test_v16f64_one: 1331 ; AVX: # %bb.0: 1332 ; AVX-NEXT: vpermilpd {{.*#+}} xmm4 = xmm0[1,0] 1333 ; AVX-NEXT: vmulsd %xmm4, %xmm0, %xmm4 1334 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0 1335 ; AVX-NEXT: vmulsd %xmm0, %xmm4, %xmm4 1336 ; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 1337 ; AVX-NEXT: vmulsd %xmm0, %xmm4, %xmm0 1338 ; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 1339 ; AVX-NEXT: vpermilpd {{.*#+}} xmm4 = xmm1[1,0] 1340 ; AVX-NEXT: vmulsd %xmm4, %xmm0, %xmm0 1341 ; AVX-NEXT: vextractf128 $1, %ymm1, %xmm1 1342 ; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 1343 ; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0] 1344 ; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 1345 ; AVX-NEXT: vmulsd %xmm2, %xmm0, %xmm0 1346 ; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm2[1,0] 1347 ; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 1348 ; AVX-NEXT: vextractf128 $1, %ymm2, %xmm1 1349 ; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 1350 ; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0] 1351 ; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 1352 ; AVX-NEXT: vmulsd %xmm3, %xmm0, %xmm0 1353 ; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm3[1,0] 1354 ; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 1355 ; AVX-NEXT: vextractf128 $1, %ymm3, %xmm1 1356 ; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 1357 ; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0] 1358 ; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 1359 ; AVX-NEXT: vzeroupper 1360 ; AVX-NEXT: retq 1361 ; 1362 ; AVX512-LABEL: test_v16f64_one: 1363 ; AVX512: # %bb.0: 1364 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] 1365 ; AVX512-NEXT: vmulsd %xmm2, %xmm0, %xmm2 1366 ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm3 1367 ; AVX512-NEXT: vmulsd %xmm3, %xmm2, %xmm2 1368 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm3 = xmm3[1,0] 1369 ; AVX512-NEXT: vmulsd %xmm3, %xmm2, %xmm2 1370 ; AVX512-NEXT: vextractf32x4 $2, %zmm0, %xmm3 1371 ; AVX512-NEXT: vmulsd %xmm3, %xmm2, %xmm2 1372 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm3 = xmm3[1,0] 1373 ; AVX512-NEXT: vmulsd %xmm3, %xmm2, %xmm2 1374 ; AVX512-NEXT: vextractf32x4 $3, %zmm0, %xmm0 1375 ; AVX512-NEXT: vmulsd %xmm0, %xmm2, %xmm2 1376 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 1377 ; AVX512-NEXT: vmulsd %xmm0, %xmm2, %xmm0 1378 ; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm0 1379 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm1[1,0] 1380 ; AVX512-NEXT: vmulsd %xmm2, %xmm0, %xmm0 1381 ; AVX512-NEXT: vextractf128 $1, %ymm1, %xmm2 1382 ; AVX512-NEXT: vmulsd %xmm2, %xmm0, %xmm0 1383 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0] 1384 ; AVX512-NEXT: vmulsd %xmm2, %xmm0, %xmm0 1385 ; AVX512-NEXT: vextractf32x4 $2, %zmm1, %xmm2 1386 ; AVX512-NEXT: vmulsd %xmm2, %xmm0, %xmm0 1387 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0] 1388 ; AVX512-NEXT: vmulsd %xmm2, %xmm0, %xmm0 1389 ; AVX512-NEXT: vextractf32x4 $3, %zmm1, %xmm1 1390 ; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm0 1391 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0] 1392 ; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm0 1393 ; AVX512-NEXT: vzeroupper 1394 ; AVX512-NEXT: retq 1395 %1 = call double @llvm.experimental.vector.reduce.fmul.f64.f64.v16f64(double 1.0, <16 x double> %a0) 1396 ret double %1 1397 } 1398 1399 ; 1400 ; vXf64 (undef) 1401 ; 1402 1403 define double @test_v2f64_undef(<2 x double> %a0) { 1404 ; SSE-LABEL: test_v2f64_undef: 1405 ; SSE: # %bb.0: 1406 ; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] 1407 ; SSE-NEXT: mulsd {{.*}}(%rip), %xmm0 1408 ; SSE-NEXT: retq 1409 ; 1410 ; AVX-LABEL: test_v2f64_undef: 1411 ; AVX: # %bb.0: 1412 ; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 1413 ; AVX-NEXT: vmulsd {{.*}}(%rip), %xmm0, %xmm0 1414 ; AVX-NEXT: retq 1415 ; 1416 ; AVX512-LABEL: test_v2f64_undef: 1417 ; AVX512: # %bb.0: 1418 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 1419 ; AVX512-NEXT: vmulsd {{.*}}(%rip), %xmm0, %xmm0 1420 ; AVX512-NEXT: retq 1421 %1 = call double @llvm.experimental.vector.reduce.fmul.f64.f64.v2f64(double undef, <2 x double> %a0) 1422 ret double %1 1423 } 1424 1425 define double @test_v4f64_undef(<4 x double> %a0) { 1426 ; SSE-LABEL: test_v4f64_undef: 1427 ; SSE: # %bb.0: 1428 ; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] 1429 ; SSE-NEXT: mulsd {{.*}}(%rip), %xmm0 1430 ; SSE-NEXT: mulsd %xmm1, %xmm0 1431 ; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] 1432 ; SSE-NEXT: mulsd %xmm1, %xmm0 1433 ; SSE-NEXT: retq 1434 ; 1435 ; AVX-LABEL: test_v4f64_undef: 1436 ; AVX: # %bb.0: 1437 ; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] 1438 ; AVX-NEXT: vmulsd {{.*}}(%rip), %xmm1, %xmm1 1439 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0 1440 ; AVX-NEXT: vmulsd %xmm0, %xmm1, %xmm1 1441 ; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 1442 ; AVX-NEXT: vmulsd %xmm0, %xmm1, %xmm0 1443 ; AVX-NEXT: vzeroupper 1444 ; AVX-NEXT: retq 1445 ; 1446 ; AVX512-LABEL: test_v4f64_undef: 1447 ; AVX512: # %bb.0: 1448 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] 1449 ; AVX512-NEXT: vmulsd {{.*}}(%rip), %xmm1, %xmm1 1450 ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm0 1451 ; AVX512-NEXT: vmulsd %xmm0, %xmm1, %xmm1 1452 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 1453 ; AVX512-NEXT: vmulsd %xmm0, %xmm1, %xmm0 1454 ; AVX512-NEXT: vzeroupper 1455 ; AVX512-NEXT: retq 1456 %1 = call double @llvm.experimental.vector.reduce.fmul.f64.f64.v4f64(double undef, <4 x double> %a0) 1457 ret double %1 1458 } 1459 1460 define double @test_v8f64_undef(<8 x double> %a0) { 1461 ; SSE-LABEL: test_v8f64_undef: 1462 ; SSE: # %bb.0: 1463 ; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] 1464 ; SSE-NEXT: mulsd {{.*}}(%rip), %xmm0 1465 ; SSE-NEXT: mulsd %xmm1, %xmm0 1466 ; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] 1467 ; SSE-NEXT: mulsd %xmm1, %xmm0 1468 ; SSE-NEXT: mulsd %xmm2, %xmm0 1469 ; SSE-NEXT: movhlps {{.*#+}} xmm2 = xmm2[1,1] 1470 ; SSE-NEXT: mulsd %xmm2, %xmm0 1471 ; SSE-NEXT: mulsd %xmm3, %xmm0 1472 ; SSE-NEXT: movhlps {{.*#+}} xmm3 = xmm3[1,1] 1473 ; SSE-NEXT: mulsd %xmm3, %xmm0 1474 ; SSE-NEXT: retq 1475 ; 1476 ; AVX-LABEL: test_v8f64_undef: 1477 ; AVX: # %bb.0: 1478 ; AVX-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] 1479 ; AVX-NEXT: vmulsd {{.*}}(%rip), %xmm2, %xmm2 1480 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0 1481 ; AVX-NEXT: vmulsd %xmm0, %xmm2, %xmm2 1482 ; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 1483 ; AVX-NEXT: vmulsd %xmm0, %xmm2, %xmm0 1484 ; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 1485 ; AVX-NEXT: vpermilpd {{.*#+}} xmm2 = xmm1[1,0] 1486 ; AVX-NEXT: vmulsd %xmm2, %xmm0, %xmm0 1487 ; AVX-NEXT: vextractf128 $1, %ymm1, %xmm1 1488 ; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 1489 ; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0] 1490 ; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 1491 ; AVX-NEXT: vzeroupper 1492 ; AVX-NEXT: retq 1493 ; 1494 ; AVX512-LABEL: test_v8f64_undef: 1495 ; AVX512: # %bb.0: 1496 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] 1497 ; AVX512-NEXT: vmulsd {{.*}}(%rip), %xmm1, %xmm1 1498 ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm2 1499 ; AVX512-NEXT: vmulsd %xmm2, %xmm1, %xmm1 1500 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0] 1501 ; AVX512-NEXT: vmulsd %xmm2, %xmm1, %xmm1 1502 ; AVX512-NEXT: vextractf32x4 $2, %zmm0, %xmm2 1503 ; AVX512-NEXT: vmulsd %xmm2, %xmm1, %xmm1 1504 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0] 1505 ; AVX512-NEXT: vmulsd %xmm2, %xmm1, %xmm1 1506 ; AVX512-NEXT: vextractf32x4 $3, %zmm0, %xmm0 1507 ; AVX512-NEXT: vmulsd %xmm0, %xmm1, %xmm1 1508 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 1509 ; AVX512-NEXT: vmulsd %xmm0, %xmm1, %xmm0 1510 ; AVX512-NEXT: vzeroupper 1511 ; AVX512-NEXT: retq 1512 %1 = call double @llvm.experimental.vector.reduce.fmul.f64.f64.v8f64(double undef, <8 x double> %a0) 1513 ret double %1 1514 } 1515 1516 define double @test_v16f64_undef(<16 x double> %a0) { 1517 ; SSE-LABEL: test_v16f64_undef: 1518 ; SSE: # %bb.0: 1519 ; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] 1520 ; SSE-NEXT: mulsd {{.*}}(%rip), %xmm0 1521 ; SSE-NEXT: mulsd %xmm1, %xmm0 1522 ; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] 1523 ; SSE-NEXT: mulsd %xmm1, %xmm0 1524 ; SSE-NEXT: mulsd %xmm2, %xmm0 1525 ; SSE-NEXT: movhlps {{.*#+}} xmm2 = xmm2[1,1] 1526 ; SSE-NEXT: mulsd %xmm2, %xmm0 1527 ; SSE-NEXT: mulsd %xmm3, %xmm0 1528 ; SSE-NEXT: movhlps {{.*#+}} xmm3 = xmm3[1,1] 1529 ; SSE-NEXT: mulsd %xmm3, %xmm0 1530 ; SSE-NEXT: mulsd %xmm4, %xmm0 1531 ; SSE-NEXT: movhlps {{.*#+}} xmm4 = xmm4[1,1] 1532 ; SSE-NEXT: mulsd %xmm4, %xmm0 1533 ; SSE-NEXT: mulsd %xmm5, %xmm0 1534 ; SSE-NEXT: movhlps {{.*#+}} xmm5 = xmm5[1,1] 1535 ; SSE-NEXT: mulsd %xmm5, %xmm0 1536 ; SSE-NEXT: mulsd %xmm6, %xmm0 1537 ; SSE-NEXT: movhlps {{.*#+}} xmm6 = xmm6[1,1] 1538 ; SSE-NEXT: mulsd %xmm6, %xmm0 1539 ; SSE-NEXT: mulsd %xmm7, %xmm0 1540 ; SSE-NEXT: movhlps {{.*#+}} xmm7 = xmm7[1,1] 1541 ; SSE-NEXT: mulsd %xmm7, %xmm0 1542 ; SSE-NEXT: retq 1543 ; 1544 ; AVX-LABEL: test_v16f64_undef: 1545 ; AVX: # %bb.0: 1546 ; AVX-NEXT: vpermilpd {{.*#+}} xmm4 = xmm0[1,0] 1547 ; AVX-NEXT: vmulsd {{.*}}(%rip), %xmm4, %xmm4 1548 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0 1549 ; AVX-NEXT: vmulsd %xmm0, %xmm4, %xmm4 1550 ; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 1551 ; AVX-NEXT: vmulsd %xmm0, %xmm4, %xmm0 1552 ; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 1553 ; AVX-NEXT: vpermilpd {{.*#+}} xmm4 = xmm1[1,0] 1554 ; AVX-NEXT: vmulsd %xmm4, %xmm0, %xmm0 1555 ; AVX-NEXT: vextractf128 $1, %ymm1, %xmm1 1556 ; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 1557 ; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0] 1558 ; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 1559 ; AVX-NEXT: vmulsd %xmm2, %xmm0, %xmm0 1560 ; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm2[1,0] 1561 ; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 1562 ; AVX-NEXT: vextractf128 $1, %ymm2, %xmm1 1563 ; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 1564 ; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0] 1565 ; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 1566 ; AVX-NEXT: vmulsd %xmm3, %xmm0, %xmm0 1567 ; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm3[1,0] 1568 ; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 1569 ; AVX-NEXT: vextractf128 $1, %ymm3, %xmm1 1570 ; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 1571 ; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0] 1572 ; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 1573 ; AVX-NEXT: vzeroupper 1574 ; AVX-NEXT: retq 1575 ; 1576 ; AVX512-LABEL: test_v16f64_undef: 1577 ; AVX512: # %bb.0: 1578 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] 1579 ; AVX512-NEXT: vmulsd {{.*}}(%rip), %xmm2, %xmm2 1580 ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm3 1581 ; AVX512-NEXT: vmulsd %xmm3, %xmm2, %xmm2 1582 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm3 = xmm3[1,0] 1583 ; AVX512-NEXT: vmulsd %xmm3, %xmm2, %xmm2 1584 ; AVX512-NEXT: vextractf32x4 $2, %zmm0, %xmm3 1585 ; AVX512-NEXT: vmulsd %xmm3, %xmm2, %xmm2 1586 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm3 = xmm3[1,0] 1587 ; AVX512-NEXT: vmulsd %xmm3, %xmm2, %xmm2 1588 ; AVX512-NEXT: vextractf32x4 $3, %zmm0, %xmm0 1589 ; AVX512-NEXT: vmulsd %xmm0, %xmm2, %xmm2 1590 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 1591 ; AVX512-NEXT: vmulsd %xmm0, %xmm2, %xmm0 1592 ; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm0 1593 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm1[1,0] 1594 ; AVX512-NEXT: vmulsd %xmm2, %xmm0, %xmm0 1595 ; AVX512-NEXT: vextractf128 $1, %ymm1, %xmm2 1596 ; AVX512-NEXT: vmulsd %xmm2, %xmm0, %xmm0 1597 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0] 1598 ; AVX512-NEXT: vmulsd %xmm2, %xmm0, %xmm0 1599 ; AVX512-NEXT: vextractf32x4 $2, %zmm1, %xmm2 1600 ; AVX512-NEXT: vmulsd %xmm2, %xmm0, %xmm0 1601 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0] 1602 ; AVX512-NEXT: vmulsd %xmm2, %xmm0, %xmm0 1603 ; AVX512-NEXT: vextractf32x4 $3, %zmm1, %xmm1 1604 ; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm0 1605 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0] 1606 ; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm0 1607 ; AVX512-NEXT: vzeroupper 1608 ; AVX512-NEXT: retq 1609 %1 = call double @llvm.experimental.vector.reduce.fmul.f64.f64.v16f64(double undef, <16 x double> %a0) 1610 ret double %1 1611 } 1612 1613 declare float @llvm.experimental.vector.reduce.fmul.f32.f32.v2f32(float, <2 x float>) 1614 declare float @llvm.experimental.vector.reduce.fmul.f32.f32.v4f32(float, <4 x float>) 1615 declare float @llvm.experimental.vector.reduce.fmul.f32.f32.v8f32(float, <8 x float>) 1616 declare float @llvm.experimental.vector.reduce.fmul.f32.f32.v16f32(float, <16 x float>) 1617 1618 declare double @llvm.experimental.vector.reduce.fmul.f64.f64.v2f64(double, <2 x double>) 1619 declare double @llvm.experimental.vector.reduce.fmul.f64.f64.v4f64(double, <4 x double>) 1620 declare double @llvm.experimental.vector.reduce.fmul.f64.f64.v8f64(double, <8 x double>) 1621 declare double @llvm.experimental.vector.reduce.fmul.f64.f64.v16f64(double, <16 x double>) 1622