1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+fma,-fma4 | FileCheck %s --check-prefix=CHECK 3 4 define <4 x float> @test_mm_fmadd_ps(<4 x float> %a, <4 x float> %b, <4 x float> %c) { 5 ; CHECK-LABEL: test_mm_fmadd_ps: 6 ; CHECK: # %bb.0: # %entry 7 ; CHECK-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 8 ; CHECK-NEXT: retq 9 entry: 10 %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) #2 11 ret <4 x float> %0 12 } 13 14 define <2 x double> @test_mm_fmadd_pd(<2 x double> %a, <2 x double> %b, <2 x double> %c) { 15 ; CHECK-LABEL: test_mm_fmadd_pd: 16 ; CHECK: # %bb.0: # %entry 17 ; CHECK-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 18 ; CHECK-NEXT: retq 19 entry: 20 %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c) #2 21 ret <2 x double> %0 22 } 23 24 define <4 x float> @test_mm_fmadd_ss(<4 x float> %a, <4 x float> %b, <4 x float> %c) { 25 ; CHECK-LABEL: test_mm_fmadd_ss: 26 ; CHECK: # %bb.0: # %entry 27 ; CHECK-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 28 ; CHECK-NEXT: retq 29 entry: 30 %0 = extractelement <4 x float> %a, i64 0 31 %1 = extractelement <4 x float> %b, i64 0 32 %2 = extractelement <4 x float> %c, i64 0 33 %3 = tail call float @llvm.fma.f32(float %0, float %1, float %2) #2 34 %4 = insertelement <4 x float> %a, float %3, i64 0 35 ret <4 x float> %4 36 } 37 38 define <2 x double> @test_mm_fmadd_sd(<2 x double> %a, <2 x double> %b, <2 x double> %c) { 39 ; CHECK-LABEL: test_mm_fmadd_sd: 40 ; CHECK: # %bb.0: # %entry 41 ; CHECK-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 42 ; CHECK-NEXT: retq 43 entry: 44 %0 = extractelement <2 x double> %a, i64 0 45 %1 = extractelement <2 x double> %b, i64 0 46 %2 = extractelement <2 x double> %c, i64 0 47 %3 = tail call double @llvm.fma.f64(double %0, double %1, double %2) #2 48 %4 = insertelement <2 x double> %a, double %3, i64 0 49 ret <2 x double> %4 50 } 51 52 define <4 x float> @test_mm_fmsub_ps(<4 x float> %a, <4 x float> %b, <4 x float> %c) { 53 ; CHECK-LABEL: test_mm_fmsub_ps: 54 ; CHECK: # %bb.0: # %entry 55 ; CHECK-NEXT: vxorps {{.*}}(%rip), %xmm2, %xmm2 56 ; CHECK-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 57 ; CHECK-NEXT: retq 58 entry: 59 %sub.i = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c 60 %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %sub.i) #2 61 ret <4 x float> %0 62 } 63 64 define <2 x double> @test_mm_fmsub_pd(<2 x double> %a, <2 x double> %b, <2 x double> %c) { 65 ; CHECK-LABEL: test_mm_fmsub_pd: 66 ; CHECK: # %bb.0: # %entry 67 ; CHECK-NEXT: vxorpd {{.*}}(%rip), %xmm2, %xmm2 68 ; CHECK-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 69 ; CHECK-NEXT: retq 70 entry: 71 %sub.i = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %c 72 %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %sub.i) #2 73 ret <2 x double> %0 74 } 75 76 define <4 x float> @test_mm_fmsub_ss(<4 x float> %a, <4 x float> %b, <4 x float> %c) { 77 ; CHECK-LABEL: test_mm_fmsub_ss: 78 ; CHECK: # %bb.0: # %entry 79 ; CHECK-NEXT: vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 80 ; CHECK-NEXT: retq 81 entry: 82 %0 = extractelement <4 x float> %a, i64 0 83 %1 = extractelement <4 x float> %b, i64 0 84 %.rhs.i = extractelement <4 x float> %c, i64 0 85 %2 = fsub float -0.000000e+00, %.rhs.i 86 %3 = tail call float @llvm.fma.f32(float %0, float %1, float %2) #2 87 %4 = insertelement <4 x float> %a, float %3, i64 0 88 ret <4 x float> %4 89 } 90 91 define <2 x double> @test_mm_fmsub_sd(<2 x double> %a, <2 x double> %b, <2 x double> %c) { 92 ; CHECK-LABEL: test_mm_fmsub_sd: 93 ; CHECK: # %bb.0: # %entry 94 ; CHECK-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 95 ; CHECK-NEXT: retq 96 entry: 97 %0 = extractelement <2 x double> %a, i64 0 98 %1 = extractelement <2 x double> %b, i64 0 99 %.rhs.i = extractelement <2 x double> %c, i64 0 100 %2 = fsub double -0.000000e+00, %.rhs.i 101 %3 = tail call double @llvm.fma.f64(double %0, double %1, double %2) #2 102 %4 = insertelement <2 x double> %a, double %3, i64 0 103 ret <2 x double> %4 104 } 105 106 define <4 x float> @test_mm_fnmadd_ps(<4 x float> %a, <4 x float> %b, <4 x float> %c) { 107 ; CHECK-LABEL: test_mm_fnmadd_ps: 108 ; CHECK: # %bb.0: # %entry 109 ; CHECK-NEXT: vxorps {{.*}}(%rip), %xmm0, %xmm0 110 ; CHECK-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 111 ; CHECK-NEXT: retq 112 entry: 113 %sub.i = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a 114 %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %sub.i, <4 x float> %b, <4 x float> %c) #2 115 ret <4 x float> %0 116 } 117 118 define <2 x double> @test_mm_fnmadd_pd(<2 x double> %a, <2 x double> %b, <2 x double> %c) { 119 ; CHECK-LABEL: test_mm_fnmadd_pd: 120 ; CHECK: # %bb.0: # %entry 121 ; CHECK-NEXT: vxorpd {{.*}}(%rip), %xmm0, %xmm0 122 ; CHECK-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 123 ; CHECK-NEXT: retq 124 entry: 125 %sub.i = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a 126 %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %sub.i, <2 x double> %b, <2 x double> %c) #2 127 ret <2 x double> %0 128 } 129 130 define <4 x float> @test_mm_fnmadd_ss(<4 x float> %a, <4 x float> %b, <4 x float> %c) { 131 ; CHECK-LABEL: test_mm_fnmadd_ss: 132 ; CHECK: # %bb.0: # %entry 133 ; CHECK-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 134 ; CHECK-NEXT: retq 135 entry: 136 %0 = extractelement <4 x float> %a, i64 0 137 %.rhs.i = extractelement <4 x float> %b, i64 0 138 %1 = fsub float -0.000000e+00, %.rhs.i 139 %2 = extractelement <4 x float> %c, i64 0 140 %3 = tail call float @llvm.fma.f32(float %0, float %1, float %2) #2 141 %4 = insertelement <4 x float> %a, float %3, i64 0 142 ret <4 x float> %4 143 } 144 145 define <2 x double> @test_mm_fnmadd_sd(<2 x double> %a, <2 x double> %b, <2 x double> %c) { 146 ; CHECK-LABEL: test_mm_fnmadd_sd: 147 ; CHECK: # %bb.0: # %entry 148 ; CHECK-NEXT: vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 149 ; CHECK-NEXT: retq 150 entry: 151 %0 = extractelement <2 x double> %a, i64 0 152 %.rhs.i = extractelement <2 x double> %b, i64 0 153 %1 = fsub double -0.000000e+00, %.rhs.i 154 %2 = extractelement <2 x double> %c, i64 0 155 %3 = tail call double @llvm.fma.f64(double %0, double %1, double %2) #2 156 %4 = insertelement <2 x double> %a, double %3, i64 0 157 ret <2 x double> %4 158 } 159 160 define <4 x float> @test_mm_fnmsub_ps(<4 x float> %a, <4 x float> %b, <4 x float> %c) { 161 ; CHECK-LABEL: test_mm_fnmsub_ps: 162 ; CHECK: # %bb.0: # %entry 163 ; CHECK-NEXT: vmovaps {{.*#+}} xmm3 = [-0.000000e+00,-0.000000e+00,-0.000000e+00,-0.000000e+00] 164 ; CHECK-NEXT: vxorps %xmm3, %xmm0, %xmm4 165 ; CHECK-NEXT: vxorps %xmm3, %xmm2, %xmm0 166 ; CHECK-NEXT: vfmadd231ps {{.*#+}} xmm0 = (xmm1 * xmm4) + xmm0 167 ; CHECK-NEXT: retq 168 entry: 169 %sub.i = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a 170 %sub1.i = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c 171 %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %sub.i, <4 x float> %b, <4 x float> %sub1.i) #2 172 ret <4 x float> %0 173 } 174 175 define <2 x double> @test_mm_fnmsub_pd(<2 x double> %a, <2 x double> %b, <2 x double> %c) { 176 ; CHECK-LABEL: test_mm_fnmsub_pd: 177 ; CHECK: # %bb.0: # %entry 178 ; CHECK-NEXT: vmovapd {{.*#+}} xmm3 = [-0.000000e+00,-0.000000e+00] 179 ; CHECK-NEXT: vxorpd %xmm3, %xmm0, %xmm4 180 ; CHECK-NEXT: vxorpd %xmm3, %xmm2, %xmm0 181 ; CHECK-NEXT: vfmadd231pd {{.*#+}} xmm0 = (xmm1 * xmm4) + xmm0 182 ; CHECK-NEXT: retq 183 entry: 184 %sub.i = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a 185 %sub1.i = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %c 186 %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %sub.i, <2 x double> %b, <2 x double> %sub1.i) #2 187 ret <2 x double> %0 188 } 189 190 define <4 x float> @test_mm_fnmsub_ss(<4 x float> %a, <4 x float> %b, <4 x float> %c) { 191 ; CHECK-LABEL: test_mm_fnmsub_ss: 192 ; CHECK: # %bb.0: # %entry 193 ; CHECK-NEXT: vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 194 ; CHECK-NEXT: retq 195 entry: 196 %0 = extractelement <4 x float> %a, i64 0 197 %.rhs.i = extractelement <4 x float> %b, i64 0 198 %1 = fsub float -0.000000e+00, %.rhs.i 199 %.rhs2.i = extractelement <4 x float> %c, i64 0 200 %2 = fsub float -0.000000e+00, %.rhs2.i 201 %3 = tail call float @llvm.fma.f32(float %0, float %1, float %2) #2 202 %4 = insertelement <4 x float> %a, float %3, i64 0 203 ret <4 x float> %4 204 } 205 206 define <2 x double> @test_mm_fnmsub_sd(<2 x double> %a, <2 x double> %b, <2 x double> %c) { 207 ; CHECK-LABEL: test_mm_fnmsub_sd: 208 ; CHECK: # %bb.0: # %entry 209 ; CHECK-NEXT: vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 210 ; CHECK-NEXT: retq 211 entry: 212 %0 = extractelement <2 x double> %a, i64 0 213 %.rhs.i = extractelement <2 x double> %b, i64 0 214 %1 = fsub double -0.000000e+00, %.rhs.i 215 %.rhs2.i = extractelement <2 x double> %c, i64 0 216 %2 = fsub double -0.000000e+00, %.rhs2.i 217 %3 = tail call double @llvm.fma.f64(double %0, double %1, double %2) #2 218 %4 = insertelement <2 x double> %a, double %3, i64 0 219 ret <2 x double> %4 220 } 221 222 define <4 x float> @test_mm_fmaddsub_ps(<4 x float> %a, <4 x float> %b, <4 x float> %c) { 223 ; CHECK-LABEL: test_mm_fmaddsub_ps: 224 ; CHECK: # %bb.0: # %entry 225 ; CHECK-NEXT: vfmaddsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) +/- xmm2 226 ; CHECK-NEXT: retq 227 entry: 228 %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) #2 229 %1 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c 230 %2 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %1) #2 231 %3 = shufflevector <4 x float> %2, <4 x float> %0, <4 x i32> <i32 0, i32 5, i32 2, i32 7> 232 ret <4 x float> %3 233 } 234 235 define <2 x double> @test_mm_fmaddsub_pd(<2 x double> %a, <2 x double> %b, <2 x double> %c) { 236 ; CHECK-LABEL: test_mm_fmaddsub_pd: 237 ; CHECK: # %bb.0: # %entry 238 ; CHECK-NEXT: vfmaddsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) +/- xmm2 239 ; CHECK-NEXT: retq 240 entry: 241 %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c) #2 242 %1 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %c 243 %2 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %1) #2 244 %3 = shufflevector <2 x double> %2, <2 x double> %0, <2 x i32> <i32 0, i32 3> 245 ret <2 x double> %3 246 } 247 248 define <4 x float> @test_mm_fmsubadd_ps(<4 x float> %a, <4 x float> %b, <4 x float> %c) { 249 ; CHECK-LABEL: test_mm_fmsubadd_ps: 250 ; CHECK: # %bb.0: # %entry 251 ; CHECK-NEXT: vfmsubadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ xmm2 252 ; CHECK-NEXT: retq 253 entry: 254 %sub.i = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c 255 %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %sub.i) #2 256 %1 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) #2 257 %2 = shufflevector <4 x float> %1, <4 x float> %0, <4 x i32> <i32 0, i32 5, i32 2, i32 7> 258 ret <4 x float> %2 259 } 260 261 define <2 x double> @test_mm_fmsubadd_pd(<2 x double> %a, <2 x double> %b, <2 x double> %c) { 262 ; CHECK-LABEL: test_mm_fmsubadd_pd: 263 ; CHECK: # %bb.0: # %entry 264 ; CHECK-NEXT: vfmsubadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ xmm2 265 ; CHECK-NEXT: retq 266 entry: 267 %sub.i = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %c 268 %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %sub.i) #2 269 %1 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c) #2 270 %2 = shufflevector <2 x double> %1, <2 x double> %0, <2 x i32> <i32 0, i32 3> 271 ret <2 x double> %2 272 } 273 274 define <8 x float> @test_mm256_fmadd_ps(<8 x float> %a, <8 x float> %b, <8 x float> %c) { 275 ; CHECK-LABEL: test_mm256_fmadd_ps: 276 ; CHECK: # %bb.0: # %entry 277 ; CHECK-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 278 ; CHECK-NEXT: retq 279 entry: 280 %0 = tail call <8 x float> @llvm.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c) #2 281 ret <8 x float> %0 282 } 283 284 define <4 x double> @test_mm256_fmadd_pd(<4 x double> %a, <4 x double> %b, <4 x double> %c) { 285 ; CHECK-LABEL: test_mm256_fmadd_pd: 286 ; CHECK: # %bb.0: # %entry 287 ; CHECK-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 288 ; CHECK-NEXT: retq 289 entry: 290 %0 = tail call <4 x double> @llvm.fma.v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %c) #2 291 ret <4 x double> %0 292 } 293 294 define <8 x float> @test_mm256_fmsub_ps(<8 x float> %a, <8 x float> %b, <8 x float> %c) { 295 ; CHECK-LABEL: test_mm256_fmsub_ps: 296 ; CHECK: # %bb.0: # %entry 297 ; CHECK-NEXT: vxorps {{.*}}(%rip), %ymm2, %ymm2 298 ; CHECK-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 299 ; CHECK-NEXT: retq 300 entry: 301 %sub.i = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c 302 %0 = tail call <8 x float> @llvm.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %sub.i) #2 303 ret <8 x float> %0 304 } 305 306 define <4 x double> @test_mm256_fmsub_pd(<4 x double> %a, <4 x double> %b, <4 x double> %c) { 307 ; CHECK-LABEL: test_mm256_fmsub_pd: 308 ; CHECK: # %bb.0: # %entry 309 ; CHECK-NEXT: vxorpd {{.*}}(%rip), %ymm2, %ymm2 310 ; CHECK-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 311 ; CHECK-NEXT: retq 312 entry: 313 %sub.i = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %c 314 %0 = tail call <4 x double> @llvm.fma.v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %sub.i) #2 315 ret <4 x double> %0 316 } 317 318 define <8 x float> @test_mm256_fnmadd_ps(<8 x float> %a, <8 x float> %b, <8 x float> %c) { 319 ; CHECK-LABEL: test_mm256_fnmadd_ps: 320 ; CHECK: # %bb.0: # %entry 321 ; CHECK-NEXT: vxorps {{.*}}(%rip), %ymm0, %ymm0 322 ; CHECK-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 323 ; CHECK-NEXT: retq 324 entry: 325 %sub.i = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a 326 %0 = tail call <8 x float> @llvm.fma.v8f32(<8 x float> %sub.i, <8 x float> %b, <8 x float> %c) #2 327 ret <8 x float> %0 328 } 329 330 define <4 x double> @test_mm256_fnmadd_pd(<4 x double> %a, <4 x double> %b, <4 x double> %c) { 331 ; CHECK-LABEL: test_mm256_fnmadd_pd: 332 ; CHECK: # %bb.0: # %entry 333 ; CHECK-NEXT: vxorpd {{.*}}(%rip), %ymm0, %ymm0 334 ; CHECK-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 335 ; CHECK-NEXT: retq 336 entry: 337 %sub.i = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a 338 %0 = tail call <4 x double> @llvm.fma.v4f64(<4 x double> %sub.i, <4 x double> %b, <4 x double> %c) #2 339 ret <4 x double> %0 340 } 341 342 define <8 x float> @test_mm256_fnmsub_ps(<8 x float> %a, <8 x float> %b, <8 x float> %c) { 343 ; CHECK-LABEL: test_mm256_fnmsub_ps: 344 ; CHECK: # %bb.0: # %entry 345 ; CHECK-NEXT: vmovaps {{.*#+}} ymm3 = [-0.000000e+00,-0.000000e+00,-0.000000e+00,-0.000000e+00,-0.000000e+00,-0.000000e+00,-0.000000e+00,-0.000000e+00] 346 ; CHECK-NEXT: vxorps %ymm3, %ymm0, %ymm4 347 ; CHECK-NEXT: vxorps %ymm3, %ymm2, %ymm0 348 ; CHECK-NEXT: vfmadd231ps {{.*#+}} ymm0 = (ymm1 * ymm4) + ymm0 349 ; CHECK-NEXT: retq 350 entry: 351 %sub.i = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a 352 %sub1.i = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c 353 %0 = tail call <8 x float> @llvm.fma.v8f32(<8 x float> %sub.i, <8 x float> %b, <8 x float> %sub1.i) #2 354 ret <8 x float> %0 355 } 356 357 define <4 x double> @test_mm256_fnmsub_pd(<4 x double> %a, <4 x double> %b, <4 x double> %c) { 358 ; CHECK-LABEL: test_mm256_fnmsub_pd: 359 ; CHECK: # %bb.0: # %entry 360 ; CHECK-NEXT: vmovapd {{.*#+}} ymm3 = [-0.000000e+00,-0.000000e+00,-0.000000e+00,-0.000000e+00] 361 ; CHECK-NEXT: vxorpd %ymm3, %ymm0, %ymm4 362 ; CHECK-NEXT: vxorpd %ymm3, %ymm2, %ymm0 363 ; CHECK-NEXT: vfmadd231pd {{.*#+}} ymm0 = (ymm1 * ymm4) + ymm0 364 ; CHECK-NEXT: retq 365 entry: 366 %sub.i = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a 367 %sub1.i = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %c 368 %0 = tail call <4 x double> @llvm.fma.v4f64(<4 x double> %sub.i, <4 x double> %b, <4 x double> %sub1.i) #2 369 ret <4 x double> %0 370 } 371 372 define <8 x float> @test_mm256_fmaddsub_ps(<8 x float> %a, <8 x float> %b, <8 x float> %c) { 373 ; CHECK-LABEL: test_mm256_fmaddsub_ps: 374 ; CHECK: # %bb.0: # %entry 375 ; CHECK-NEXT: vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- ymm2 376 ; CHECK-NEXT: retq 377 entry: 378 %0 = tail call <8 x float> @llvm.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c) #2 379 %1 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c 380 %2 = tail call <8 x float> @llvm.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %1) #2 381 %3 = shufflevector <8 x float> %2, <8 x float> %0, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15> 382 ret <8 x float> %3 383 } 384 385 define <4 x double> @test_mm256_fmaddsub_pd(<4 x double> %a, <4 x double> %b, <4 x double> %c) { 386 ; CHECK-LABEL: test_mm256_fmaddsub_pd: 387 ; CHECK: # %bb.0: # %entry 388 ; CHECK-NEXT: vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- ymm2 389 ; CHECK-NEXT: retq 390 entry: 391 %0 = tail call <4 x double> @llvm.fma.v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %c) #2 392 %1 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %c 393 %2 = tail call <4 x double> @llvm.fma.v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %1) #2 394 %3 = shufflevector <4 x double> %2, <4 x double> %0, <4 x i32> <i32 0, i32 5, i32 2, i32 7> 395 ret <4 x double> %3 396 } 397 398 define <8 x float> @test_mm256_fmsubadd_ps(<8 x float> %a, <8 x float> %b, <8 x float> %c) { 399 ; CHECK-LABEL: test_mm256_fmsubadd_ps: 400 ; CHECK: # %bb.0: # %entry 401 ; CHECK-NEXT: vfmsubadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ ymm2 402 ; CHECK-NEXT: retq 403 entry: 404 %sub.i = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c 405 %0 = tail call <8 x float> @llvm.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %sub.i) #2 406 %1 = tail call <8 x float> @llvm.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c) #2 407 %2 = shufflevector <8 x float> %1, <8 x float> %0, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15> 408 ret <8 x float> %2 409 } 410 411 define <4 x double> @test_mm256_fmsubadd_pd(<4 x double> %a, <4 x double> %b, <4 x double> %c) { 412 ; CHECK-LABEL: test_mm256_fmsubadd_pd: 413 ; CHECK: # %bb.0: # %entry 414 ; CHECK-NEXT: vfmsubadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ ymm2 415 ; CHECK-NEXT: retq 416 entry: 417 %sub.i = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %c 418 %0 = tail call <4 x double> @llvm.fma.v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %sub.i) #2 419 %1 = tail call <4 x double> @llvm.fma.v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %c) #2 420 %2 = shufflevector <4 x double> %1, <4 x double> %0, <4 x i32> <i32 0, i32 5, i32 2, i32 7> 421 ret <4 x double> %2 422 } 423 424 declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) #1 425 declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) #1 426 declare float @llvm.fma.f32(float, float, float) #1 427 declare double @llvm.fma.f64(double, double, double) #1 428 declare <8 x float> @llvm.fma.v8f32(<8 x float>, <8 x float>, <8 x float>) #1 429 declare <4 x double> @llvm.fma.v4f64(<4 x double>, <4 x double>, <4 x double>) #1 430