1 ; RUN: llc < %s -mtriple=x86_64-pc-win32 -mcpu=core-avx2 | FileCheck %s 2 3 attributes #0 = { nounwind } 4 5 declare <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float>, <4 x float>, <4 x float>) 6 declare <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float>, <4 x float>, <4 x float>) 7 declare <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float>, <4 x float>, <4 x float>) 8 declare <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float>, <4 x float>, <4 x float>) 9 10 declare <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double>, <2 x double>, <2 x double>) 11 declare <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double>, <2 x double>, <2 x double>) 12 declare <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double>, <2 x double>, <2 x double>) 13 declare <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double>, <2 x double>, <2 x double>) 14 15 define void @fmadd_aab_ss(float* %a, float* %b) #0 { 16 ; CHECK-LABEL: fmadd_aab_ss: 17 ; CHECK: vmovss (%rcx), %[[XMM:xmm[0-9]+]] 18 ; CHECK-NEXT: vfmadd213ss (%rdx), %[[XMM]], %[[XMM]] 19 ; CHECK-NEXT: vmovss %[[XMM]], (%rcx) 20 ; CHECK-NEXT: ret 21 %a.val = load float, float* %a 22 %av0 = insertelement <4 x float> undef, float %a.val, i32 0 23 %av1 = insertelement <4 x float> %av0, float 0.000000e+00, i32 1 24 %av2 = insertelement <4 x float> %av1, float 0.000000e+00, i32 2 25 %av = insertelement <4 x float> %av2, float 0.000000e+00, i32 3 26 27 %b.val = load float, float* %b 28 %bv0 = insertelement <4 x float> undef, float %b.val, i32 0 29 %bv1 = insertelement <4 x float> %bv0, float 0.000000e+00, i32 1 30 %bv2 = insertelement <4 x float> %bv1, float 0.000000e+00, i32 2 31 %bv = insertelement <4 x float> %bv2, float 0.000000e+00, i32 3 32 33 %vr = call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %av, <4 x float> %av, <4 x float> %bv) 34 35 %sr = extractelement <4 x float> %vr, i32 0 36 store float %sr, float* %a 37 ret void 38 } 39 40 define void @fmadd_aba_ss(float* %a, float* %b) #0 { 41 ; CHECK-LABEL: fmadd_aba_ss: 42 ; CHECK: vmovss (%rcx), %[[XMM:xmm[0-9]+]] 43 ; CHECK-NEXT: vfmadd132ss (%rdx), %[[XMM]], %[[XMM]] 44 ; CHECK-NEXT: vmovss %[[XMM]], (%rcx) 45 ; CHECK-NEXT: ret 46 %a.val = load float, float* %a 47 %av0 = insertelement <4 x float> undef, float %a.val, i32 0 48 %av1 = insertelement <4 x float> %av0, float 0.000000e+00, i32 1 49 %av2 = insertelement <4 x float> %av1, float 0.000000e+00, i32 2 50 %av = insertelement <4 x float> %av2, float 0.000000e+00, i32 3 51 52 %b.val = load float, float* %b 53 %bv0 = insertelement <4 x float> undef, float %b.val, i32 0 54 %bv1 = insertelement <4 x float> %bv0, float 0.000000e+00, i32 1 55 %bv2 = insertelement <4 x float> %bv1, float 0.000000e+00, i32 2 56 %bv = insertelement <4 x float> %bv2, float 0.000000e+00, i32 3 57 58 %vr = call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %av, <4 x float> %bv, <4 x float> %av) 59 60 %sr = extractelement <4 x float> %vr, i32 0 61 store float %sr, float* %a 62 ret void 63 } 64 65 define void @fmsub_aab_ss(float* %a, float* %b) #0 { 66 ; CHECK-LABEL: fmsub_aab_ss: 67 ; CHECK: vmovss (%rcx), %[[XMM:xmm[0-9]+]] 68 ; CHECK-NEXT: vfmsub213ss (%rdx), %[[XMM]], %[[XMM]] 69 ; CHECK-NEXT: vmovss %[[XMM]], (%rcx) 70 ; CHECK-NEXT: ret 71 %a.val = load float, float* %a 72 %av0 = insertelement <4 x float> undef, float %a.val, i32 0 73 %av1 = insertelement <4 x float> %av0, float 0.000000e+00, i32 1 74 %av2 = insertelement <4 x float> %av1, float 0.000000e+00, i32 2 75 %av = insertelement <4 x float> %av2, float 0.000000e+00, i32 3 76 77 %b.val = load float, float* %b 78 %bv0 = insertelement <4 x float> undef, float %b.val, i32 0 79 %bv1 = insertelement <4 x float> %bv0, float 0.000000e+00, i32 1 80 %bv2 = insertelement <4 x float> %bv1, float 0.000000e+00, i32 2 81 %bv = insertelement <4 x float> %bv2, float 0.000000e+00, i32 3 82 83 %vr = call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %av, <4 x float> %av, <4 x float> %bv) 84 85 %sr = extractelement <4 x float> %vr, i32 0 86 store float %sr, float* %a 87 ret void 88 } 89 90 define void @fmsub_aba_ss(float* %a, float* %b) #0 { 91 ; CHECK-LABEL: fmsub_aba_ss: 92 ; CHECK: vmovss (%rcx), %[[XMM:xmm[0-9]+]] 93 ; CHECK-NEXT: vfmsub132ss (%rdx), %[[XMM]], %[[XMM]] 94 ; CHECK-NEXT: vmovss %[[XMM]], (%rcx) 95 ; CHECK-NEXT: ret 96 %a.val = load float, float* %a 97 %av0 = insertelement <4 x float> undef, float %a.val, i32 0 98 %av1 = insertelement <4 x float> %av0, float 0.000000e+00, i32 1 99 %av2 = insertelement <4 x float> %av1, float 0.000000e+00, i32 2 100 %av = insertelement <4 x float> %av2, float 0.000000e+00, i32 3 101 102 %b.val = load float, float* %b 103 %bv0 = insertelement <4 x float> undef, float %b.val, i32 0 104 %bv1 = insertelement <4 x float> %bv0, float 0.000000e+00, i32 1 105 %bv2 = insertelement <4 x float> %bv1, float 0.000000e+00, i32 2 106 %bv = insertelement <4 x float> %bv2, float 0.000000e+00, i32 3 107 108 %vr = call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %av, <4 x float> %bv, <4 x float> %av) 109 110 %sr = extractelement <4 x float> %vr, i32 0 111 store float %sr, float* %a 112 ret void 113 } 114 115 define void @fnmadd_aab_ss(float* %a, float* %b) #0 { 116 ; CHECK-LABEL: fnmadd_aab_ss: 117 ; CHECK: vmovss (%rcx), %[[XMM:xmm[0-9]+]] 118 ; CHECK-NEXT: vfnmadd213ss (%rdx), %[[XMM]], %[[XMM]] 119 ; CHECK-NEXT: vmovss %[[XMM]], (%rcx) 120 ; CHECK-NEXT: ret 121 %a.val = load float, float* %a 122 %av0 = insertelement <4 x float> undef, float %a.val, i32 0 123 %av1 = insertelement <4 x float> %av0, float 0.000000e+00, i32 1 124 %av2 = insertelement <4 x float> %av1, float 0.000000e+00, i32 2 125 %av = insertelement <4 x float> %av2, float 0.000000e+00, i32 3 126 127 %b.val = load float, float* %b 128 %bv0 = insertelement <4 x float> undef, float %b.val, i32 0 129 %bv1 = insertelement <4 x float> %bv0, float 0.000000e+00, i32 1 130 %bv2 = insertelement <4 x float> %bv1, float 0.000000e+00, i32 2 131 %bv = insertelement <4 x float> %bv2, float 0.000000e+00, i32 3 132 133 %vr = call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %av, <4 x float> %av, <4 x float> %bv) 134 135 %sr = extractelement <4 x float> %vr, i32 0 136 store float %sr, float* %a 137 ret void 138 } 139 140 define void @fnmadd_aba_ss(float* %a, float* %b) #0 { 141 ; CHECK-LABEL: fnmadd_aba_ss: 142 ; CHECK: vmovss (%rcx), %[[XMM:xmm[0-9]+]] 143 ; CHECK-NEXT: vfnmadd132ss (%rdx), %[[XMM]], %[[XMM]] 144 ; CHECK-NEXT: vmovss %[[XMM]], (%rcx) 145 ; CHECK-NEXT: ret 146 %a.val = load float, float* %a 147 %av0 = insertelement <4 x float> undef, float %a.val, i32 0 148 %av1 = insertelement <4 x float> %av0, float 0.000000e+00, i32 1 149 %av2 = insertelement <4 x float> %av1, float 0.000000e+00, i32 2 150 %av = insertelement <4 x float> %av2, float 0.000000e+00, i32 3 151 152 %b.val = load float, float* %b 153 %bv0 = insertelement <4 x float> undef, float %b.val, i32 0 154 %bv1 = insertelement <4 x float> %bv0, float 0.000000e+00, i32 1 155 %bv2 = insertelement <4 x float> %bv1, float 0.000000e+00, i32 2 156 %bv = insertelement <4 x float> %bv2, float 0.000000e+00, i32 3 157 158 %vr = call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %av, <4 x float> %bv, <4 x float> %av) 159 160 %sr = extractelement <4 x float> %vr, i32 0 161 store float %sr, float* %a 162 ret void 163 } 164 165 define void @fnmsub_aab_ss(float* %a, float* %b) #0 { 166 ; CHECK-LABEL: fnmsub_aab_ss: 167 ; CHECK: vmovss (%rcx), %[[XMM:xmm[0-9]+]] 168 ; CHECK-NEXT: vfnmsub213ss (%rdx), %[[XMM]], %[[XMM]] 169 ; CHECK-NEXT: vmovss %[[XMM]], (%rcx) 170 ; CHECK-NEXT: ret 171 %a.val = load float, float* %a 172 %av0 = insertelement <4 x float> undef, float %a.val, i32 0 173 %av1 = insertelement <4 x float> %av0, float 0.000000e+00, i32 1 174 %av2 = insertelement <4 x float> %av1, float 0.000000e+00, i32 2 175 %av = insertelement <4 x float> %av2, float 0.000000e+00, i32 3 176 177 %b.val = load float, float* %b 178 %bv0 = insertelement <4 x float> undef, float %b.val, i32 0 179 %bv1 = insertelement <4 x float> %bv0, float 0.000000e+00, i32 1 180 %bv2 = insertelement <4 x float> %bv1, float 0.000000e+00, i32 2 181 %bv = insertelement <4 x float> %bv2, float 0.000000e+00, i32 3 182 183 %vr = call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %av, <4 x float> %av, <4 x float> %bv) 184 185 %sr = extractelement <4 x float> %vr, i32 0 186 store float %sr, float* %a 187 ret void 188 } 189 190 define void @fnmsub_aba_ss(float* %a, float* %b) #0 { 191 ; CHECK-LABEL: fnmsub_aba_ss: 192 ; CHECK: vmovss (%rcx), %[[XMM:xmm[0-9]+]] 193 ; CHECK-NEXT: vfnmsub132ss (%rdx), %[[XMM]], %[[XMM]] 194 ; CHECK-NEXT: vmovss %[[XMM]], (%rcx) 195 ; CHECK-NEXT: ret 196 %a.val = load float, float* %a 197 %av0 = insertelement <4 x float> undef, float %a.val, i32 0 198 %av1 = insertelement <4 x float> %av0, float 0.000000e+00, i32 1 199 %av2 = insertelement <4 x float> %av1, float 0.000000e+00, i32 2 200 %av = insertelement <4 x float> %av2, float 0.000000e+00, i32 3 201 202 %b.val = load float, float* %b 203 %bv0 = insertelement <4 x float> undef, float %b.val, i32 0 204 %bv1 = insertelement <4 x float> %bv0, float 0.000000e+00, i32 1 205 %bv2 = insertelement <4 x float> %bv1, float 0.000000e+00, i32 2 206 %bv = insertelement <4 x float> %bv2, float 0.000000e+00, i32 3 207 208 %vr = call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %av, <4 x float> %bv, <4 x float> %av) 209 210 %sr = extractelement <4 x float> %vr, i32 0 211 store float %sr, float* %a 212 ret void 213 } 214 215 define void @fmadd_aab_sd(double* %a, double* %b) #0 { 216 ; CHECK-LABEL: fmadd_aab_sd: 217 ; CHECK: vmovsd (%rcx), %[[XMM:xmm[0-9]+]] 218 ; CHECK-NEXT: vfmadd213sd (%rdx), %[[XMM]], %[[XMM]] 219 ; CHECK-NEXT: vmovlpd %[[XMM]], (%rcx) 220 ; CHECK-NEXT: ret 221 %a.val = load double, double* %a 222 %av0 = insertelement <2 x double> undef, double %a.val, i32 0 223 %av = insertelement <2 x double> %av0, double 0.000000e+00, i32 1 224 225 %b.val = load double, double* %b 226 %bv0 = insertelement <2 x double> undef, double %b.val, i32 0 227 %bv = insertelement <2 x double> %bv0, double 0.000000e+00, i32 1 228 229 %vr = call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %av, <2 x double> %av, <2 x double> %bv) 230 231 %sr = extractelement <2 x double> %vr, i32 0 232 store double %sr, double* %a 233 ret void 234 } 235 236 define void @fmadd_aba_sd(double* %a, double* %b) #0 { 237 ; CHECK-LABEL: fmadd_aba_sd: 238 ; CHECK: vmovsd (%rcx), %[[XMM:xmm[0-9]+]] 239 ; CHECK-NEXT: vfmadd132sd (%rdx), %[[XMM]], %[[XMM]] 240 ; CHECK-NEXT: vmovlpd %[[XMM]], (%rcx) 241 ; CHECK-NEXT: ret 242 %a.val = load double, double* %a 243 %av0 = insertelement <2 x double> undef, double %a.val, i32 0 244 %av = insertelement <2 x double> %av0, double 0.000000e+00, i32 1 245 246 %b.val = load double, double* %b 247 %bv0 = insertelement <2 x double> undef, double %b.val, i32 0 248 %bv = insertelement <2 x double> %bv0, double 0.000000e+00, i32 1 249 250 %vr = call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %av, <2 x double> %bv, <2 x double> %av) 251 252 %sr = extractelement <2 x double> %vr, i32 0 253 store double %sr, double* %a 254 ret void 255 } 256 257 define void @fmsub_aab_sd(double* %a, double* %b) #0 { 258 ; CHECK-LABEL: fmsub_aab_sd: 259 ; CHECK: vmovsd (%rcx), %[[XMM:xmm[0-9]+]] 260 ; CHECK-NEXT: vfmsub213sd (%rdx), %[[XMM]], %[[XMM]] 261 ; CHECK-NEXT: vmovlpd %[[XMM]], (%rcx) 262 ; CHECK-NEXT: ret 263 %a.val = load double, double* %a 264 %av0 = insertelement <2 x double> undef, double %a.val, i32 0 265 %av = insertelement <2 x double> %av0, double 0.000000e+00, i32 1 266 267 %b.val = load double, double* %b 268 %bv0 = insertelement <2 x double> undef, double %b.val, i32 0 269 %bv = insertelement <2 x double> %bv0, double 0.000000e+00, i32 1 270 271 %vr = call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %av, <2 x double> %av, <2 x double> %bv) 272 273 %sr = extractelement <2 x double> %vr, i32 0 274 store double %sr, double* %a 275 ret void 276 } 277 278 define void @fmsub_aba_sd(double* %a, double* %b) #0 { 279 ; CHECK-LABEL: fmsub_aba_sd: 280 ; CHECK: vmovsd (%rcx), %[[XMM:xmm[0-9]+]] 281 ; CHECK-NEXT: vfmsub132sd (%rdx), %[[XMM]], %[[XMM]] 282 ; CHECK-NEXT: vmovlpd %[[XMM]], (%rcx) 283 ; CHECK-NEXT: ret 284 %a.val = load double, double* %a 285 %av0 = insertelement <2 x double> undef, double %a.val, i32 0 286 %av = insertelement <2 x double> %av0, double 0.000000e+00, i32 1 287 288 %b.val = load double, double* %b 289 %bv0 = insertelement <2 x double> undef, double %b.val, i32 0 290 %bv = insertelement <2 x double> %bv0, double 0.000000e+00, i32 1 291 292 %vr = call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %av, <2 x double> %bv, <2 x double> %av) 293 294 %sr = extractelement <2 x double> %vr, i32 0 295 store double %sr, double* %a 296 ret void 297 } 298 299 define void @fnmadd_aab_sd(double* %a, double* %b) #0 { 300 ; CHECK-LABEL: fnmadd_aab_sd: 301 ; CHECK: vmovsd (%rcx), %[[XMM:xmm[0-9]+]] 302 ; CHECK-NEXT: vfnmadd213sd (%rdx), %[[XMM]], %[[XMM]] 303 ; CHECK-NEXT: vmovlpd %[[XMM]], (%rcx) 304 ; CHECK-NEXT: ret 305 %a.val = load double, double* %a 306 %av0 = insertelement <2 x double> undef, double %a.val, i32 0 307 %av = insertelement <2 x double> %av0, double 0.000000e+00, i32 1 308 309 %b.val = load double, double* %b 310 %bv0 = insertelement <2 x double> undef, double %b.val, i32 0 311 %bv = insertelement <2 x double> %bv0, double 0.000000e+00, i32 1 312 313 %vr = call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %av, <2 x double> %av, <2 x double> %bv) 314 315 %sr = extractelement <2 x double> %vr, i32 0 316 store double %sr, double* %a 317 ret void 318 } 319 320 define void @fnmadd_aba_sd(double* %a, double* %b) #0 { 321 ; CHECK-LABEL: fnmadd_aba_sd: 322 ; CHECK: vmovsd (%rcx), %[[XMM:xmm[0-9]+]] 323 ; CHECK-NEXT: vfnmadd132sd (%rdx), %[[XMM]], %[[XMM]] 324 ; CHECK-NEXT: vmovlpd %[[XMM]], (%rcx) 325 ; CHECK-NEXT: ret 326 %a.val = load double, double* %a 327 %av0 = insertelement <2 x double> undef, double %a.val, i32 0 328 %av = insertelement <2 x double> %av0, double 0.000000e+00, i32 1 329 330 %b.val = load double, double* %b 331 %bv0 = insertelement <2 x double> undef, double %b.val, i32 0 332 %bv = insertelement <2 x double> %bv0, double 0.000000e+00, i32 1 333 334 %vr = call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %av, <2 x double> %bv, <2 x double> %av) 335 336 %sr = extractelement <2 x double> %vr, i32 0 337 store double %sr, double* %a 338 ret void 339 } 340 341 define void @fnmsub_aab_sd(double* %a, double* %b) #0 { 342 ; CHECK-LABEL: fnmsub_aab_sd: 343 ; CHECK: vmovsd (%rcx), %[[XMM:xmm[0-9]+]] 344 ; CHECK-NEXT: vfnmsub213sd (%rdx), %[[XMM]], %[[XMM]] 345 ; CHECK-NEXT: vmovlpd %[[XMM]], (%rcx) 346 ; CHECK-NEXT: ret 347 %a.val = load double, double* %a 348 %av0 = insertelement <2 x double> undef, double %a.val, i32 0 349 %av = insertelement <2 x double> %av0, double 0.000000e+00, i32 1 350 351 %b.val = load double, double* %b 352 %bv0 = insertelement <2 x double> undef, double %b.val, i32 0 353 %bv = insertelement <2 x double> %bv0, double 0.000000e+00, i32 1 354 355 %vr = call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %av, <2 x double> %av, <2 x double> %bv) 356 357 %sr = extractelement <2 x double> %vr, i32 0 358 store double %sr, double* %a 359 ret void 360 } 361 362 define void @fnmsub_aba_sd(double* %a, double* %b) #0 { 363 ; CHECK-LABEL: fnmsub_aba_sd: 364 ; CHECK: vmovsd (%rcx), %[[XMM:xmm[0-9]+]] 365 ; CHECK-NEXT: vfnmsub132sd (%rdx), %[[XMM]], %[[XMM]] 366 ; CHECK-NEXT: vmovlpd %[[XMM]], (%rcx) 367 ; CHECK-NEXT: ret 368 %a.val = load double, double* %a 369 %av0 = insertelement <2 x double> undef, double %a.val, i32 0 370 %av = insertelement <2 x double> %av0, double 0.000000e+00, i32 1 371 372 %b.val = load double, double* %b 373 %bv0 = insertelement <2 x double> undef, double %b.val, i32 0 374 %bv = insertelement <2 x double> %bv0, double 0.000000e+00, i32 1 375 376 %vr = call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %av, <2 x double> %bv, <2 x double> %av) 377 378 %sr = extractelement <2 x double> %vr, i32 0 379 store double %sr, double* %a 380 ret void 381 } 382 383 384