1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mcpu=bdver2 -mtriple=x86_64-pc-win32 | FileCheck %s --check-prefix=FMA4 --check-prefix=FMA 3 4 attributes #0 = { nounwind } 5 6 declare <4 x float> @llvm.x86.fma4.vfmadd.ss(<4 x float>, <4 x float>, <4 x float>) nounwind readnone 7 define <4 x float> @test_x86_fmadd_baa_ss(<4 x float> %a, <4 x float> %b) #0 { 8 ; FMA4-LABEL: test_x86_fmadd_baa_ss: 9 ; FMA4: # %bb.0: 10 ; FMA4-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 11 ; FMA4-NEXT: vfmaddss %xmm0, (%rdx), %xmm0, %xmm0 12 ; FMA4-NEXT: retq 13 %res = call <4 x float> @llvm.x86.fma4.vfmadd.ss(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind 14 ret <4 x float> %res 15 } 16 17 define <4 x float> @test_x86_fmadd_aba_ss(<4 x float> %a, <4 x float> %b) #0 { 18 ; FMA4-LABEL: test_x86_fmadd_aba_ss: 19 ; FMA4: # %bb.0: 20 ; FMA4-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 21 ; FMA4-NEXT: vfmaddss %xmm0, (%rdx), %xmm0, %xmm0 22 ; FMA4-NEXT: retq 23 %res = call <4 x float> @llvm.x86.fma4.vfmadd.ss(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind 24 ret <4 x float> %res 25 } 26 27 define <4 x float> @test_x86_fmadd_bba_ss(<4 x float> %a, <4 x float> %b) #0 { 28 ; FMA4-LABEL: test_x86_fmadd_bba_ss: 29 ; FMA4: # %bb.0: 30 ; FMA4-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 31 ; FMA4-NEXT: vfmaddss (%rcx), %xmm0, %xmm0, %xmm0 32 ; FMA4-NEXT: retq 33 %res = call <4 x float> @llvm.x86.fma4.vfmadd.ss(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind 34 ret <4 x float> %res 35 } 36 37 declare <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone 38 define <4 x float> @test_x86_fmadd_baa_ps(<4 x float> %a, <4 x float> %b) #0 { 39 ; FMA4-LABEL: test_x86_fmadd_baa_ps: 40 ; FMA4: # %bb.0: 41 ; FMA4-NEXT: vmovaps (%rcx), %xmm0 42 ; FMA4-NEXT: vfmaddps %xmm0, (%rdx), %xmm0, %xmm0 43 ; FMA4-NEXT: retq 44 %res = call <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind 45 ret <4 x float> %res 46 } 47 48 define <4 x float> @test_x86_fmadd_aba_ps(<4 x float> %a, <4 x float> %b) #0 { 49 ; FMA4-LABEL: test_x86_fmadd_aba_ps: 50 ; FMA4: # %bb.0: 51 ; FMA4-NEXT: vmovaps (%rcx), %xmm0 52 ; FMA4-NEXT: vfmaddps %xmm0, (%rdx), %xmm0, %xmm0 53 ; FMA4-NEXT: retq 54 %res = call <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind 55 ret <4 x float> %res 56 } 57 58 define <4 x float> @test_x86_fmadd_bba_ps(<4 x float> %a, <4 x float> %b) #0 { 59 ; FMA4-LABEL: test_x86_fmadd_bba_ps: 60 ; FMA4: # %bb.0: 61 ; FMA4-NEXT: vmovaps (%rdx), %xmm0 62 ; FMA4-NEXT: vfmaddps (%rcx), %xmm0, %xmm0, %xmm0 63 ; FMA4-NEXT: retq 64 %res = call <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind 65 ret <4 x float> %res 66 } 67 68 declare <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone 69 define <8 x float> @test_x86_fmadd_baa_ps_y(<8 x float> %a, <8 x float> %b) #0 { 70 ; FMA4-LABEL: test_x86_fmadd_baa_ps_y: 71 ; FMA4: # %bb.0: 72 ; FMA4-NEXT: vmovaps (%rcx), %ymm0 73 ; FMA4-NEXT: vfmaddps %ymm0, (%rdx), %ymm0, %ymm0 74 ; FMA4-NEXT: retq 75 %res = call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %b, <8 x float> %a, <8 x float> %a) nounwind 76 ret <8 x float> %res 77 } 78 79 define <8 x float> @test_x86_fmadd_aba_ps_y(<8 x float> %a, <8 x float> %b) #0 { 80 ; FMA4-LABEL: test_x86_fmadd_aba_ps_y: 81 ; FMA4: # %bb.0: 82 ; FMA4-NEXT: vmovaps (%rcx), %ymm0 83 ; FMA4-NEXT: vfmaddps %ymm0, (%rdx), %ymm0, %ymm0 84 ; FMA4-NEXT: retq 85 %res = call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %a) nounwind 86 ret <8 x float> %res 87 } 88 89 define <8 x float> @test_x86_fmadd_bba_ps_y(<8 x float> %a, <8 x float> %b) #0 { 90 ; FMA4-LABEL: test_x86_fmadd_bba_ps_y: 91 ; FMA4: # %bb.0: 92 ; FMA4-NEXT: vmovaps (%rdx), %ymm0 93 ; FMA4-NEXT: vfmaddps (%rcx), %ymm0, %ymm0, %ymm0 94 ; FMA4-NEXT: retq 95 %res = call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %b, <8 x float> %b, <8 x float> %a) nounwind 96 ret <8 x float> %res 97 } 98 99 declare <2 x double> @llvm.x86.fma4.vfmadd.sd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone 100 define <2 x double> @test_x86_fmadd_baa_sd(<2 x double> %a, <2 x double> %b) #0 { 101 ; FMA4-LABEL: test_x86_fmadd_baa_sd: 102 ; FMA4: # %bb.0: 103 ; FMA4-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 104 ; FMA4-NEXT: vfmaddsd %xmm0, (%rdx), %xmm0, %xmm0 105 ; FMA4-NEXT: retq 106 %res = call <2 x double> @llvm.x86.fma4.vfmadd.sd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind 107 ret <2 x double> %res 108 } 109 110 define <2 x double> @test_x86_fmadd_aba_sd(<2 x double> %a, <2 x double> %b) #0 { 111 ; FMA4-LABEL: test_x86_fmadd_aba_sd: 112 ; FMA4: # %bb.0: 113 ; FMA4-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 114 ; FMA4-NEXT: vfmaddsd %xmm0, (%rdx), %xmm0, %xmm0 115 ; FMA4-NEXT: retq 116 %res = call <2 x double> @llvm.x86.fma4.vfmadd.sd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind 117 ret <2 x double> %res 118 } 119 120 define <2 x double> @test_x86_fmadd_bba_sd(<2 x double> %a, <2 x double> %b) #0 { 121 ; FMA4-LABEL: test_x86_fmadd_bba_sd: 122 ; FMA4: # %bb.0: 123 ; FMA4-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 124 ; FMA4-NEXT: vfmaddsd (%rcx), %xmm0, %xmm0, %xmm0 125 ; FMA4-NEXT: retq 126 %res = call <2 x double> @llvm.x86.fma4.vfmadd.sd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind 127 ret <2 x double> %res 128 } 129 130 declare <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone 131 define <2 x double> @test_x86_fmadd_baa_pd(<2 x double> %a, <2 x double> %b) #0 { 132 ; FMA4-LABEL: test_x86_fmadd_baa_pd: 133 ; FMA4: # %bb.0: 134 ; FMA4-NEXT: vmovapd (%rcx), %xmm0 135 ; FMA4-NEXT: vfmaddpd %xmm0, (%rdx), %xmm0, %xmm0 136 ; FMA4-NEXT: retq 137 %res = call <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind 138 ret <2 x double> %res 139 } 140 141 define <2 x double> @test_x86_fmadd_aba_pd(<2 x double> %a, <2 x double> %b) #0 { 142 ; FMA4-LABEL: test_x86_fmadd_aba_pd: 143 ; FMA4: # %bb.0: 144 ; FMA4-NEXT: vmovapd (%rcx), %xmm0 145 ; FMA4-NEXT: vfmaddpd %xmm0, (%rdx), %xmm0, %xmm0 146 ; FMA4-NEXT: retq 147 %res = call <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind 148 ret <2 x double> %res 149 } 150 151 define <2 x double> @test_x86_fmadd_bba_pd(<2 x double> %a, <2 x double> %b) #0 { 152 ; FMA4-LABEL: test_x86_fmadd_bba_pd: 153 ; FMA4: # %bb.0: 154 ; FMA4-NEXT: vmovapd (%rdx), %xmm0 155 ; FMA4-NEXT: vfmaddpd (%rcx), %xmm0, %xmm0, %xmm0 156 ; FMA4-NEXT: retq 157 %res = call <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind 158 ret <2 x double> %res 159 } 160 161 declare <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double>, <4 x double>, <4 x double>) nounwind readnone 162 define <4 x double> @test_x86_fmadd_baa_pd_y(<4 x double> %a, <4 x double> %b) #0 { 163 ; FMA4-LABEL: test_x86_fmadd_baa_pd_y: 164 ; FMA4: # %bb.0: 165 ; FMA4-NEXT: vmovapd (%rcx), %ymm0 166 ; FMA4-NEXT: vfmaddpd %ymm0, (%rdx), %ymm0, %ymm0 167 ; FMA4-NEXT: retq 168 %res = call <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double> %b, <4 x double> %a, <4 x double> %a) nounwind 169 ret <4 x double> %res 170 } 171 172 define <4 x double> @test_x86_fmadd_aba_pd_y(<4 x double> %a, <4 x double> %b) #0 { 173 ; FMA4-LABEL: test_x86_fmadd_aba_pd_y: 174 ; FMA4: # %bb.0: 175 ; FMA4-NEXT: vmovapd (%rcx), %ymm0 176 ; FMA4-NEXT: vfmaddpd %ymm0, (%rdx), %ymm0, %ymm0 177 ; FMA4-NEXT: retq 178 %res = call <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %a) nounwind 179 ret <4 x double> %res 180 } 181 182 define <4 x double> @test_x86_fmadd_bba_pd_y(<4 x double> %a, <4 x double> %b) #0 { 183 ; FMA4-LABEL: test_x86_fmadd_bba_pd_y: 184 ; FMA4: # %bb.0: 185 ; FMA4-NEXT: vmovapd (%rdx), %ymm0 186 ; FMA4-NEXT: vfmaddpd (%rcx), %ymm0, %ymm0, %ymm0 187 ; FMA4-NEXT: retq 188 %res = call <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double> %b, <4 x double> %b, <4 x double> %a) nounwind 189 ret <4 x double> %res 190 } 191 192 declare <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone 193 define <4 x float> @test_x86_fnmadd_baa_ps(<4 x float> %a, <4 x float> %b) #0 { 194 ; FMA4-LABEL: test_x86_fnmadd_baa_ps: 195 ; FMA4: # %bb.0: 196 ; FMA4-NEXT: vmovaps (%rcx), %xmm0 197 ; FMA4-NEXT: vfnmaddps %xmm0, (%rdx), %xmm0, %xmm0 198 ; FMA4-NEXT: retq 199 %res = call <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind 200 ret <4 x float> %res 201 } 202 203 define <4 x float> @test_x86_fnmadd_aba_ps(<4 x float> %a, <4 x float> %b) #0 { 204 ; FMA4-LABEL: test_x86_fnmadd_aba_ps: 205 ; FMA4: # %bb.0: 206 ; FMA4-NEXT: vmovaps (%rcx), %xmm0 207 ; FMA4-NEXT: vfnmaddps %xmm0, (%rdx), %xmm0, %xmm0 208 ; FMA4-NEXT: retq 209 %res = call <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind 210 ret <4 x float> %res 211 } 212 213 define <4 x float> @test_x86_fnmadd_bba_ps(<4 x float> %a, <4 x float> %b) #0 { 214 ; FMA4-LABEL: test_x86_fnmadd_bba_ps: 215 ; FMA4: # %bb.0: 216 ; FMA4-NEXT: vmovaps (%rdx), %xmm0 217 ; FMA4-NEXT: vfnmaddps (%rcx), %xmm0, %xmm0, %xmm0 218 ; FMA4-NEXT: retq 219 %res = call <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind 220 ret <4 x float> %res 221 } 222 223 declare <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone 224 define <8 x float> @test_x86_fnmadd_baa_ps_y(<8 x float> %a, <8 x float> %b) #0 { 225 ; FMA4-LABEL: test_x86_fnmadd_baa_ps_y: 226 ; FMA4: # %bb.0: 227 ; FMA4-NEXT: vmovaps (%rcx), %ymm0 228 ; FMA4-NEXT: vfnmaddps %ymm0, (%rdx), %ymm0, %ymm0 229 ; FMA4-NEXT: retq 230 %res = call <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float> %b, <8 x float> %a, <8 x float> %a) nounwind 231 ret <8 x float> %res 232 } 233 234 define <8 x float> @test_x86_fnmadd_aba_ps_y(<8 x float> %a, <8 x float> %b) #0 { 235 ; FMA4-LABEL: test_x86_fnmadd_aba_ps_y: 236 ; FMA4: # %bb.0: 237 ; FMA4-NEXT: vmovaps (%rcx), %ymm0 238 ; FMA4-NEXT: vfnmaddps %ymm0, (%rdx), %ymm0, %ymm0 239 ; FMA4-NEXT: retq 240 %res = call <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %a) nounwind 241 ret <8 x float> %res 242 } 243 244 define <8 x float> @test_x86_fnmadd_bba_ps_y(<8 x float> %a, <8 x float> %b) #0 { 245 ; FMA4-LABEL: test_x86_fnmadd_bba_ps_y: 246 ; FMA4: # %bb.0: 247 ; FMA4-NEXT: vmovaps (%rdx), %ymm0 248 ; FMA4-NEXT: vfnmaddps (%rcx), %ymm0, %ymm0, %ymm0 249 ; FMA4-NEXT: retq 250 %res = call <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float> %b, <8 x float> %b, <8 x float> %a) nounwind 251 ret <8 x float> %res 252 } 253 254 declare <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone 255 define <2 x double> @test_x86_fnmadd_baa_pd(<2 x double> %a, <2 x double> %b) #0 { 256 ; FMA4-LABEL: test_x86_fnmadd_baa_pd: 257 ; FMA4: # %bb.0: 258 ; FMA4-NEXT: vmovapd (%rcx), %xmm0 259 ; FMA4-NEXT: vfnmaddpd %xmm0, (%rdx), %xmm0, %xmm0 260 ; FMA4-NEXT: retq 261 %res = call <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind 262 ret <2 x double> %res 263 } 264 265 define <2 x double> @test_x86_fnmadd_aba_pd(<2 x double> %a, <2 x double> %b) #0 { 266 ; FMA4-LABEL: test_x86_fnmadd_aba_pd: 267 ; FMA4: # %bb.0: 268 ; FMA4-NEXT: vmovapd (%rcx), %xmm0 269 ; FMA4-NEXT: vfnmaddpd %xmm0, (%rdx), %xmm0, %xmm0 270 ; FMA4-NEXT: retq 271 %res = call <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind 272 ret <2 x double> %res 273 } 274 275 define <2 x double> @test_x86_fnmadd_bba_pd(<2 x double> %a, <2 x double> %b) #0 { 276 ; FMA4-LABEL: test_x86_fnmadd_bba_pd: 277 ; FMA4: # %bb.0: 278 ; FMA4-NEXT: vmovapd (%rdx), %xmm0 279 ; FMA4-NEXT: vfnmaddpd (%rcx), %xmm0, %xmm0, %xmm0 280 ; FMA4-NEXT: retq 281 %res = call <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind 282 ret <2 x double> %res 283 } 284 285 declare <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double>, <4 x double>, <4 x double>) nounwind readnone 286 define <4 x double> @test_x86_fnmadd_baa_pd_y(<4 x double> %a, <4 x double> %b) #0 { 287 ; FMA4-LABEL: test_x86_fnmadd_baa_pd_y: 288 ; FMA4: # %bb.0: 289 ; FMA4-NEXT: vmovapd (%rcx), %ymm0 290 ; FMA4-NEXT: vfnmaddpd %ymm0, (%rdx), %ymm0, %ymm0 291 ; FMA4-NEXT: retq 292 %res = call <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double> %b, <4 x double> %a, <4 x double> %a) nounwind 293 ret <4 x double> %res 294 } 295 296 define <4 x double> @test_x86_fnmadd_aba_pd_y(<4 x double> %a, <4 x double> %b) #0 { 297 ; FMA4-LABEL: test_x86_fnmadd_aba_pd_y: 298 ; FMA4: # %bb.0: 299 ; FMA4-NEXT: vmovapd (%rcx), %ymm0 300 ; FMA4-NEXT: vfnmaddpd %ymm0, (%rdx), %ymm0, %ymm0 301 ; FMA4-NEXT: retq 302 %res = call <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %a) nounwind 303 ret <4 x double> %res 304 } 305 306 define <4 x double> @test_x86_fnmadd_bba_pd_y(<4 x double> %a, <4 x double> %b) #0 { 307 ; FMA4-LABEL: test_x86_fnmadd_bba_pd_y: 308 ; FMA4: # %bb.0: 309 ; FMA4-NEXT: vmovapd (%rdx), %ymm0 310 ; FMA4-NEXT: vfnmaddpd (%rcx), %ymm0, %ymm0, %ymm0 311 ; FMA4-NEXT: retq 312 %res = call <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double> %b, <4 x double> %b, <4 x double> %a) nounwind 313 ret <4 x double> %res 314 } 315 316 declare <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone 317 define <4 x float> @test_x86_fmsub_baa_ps(<4 x float> %a, <4 x float> %b) #0 { 318 ; FMA4-LABEL: test_x86_fmsub_baa_ps: 319 ; FMA4: # %bb.0: 320 ; FMA4-NEXT: vmovaps (%rcx), %xmm0 321 ; FMA4-NEXT: vfmsubps %xmm0, (%rdx), %xmm0, %xmm0 322 ; FMA4-NEXT: retq 323 %res = call <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind 324 ret <4 x float> %res 325 } 326 327 define <4 x float> @test_x86_fmsub_aba_ps(<4 x float> %a, <4 x float> %b) #0 { 328 ; FMA4-LABEL: test_x86_fmsub_aba_ps: 329 ; FMA4: # %bb.0: 330 ; FMA4-NEXT: vmovaps (%rcx), %xmm0 331 ; FMA4-NEXT: vfmsubps %xmm0, (%rdx), %xmm0, %xmm0 332 ; FMA4-NEXT: retq 333 %res = call <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind 334 ret <4 x float> %res 335 } 336 337 define <4 x float> @test_x86_fmsub_bba_ps(<4 x float> %a, <4 x float> %b) #0 { 338 ; FMA4-LABEL: test_x86_fmsub_bba_ps: 339 ; FMA4: # %bb.0: 340 ; FMA4-NEXT: vmovaps (%rdx), %xmm0 341 ; FMA4-NEXT: vfmsubps (%rcx), %xmm0, %xmm0, %xmm0 342 ; FMA4-NEXT: retq 343 %res = call <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind 344 ret <4 x float> %res 345 } 346 347 declare <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone 348 define <8 x float> @test_x86_fmsub_baa_ps_y(<8 x float> %a, <8 x float> %b) #0 { 349 ; FMA4-LABEL: test_x86_fmsub_baa_ps_y: 350 ; FMA4: # %bb.0: 351 ; FMA4-NEXT: vmovaps (%rcx), %ymm0 352 ; FMA4-NEXT: vfmsubps %ymm0, (%rdx), %ymm0, %ymm0 353 ; FMA4-NEXT: retq 354 %res = call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %b, <8 x float> %a, <8 x float> %a) nounwind 355 ret <8 x float> %res 356 } 357 358 define <8 x float> @test_x86_fmsub_aba_ps_y(<8 x float> %a, <8 x float> %b) #0 { 359 ; FMA4-LABEL: test_x86_fmsub_aba_ps_y: 360 ; FMA4: # %bb.0: 361 ; FMA4-NEXT: vmovaps (%rcx), %ymm0 362 ; FMA4-NEXT: vfmsubps %ymm0, (%rdx), %ymm0, %ymm0 363 ; FMA4-NEXT: retq 364 %res = call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %a) nounwind 365 ret <8 x float> %res 366 } 367 368 define <8 x float> @test_x86_fmsub_bba_ps_y(<8 x float> %a, <8 x float> %b) #0 { 369 ; FMA4-LABEL: test_x86_fmsub_bba_ps_y: 370 ; FMA4: # %bb.0: 371 ; FMA4-NEXT: vmovaps (%rdx), %ymm0 372 ; FMA4-NEXT: vfmsubps (%rcx), %ymm0, %ymm0, %ymm0 373 ; FMA4-NEXT: retq 374 %res = call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %b, <8 x float> %b, <8 x float> %a) nounwind 375 ret <8 x float> %res 376 } 377 378 declare <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone 379 define <2 x double> @test_x86_fmsub_baa_pd(<2 x double> %a, <2 x double> %b) #0 { 380 ; FMA4-LABEL: test_x86_fmsub_baa_pd: 381 ; FMA4: # %bb.0: 382 ; FMA4-NEXT: vmovapd (%rcx), %xmm0 383 ; FMA4-NEXT: vfmsubpd %xmm0, (%rdx), %xmm0, %xmm0 384 ; FMA4-NEXT: retq 385 %res = call <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind 386 ret <2 x double> %res 387 } 388 389 define <2 x double> @test_x86_fmsub_aba_pd(<2 x double> %a, <2 x double> %b) #0 { 390 ; FMA4-LABEL: test_x86_fmsub_aba_pd: 391 ; FMA4: # %bb.0: 392 ; FMA4-NEXT: vmovapd (%rcx), %xmm0 393 ; FMA4-NEXT: vfmsubpd %xmm0, (%rdx), %xmm0, %xmm0 394 ; FMA4-NEXT: retq 395 %res = call <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind 396 ret <2 x double> %res 397 } 398 399 define <2 x double> @test_x86_fmsub_bba_pd(<2 x double> %a, <2 x double> %b) #0 { 400 ; FMA4-LABEL: test_x86_fmsub_bba_pd: 401 ; FMA4: # %bb.0: 402 ; FMA4-NEXT: vmovapd (%rdx), %xmm0 403 ; FMA4-NEXT: vfmsubpd (%rcx), %xmm0, %xmm0, %xmm0 404 ; FMA4-NEXT: retq 405 %res = call <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind 406 ret <2 x double> %res 407 } 408 409 declare <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double>, <4 x double>, <4 x double>) nounwind readnone 410 define <4 x double> @test_x86_fmsub_baa_pd_y(<4 x double> %a, <4 x double> %b) #0 { 411 ; FMA4-LABEL: test_x86_fmsub_baa_pd_y: 412 ; FMA4: # %bb.0: 413 ; FMA4-NEXT: vmovapd (%rcx), %ymm0 414 ; FMA4-NEXT: vfmsubpd %ymm0, (%rdx), %ymm0, %ymm0 415 ; FMA4-NEXT: retq 416 %res = call <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double> %b, <4 x double> %a, <4 x double> %a) nounwind 417 ret <4 x double> %res 418 } 419 420 define <4 x double> @test_x86_fmsub_aba_pd_y(<4 x double> %a, <4 x double> %b) #0 { 421 ; FMA4-LABEL: test_x86_fmsub_aba_pd_y: 422 ; FMA4: # %bb.0: 423 ; FMA4-NEXT: vmovapd (%rcx), %ymm0 424 ; FMA4-NEXT: vfmsubpd %ymm0, (%rdx), %ymm0, %ymm0 425 ; FMA4-NEXT: retq 426 %res = call <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %a) nounwind 427 ret <4 x double> %res 428 } 429 430 define <4 x double> @test_x86_fmsub_bba_pd_y(<4 x double> %a, <4 x double> %b) #0 { 431 ; FMA4-LABEL: test_x86_fmsub_bba_pd_y: 432 ; FMA4: # %bb.0: 433 ; FMA4-NEXT: vmovapd (%rdx), %ymm0 434 ; FMA4-NEXT: vfmsubpd (%rcx), %ymm0, %ymm0, %ymm0 435 ; FMA4-NEXT: retq 436 %res = call <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double> %b, <4 x double> %b, <4 x double> %a) nounwind 437 ret <4 x double> %res 438 } 439 440 declare <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone 441 define <4 x float> @test_x86_fnmsub_baa_ps(<4 x float> %a, <4 x float> %b) #0 { 442 ; FMA4-LABEL: test_x86_fnmsub_baa_ps: 443 ; FMA4: # %bb.0: 444 ; FMA4-NEXT: vmovaps (%rcx), %xmm0 445 ; FMA4-NEXT: vfnmsubps %xmm0, (%rdx), %xmm0, %xmm0 446 ; FMA4-NEXT: retq 447 %res = call <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind 448 ret <4 x float> %res 449 } 450 451 define <4 x float> @test_x86_fnmsub_aba_ps(<4 x float> %a, <4 x float> %b) #0 { 452 ; FMA4-LABEL: test_x86_fnmsub_aba_ps: 453 ; FMA4: # %bb.0: 454 ; FMA4-NEXT: vmovaps (%rcx), %xmm0 455 ; FMA4-NEXT: vfnmsubps %xmm0, (%rdx), %xmm0, %xmm0 456 ; FMA4-NEXT: retq 457 %res = call <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind 458 ret <4 x float> %res 459 } 460 461 define <4 x float> @test_x86_fnmsub_bba_ps(<4 x float> %a, <4 x float> %b) #0 { 462 ; FMA4-LABEL: test_x86_fnmsub_bba_ps: 463 ; FMA4: # %bb.0: 464 ; FMA4-NEXT: vmovaps (%rdx), %xmm0 465 ; FMA4-NEXT: vfnmsubps (%rcx), %xmm0, %xmm0, %xmm0 466 ; FMA4-NEXT: retq 467 %res = call <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind 468 ret <4 x float> %res 469 } 470 471 declare <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone 472 define <8 x float> @test_x86_fnmsub_baa_ps_y(<8 x float> %a, <8 x float> %b) #0 { 473 ; FMA4-LABEL: test_x86_fnmsub_baa_ps_y: 474 ; FMA4: # %bb.0: 475 ; FMA4-NEXT: vmovaps (%rcx), %ymm0 476 ; FMA4-NEXT: vfnmsubps %ymm0, (%rdx), %ymm0, %ymm0 477 ; FMA4-NEXT: retq 478 %res = call <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float> %b, <8 x float> %a, <8 x float> %a) nounwind 479 ret <8 x float> %res 480 } 481 482 define <8 x float> @test_x86_fnmsub_aba_ps_y(<8 x float> %a, <8 x float> %b) #0 { 483 ; FMA4-LABEL: test_x86_fnmsub_aba_ps_y: 484 ; FMA4: # %bb.0: 485 ; FMA4-NEXT: vmovaps (%rcx), %ymm0 486 ; FMA4-NEXT: vfnmsubps %ymm0, (%rdx), %ymm0, %ymm0 487 ; FMA4-NEXT: retq 488 %res = call <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %a) nounwind 489 ret <8 x float> %res 490 } 491 492 define <8 x float> @test_x86_fnmsub_bba_ps_y(<8 x float> %a, <8 x float> %b) #0 { 493 ; FMA4-LABEL: test_x86_fnmsub_bba_ps_y: 494 ; FMA4: # %bb.0: 495 ; FMA4-NEXT: vmovaps (%rdx), %ymm0 496 ; FMA4-NEXT: vfnmsubps (%rcx), %ymm0, %ymm0, %ymm0 497 ; FMA4-NEXT: retq 498 %res = call <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float> %b, <8 x float> %b, <8 x float> %a) nounwind 499 ret <8 x float> %res 500 } 501 502 declare <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone 503 define <2 x double> @test_x86_fnmsub_baa_pd(<2 x double> %a, <2 x double> %b) #0 { 504 ; FMA4-LABEL: test_x86_fnmsub_baa_pd: 505 ; FMA4: # %bb.0: 506 ; FMA4-NEXT: vmovapd (%rcx), %xmm0 507 ; FMA4-NEXT: vfnmsubpd %xmm0, (%rdx), %xmm0, %xmm0 508 ; FMA4-NEXT: retq 509 %res = call <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind 510 ret <2 x double> %res 511 } 512 513 define <2 x double> @test_x86_fnmsub_aba_pd(<2 x double> %a, <2 x double> %b) #0 { 514 ; FMA4-LABEL: test_x86_fnmsub_aba_pd: 515 ; FMA4: # %bb.0: 516 ; FMA4-NEXT: vmovapd (%rcx), %xmm0 517 ; FMA4-NEXT: vfnmsubpd %xmm0, (%rdx), %xmm0, %xmm0 518 ; FMA4-NEXT: retq 519 %res = call <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind 520 ret <2 x double> %res 521 } 522 523 define <2 x double> @test_x86_fnmsub_bba_pd(<2 x double> %a, <2 x double> %b) #0 { 524 ; FMA4-LABEL: test_x86_fnmsub_bba_pd: 525 ; FMA4: # %bb.0: 526 ; FMA4-NEXT: vmovapd (%rdx), %xmm0 527 ; FMA4-NEXT: vfnmsubpd (%rcx), %xmm0, %xmm0, %xmm0 528 ; FMA4-NEXT: retq 529 %res = call <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind 530 ret <2 x double> %res 531 } 532 533 declare <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double>, <4 x double>, <4 x double>) nounwind readnone 534 define <4 x double> @test_x86_fnmsub_baa_pd_y(<4 x double> %a, <4 x double> %b) #0 { 535 ; FMA4-LABEL: test_x86_fnmsub_baa_pd_y: 536 ; FMA4: # %bb.0: 537 ; FMA4-NEXT: vmovapd (%rcx), %ymm0 538 ; FMA4-NEXT: vfnmsubpd %ymm0, (%rdx), %ymm0, %ymm0 539 ; FMA4-NEXT: retq 540 %res = call <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double> %b, <4 x double> %a, <4 x double> %a) nounwind 541 ret <4 x double> %res 542 } 543 544 define <4 x double> @test_x86_fnmsub_aba_pd_y(<4 x double> %a, <4 x double> %b) #0 { 545 ; FMA4-LABEL: test_x86_fnmsub_aba_pd_y: 546 ; FMA4: # %bb.0: 547 ; FMA4-NEXT: vmovapd (%rcx), %ymm0 548 ; FMA4-NEXT: vfnmsubpd %ymm0, (%rdx), %ymm0, %ymm0 549 ; FMA4-NEXT: retq 550 %res = call <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %a) nounwind 551 ret <4 x double> %res 552 } 553 554 define <4 x double> @test_x86_fnmsub_bba_pd_y(<4 x double> %a, <4 x double> %b) #0 { 555 ; FMA4-LABEL: test_x86_fnmsub_bba_pd_y: 556 ; FMA4: # %bb.0: 557 ; FMA4-NEXT: vmovapd (%rdx), %ymm0 558 ; FMA4-NEXT: vfnmsubpd (%rcx), %ymm0, %ymm0, %ymm0 559 ; FMA4-NEXT: retq 560 %res = call <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double> %b, <4 x double> %b, <4 x double> %a) nounwind 561 ret <4 x double> %res 562 } 563 564