1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=x86_64-pc-win32 -mcpu=core-avx2 | FileCheck %s --check-prefix=FMA 3 ; RUN: llc < %s -mtriple=x86_64-pc-win32 -mattr=+fma | FileCheck %s --check-prefix=FMA 4 ; RUN: llc < %s -mcpu=bdver2 -mtriple=x86_64-pc-win32 -mattr=-fma4 | FileCheck %s --check-prefix=FMA 5 6 attributes #0 = { nounwind } 7 8 declare <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float>, <4 x float>, <4 x float>) nounwind readnone 9 define <4 x float> @test_x86_fmadd_baa_ss(<4 x float> %a, <4 x float> %b) #0 { 10 ; FMA-LABEL: test_x86_fmadd_baa_ss: 11 ; FMA: # %bb.0: 12 ; FMA-NEXT: vmovaps (%rdx), %xmm0 13 ; FMA-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero 14 ; FMA-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm1 15 ; FMA-NEXT: retq 16 %res = call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind 17 ret <4 x float> %res 18 } 19 20 define <4 x float> @test_x86_fmadd_aba_ss(<4 x float> %a, <4 x float> %b) #0 { 21 ; FMA-LABEL: test_x86_fmadd_aba_ss: 22 ; FMA: # %bb.0: 23 ; FMA-NEXT: vmovaps (%rcx), %xmm0 24 ; FMA-NEXT: vfmadd132ss (%rdx), %xmm0, %xmm0 25 ; FMA-NEXT: retq 26 %res = call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind 27 ret <4 x float> %res 28 } 29 30 define <4 x float> @test_x86_fmadd_bba_ss(<4 x float> %a, <4 x float> %b) #0 { 31 ; FMA-LABEL: test_x86_fmadd_bba_ss: 32 ; FMA: # %bb.0: 33 ; FMA-NEXT: vmovaps (%rdx), %xmm0 34 ; FMA-NEXT: vfmadd213ss (%rcx), %xmm0, %xmm0 35 ; FMA-NEXT: retq 36 %res = call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind 37 ret <4 x float> %res 38 } 39 40 declare <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone 41 define <4 x float> @test_x86_fmadd_baa_ps(<4 x float> %a, <4 x float> %b) #0 { 42 ; FMA-LABEL: test_x86_fmadd_baa_ps: 43 ; FMA: # %bb.0: 44 ; FMA-NEXT: vmovaps (%rcx), %xmm0 45 ; FMA-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * mem) + xmm0 46 ; FMA-NEXT: retq 47 %res = call <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind 48 ret <4 x float> %res 49 } 50 51 define <4 x float> @test_x86_fmadd_aba_ps(<4 x float> %a, <4 x float> %b) #0 { 52 ; FMA-LABEL: test_x86_fmadd_aba_ps: 53 ; FMA: # %bb.0: 54 ; FMA-NEXT: vmovaps (%rcx), %xmm0 55 ; FMA-NEXT: vfmadd231ps {{.*#+}} xmm0 = (xmm0 * mem) + xmm0 56 ; FMA-NEXT: retq 57 %res = call <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind 58 ret <4 x float> %res 59 } 60 61 define <4 x float> @test_x86_fmadd_bba_ps(<4 x float> %a, <4 x float> %b) #0 { 62 ; FMA-LABEL: test_x86_fmadd_bba_ps: 63 ; FMA: # %bb.0: 64 ; FMA-NEXT: vmovaps (%rdx), %xmm0 65 ; FMA-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm0 * xmm0) + mem 66 ; FMA-NEXT: retq 67 %res = call <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind 68 ret <4 x float> %res 69 } 70 71 declare <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone 72 define <8 x float> @test_x86_fmadd_baa_ps_y(<8 x float> %a, <8 x float> %b) #0 { 73 ; FMA-LABEL: test_x86_fmadd_baa_ps_y: 74 ; FMA: # %bb.0: 75 ; FMA-NEXT: vmovaps (%rcx), %ymm0 76 ; FMA-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * mem) + ymm0 77 ; FMA-NEXT: retq 78 %res = call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %b, <8 x float> %a, <8 x float> %a) nounwind 79 ret <8 x float> %res 80 } 81 82 define <8 x float> @test_x86_fmadd_aba_ps_y(<8 x float> %a, <8 x float> %b) #0 { 83 ; FMA-LABEL: test_x86_fmadd_aba_ps_y: 84 ; FMA: # %bb.0: 85 ; FMA-NEXT: vmovaps (%rcx), %ymm0 86 ; FMA-NEXT: vfmadd231ps {{.*#+}} ymm0 = (ymm0 * mem) + ymm0 87 ; FMA-NEXT: retq 88 %res = call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %a) nounwind 89 ret <8 x float> %res 90 } 91 92 define <8 x float> @test_x86_fmadd_bba_ps_y(<8 x float> %a, <8 x float> %b) #0 { 93 ; FMA-LABEL: test_x86_fmadd_bba_ps_y: 94 ; FMA: # %bb.0: 95 ; FMA-NEXT: vmovaps (%rdx), %ymm0 96 ; FMA-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm0 * ymm0) + mem 97 ; FMA-NEXT: retq 98 %res = call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %b, <8 x float> %b, <8 x float> %a) nounwind 99 ret <8 x float> %res 100 } 101 102 declare <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone 103 define <2 x double> @test_x86_fmadd_baa_sd(<2 x double> %a, <2 x double> %b) #0 { 104 ; FMA-LABEL: test_x86_fmadd_baa_sd: 105 ; FMA: # %bb.0: 106 ; FMA-NEXT: vmovapd (%rdx), %xmm0 107 ; FMA-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 108 ; FMA-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm1 109 ; FMA-NEXT: retq 110 %res = call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind 111 ret <2 x double> %res 112 } 113 114 define <2 x double> @test_x86_fmadd_aba_sd(<2 x double> %a, <2 x double> %b) #0 { 115 ; FMA-LABEL: test_x86_fmadd_aba_sd: 116 ; FMA: # %bb.0: 117 ; FMA-NEXT: vmovapd (%rcx), %xmm0 118 ; FMA-NEXT: vfmadd132sd (%rdx), %xmm0, %xmm0 119 ; FMA-NEXT: retq 120 %res = call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind 121 ret <2 x double> %res 122 } 123 124 define <2 x double> @test_x86_fmadd_bba_sd(<2 x double> %a, <2 x double> %b) #0 { 125 ; FMA-LABEL: test_x86_fmadd_bba_sd: 126 ; FMA: # %bb.0: 127 ; FMA-NEXT: vmovapd (%rdx), %xmm0 128 ; FMA-NEXT: vfmadd213sd (%rcx), %xmm0, %xmm0 129 ; FMA-NEXT: retq 130 %res = call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind 131 ret <2 x double> %res 132 } 133 134 declare <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone 135 define <2 x double> @test_x86_fmadd_baa_pd(<2 x double> %a, <2 x double> %b) #0 { 136 ; FMA-LABEL: test_x86_fmadd_baa_pd: 137 ; FMA: # %bb.0: 138 ; FMA-NEXT: vmovapd (%rcx), %xmm0 139 ; FMA-NEXT: vfmadd132pd {{.*#+}} xmm0 = (xmm0 * mem) + xmm0 140 ; FMA-NEXT: retq 141 %res = call <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind 142 ret <2 x double> %res 143 } 144 145 define <2 x double> @test_x86_fmadd_aba_pd(<2 x double> %a, <2 x double> %b) #0 { 146 ; FMA-LABEL: test_x86_fmadd_aba_pd: 147 ; FMA: # %bb.0: 148 ; FMA-NEXT: vmovapd (%rcx), %xmm0 149 ; FMA-NEXT: vfmadd231pd {{.*#+}} xmm0 = (xmm0 * mem) + xmm0 150 ; FMA-NEXT: retq 151 %res = call <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind 152 ret <2 x double> %res 153 } 154 155 define <2 x double> @test_x86_fmadd_bba_pd(<2 x double> %a, <2 x double> %b) #0 { 156 ; FMA-LABEL: test_x86_fmadd_bba_pd: 157 ; FMA: # %bb.0: 158 ; FMA-NEXT: vmovapd (%rdx), %xmm0 159 ; FMA-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm0 * xmm0) + mem 160 ; FMA-NEXT: retq 161 %res = call <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind 162 ret <2 x double> %res 163 } 164 165 declare <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double>, <4 x double>, <4 x double>) nounwind readnone 166 define <4 x double> @test_x86_fmadd_baa_pd_y(<4 x double> %a, <4 x double> %b) #0 { 167 ; FMA-LABEL: test_x86_fmadd_baa_pd_y: 168 ; FMA: # %bb.0: 169 ; FMA-NEXT: vmovapd (%rcx), %ymm0 170 ; FMA-NEXT: vfmadd132pd {{.*#+}} ymm0 = (ymm0 * mem) + ymm0 171 ; FMA-NEXT: retq 172 %res = call <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double> %b, <4 x double> %a, <4 x double> %a) nounwind 173 ret <4 x double> %res 174 } 175 176 define <4 x double> @test_x86_fmadd_aba_pd_y(<4 x double> %a, <4 x double> %b) #0 { 177 ; FMA-LABEL: test_x86_fmadd_aba_pd_y: 178 ; FMA: # %bb.0: 179 ; FMA-NEXT: vmovapd (%rcx), %ymm0 180 ; FMA-NEXT: vfmadd231pd {{.*#+}} ymm0 = (ymm0 * mem) + ymm0 181 ; FMA-NEXT: retq 182 %res = call <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %a) nounwind 183 ret <4 x double> %res 184 } 185 186 define <4 x double> @test_x86_fmadd_bba_pd_y(<4 x double> %a, <4 x double> %b) #0 { 187 ; FMA-LABEL: test_x86_fmadd_bba_pd_y: 188 ; FMA: # %bb.0: 189 ; FMA-NEXT: vmovapd (%rdx), %ymm0 190 ; FMA-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm0 * ymm0) + mem 191 ; FMA-NEXT: retq 192 %res = call <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double> %b, <4 x double> %b, <4 x double> %a) nounwind 193 ret <4 x double> %res 194 } 195 196 197 declare <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float>, <4 x float>, <4 x float>) nounwind readnone 198 define <4 x float> @test_x86_fnmadd_baa_ss(<4 x float> %a, <4 x float> %b) #0 { 199 ; FMA-LABEL: test_x86_fnmadd_baa_ss: 200 ; FMA: # %bb.0: 201 ; FMA-NEXT: vmovaps (%rdx), %xmm0 202 ; FMA-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero 203 ; FMA-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm1 204 ; FMA-NEXT: retq 205 %res = call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind 206 ret <4 x float> %res 207 } 208 209 define <4 x float> @test_x86_fnmadd_aba_ss(<4 x float> %a, <4 x float> %b) #0 { 210 ; FMA-LABEL: test_x86_fnmadd_aba_ss: 211 ; FMA: # %bb.0: 212 ; FMA-NEXT: vmovaps (%rcx), %xmm0 213 ; FMA-NEXT: vfnmadd132ss (%rdx), %xmm0, %xmm0 214 ; FMA-NEXT: retq 215 %res = call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind 216 ret <4 x float> %res 217 } 218 219 define <4 x float> @test_x86_fnmadd_bba_ss(<4 x float> %a, <4 x float> %b) #0 { 220 ; FMA-LABEL: test_x86_fnmadd_bba_ss: 221 ; FMA: # %bb.0: 222 ; FMA-NEXT: vmovaps (%rdx), %xmm0 223 ; FMA-NEXT: vfnmadd213ss (%rcx), %xmm0, %xmm0 224 ; FMA-NEXT: retq 225 %res = call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind 226 ret <4 x float> %res 227 } 228 229 declare <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone 230 define <4 x float> @test_x86_fnmadd_baa_ps(<4 x float> %a, <4 x float> %b) #0 { 231 ; FMA-LABEL: test_x86_fnmadd_baa_ps: 232 ; FMA: # %bb.0: 233 ; FMA-NEXT: vmovaps (%rcx), %xmm0 234 ; FMA-NEXT: vfnmadd132ps {{.*#+}} xmm0 = -(xmm0 * mem) + xmm0 235 ; FMA-NEXT: retq 236 %res = call <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind 237 ret <4 x float> %res 238 } 239 240 define <4 x float> @test_x86_fnmadd_aba_ps(<4 x float> %a, <4 x float> %b) #0 { 241 ; FMA-LABEL: test_x86_fnmadd_aba_ps: 242 ; FMA: # %bb.0: 243 ; FMA-NEXT: vmovaps (%rcx), %xmm0 244 ; FMA-NEXT: vfnmadd231ps {{.*#+}} xmm0 = -(xmm0 * mem) + xmm0 245 ; FMA-NEXT: retq 246 %res = call <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind 247 ret <4 x float> %res 248 } 249 250 define <4 x float> @test_x86_fnmadd_bba_ps(<4 x float> %a, <4 x float> %b) #0 { 251 ; FMA-LABEL: test_x86_fnmadd_bba_ps: 252 ; FMA: # %bb.0: 253 ; FMA-NEXT: vmovaps (%rdx), %xmm0 254 ; FMA-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm0 * xmm0) + mem 255 ; FMA-NEXT: retq 256 %res = call <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind 257 ret <4 x float> %res 258 } 259 260 declare <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone 261 define <8 x float> @test_x86_fnmadd_baa_ps_y(<8 x float> %a, <8 x float> %b) #0 { 262 ; FMA-LABEL: test_x86_fnmadd_baa_ps_y: 263 ; FMA: # %bb.0: 264 ; FMA-NEXT: vmovaps (%rcx), %ymm0 265 ; FMA-NEXT: vfnmadd132ps {{.*#+}} ymm0 = -(ymm0 * mem) + ymm0 266 ; FMA-NEXT: retq 267 %res = call <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float> %b, <8 x float> %a, <8 x float> %a) nounwind 268 ret <8 x float> %res 269 } 270 271 define <8 x float> @test_x86_fnmadd_aba_ps_y(<8 x float> %a, <8 x float> %b) #0 { 272 ; FMA-LABEL: test_x86_fnmadd_aba_ps_y: 273 ; FMA: # %bb.0: 274 ; FMA-NEXT: vmovaps (%rcx), %ymm0 275 ; FMA-NEXT: vfnmadd231ps {{.*#+}} ymm0 = -(ymm0 * mem) + ymm0 276 ; FMA-NEXT: retq 277 %res = call <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %a) nounwind 278 ret <8 x float> %res 279 } 280 281 define <8 x float> @test_x86_fnmadd_bba_ps_y(<8 x float> %a, <8 x float> %b) #0 { 282 ; FMA-LABEL: test_x86_fnmadd_bba_ps_y: 283 ; FMA: # %bb.0: 284 ; FMA-NEXT: vmovaps (%rdx), %ymm0 285 ; FMA-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm0 * ymm0) + mem 286 ; FMA-NEXT: retq 287 %res = call <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float> %b, <8 x float> %b, <8 x float> %a) nounwind 288 ret <8 x float> %res 289 } 290 291 declare <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone 292 define <2 x double> @test_x86_fnmadd_baa_sd(<2 x double> %a, <2 x double> %b) #0 { 293 ; FMA-LABEL: test_x86_fnmadd_baa_sd: 294 ; FMA: # %bb.0: 295 ; FMA-NEXT: vmovapd (%rdx), %xmm0 296 ; FMA-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 297 ; FMA-NEXT: vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm1 298 ; FMA-NEXT: retq 299 %res = call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind 300 ret <2 x double> %res 301 } 302 303 define <2 x double> @test_x86_fnmadd_aba_sd(<2 x double> %a, <2 x double> %b) #0 { 304 ; FMA-LABEL: test_x86_fnmadd_aba_sd: 305 ; FMA: # %bb.0: 306 ; FMA-NEXT: vmovapd (%rcx), %xmm0 307 ; FMA-NEXT: vfnmadd132sd (%rdx), %xmm0, %xmm0 308 ; FMA-NEXT: retq 309 %res = call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind 310 ret <2 x double> %res 311 } 312 313 define <2 x double> @test_x86_fnmadd_bba_sd(<2 x double> %a, <2 x double> %b) #0 { 314 ; FMA-LABEL: test_x86_fnmadd_bba_sd: 315 ; FMA: # %bb.0: 316 ; FMA-NEXT: vmovapd (%rdx), %xmm0 317 ; FMA-NEXT: vfnmadd213sd (%rcx), %xmm0, %xmm0 318 ; FMA-NEXT: retq 319 %res = call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind 320 ret <2 x double> %res 321 } 322 323 declare <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone 324 define <2 x double> @test_x86_fnmadd_baa_pd(<2 x double> %a, <2 x double> %b) #0 { 325 ; FMA-LABEL: test_x86_fnmadd_baa_pd: 326 ; FMA: # %bb.0: 327 ; FMA-NEXT: vmovapd (%rcx), %xmm0 328 ; FMA-NEXT: vfnmadd132pd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm0 329 ; FMA-NEXT: retq 330 %res = call <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind 331 ret <2 x double> %res 332 } 333 334 define <2 x double> @test_x86_fnmadd_aba_pd(<2 x double> %a, <2 x double> %b) #0 { 335 ; FMA-LABEL: test_x86_fnmadd_aba_pd: 336 ; FMA: # %bb.0: 337 ; FMA-NEXT: vmovapd (%rcx), %xmm0 338 ; FMA-NEXT: vfnmadd231pd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm0 339 ; FMA-NEXT: retq 340 %res = call <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind 341 ret <2 x double> %res 342 } 343 344 define <2 x double> @test_x86_fnmadd_bba_pd(<2 x double> %a, <2 x double> %b) #0 { 345 ; FMA-LABEL: test_x86_fnmadd_bba_pd: 346 ; FMA: # %bb.0: 347 ; FMA-NEXT: vmovapd (%rdx), %xmm0 348 ; FMA-NEXT: vfnmadd213pd {{.*#+}} xmm0 = -(xmm0 * xmm0) + mem 349 ; FMA-NEXT: retq 350 %res = call <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind 351 ret <2 x double> %res 352 } 353 354 declare <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double>, <4 x double>, <4 x double>) nounwind readnone 355 define <4 x double> @test_x86_fnmadd_baa_pd_y(<4 x double> %a, <4 x double> %b) #0 { 356 ; FMA-LABEL: test_x86_fnmadd_baa_pd_y: 357 ; FMA: # %bb.0: 358 ; FMA-NEXT: vmovapd (%rcx), %ymm0 359 ; FMA-NEXT: vfnmadd132pd {{.*#+}} ymm0 = -(ymm0 * mem) + ymm0 360 ; FMA-NEXT: retq 361 %res = call <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double> %b, <4 x double> %a, <4 x double> %a) nounwind 362 ret <4 x double> %res 363 } 364 365 define <4 x double> @test_x86_fnmadd_aba_pd_y(<4 x double> %a, <4 x double> %b) #0 { 366 ; FMA-LABEL: test_x86_fnmadd_aba_pd_y: 367 ; FMA: # %bb.0: 368 ; FMA-NEXT: vmovapd (%rcx), %ymm0 369 ; FMA-NEXT: vfnmadd231pd {{.*#+}} ymm0 = -(ymm0 * mem) + ymm0 370 ; FMA-NEXT: retq 371 %res = call <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %a) nounwind 372 ret <4 x double> %res 373 } 374 375 define <4 x double> @test_x86_fnmadd_bba_pd_y(<4 x double> %a, <4 x double> %b) #0 { 376 ; FMA-LABEL: test_x86_fnmadd_bba_pd_y: 377 ; FMA: # %bb.0: 378 ; FMA-NEXT: vmovapd (%rdx), %ymm0 379 ; FMA-NEXT: vfnmadd213pd {{.*#+}} ymm0 = -(ymm0 * ymm0) + mem 380 ; FMA-NEXT: retq 381 %res = call <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double> %b, <4 x double> %b, <4 x double> %a) nounwind 382 ret <4 x double> %res 383 } 384 385 declare <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float>, <4 x float>, <4 x float>) nounwind readnone 386 define <4 x float> @test_x86_fmsub_baa_ss(<4 x float> %a, <4 x float> %b) #0 { 387 ; FMA-LABEL: test_x86_fmsub_baa_ss: 388 ; FMA: # %bb.0: 389 ; FMA-NEXT: vmovaps (%rdx), %xmm0 390 ; FMA-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero 391 ; FMA-NEXT: vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm1 392 ; FMA-NEXT: retq 393 %res = call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind 394 ret <4 x float> %res 395 } 396 397 define <4 x float> @test_x86_fmsub_aba_ss(<4 x float> %a, <4 x float> %b) #0 { 398 ; FMA-LABEL: test_x86_fmsub_aba_ss: 399 ; FMA: # %bb.0: 400 ; FMA-NEXT: vmovaps (%rcx), %xmm0 401 ; FMA-NEXT: vfmsub132ss (%rdx), %xmm0, %xmm0 402 ; FMA-NEXT: retq 403 %res = call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind 404 ret <4 x float> %res 405 } 406 407 define <4 x float> @test_x86_fmsub_bba_ss(<4 x float> %a, <4 x float> %b) #0 { 408 ; FMA-LABEL: test_x86_fmsub_bba_ss: 409 ; FMA: # %bb.0: 410 ; FMA-NEXT: vmovaps (%rdx), %xmm0 411 ; FMA-NEXT: vfmsub213ss (%rcx), %xmm0, %xmm0 412 ; FMA-NEXT: retq 413 %res = call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind 414 ret <4 x float> %res 415 } 416 417 declare <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone 418 define <4 x float> @test_x86_fmsub_baa_ps(<4 x float> %a, <4 x float> %b) #0 { 419 ; FMA-LABEL: test_x86_fmsub_baa_ps: 420 ; FMA: # %bb.0: 421 ; FMA-NEXT: vmovaps (%rcx), %xmm0 422 ; FMA-NEXT: vfmsub132ps {{.*#+}} xmm0 = (xmm0 * mem) - xmm0 423 ; FMA-NEXT: retq 424 %res = call <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind 425 ret <4 x float> %res 426 } 427 428 define <4 x float> @test_x86_fmsub_aba_ps(<4 x float> %a, <4 x float> %b) #0 { 429 ; FMA-LABEL: test_x86_fmsub_aba_ps: 430 ; FMA: # %bb.0: 431 ; FMA-NEXT: vmovaps (%rcx), %xmm0 432 ; FMA-NEXT: vfmsub231ps {{.*#+}} xmm0 = (xmm0 * mem) - xmm0 433 ; FMA-NEXT: retq 434 %res = call <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind 435 ret <4 x float> %res 436 } 437 438 define <4 x float> @test_x86_fmsub_bba_ps(<4 x float> %a, <4 x float> %b) #0 { 439 ; FMA-LABEL: test_x86_fmsub_bba_ps: 440 ; FMA: # %bb.0: 441 ; FMA-NEXT: vmovaps (%rdx), %xmm0 442 ; FMA-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm0 * xmm0) - mem 443 ; FMA-NEXT: retq 444 %res = call <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind 445 ret <4 x float> %res 446 } 447 448 declare <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone 449 define <8 x float> @test_x86_fmsub_baa_ps_y(<8 x float> %a, <8 x float> %b) #0 { 450 ; FMA-LABEL: test_x86_fmsub_baa_ps_y: 451 ; FMA: # %bb.0: 452 ; FMA-NEXT: vmovaps (%rcx), %ymm0 453 ; FMA-NEXT: vfmsub132ps {{.*#+}} ymm0 = (ymm0 * mem) - ymm0 454 ; FMA-NEXT: retq 455 %res = call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %b, <8 x float> %a, <8 x float> %a) nounwind 456 ret <8 x float> %res 457 } 458 459 define <8 x float> @test_x86_fmsub_aba_ps_y(<8 x float> %a, <8 x float> %b) #0 { 460 ; FMA-LABEL: test_x86_fmsub_aba_ps_y: 461 ; FMA: # %bb.0: 462 ; FMA-NEXT: vmovaps (%rcx), %ymm0 463 ; FMA-NEXT: vfmsub231ps {{.*#+}} ymm0 = (ymm0 * mem) - ymm0 464 ; FMA-NEXT: retq 465 %res = call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %a) nounwind 466 ret <8 x float> %res 467 } 468 469 define <8 x float> @test_x86_fmsub_bba_ps_y(<8 x float> %a, <8 x float> %b) #0 { 470 ; FMA-LABEL: test_x86_fmsub_bba_ps_y: 471 ; FMA: # %bb.0: 472 ; FMA-NEXT: vmovaps (%rdx), %ymm0 473 ; FMA-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm0 * ymm0) - mem 474 ; FMA-NEXT: retq 475 %res = call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %b, <8 x float> %b, <8 x float> %a) nounwind 476 ret <8 x float> %res 477 } 478 479 declare <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone 480 define <2 x double> @test_x86_fmsub_baa_sd(<2 x double> %a, <2 x double> %b) #0 { 481 ; FMA-LABEL: test_x86_fmsub_baa_sd: 482 ; FMA: # %bb.0: 483 ; FMA-NEXT: vmovapd (%rdx), %xmm0 484 ; FMA-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 485 ; FMA-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm1 486 ; FMA-NEXT: retq 487 %res = call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind 488 ret <2 x double> %res 489 } 490 491 define <2 x double> @test_x86_fmsub_aba_sd(<2 x double> %a, <2 x double> %b) #0 { 492 ; FMA-LABEL: test_x86_fmsub_aba_sd: 493 ; FMA: # %bb.0: 494 ; FMA-NEXT: vmovapd (%rcx), %xmm0 495 ; FMA-NEXT: vfmsub132sd (%rdx), %xmm0, %xmm0 496 ; FMA-NEXT: retq 497 %res = call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind 498 ret <2 x double> %res 499 } 500 501 define <2 x double> @test_x86_fmsub_bba_sd(<2 x double> %a, <2 x double> %b) #0 { 502 ; FMA-LABEL: test_x86_fmsub_bba_sd: 503 ; FMA: # %bb.0: 504 ; FMA-NEXT: vmovapd (%rdx), %xmm0 505 ; FMA-NEXT: vfmsub213sd (%rcx), %xmm0, %xmm0 506 ; FMA-NEXT: retq 507 %res = call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind 508 ret <2 x double> %res 509 } 510 511 declare <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone 512 define <2 x double> @test_x86_fmsub_baa_pd(<2 x double> %a, <2 x double> %b) #0 { 513 ; FMA-LABEL: test_x86_fmsub_baa_pd: 514 ; FMA: # %bb.0: 515 ; FMA-NEXT: vmovapd (%rcx), %xmm0 516 ; FMA-NEXT: vfmsub132pd {{.*#+}} xmm0 = (xmm0 * mem) - xmm0 517 ; FMA-NEXT: retq 518 %res = call <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind 519 ret <2 x double> %res 520 } 521 522 define <2 x double> @test_x86_fmsub_aba_pd(<2 x double> %a, <2 x double> %b) #0 { 523 ; FMA-LABEL: test_x86_fmsub_aba_pd: 524 ; FMA: # %bb.0: 525 ; FMA-NEXT: vmovapd (%rcx), %xmm0 526 ; FMA-NEXT: vfmsub231pd {{.*#+}} xmm0 = (xmm0 * mem) - xmm0 527 ; FMA-NEXT: retq 528 %res = call <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind 529 ret <2 x double> %res 530 } 531 532 define <2 x double> @test_x86_fmsub_bba_pd(<2 x double> %a, <2 x double> %b) #0 { 533 ; FMA-LABEL: test_x86_fmsub_bba_pd: 534 ; FMA: # %bb.0: 535 ; FMA-NEXT: vmovapd (%rdx), %xmm0 536 ; FMA-NEXT: vfmsub213pd {{.*#+}} xmm0 = (xmm0 * xmm0) - mem 537 ; FMA-NEXT: retq 538 %res = call <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind 539 ret <2 x double> %res 540 } 541 542 declare <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double>, <4 x double>, <4 x double>) nounwind readnone 543 define <4 x double> @test_x86_fmsub_baa_pd_y(<4 x double> %a, <4 x double> %b) #0 { 544 ; FMA-LABEL: test_x86_fmsub_baa_pd_y: 545 ; FMA: # %bb.0: 546 ; FMA-NEXT: vmovapd (%rcx), %ymm0 547 ; FMA-NEXT: vfmsub132pd {{.*#+}} ymm0 = (ymm0 * mem) - ymm0 548 ; FMA-NEXT: retq 549 %res = call <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double> %b, <4 x double> %a, <4 x double> %a) nounwind 550 ret <4 x double> %res 551 } 552 553 define <4 x double> @test_x86_fmsub_aba_pd_y(<4 x double> %a, <4 x double> %b) #0 { 554 ; FMA-LABEL: test_x86_fmsub_aba_pd_y: 555 ; FMA: # %bb.0: 556 ; FMA-NEXT: vmovapd (%rcx), %ymm0 557 ; FMA-NEXT: vfmsub231pd {{.*#+}} ymm0 = (ymm0 * mem) - ymm0 558 ; FMA-NEXT: retq 559 %res = call <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %a) nounwind 560 ret <4 x double> %res 561 } 562 563 define <4 x double> @test_x86_fmsub_bba_pd_y(<4 x double> %a, <4 x double> %b) #0 { 564 ; FMA-LABEL: test_x86_fmsub_bba_pd_y: 565 ; FMA: # %bb.0: 566 ; FMA-NEXT: vmovapd (%rdx), %ymm0 567 ; FMA-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm0 * ymm0) - mem 568 ; FMA-NEXT: retq 569 %res = call <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double> %b, <4 x double> %b, <4 x double> %a) nounwind 570 ret <4 x double> %res 571 } 572 573 574 declare <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float>, <4 x float>, <4 x float>) nounwind readnone 575 define <4 x float> @test_x86_fnmsub_baa_ss(<4 x float> %a, <4 x float> %b) #0 { 576 ; FMA-LABEL: test_x86_fnmsub_baa_ss: 577 ; FMA: # %bb.0: 578 ; FMA-NEXT: vmovaps (%rdx), %xmm0 579 ; FMA-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero 580 ; FMA-NEXT: vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm1 581 ; FMA-NEXT: retq 582 %res = call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind 583 ret <4 x float> %res 584 } 585 586 define <4 x float> @test_x86_fnmsub_aba_ss(<4 x float> %a, <4 x float> %b) #0 { 587 ; FMA-LABEL: test_x86_fnmsub_aba_ss: 588 ; FMA: # %bb.0: 589 ; FMA-NEXT: vmovaps (%rcx), %xmm0 590 ; FMA-NEXT: vfnmsub132ss (%rdx), %xmm0, %xmm0 591 ; FMA-NEXT: retq 592 %res = call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind 593 ret <4 x float> %res 594 } 595 596 define <4 x float> @test_x86_fnmsub_bba_ss(<4 x float> %a, <4 x float> %b) #0 { 597 ; FMA-LABEL: test_x86_fnmsub_bba_ss: 598 ; FMA: # %bb.0: 599 ; FMA-NEXT: vmovaps (%rdx), %xmm0 600 ; FMA-NEXT: vfnmsub213ss (%rcx), %xmm0, %xmm0 601 ; FMA-NEXT: retq 602 %res = call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind 603 ret <4 x float> %res 604 } 605 606 declare <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone 607 define <4 x float> @test_x86_fnmsub_baa_ps(<4 x float> %a, <4 x float> %b) #0 { 608 ; FMA-LABEL: test_x86_fnmsub_baa_ps: 609 ; FMA: # %bb.0: 610 ; FMA-NEXT: vmovaps (%rcx), %xmm0 611 ; FMA-NEXT: vfnmsub132ps {{.*#+}} xmm0 = -(xmm0 * mem) - xmm0 612 ; FMA-NEXT: retq 613 %res = call <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind 614 ret <4 x float> %res 615 } 616 617 define <4 x float> @test_x86_fnmsub_aba_ps(<4 x float> %a, <4 x float> %b) #0 { 618 ; FMA-LABEL: test_x86_fnmsub_aba_ps: 619 ; FMA: # %bb.0: 620 ; FMA-NEXT: vmovaps (%rcx), %xmm0 621 ; FMA-NEXT: vfnmsub231ps {{.*#+}} xmm0 = -(xmm0 * mem) - xmm0 622 ; FMA-NEXT: retq 623 %res = call <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind 624 ret <4 x float> %res 625 } 626 627 define <4 x float> @test_x86_fnmsub_bba_ps(<4 x float> %a, <4 x float> %b) #0 { 628 ; FMA-LABEL: test_x86_fnmsub_bba_ps: 629 ; FMA: # %bb.0: 630 ; FMA-NEXT: vmovaps (%rdx), %xmm0 631 ; FMA-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm0 * xmm0) - mem 632 ; FMA-NEXT: retq 633 %res = call <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind 634 ret <4 x float> %res 635 } 636 637 declare <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone 638 define <8 x float> @test_x86_fnmsub_baa_ps_y(<8 x float> %a, <8 x float> %b) #0 { 639 ; FMA-LABEL: test_x86_fnmsub_baa_ps_y: 640 ; FMA: # %bb.0: 641 ; FMA-NEXT: vmovaps (%rcx), %ymm0 642 ; FMA-NEXT: vfnmsub132ps {{.*#+}} ymm0 = -(ymm0 * mem) - ymm0 643 ; FMA-NEXT: retq 644 %res = call <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float> %b, <8 x float> %a, <8 x float> %a) nounwind 645 ret <8 x float> %res 646 } 647 648 define <8 x float> @test_x86_fnmsub_aba_ps_y(<8 x float> %a, <8 x float> %b) #0 { 649 ; FMA-LABEL: test_x86_fnmsub_aba_ps_y: 650 ; FMA: # %bb.0: 651 ; FMA-NEXT: vmovaps (%rcx), %ymm0 652 ; FMA-NEXT: vfnmsub231ps {{.*#+}} ymm0 = -(ymm0 * mem) - ymm0 653 ; FMA-NEXT: retq 654 %res = call <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %a) nounwind 655 ret <8 x float> %res 656 } 657 658 define <8 x float> @test_x86_fnmsub_bba_ps_y(<8 x float> %a, <8 x float> %b) #0 { 659 ; FMA-LABEL: test_x86_fnmsub_bba_ps_y: 660 ; FMA: # %bb.0: 661 ; FMA-NEXT: vmovaps (%rdx), %ymm0 662 ; FMA-NEXT: vfnmsub213ps {{.*#+}} ymm0 = -(ymm0 * ymm0) - mem 663 ; FMA-NEXT: retq 664 %res = call <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float> %b, <8 x float> %b, <8 x float> %a) nounwind 665 ret <8 x float> %res 666 } 667 668 declare <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone 669 define <2 x double> @test_x86_fnmsub_baa_sd(<2 x double> %a, <2 x double> %b) #0 { 670 ; FMA-LABEL: test_x86_fnmsub_baa_sd: 671 ; FMA: # %bb.0: 672 ; FMA-NEXT: vmovapd (%rdx), %xmm0 673 ; FMA-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 674 ; FMA-NEXT: vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm1 675 ; FMA-NEXT: retq 676 %res = call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind 677 ret <2 x double> %res 678 } 679 680 define <2 x double> @test_x86_fnmsub_aba_sd(<2 x double> %a, <2 x double> %b) #0 { 681 ; FMA-LABEL: test_x86_fnmsub_aba_sd: 682 ; FMA: # %bb.0: 683 ; FMA-NEXT: vmovapd (%rcx), %xmm0 684 ; FMA-NEXT: vfnmsub132sd (%rdx), %xmm0, %xmm0 685 ; FMA-NEXT: retq 686 %res = call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind 687 ret <2 x double> %res 688 } 689 690 define <2 x double> @test_x86_fnmsub_bba_sd(<2 x double> %a, <2 x double> %b) #0 { 691 ; FMA-LABEL: test_x86_fnmsub_bba_sd: 692 ; FMA: # %bb.0: 693 ; FMA-NEXT: vmovapd (%rdx), %xmm0 694 ; FMA-NEXT: vfnmsub213sd (%rcx), %xmm0, %xmm0 695 ; FMA-NEXT: retq 696 %res = call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind 697 ret <2 x double> %res 698 } 699 700 declare <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone 701 define <2 x double> @test_x86_fnmsub_baa_pd(<2 x double> %a, <2 x double> %b) #0 { 702 ; FMA-LABEL: test_x86_fnmsub_baa_pd: 703 ; FMA: # %bb.0: 704 ; FMA-NEXT: vmovapd (%rcx), %xmm0 705 ; FMA-NEXT: vfnmsub132pd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm0 706 ; FMA-NEXT: retq 707 %res = call <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind 708 ret <2 x double> %res 709 } 710 711 define <2 x double> @test_x86_fnmsub_aba_pd(<2 x double> %a, <2 x double> %b) #0 { 712 ; FMA-LABEL: test_x86_fnmsub_aba_pd: 713 ; FMA: # %bb.0: 714 ; FMA-NEXT: vmovapd (%rcx), %xmm0 715 ; FMA-NEXT: vfnmsub231pd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm0 716 ; FMA-NEXT: retq 717 %res = call <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind 718 ret <2 x double> %res 719 } 720 721 define <2 x double> @test_x86_fnmsub_bba_pd(<2 x double> %a, <2 x double> %b) #0 { 722 ; FMA-LABEL: test_x86_fnmsub_bba_pd: 723 ; FMA: # %bb.0: 724 ; FMA-NEXT: vmovapd (%rdx), %xmm0 725 ; FMA-NEXT: vfnmsub213pd {{.*#+}} xmm0 = -(xmm0 * xmm0) - mem 726 ; FMA-NEXT: retq 727 %res = call <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind 728 ret <2 x double> %res 729 } 730 731 declare <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double>, <4 x double>, <4 x double>) nounwind readnone 732 define <4 x double> @test_x86_fnmsub_baa_pd_y(<4 x double> %a, <4 x double> %b) #0 { 733 ; FMA-LABEL: test_x86_fnmsub_baa_pd_y: 734 ; FMA: # %bb.0: 735 ; FMA-NEXT: vmovapd (%rcx), %ymm0 736 ; FMA-NEXT: vfnmsub132pd {{.*#+}} ymm0 = -(ymm0 * mem) - ymm0 737 ; FMA-NEXT: retq 738 %res = call <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double> %b, <4 x double> %a, <4 x double> %a) nounwind 739 ret <4 x double> %res 740 } 741 742 define <4 x double> @test_x86_fnmsub_aba_pd_y(<4 x double> %a, <4 x double> %b) #0 { 743 ; FMA-LABEL: test_x86_fnmsub_aba_pd_y: 744 ; FMA: # %bb.0: 745 ; FMA-NEXT: vmovapd (%rcx), %ymm0 746 ; FMA-NEXT: vfnmsub231pd {{.*#+}} ymm0 = -(ymm0 * mem) - ymm0 747 ; FMA-NEXT: retq 748 %res = call <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %a) nounwind 749 ret <4 x double> %res 750 } 751 752 define <4 x double> @test_x86_fnmsub_bba_pd_y(<4 x double> %a, <4 x double> %b) #0 { 753 ; FMA-LABEL: test_x86_fnmsub_bba_pd_y: 754 ; FMA: # %bb.0: 755 ; FMA-NEXT: vmovapd (%rdx), %ymm0 756 ; FMA-NEXT: vfnmsub213pd {{.*#+}} ymm0 = -(ymm0 * ymm0) - mem 757 ; FMA-NEXT: retq 758 %res = call <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double> %b, <4 x double> %b, <4 x double> %a) nounwind 759 ret <4 x double> %res 760 } 761 762