1 ; RUN: llc < %s -mtriple=x86_64-pc-win32 -mcpu=core-avx2 | FileCheck %s 2 ; RUN: llc < %s -mtriple=x86_64-pc-win32 -mattr=+fma | FileCheck %s 3 ; RUN: llc < %s -mcpu=bdver2 -mtriple=x86_64-pc-win32 -mattr=-fma4 | FileCheck %s 4 5 attributes #0 = { nounwind } 6 7 declare <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float>, <4 x float>, <4 x float>) nounwind readnone 8 define <4 x float> @test_x86_fmadd_baa_ss(<4 x float> %a, <4 x float> %b) #0 { 9 ; CHECK-LABEL: test_x86_fmadd_baa_ss: 10 ; CHECK: # BB#0: 11 ; CHECK-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}} 12 ; CHECK-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}} 13 ; CHECK-NEXT: vfmadd213ss %xmm1, %xmm1, %xmm0 14 ; CHECK-NEXT: retq 15 %res = call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind 16 ret <4 x float> %res 17 } 18 19 define <4 x float> @test_x86_fmadd_aba_ss(<4 x float> %a, <4 x float> %b) #0 { 20 ; CHECK-LABEL: test_x86_fmadd_aba_ss: 21 ; CHECK: # BB#0: 22 ; CHECK-NEXT: vmovaps (%rcx), %xmm0 23 ; CHECK-NEXT: vfmadd132ss (%rdx), %xmm0, %xmm0 24 ; CHECK-NEXT: retq 25 %res = call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind 26 ret <4 x float> %res 27 } 28 29 define <4 x float> @test_x86_fmadd_bba_ss(<4 x float> %a, <4 x float> %b) #0 { 30 ; CHECK-LABEL: test_x86_fmadd_bba_ss: 31 ; CHECK: # BB#0: 32 ; CHECK-NEXT: vmovaps (%rdx), %xmm0 33 ; CHECK-NEXT: vfmadd213ss (%rcx), %xmm0, %xmm0 34 ; CHECK-NEXT: retq 35 %res = call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind 36 ret <4 x float> %res 37 } 38 39 declare <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone 40 define <4 x float> @test_x86_fmadd_baa_ps(<4 x float> %a, <4 x float> %b) #0 { 41 ; CHECK-LABEL: test_x86_fmadd_baa_ps: 42 ; CHECK: # BB#0: 43 ; CHECK-NEXT: vmovaps (%rcx), %xmm0 44 ; CHECK-NEXT: vfmadd132ps (%rdx), %xmm0, %xmm0 45 ; CHECK-NEXT: retq 46 %res = call <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind 47 ret <4 x float> %res 48 } 49 50 define <4 x float> @test_x86_fmadd_aba_ps(<4 x float> %a, <4 x float> %b) #0 { 51 ; CHECK-LABEL: test_x86_fmadd_aba_ps: 52 ; CHECK: # BB#0: 53 ; CHECK-NEXT: vmovaps (%rcx), %xmm0 54 ; CHECK-NEXT: vfmadd231ps (%rdx), %xmm0, %xmm0 55 ; CHECK-NEXT: retq 56 %res = call <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind 57 ret <4 x float> %res 58 } 59 60 define <4 x float> @test_x86_fmadd_bba_ps(<4 x float> %a, <4 x float> %b) #0 { 61 ; CHECK-LABEL: test_x86_fmadd_bba_ps: 62 ; CHECK: # BB#0: 63 ; CHECK-NEXT: vmovaps (%rdx), %xmm0 64 ; CHECK-NEXT: vfmadd213ps (%rcx), %xmm0, %xmm0 65 ; CHECK-NEXT: retq 66 %res = call <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind 67 ret <4 x float> %res 68 } 69 70 declare <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone 71 define <8 x float> @test_x86_fmadd_baa_ps_y(<8 x float> %a, <8 x float> %b) #0 { 72 ; CHECK-LABEL: test_x86_fmadd_baa_ps_y: 73 ; CHECK: # BB#0: 74 ; CHECK-NEXT: vmovaps (%rcx), %ymm0 75 ; CHECK-NEXT: vfmadd132ps (%rdx), %ymm0, %ymm0 76 ; CHECK-NEXT: retq 77 %res = call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %b, <8 x float> %a, <8 x float> %a) nounwind 78 ret <8 x float> %res 79 } 80 81 define <8 x float> @test_x86_fmadd_aba_ps_y(<8 x float> %a, <8 x float> %b) #0 { 82 ; CHECK-LABEL: test_x86_fmadd_aba_ps_y: 83 ; CHECK: # BB#0: 84 ; CHECK-NEXT: vmovaps (%rcx), %ymm0 85 ; CHECK-NEXT: vfmadd231ps (%rdx), %ymm0, %ymm0 86 ; CHECK-NEXT: retq 87 %res = call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %a) nounwind 88 ret <8 x float> %res 89 } 90 91 define <8 x float> @test_x86_fmadd_bba_ps_y(<8 x float> %a, <8 x float> %b) #0 { 92 ; CHECK-LABEL: test_x86_fmadd_bba_ps_y: 93 ; CHECK: # BB#0: 94 ; CHECK-NEXT: vmovaps (%rdx), %ymm0 95 ; CHECK-NEXT: vfmadd213ps (%rcx), %ymm0, %ymm0 96 ; CHECK-NEXT: retq 97 %res = call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %b, <8 x float> %b, <8 x float> %a) nounwind 98 ret <8 x float> %res 99 } 100 101 declare <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone 102 define <2 x double> @test_x86_fmadd_baa_sd(<2 x double> %a, <2 x double> %b) #0 { 103 ; CHECK-LABEL: test_x86_fmadd_baa_sd: 104 ; CHECK: # BB#0: 105 ; CHECK-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}} 106 ; CHECK-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}} 107 ; CHECK-NEXT: vfmadd213sd %xmm1, %xmm1, %xmm0 108 ; CHECK-NEXT: retq 109 %res = call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind 110 ret <2 x double> %res 111 } 112 113 define <2 x double> @test_x86_fmadd_aba_sd(<2 x double> %a, <2 x double> %b) #0 { 114 ; CHECK-LABEL: test_x86_fmadd_aba_sd: 115 ; CHECK: # BB#0: 116 ; CHECK-NEXT: vmovapd (%rcx), %xmm0 117 ; CHECK-NEXT: vfmadd132sd (%rdx), %xmm0, %xmm0 118 ; CHECK-NEXT: retq 119 %res = call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind 120 ret <2 x double> %res 121 } 122 123 define <2 x double> @test_x86_fmadd_bba_sd(<2 x double> %a, <2 x double> %b) #0 { 124 ; CHECK-LABEL: test_x86_fmadd_bba_sd: 125 ; CHECK: # BB#0: 126 ; CHECK-NEXT: vmovapd (%rdx), %xmm0 127 ; CHECK-NEXT: vfmadd213sd (%rcx), %xmm0, %xmm0 128 ; CHECK-NEXT: retq 129 %res = call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind 130 ret <2 x double> %res 131 } 132 133 declare <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone 134 define <2 x double> @test_x86_fmadd_baa_pd(<2 x double> %a, <2 x double> %b) #0 { 135 ; CHECK-LABEL: test_x86_fmadd_baa_pd: 136 ; CHECK: # BB#0: 137 ; CHECK-NEXT: vmovapd (%rcx), %xmm0 138 ; CHECK-NEXT: vfmadd132pd (%rdx), %xmm0, %xmm0 139 ; CHECK-NEXT: retq 140 %res = call <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind 141 ret <2 x double> %res 142 } 143 144 define <2 x double> @test_x86_fmadd_aba_pd(<2 x double> %a, <2 x double> %b) #0 { 145 ; CHECK-LABEL: test_x86_fmadd_aba_pd: 146 ; CHECK: # BB#0: 147 ; CHECK-NEXT: vmovapd (%rcx), %xmm0 148 ; CHECK-NEXT: vfmadd231pd (%rdx), %xmm0, %xmm0 149 ; CHECK-NEXT: retq 150 %res = call <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind 151 ret <2 x double> %res 152 } 153 154 define <2 x double> @test_x86_fmadd_bba_pd(<2 x double> %a, <2 x double> %b) #0 { 155 ; CHECK-LABEL: test_x86_fmadd_bba_pd: 156 ; CHECK: # BB#0: 157 ; CHECK-NEXT: vmovapd (%rdx), %xmm0 158 ; CHECK-NEXT: vfmadd213pd (%rcx), %xmm0, %xmm0 159 ; CHECK-NEXT: retq 160 %res = call <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind 161 ret <2 x double> %res 162 } 163 164 declare <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double>, <4 x double>, <4 x double>) nounwind readnone 165 define <4 x double> @test_x86_fmadd_baa_pd_y(<4 x double> %a, <4 x double> %b) #0 { 166 ; CHECK-LABEL: test_x86_fmadd_baa_pd_y: 167 ; CHECK: # BB#0: 168 ; CHECK-NEXT: vmovapd (%rcx), %ymm0 169 ; CHECK-NEXT: vfmadd132pd (%rdx), %ymm0, %ymm0 170 ; CHECK-NEXT: retq 171 %res = call <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double> %b, <4 x double> %a, <4 x double> %a) nounwind 172 ret <4 x double> %res 173 } 174 175 define <4 x double> @test_x86_fmadd_aba_pd_y(<4 x double> %a, <4 x double> %b) #0 { 176 ; CHECK-LABEL: test_x86_fmadd_aba_pd_y: 177 ; CHECK: # BB#0: 178 ; CHECK-NEXT: vmovapd (%rcx), %ymm0 179 ; CHECK-NEXT: vfmadd231pd (%rdx), %ymm0, %ymm0 180 ; CHECK-NEXT: retq 181 %res = call <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %a) nounwind 182 ret <4 x double> %res 183 } 184 185 define <4 x double> @test_x86_fmadd_bba_pd_y(<4 x double> %a, <4 x double> %b) #0 { 186 ; CHECK-LABEL: test_x86_fmadd_bba_pd_y: 187 ; CHECK: # BB#0: 188 ; CHECK-NEXT: vmovapd (%rdx), %ymm0 189 ; CHECK-NEXT: vfmadd213pd (%rcx), %ymm0, %ymm0 190 ; CHECK-NEXT: retq 191 %res = call <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double> %b, <4 x double> %b, <4 x double> %a) nounwind 192 ret <4 x double> %res 193 } 194 195 196 declare <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float>, <4 x float>, <4 x float>) nounwind readnone 197 define <4 x float> @test_x86_fnmadd_baa_ss(<4 x float> %a, <4 x float> %b) #0 { 198 ; CHECK-LABEL: test_x86_fnmadd_baa_ss: 199 ; CHECK: # BB#0: 200 ; CHECK-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}} 201 ; CHECK-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}} 202 ; CHECK-NEXT: vfnmadd213ss %xmm1, %xmm1, %xmm0 203 ; CHECK-NEXT: retq 204 %res = call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind 205 ret <4 x float> %res 206 } 207 208 define <4 x float> @test_x86_fnmadd_aba_ss(<4 x float> %a, <4 x float> %b) #0 { 209 ; CHECK-LABEL: test_x86_fnmadd_aba_ss: 210 ; CHECK: # BB#0: 211 ; CHECK-NEXT: vmovaps (%rcx), %xmm0 212 ; CHECK-NEXT: vfnmadd132ss (%rdx), %xmm0, %xmm0 213 ; CHECK-NEXT: retq 214 %res = call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind 215 ret <4 x float> %res 216 } 217 218 define <4 x float> @test_x86_fnmadd_bba_ss(<4 x float> %a, <4 x float> %b) #0 { 219 ; CHECK-LABEL: test_x86_fnmadd_bba_ss: 220 ; CHECK: # BB#0: 221 ; CHECK-NEXT: vmovaps (%rdx), %xmm0 222 ; CHECK-NEXT: vfnmadd213ss (%rcx), %xmm0, %xmm0 223 ; CHECK-NEXT: retq 224 %res = call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind 225 ret <4 x float> %res 226 } 227 228 declare <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone 229 define <4 x float> @test_x86_fnmadd_baa_ps(<4 x float> %a, <4 x float> %b) #0 { 230 ; CHECK-LABEL: test_x86_fnmadd_baa_ps: 231 ; CHECK: # BB#0: 232 ; CHECK-NEXT: vmovaps (%rcx), %xmm0 233 ; CHECK-NEXT: vfnmadd132ps (%rdx), %xmm0, %xmm0 234 ; CHECK-NEXT: retq 235 %res = call <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind 236 ret <4 x float> %res 237 } 238 239 define <4 x float> @test_x86_fnmadd_aba_ps(<4 x float> %a, <4 x float> %b) #0 { 240 ; CHECK-LABEL: test_x86_fnmadd_aba_ps: 241 ; CHECK: # BB#0: 242 ; CHECK-NEXT: vmovaps (%rcx), %xmm0 243 ; CHECK-NEXT: vfnmadd231ps (%rdx), %xmm0, %xmm0 244 ; CHECK-NEXT: retq 245 %res = call <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind 246 ret <4 x float> %res 247 } 248 249 define <4 x float> @test_x86_fnmadd_bba_ps(<4 x float> %a, <4 x float> %b) #0 { 250 ; CHECK-LABEL: test_x86_fnmadd_bba_ps: 251 ; CHECK: # BB#0: 252 ; CHECK-NEXT: vmovaps (%rdx), %xmm0 253 ; CHECK-NEXT: vfnmadd213ps (%rcx), %xmm0, %xmm0 254 ; CHECK-NEXT: retq 255 %res = call <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind 256 ret <4 x float> %res 257 } 258 259 declare <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone 260 define <8 x float> @test_x86_fnmadd_baa_ps_y(<8 x float> %a, <8 x float> %b) #0 { 261 ; CHECK-LABEL: test_x86_fnmadd_baa_ps_y: 262 ; CHECK: # BB#0: 263 ; CHECK-NEXT: vmovaps (%rcx), %ymm0 264 ; CHECK-NEXT: vfnmadd132ps (%rdx), %ymm0, %ymm0 265 ; CHECK-NEXT: retq 266 %res = call <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float> %b, <8 x float> %a, <8 x float> %a) nounwind 267 ret <8 x float> %res 268 } 269 270 define <8 x float> @test_x86_fnmadd_aba_ps_y(<8 x float> %a, <8 x float> %b) #0 { 271 ; CHECK-LABEL: test_x86_fnmadd_aba_ps_y: 272 ; CHECK: # BB#0: 273 ; CHECK-NEXT: vmovaps (%rcx), %ymm0 274 ; CHECK-NEXT: vfnmadd231ps (%rdx), %ymm0, %ymm0 275 ; CHECK-NEXT: retq 276 %res = call <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %a) nounwind 277 ret <8 x float> %res 278 } 279 280 define <8 x float> @test_x86_fnmadd_bba_ps_y(<8 x float> %a, <8 x float> %b) #0 { 281 ; CHECK-LABEL: test_x86_fnmadd_bba_ps_y: 282 ; CHECK: # BB#0: 283 ; CHECK-NEXT: vmovaps (%rdx), %ymm0 284 ; CHECK-NEXT: vfnmadd213ps (%rcx), %ymm0, %ymm0 285 ; CHECK-NEXT: retq 286 %res = call <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float> %b, <8 x float> %b, <8 x float> %a) nounwind 287 ret <8 x float> %res 288 } 289 290 declare <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone 291 define <2 x double> @test_x86_fnmadd_baa_sd(<2 x double> %a, <2 x double> %b) #0 { 292 ; CHECK-LABEL: test_x86_fnmadd_baa_sd: 293 ; CHECK: # BB#0: 294 ; CHECK-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}} 295 ; CHECK-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}} 296 ; CHECK-NEXT: vfnmadd213sd %xmm1, %xmm1, %xmm0 297 ; CHECK-NEXT: retq 298 %res = call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind 299 ret <2 x double> %res 300 } 301 302 define <2 x double> @test_x86_fnmadd_aba_sd(<2 x double> %a, <2 x double> %b) #0 { 303 ; CHECK-LABEL: test_x86_fnmadd_aba_sd: 304 ; CHECK: # BB#0: 305 ; CHECK-NEXT: vmovapd (%rcx), %xmm0 306 ; CHECK-NEXT: vfnmadd132sd (%rdx), %xmm0, %xmm0 307 ; CHECK-NEXT: retq 308 %res = call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind 309 ret <2 x double> %res 310 } 311 312 define <2 x double> @test_x86_fnmadd_bba_sd(<2 x double> %a, <2 x double> %b) #0 { 313 ; CHECK-LABEL: test_x86_fnmadd_bba_sd: 314 ; CHECK: # BB#0: 315 ; CHECK-NEXT: vmovapd (%rdx), %xmm0 316 ; CHECK-NEXT: vfnmadd213sd (%rcx), %xmm0, %xmm0 317 ; CHECK-NEXT: retq 318 %res = call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind 319 ret <2 x double> %res 320 } 321 322 declare <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone 323 define <2 x double> @test_x86_fnmadd_baa_pd(<2 x double> %a, <2 x double> %b) #0 { 324 ; CHECK-LABEL: test_x86_fnmadd_baa_pd: 325 ; CHECK: # BB#0: 326 ; CHECK-NEXT: vmovapd (%rcx), %xmm0 327 ; CHECK-NEXT: vfnmadd132pd (%rdx), %xmm0, %xmm0 328 ; CHECK-NEXT: retq 329 %res = call <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind 330 ret <2 x double> %res 331 } 332 333 define <2 x double> @test_x86_fnmadd_aba_pd(<2 x double> %a, <2 x double> %b) #0 { 334 ; CHECK-LABEL: test_x86_fnmadd_aba_pd: 335 ; CHECK: # BB#0: 336 ; CHECK-NEXT: vmovapd (%rcx), %xmm0 337 ; CHECK-NEXT: vfnmadd231pd (%rdx), %xmm0, %xmm0 338 ; CHECK-NEXT: retq 339 %res = call <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind 340 ret <2 x double> %res 341 } 342 343 define <2 x double> @test_x86_fnmadd_bba_pd(<2 x double> %a, <2 x double> %b) #0 { 344 ; CHECK-LABEL: test_x86_fnmadd_bba_pd: 345 ; CHECK: # BB#0: 346 ; CHECK-NEXT: vmovapd (%rdx), %xmm0 347 ; CHECK-NEXT: vfnmadd213pd (%rcx), %xmm0, %xmm0 348 ; CHECK-NEXT: retq 349 %res = call <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind 350 ret <2 x double> %res 351 } 352 353 declare <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double>, <4 x double>, <4 x double>) nounwind readnone 354 define <4 x double> @test_x86_fnmadd_baa_pd_y(<4 x double> %a, <4 x double> %b) #0 { 355 ; CHECK-LABEL: test_x86_fnmadd_baa_pd_y: 356 ; CHECK: # BB#0: 357 ; CHECK-NEXT: vmovapd (%rcx), %ymm0 358 ; CHECK-NEXT: vfnmadd132pd (%rdx), %ymm0, %ymm0 359 ; CHECK-NEXT: retq 360 %res = call <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double> %b, <4 x double> %a, <4 x double> %a) nounwind 361 ret <4 x double> %res 362 } 363 364 define <4 x double> @test_x86_fnmadd_aba_pd_y(<4 x double> %a, <4 x double> %b) #0 { 365 ; CHECK-LABEL: test_x86_fnmadd_aba_pd_y: 366 ; CHECK: # BB#0: 367 ; CHECK-NEXT: vmovapd (%rcx), %ymm0 368 ; CHECK-NEXT: vfnmadd231pd (%rdx), %ymm0, %ymm0 369 ; CHECK-NEXT: retq 370 %res = call <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %a) nounwind 371 ret <4 x double> %res 372 } 373 374 define <4 x double> @test_x86_fnmadd_bba_pd_y(<4 x double> %a, <4 x double> %b) #0 { 375 ; CHECK-LABEL: test_x86_fnmadd_bba_pd_y: 376 ; CHECK: # BB#0: 377 ; CHECK-NEXT: vmovapd (%rdx), %ymm0 378 ; CHECK-NEXT: vfnmadd213pd (%rcx), %ymm0, %ymm0 379 ; CHECK-NEXT: retq 380 %res = call <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double> %b, <4 x double> %b, <4 x double> %a) nounwind 381 ret <4 x double> %res 382 } 383 384 385 declare <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float>, <4 x float>, <4 x float>) nounwind readnone 386 define <4 x float> @test_x86_fmsub_baa_ss(<4 x float> %a, <4 x float> %b) #0 { 387 ; CHECK-LABEL: test_x86_fmsub_baa_ss: 388 ; CHECK: # BB#0: 389 ; CHECK-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}} 390 ; CHECK-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}} 391 ; CHECK-NEXT: vfmsub213ss %xmm1, %xmm1, %xmm0 392 ; CHECK-NEXT: retq 393 %res = call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind 394 ret <4 x float> %res 395 } 396 397 define <4 x float> @test_x86_fmsub_aba_ss(<4 x float> %a, <4 x float> %b) #0 { 398 ; CHECK-LABEL: test_x86_fmsub_aba_ss: 399 ; CHECK: # BB#0: 400 ; CHECK-NEXT: vmovaps (%rcx), %xmm0 401 ; CHECK-NEXT: vfmsub132ss (%rdx), %xmm0, %xmm0 402 ; CHECK-NEXT: retq 403 %res = call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind 404 ret <4 x float> %res 405 } 406 407 define <4 x float> @test_x86_fmsub_bba_ss(<4 x float> %a, <4 x float> %b) #0 { 408 ; CHECK-LABEL: test_x86_fmsub_bba_ss: 409 ; CHECK: # BB#0: 410 ; CHECK-NEXT: vmovaps (%rdx), %xmm0 411 ; CHECK-NEXT: vfmsub213ss (%rcx), %xmm0, %xmm0 412 ; CHECK-NEXT: retq 413 %res = call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind 414 ret <4 x float> %res 415 } 416 417 declare <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone 418 define <4 x float> @test_x86_fmsub_baa_ps(<4 x float> %a, <4 x float> %b) #0 { 419 ; CHECK-LABEL: test_x86_fmsub_baa_ps: 420 ; CHECK: # BB#0: 421 ; CHECK-NEXT: vmovaps (%rcx), %xmm0 422 ; CHECK-NEXT: vfmsub132ps (%rdx), %xmm0, %xmm0 423 ; CHECK-NEXT: retq 424 %res = call <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind 425 ret <4 x float> %res 426 } 427 428 define <4 x float> @test_x86_fmsub_aba_ps(<4 x float> %a, <4 x float> %b) #0 { 429 ; CHECK-LABEL: test_x86_fmsub_aba_ps: 430 ; CHECK: # BB#0: 431 ; CHECK-NEXT: vmovaps (%rcx), %xmm0 432 ; CHECK-NEXT: vfmsub231ps (%rdx), %xmm0, %xmm0 433 ; CHECK-NEXT: retq 434 %res = call <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind 435 ret <4 x float> %res 436 } 437 438 define <4 x float> @test_x86_fmsub_bba_ps(<4 x float> %a, <4 x float> %b) #0 { 439 ; CHECK-LABEL: test_x86_fmsub_bba_ps: 440 ; CHECK: # BB#0: 441 ; CHECK-NEXT: vmovaps (%rdx), %xmm0 442 ; CHECK-NEXT: vfmsub213ps (%rcx), %xmm0, %xmm0 443 ; CHECK-NEXT: retq 444 %res = call <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind 445 ret <4 x float> %res 446 } 447 448 declare <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone 449 define <8 x float> @test_x86_fmsub_baa_ps_y(<8 x float> %a, <8 x float> %b) #0 { 450 ; CHECK-LABEL: test_x86_fmsub_baa_ps_y: 451 ; CHECK: # BB#0: 452 ; CHECK-NEXT: vmovaps (%rcx), %ymm0 453 ; CHECK-NEXT: vfmsub132ps (%rdx), %ymm0, %ymm0 454 ; CHECK-NEXT: retq 455 %res = call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %b, <8 x float> %a, <8 x float> %a) nounwind 456 ret <8 x float> %res 457 } 458 459 define <8 x float> @test_x86_fmsub_aba_ps_y(<8 x float> %a, <8 x float> %b) #0 { 460 ; CHECK-LABEL: test_x86_fmsub_aba_ps_y: 461 ; CHECK: # BB#0: 462 ; CHECK-NEXT: vmovaps (%rcx), %ymm0 463 ; CHECK-NEXT: vfmsub231ps (%rdx), %ymm0, %ymm0 464 ; CHECK-NEXT: retq 465 %res = call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %a) nounwind 466 ret <8 x float> %res 467 } 468 469 define <8 x float> @test_x86_fmsub_bba_ps_y(<8 x float> %a, <8 x float> %b) #0 { 470 ; CHECK-LABEL: test_x86_fmsub_bba_ps_y: 471 ; CHECK: # BB#0: 472 ; CHECK-NEXT: vmovaps (%rdx), %ymm0 473 ; CHECK-NEXT: vfmsub213ps (%rcx), %ymm0, %ymm0 474 ; CHECK-NEXT: retq 475 %res = call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %b, <8 x float> %b, <8 x float> %a) nounwind 476 ret <8 x float> %res 477 } 478 479 declare <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone 480 define <2 x double> @test_x86_fmsub_baa_sd(<2 x double> %a, <2 x double> %b) #0 { 481 ; CHECK-LABEL: test_x86_fmsub_baa_sd: 482 ; CHECK: # BB#0: 483 ; CHECK-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}} 484 ; CHECK-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}} 485 ; CHECK-NEXT: vfmsub213sd %xmm1, %xmm1, %xmm0 486 ; CHECK-NEXT: retq 487 %res = call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind 488 ret <2 x double> %res 489 } 490 491 define <2 x double> @test_x86_fmsub_aba_sd(<2 x double> %a, <2 x double> %b) #0 { 492 ; CHECK-LABEL: test_x86_fmsub_aba_sd: 493 ; CHECK: # BB#0: 494 ; CHECK-NEXT: vmovapd (%rcx), %xmm0 495 ; CHECK-NEXT: vfmsub132sd (%rdx), %xmm0, %xmm0 496 ; CHECK-NEXT: retq 497 %res = call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind 498 ret <2 x double> %res 499 } 500 501 define <2 x double> @test_x86_fmsub_bba_sd(<2 x double> %a, <2 x double> %b) #0 { 502 ; CHECK-LABEL: test_x86_fmsub_bba_sd: 503 ; CHECK: # BB#0: 504 ; CHECK-NEXT: vmovapd (%rdx), %xmm0 505 ; CHECK-NEXT: vfmsub213sd (%rcx), %xmm0, %xmm0 506 ; CHECK-NEXT: retq 507 %res = call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind 508 ret <2 x double> %res 509 } 510 511 declare <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone 512 define <2 x double> @test_x86_fmsub_baa_pd(<2 x double> %a, <2 x double> %b) #0 { 513 ; CHECK-LABEL: test_x86_fmsub_baa_pd: 514 ; CHECK: # BB#0: 515 ; CHECK-NEXT: vmovapd (%rcx), %xmm0 516 ; CHECK-NEXT: vfmsub132pd (%rdx), %xmm0, %xmm0 517 ; CHECK-NEXT: retq 518 %res = call <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind 519 ret <2 x double> %res 520 } 521 522 define <2 x double> @test_x86_fmsub_aba_pd(<2 x double> %a, <2 x double> %b) #0 { 523 ; CHECK-LABEL: test_x86_fmsub_aba_pd: 524 ; CHECK: # BB#0: 525 ; CHECK-NEXT: vmovapd (%rcx), %xmm0 526 ; CHECK-NEXT: vfmsub231pd (%rdx), %xmm0, %xmm0 527 ; CHECK-NEXT: retq 528 %res = call <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind 529 ret <2 x double> %res 530 } 531 532 define <2 x double> @test_x86_fmsub_bba_pd(<2 x double> %a, <2 x double> %b) #0 { 533 ; CHECK-LABEL: test_x86_fmsub_bba_pd: 534 ; CHECK: # BB#0: 535 ; CHECK-NEXT: vmovapd (%rdx), %xmm0 536 ; CHECK-NEXT: vfmsub213pd (%rcx), %xmm0, %xmm0 537 ; CHECK-NEXT: retq 538 %res = call <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind 539 ret <2 x double> %res 540 } 541 542 declare <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double>, <4 x double>, <4 x double>) nounwind readnone 543 define <4 x double> @test_x86_fmsub_baa_pd_y(<4 x double> %a, <4 x double> %b) #0 { 544 ; CHECK-LABEL: test_x86_fmsub_baa_pd_y: 545 ; CHECK: # BB#0: 546 ; CHECK-NEXT: vmovapd (%rcx), %ymm0 547 ; CHECK-NEXT: vfmsub132pd (%rdx), %ymm0, %ymm0 548 ; CHECK-NEXT: retq 549 %res = call <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double> %b, <4 x double> %a, <4 x double> %a) nounwind 550 ret <4 x double> %res 551 } 552 553 define <4 x double> @test_x86_fmsub_aba_pd_y(<4 x double> %a, <4 x double> %b) #0 { 554 ; CHECK-LABEL: test_x86_fmsub_aba_pd_y: 555 ; CHECK: # BB#0: 556 ; CHECK-NEXT: vmovapd (%rcx), %ymm0 557 ; CHECK-NEXT: vfmsub231pd (%rdx), %ymm0, %ymm0 558 ; CHECK-NEXT: retq 559 %res = call <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %a) nounwind 560 ret <4 x double> %res 561 } 562 563 define <4 x double> @test_x86_fmsub_bba_pd_y(<4 x double> %a, <4 x double> %b) #0 { 564 ; CHECK-LABEL: test_x86_fmsub_bba_pd_y: 565 ; CHECK: # BB#0: 566 ; CHECK-NEXT: vmovapd (%rdx), %ymm0 567 ; CHECK-NEXT: vfmsub213pd (%rcx), %ymm0, %ymm0 568 ; CHECK-NEXT: retq 569 %res = call <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double> %b, <4 x double> %b, <4 x double> %a) nounwind 570 ret <4 x double> %res 571 } 572 573 574 declare <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float>, <4 x float>, <4 x float>) nounwind readnone 575 define <4 x float> @test_x86_fnmsub_baa_ss(<4 x float> %a, <4 x float> %b) #0 { 576 ; CHECK-LABEL: test_x86_fnmsub_baa_ss: 577 ; CHECK: # BB#0: 578 ; CHECK-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}} 579 ; CHECK-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}} 580 ; CHECK-NEXT: vfnmsub213ss %xmm1, %xmm1, %xmm0 581 ; CHECK-NEXT: retq 582 %res = call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind 583 ret <4 x float> %res 584 } 585 586 define <4 x float> @test_x86_fnmsub_aba_ss(<4 x float> %a, <4 x float> %b) #0 { 587 ; CHECK-LABEL: test_x86_fnmsub_aba_ss: 588 ; CHECK: # BB#0: 589 ; CHECK-NEXT: vmovaps (%rcx), %xmm0 590 ; CHECK-NEXT: vfnmsub132ss (%rdx), %xmm0, %xmm0 591 ; CHECK-NEXT: retq 592 %res = call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind 593 ret <4 x float> %res 594 } 595 596 define <4 x float> @test_x86_fnmsub_bba_ss(<4 x float> %a, <4 x float> %b) #0 { 597 ; CHECK-LABEL: test_x86_fnmsub_bba_ss: 598 ; CHECK: # BB#0: 599 ; CHECK-NEXT: vmovaps (%rdx), %xmm0 600 ; CHECK-NEXT: vfnmsub213ss (%rcx), %xmm0, %xmm0 601 ; CHECK-NEXT: retq 602 %res = call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind 603 ret <4 x float> %res 604 } 605 606 declare <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone 607 define <4 x float> @test_x86_fnmsub_baa_ps(<4 x float> %a, <4 x float> %b) #0 { 608 ; CHECK-LABEL: test_x86_fnmsub_baa_ps: 609 ; CHECK: # BB#0: 610 ; CHECK-NEXT: vmovaps (%rcx), %xmm0 611 ; CHECK-NEXT: vfnmsub132ps (%rdx), %xmm0, %xmm0 612 ; CHECK-NEXT: retq 613 %res = call <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind 614 ret <4 x float> %res 615 } 616 617 define <4 x float> @test_x86_fnmsub_aba_ps(<4 x float> %a, <4 x float> %b) #0 { 618 ; CHECK-LABEL: test_x86_fnmsub_aba_ps: 619 ; CHECK: # BB#0: 620 ; CHECK-NEXT: vmovaps (%rcx), %xmm0 621 ; CHECK-NEXT: vfnmsub231ps (%rdx), %xmm0, %xmm0 622 ; CHECK-NEXT: retq 623 %res = call <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind 624 ret <4 x float> %res 625 } 626 627 define <4 x float> @test_x86_fnmsub_bba_ps(<4 x float> %a, <4 x float> %b) #0 { 628 ; CHECK-LABEL: test_x86_fnmsub_bba_ps: 629 ; CHECK: # BB#0: 630 ; CHECK-NEXT: vmovaps (%rdx), %xmm0 631 ; CHECK-NEXT: vfnmsub213ps (%rcx), %xmm0, %xmm0 632 ; CHECK-NEXT: retq 633 %res = call <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind 634 ret <4 x float> %res 635 } 636 637 declare <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone 638 define <8 x float> @test_x86_fnmsub_baa_ps_y(<8 x float> %a, <8 x float> %b) #0 { 639 ; CHECK-LABEL: test_x86_fnmsub_baa_ps_y: 640 ; CHECK: # BB#0: 641 ; CHECK-NEXT: vmovaps (%rcx), %ymm0 642 ; CHECK-NEXT: vfnmsub132ps (%rdx), %ymm0, %ymm0 643 ; CHECK-NEXT: retq 644 %res = call <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float> %b, <8 x float> %a, <8 x float> %a) nounwind 645 ret <8 x float> %res 646 } 647 648 define <8 x float> @test_x86_fnmsub_aba_ps_y(<8 x float> %a, <8 x float> %b) #0 { 649 ; CHECK-LABEL: test_x86_fnmsub_aba_ps_y: 650 ; CHECK: # BB#0: 651 ; CHECK-NEXT: vmovaps (%rcx), %ymm0 652 ; CHECK-NEXT: vfnmsub231ps (%rdx), %ymm0, %ymm0 653 ; CHECK-NEXT: retq 654 %res = call <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %a) nounwind 655 ret <8 x float> %res 656 } 657 658 define <8 x float> @test_x86_fnmsub_bba_ps_y(<8 x float> %a, <8 x float> %b) #0 { 659 ; CHECK-LABEL: test_x86_fnmsub_bba_ps_y: 660 ; CHECK: # BB#0: 661 ; CHECK-NEXT: vmovaps (%rdx), %ymm0 662 ; CHECK-NEXT: vfnmsub213ps (%rcx), %ymm0, %ymm0 663 ; CHECK-NEXT: retq 664 %res = call <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float> %b, <8 x float> %b, <8 x float> %a) nounwind 665 ret <8 x float> %res 666 } 667 668 declare <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone 669 define <2 x double> @test_x86_fnmsub_baa_sd(<2 x double> %a, <2 x double> %b) #0 { 670 ; CHECK-LABEL: test_x86_fnmsub_baa_sd: 671 ; CHECK: # BB#0: 672 ; CHECK-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}} 673 ; CHECK-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}} 674 ; CHECK-NEXT: vfnmsub213sd %xmm1, %xmm1, %xmm0 675 ; CHECK-NEXT: retq 676 %res = call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind 677 ret <2 x double> %res 678 } 679 680 define <2 x double> @test_x86_fnmsub_aba_sd(<2 x double> %a, <2 x double> %b) #0 { 681 ; CHECK-LABEL: test_x86_fnmsub_aba_sd: 682 ; CHECK: # BB#0: 683 ; CHECK-NEXT: vmovapd (%rcx), %xmm0 684 ; CHECK-NEXT: vfnmsub132sd (%rdx), %xmm0, %xmm0 685 ; CHECK-NEXT: retq 686 %res = call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind 687 ret <2 x double> %res 688 } 689 690 define <2 x double> @test_x86_fnmsub_bba_sd(<2 x double> %a, <2 x double> %b) #0 { 691 ; CHECK-LABEL: test_x86_fnmsub_bba_sd: 692 ; CHECK: # BB#0: 693 ; CHECK-NEXT: vmovapd (%rdx), %xmm0 694 ; CHECK-NEXT: vfnmsub213sd (%rcx), %xmm0, %xmm0 695 ; CHECK-NEXT: retq 696 %res = call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind 697 ret <2 x double> %res 698 } 699 700 declare <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone 701 define <2 x double> @test_x86_fnmsub_baa_pd(<2 x double> %a, <2 x double> %b) #0 { 702 ; CHECK-LABEL: test_x86_fnmsub_baa_pd: 703 ; CHECK: # BB#0: 704 ; CHECK-NEXT: vmovapd (%rcx), %xmm0 705 ; CHECK-NEXT: vfnmsub132pd (%rdx), %xmm0, %xmm0 706 ; CHECK-NEXT: retq 707 %res = call <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind 708 ret <2 x double> %res 709 } 710 711 define <2 x double> @test_x86_fnmsub_aba_pd(<2 x double> %a, <2 x double> %b) #0 { 712 ; CHECK-LABEL: test_x86_fnmsub_aba_pd: 713 ; CHECK: # BB#0: 714 ; CHECK-NEXT: vmovapd (%rcx), %xmm0 715 ; CHECK-NEXT: vfnmsub231pd (%rdx), %xmm0, %xmm0 716 ; CHECK-NEXT: retq 717 %res = call <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind 718 ret <2 x double> %res 719 } 720 721 define <2 x double> @test_x86_fnmsub_bba_pd(<2 x double> %a, <2 x double> %b) #0 { 722 ; CHECK-LABEL: test_x86_fnmsub_bba_pd: 723 ; CHECK: # BB#0: 724 ; CHECK-NEXT: vmovapd (%rdx), %xmm0 725 ; CHECK-NEXT: vfnmsub213pd (%rcx), %xmm0, %xmm0 726 ; CHECK-NEXT: retq 727 %res = call <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind 728 ret <2 x double> %res 729 } 730 731 declare <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double>, <4 x double>, <4 x double>) nounwind readnone 732 define <4 x double> @test_x86_fnmsub_baa_pd_y(<4 x double> %a, <4 x double> %b) #0 { 733 ; CHECK-LABEL: test_x86_fnmsub_baa_pd_y: 734 ; CHECK: # BB#0: 735 ; CHECK-NEXT: vmovapd (%rcx), %ymm0 736 ; CHECK-NEXT: vfnmsub132pd (%rdx), %ymm0, %ymm0 737 ; CHECK-NEXT: retq 738 %res = call <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double> %b, <4 x double> %a, <4 x double> %a) nounwind 739 ret <4 x double> %res 740 } 741 742 define <4 x double> @test_x86_fnmsub_aba_pd_y(<4 x double> %a, <4 x double> %b) #0 { 743 ; CHECK-LABEL: test_x86_fnmsub_aba_pd_y: 744 ; CHECK: # BB#0: 745 ; CHECK-NEXT: vmovapd (%rcx), %ymm0 746 ; CHECK-NEXT: vfnmsub231pd (%rdx), %ymm0, %ymm0 747 ; CHECK-NEXT: retq 748 %res = call <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %a) nounwind 749 ret <4 x double> %res 750 } 751 752 define <4 x double> @test_x86_fnmsub_bba_pd_y(<4 x double> %a, <4 x double> %b) #0 { 753 ; CHECK-LABEL: test_x86_fnmsub_bba_pd_y: 754 ; CHECK: # BB#0: 755 ; CHECK-NEXT: vmovapd (%rdx), %ymm0 756 ; CHECK-NEXT: vfnmsub213pd (%rcx), %ymm0, %ymm0 757 ; CHECK-NEXT: retq 758 %res = call <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double> %b, <4 x double> %b, <4 x double> %a) nounwind 759 ret <4 x double> %res 760 } 761 762