1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -march=x86-64 -mcpu=corei7-avx -mattr=+fma | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FMA 2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -march=x86-64 -mcpu=core-avx2 -mattr=+fma,+avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FMA 3 ; RUN: llc < %s -mtriple=x86_64-pc-windows -march=x86-64 -mcpu=core-avx2 -mattr=+fma,+avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FMA-WIN 4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -march=x86-64 -mcpu=corei7-avx -mattr=+fma4 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FMA4 5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=bdver2 -mattr=+avx,-fma | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FMA4 6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=bdver2 -mattr=-fma4 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FMA 7 8 ; VFMADD 9 define <4 x float> @test_x86_fma_vfmadd_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { 10 ; CHECK-LABEL: test_x86_fma_vfmadd_ss: 11 ; CHECK-NEXT: # BB#0: 12 ; 13 ; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rcx\), %xmm0|\(%r8\), %xmm1}} 14 ; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rcx\), %xmm0|\(%r8\), %xmm1}} 15 ; CHECK-FMA-WIN-NEXT: vfmadd132ss (%rdx), %xmm1, %xmm0 16 ; 17 ; CHECK-FMA-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0 18 ; 19 ; CHECK-FMA4-NEXT: vfmaddss %xmm2, %xmm1, %xmm0, %xmm0 20 ; 21 ; CHECK-NEXT: retq 22 %res = call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) 23 ret <4 x float> %res 24 } 25 26 define <4 x float> @test_x86_fma_vfmadd_bac_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { 27 ; CHECK-LABEL: test_x86_fma_vfmadd_bac_ss: 28 ; CHECK-NEXT: # BB#0: 29 ; 30 ; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%r8\), %xmm1}} 31 ; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%r8\), %xmm1}} 32 ; CHECK-FMA-WIN-NEXT: vfmadd132ss (%rcx), %xmm1, %xmm0 33 ; 34 ; CHECK-FMA-NEXT: vfmadd213ss %xmm2, %xmm0, %xmm1 35 ; CHECK-FMA-NEXT: vmovaps %xmm1, %xmm0 36 ; 37 ; CHECK-FMA4-NEXT: vfmaddss %xmm2, %xmm0, %xmm1, %xmm0 38 ; CHECK-NEXT: retq 39 %res = call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %a1, <4 x float> %a0, <4 x float> %a2) 40 ret <4 x float> %res 41 } 42 declare <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float>, <4 x float>, <4 x float>) 43 44 define <2 x double> @test_x86_fma_vfmadd_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 { 45 ; CHECK-LABEL: test_x86_fma_vfmadd_sd: 46 ; CHECK-NEXT: # BB#0: 47 ; 48 ; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rcx\), %xmm0|\(%r8\), %xmm1}} 49 ; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rcx\), %xmm0|\(%r8\), %xmm1}} 50 ; CHECK-FMA-WIN-NEXT: vfmadd132sd (%rdx), %xmm1, %xmm0 51 ; 52 ; CHECK-FMA-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm0 53 ; 54 ; CHECK-FMA4-NEXT: vfmaddsd %xmm2, %xmm1, %xmm0, %xmm0 55 ; 56 ; CHECK-NEXT: retq 57 %res = call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) 58 ret <2 x double> %res 59 } 60 61 define <2 x double> @test_x86_fma_vfmadd_bac_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 { 62 ; CHECK-LABEL: test_x86_fma_vfmadd_bac_sd: 63 ; CHECK-NEXT: # BB#0: 64 ; 65 ; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%r8\), %xmm1}} 66 ; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%r8\), %xmm1}} 67 ; CHECK-FMA-WIN-NEXT: vfmadd132sd (%rcx), %xmm1, %xmm0 68 ; 69 ; CHECK-FMA-NEXT: vfmadd213sd %xmm2, %xmm0, %xmm1 70 ; CHECK-FMA-NEXT: vmovapd %xmm1, %xmm0 71 ; 72 ; CHECK-FMA4-NEXT: vfmaddsd %xmm2, %xmm0, %xmm1, %xmm0 73 ; 74 ; CHECK-NEXT: retq 75 %res = call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %a1, <2 x double> %a0, <2 x double> %a2) 76 ret <2 x double> %res 77 } 78 declare <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double>, <2 x double>, <2 x double>) 79 80 define <4 x float> @test_x86_fma_vfmadd_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { 81 ; CHECK-LABEL: test_x86_fma_vfmadd_ps: 82 ; CHECK-NEXT: # BB#0: 83 ; 84 ; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %xmm{{0|1}} 85 ; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %xmm{{0|1}} 86 ; CHECK-FMA-WIN-NEXT: vfmadd213ps (%r8), %xmm1, %xmm0 87 ; 88 ; CHECK-FMA-NEXT: vfmadd213ps %xmm2, %xmm1, %xmm0 89 ; 90 ; CHECK-FMA4-NEXT: vfmaddps %xmm2, %xmm1, %xmm0, %xmm0 91 ; 92 ; CHECK-NEXT: retq 93 %res = call <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) 94 ret <4 x float> %res 95 } 96 declare <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float>, <4 x float>, <4 x float>) 97 98 define <2 x double> @test_x86_fma_vfmadd_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 { 99 ; CHECK-LABEL: test_x86_fma_vfmadd_pd: 100 ; CHECK-NEXT: # BB#0: 101 ; 102 ; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %xmm{{0|1}} 103 ; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %xmm{{0|1}} 104 ; CHECK-FMA-WIN-NEXT: vfmadd213pd (%r8), %xmm1, %xmm0 105 ; 106 ; CHECK-FMA-NEXT: vfmadd213pd %xmm2, %xmm1, %xmm0 107 ; 108 ; CHECK-FMA4-NEXT: vfmaddpd %xmm2, %xmm1, %xmm0, %xmm0 109 ; 110 ; CHECK-NEXT: retq 111 %res = call <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) 112 ret <2 x double> %res 113 } 114 declare <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double>, <2 x double>, <2 x double>) 115 116 define <8 x float> @test_x86_fma_vfmadd_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 { 117 ; CHECK-LABEL: test_x86_fma_vfmadd_ps_256: 118 ; CHECK-NEXT: # BB#0: 119 ; 120 ; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %ymm{{0|1}} 121 ; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %ymm{{0|1}} 122 ; CHECK-FMA-WIN-NEXT: vfmadd213ps (%r8), %ymm1, %ymm0 123 ; 124 ; CHECK-FMA-NEXT: vfmadd213ps %ymm2, %ymm1, %ymm0 125 ; 126 ; CHECK-FMA4-NEXT: vfmaddps %ymm2, %ymm1, %ymm0, %ymm0 127 ; 128 ; CHECK-NEXT: retq 129 %res = call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) 130 ret <8 x float> %res 131 } 132 declare <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float>, <8 x float>, <8 x float>) 133 134 define <4 x double> @test_x86_fma_vfmadd_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 { 135 ; CHECK-LABEL: test_x86_fma_vfmadd_pd_256: 136 ; CHECK-NEXT: # BB#0: 137 ; 138 ; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %ymm{{0|1}} 139 ; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %ymm{{0|1}} 140 ; CHECK-FMA-WIN-NEXT: vfmadd213pd (%r8), %ymm1, %ymm0 141 ; 142 ; CHECK-FMA-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm0 143 ; 144 ; CHECK-FMA4-NEXT: vfmaddpd %ymm2, %ymm1, %ymm0, %ymm0 145 ; 146 ; CHECK-NEXT: retq 147 %res = call <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) 148 ret <4 x double> %res 149 } 150 declare <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double>, <4 x double>, <4 x double>) 151 152 ; VFMSUB 153 define <4 x float> @test_x86_fma_vfmsub_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { 154 ; CHECK-LABEL: test_x86_fma_vfmsub_ss: 155 ; CHECK-NEXT: # BB#0: 156 ; 157 ; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rcx\), %xmm0|\(%r8\), %xmm1}} 158 ; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rcx\), %xmm0|\(%r8\), %xmm1}} 159 ; CHECK-FMA-WIN-NEXT: vfmsub132ss (%rdx), %xmm1, %xmm0 160 ; 161 ; CHECK-FMA-NEXT: vfmsub213ss %xmm2, %xmm1, %xmm0 162 ; 163 ; CHECK-FMA4-NEXT: vfmsubss %xmm2, %xmm1, %xmm0, %xmm0 164 ; 165 ; CHECK-NEXT: retq 166 %res = call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) 167 ret <4 x float> %res 168 } 169 170 define <4 x float> @test_x86_fma_vfmsub_bac_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { 171 ; CHECK-LABEL: test_x86_fma_vfmsub_bac_ss: 172 ; CHECK-NEXT: # BB#0: 173 ; 174 ; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%r8\), %xmm1}} 175 ; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%r8\), %xmm1}} 176 ; CHECK-FMA-WIN-NEXT: vfmsub132ss (%rcx), %xmm1, %xmm0 177 ; 178 ; CHECK-FMA-NEXT: vfmsub213ss %xmm2, %xmm0, %xmm1 179 ; CHECK-FMA-NEXT: vmovaps %xmm1, %xmm0 180 ; 181 ; CHECK-FMA4-NEXT: vfmsubss %xmm2, %xmm0, %xmm1, %xmm0 182 ; 183 ; CHECK-NEXT: retq 184 %res = call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %a1, <4 x float> %a0, <4 x float> %a2) 185 ret <4 x float> %res 186 } 187 declare <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float>, <4 x float>, <4 x float>) 188 189 define <2 x double> @test_x86_fma_vfmsub_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 { 190 ; CHECK-LABEL: test_x86_fma_vfmsub_sd: 191 ; CHECK-NEXT: # BB#0: 192 ; 193 ; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rcx\), %xmm0|\(%r8\), %xmm1}} 194 ; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rcx\), %xmm0|\(%r8\), %xmm1}} 195 ; CHECK-FMA-WIN-NEXT: vfmsub132sd (%rdx), %xmm1, %xmm0 196 ; 197 ; CHECK-FMA-NEXT: vfmsub213sd %xmm2, %xmm1, %xmm0 198 ; 199 ; CHECK-FMA4-NEXT: vfmsubsd %xmm2, %xmm1, %xmm0, %xmm0 200 ; 201 ; CHECK-NEXT: retq 202 %res = call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) 203 ret <2 x double> %res 204 } 205 206 define <2 x double> @test_x86_fma_vfmsub_bac_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 { 207 ; CHECK-LABEL: test_x86_fma_vfmsub_bac_sd: 208 ; CHECK-NEXT: # BB#0: 209 ; 210 ; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%r8\), %xmm1}} 211 ; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%r8\), %xmm1}} 212 ; CHECK-FMA-WIN-NEXT: vfmsub132sd (%rcx), %xmm1, %xmm0 213 ; 214 ; CHECK-FMA-NEXT: vfmsub213sd %xmm2, %xmm0, %xmm1 215 ; CHECK-FMA-NEXT: vmovapd %xmm1, %xmm0 216 ; 217 ; CHECK-FMA4-NEXT: vfmsubsd %xmm2, %xmm0, %xmm1, %xmm0 218 ; 219 ; CHECK-NEXT: retq 220 %res = call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %a1, <2 x double> %a0, <2 x double> %a2) 221 ret <2 x double> %res 222 } 223 declare <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double>, <2 x double>, <2 x double>) 224 225 define <4 x float> @test_x86_fma_vfmsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { 226 ; CHECK-LABEL: test_x86_fma_vfmsub_ps: 227 ; CHECK-NEXT: # BB#0: 228 ; 229 ; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %xmm{{0|1}} 230 ; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %xmm{{0|1}} 231 ; CHECK-FMA-WIN-NEXT: vfmsub213ps (%r8), %xmm1, %xmm0 232 ; 233 ; CHECK-FMA-NEXT: vfmsub213ps %xmm2, %xmm1, %xmm0 234 ; 235 ; CHECK-FMA4-NEXT: vfmsubps %xmm2, %xmm1, %xmm0, %xmm0 236 ; 237 ; CHECK-NEXT: retq 238 %res = call <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) 239 ret <4 x float> %res 240 } 241 declare <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float>, <4 x float>, <4 x float>) 242 243 define <2 x double> @test_x86_fma_vfmsub_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 { 244 ; CHECK-LABEL: test_x86_fma_vfmsub_pd: 245 ; CHECK-NEXT: # BB#0: 246 ; 247 ; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %xmm{{0|1}} 248 ; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %xmm{{0|1}} 249 ; CHECK-FMA-WIN-NEXT: vfmsub213pd (%r8), %xmm1, %xmm0 250 ; 251 ; CHECK-FMA-NEXT: vfmsub213pd %xmm2, %xmm1, %xmm0 252 ; 253 ; CHECK-FMA4-NEXT: vfmsubpd %xmm2, %xmm1, %xmm0, %xmm0 254 ; 255 ; CHECK-NEXT: retq 256 %res = call <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) 257 ret <2 x double> %res 258 } 259 declare <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double>, <2 x double>, <2 x double>) 260 261 define <8 x float> @test_x86_fma_vfmsub_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 { 262 ; CHECK-LABEL: test_x86_fma_vfmsub_ps_256: 263 ; CHECK-NEXT: # BB#0: 264 ; 265 ; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %ymm{{0|1}} 266 ; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %ymm{{0|1}} 267 ; CHECK-FMA-WIN-NEXT: vfmsub213ps (%r8), %ymm1, %ymm0 268 ; 269 ; CHECK-FMA-NEXT: vfmsub213ps %ymm2, %ymm1, %ymm0 270 ; 271 ; CHECK-FMA4-NEXT: vfmsubps %ymm2, %ymm1, %ymm0, %ymm0 272 ; 273 ; CHECK-NEXT: retq 274 %res = call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) 275 ret <8 x float> %res 276 } 277 declare <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float>, <8 x float>, <8 x float>) 278 279 define <4 x double> @test_x86_fma_vfmsub_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 { 280 ; CHECK-LABEL: test_x86_fma_vfmsub_pd_256: 281 ; CHECK-NEXT: # BB#0: 282 ; 283 ; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %ymm{{0|1}} 284 ; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %ymm{{0|1}} 285 ; CHECK-FMA-WIN-NEXT: vfmsub213pd (%r8), %ymm1, %ymm0 286 ; 287 ; CHECK-FMA-NEXT: vfmsub213pd %ymm2, %ymm1, %ymm0 288 ; 289 ; CHECK-FMA4-NEXT: vfmsubpd %ymm2, %ymm1, %ymm0, %ymm0 290 ; 291 ; CHECK-NEXT: retq 292 %res = call <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) 293 ret <4 x double> %res 294 } 295 declare <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double>, <4 x double>, <4 x double>) 296 297 ; VFNMADD 298 define <4 x float> @test_x86_fma_vfnmadd_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { 299 ; CHECK-LABEL: test_x86_fma_vfnmadd_ss: 300 ; CHECK-NEXT: # BB#0: 301 ; 302 ; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rcx\), %xmm0|\(%r8\), %xmm1}} 303 ; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rcx\), %xmm0|\(%r8\), %xmm1}} 304 ; CHECK-FMA-WIN-NEXT: vfnmadd132ss (%rdx), %xmm1, %xmm0 305 ; 306 ; CHECK-FMA-NEXT: vfnmadd213ss %xmm2, %xmm1, %xmm0 307 ; 308 ; CHECK-FMA4-NEXT: vfnmaddss %xmm2, %xmm1, %xmm0, %xmm0 309 ; 310 ; CHECK-NEXT: retq 311 %res = call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) 312 ret <4 x float> %res 313 } 314 315 define <4 x float> @test_x86_fma_vfnmadd_bac_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { 316 ; CHECK-LABEL: test_x86_fma_vfnmadd_bac_ss: 317 ; CHECK-NEXT: # BB#0: 318 ; 319 ; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%r8\), %xmm1}} 320 ; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%r8\), %xmm1}} 321 ; CHECK-FMA-WIN-NEXT: vfnmadd132ss (%rcx), %xmm1, %xmm0 322 ; 323 ; CHECK-FMA-NEXT: vfnmadd213ss %xmm2, %xmm0, %xmm1 324 ; CHECK-FMA-NEXT: vmovaps %xmm1, %xmm0 325 ; 326 ; CHECK-FMA4-NEXT: vfnmaddss %xmm2, %xmm0, %xmm1, %xmm0 327 ; 328 ; CHECK-NEXT: retq 329 %res = call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %a1, <4 x float> %a0, <4 x float> %a2) 330 ret <4 x float> %res 331 } 332 declare <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float>, <4 x float>, <4 x float>) 333 334 define <2 x double> @test_x86_fma_vfnmadd_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 { 335 ; CHECK-LABEL: test_x86_fma_vfnmadd_sd: 336 ; CHECK-NEXT: # BB#0: 337 ; 338 ; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rcx\), %xmm0|\(%r8\), %xmm1}} 339 ; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rcx\), %xmm0|\(%r8\), %xmm1}} 340 ; CHECK-FMA-WIN-NEXT: vfnmadd132sd (%rdx), %xmm1, %xmm0 341 ; 342 ; CHECK-FMA-NEXT: vfnmadd213sd %xmm2, %xmm1, %xmm0 343 ; 344 ; CHECK-FMA4-NEXT: vfnmaddsd %xmm2, %xmm1, %xmm0, %xmm0 345 ; 346 ; CHECK-NEXT: retq 347 %res = call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) 348 ret <2 x double> %res 349 } 350 351 define <2 x double> @test_x86_fma_vfnmadd_bac_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 { 352 ; CHECK-LABEL: test_x86_fma_vfnmadd_bac_sd: 353 ; CHECK-NEXT: # BB#0: 354 ; 355 ; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%r8\), %xmm1}} 356 ; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%r8\), %xmm1}} 357 ; CHECK-FMA-WIN-NEXT: vfnmadd132sd (%rcx), %xmm1, %xmm0 358 ; 359 ; CHECK-FMA-NEXT: vfnmadd213sd %xmm2, %xmm0, %xmm1 360 ; CHECK-FMA-NEXT: vmovapd %xmm1, %xmm0 361 ; 362 ; CHECK-FMA4-NEXT: vfnmaddsd %xmm2, %xmm0, %xmm1, %xmm0 363 ; 364 ; CHECK-NEXT: retq 365 %res = call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %a1, <2 x double> %a0, <2 x double> %a2) 366 ret <2 x double> %res 367 } 368 declare <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double>, <2 x double>, <2 x double>) 369 370 define <4 x float> @test_x86_fma_vfnmadd_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { 371 ; CHECK-LABEL: test_x86_fma_vfnmadd_ps: 372 ; CHECK-NEXT: # BB#0: 373 ; 374 ; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %xmm{{0|1}} 375 ; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %xmm{{0|1}} 376 ; CHECK-FMA-WIN-NEXT: vfnmadd213ps (%r8), %xmm1, %xmm0 377 ; 378 ; CHECK-FMA-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0 379 ; 380 ; CHECK-FMA4-NEXT: vfnmaddps %xmm2, %xmm1, %xmm0, %xmm0 381 ; 382 ; CHECK-NEXT: retq 383 %res = call <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) 384 ret <4 x float> %res 385 } 386 declare <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float>, <4 x float>, <4 x float>) 387 388 define <2 x double> @test_x86_fma_vfnmadd_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 { 389 ; CHECK-LABEL: test_x86_fma_vfnmadd_pd: 390 ; CHECK-NEXT: # BB#0: 391 ; 392 ; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %xmm{{0|1}} 393 ; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %xmm{{0|1}} 394 ; CHECK-FMA-WIN-NEXT: vfnmadd213pd (%r8), %xmm1, %xmm0 395 ; 396 ; CHECK-FMA-NEXT: vfnmadd213pd %xmm2, %xmm1, %xmm0 397 ; 398 ; CHECK-FMA4-NEXT: vfnmaddpd %xmm2, %xmm1, %xmm0, %xmm0 399 ; 400 ; CHECK-NEXT: retq 401 %res = call <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) 402 ret <2 x double> %res 403 } 404 declare <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double>, <2 x double>, <2 x double>) 405 406 define <8 x float> @test_x86_fma_vfnmadd_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 { 407 ; CHECK-LABEL: test_x86_fma_vfnmadd_ps_256: 408 ; CHECK-NEXT: # BB#0: 409 ; 410 ; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %ymm{{0|1}} 411 ; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %ymm{{0|1}} 412 ; CHECK-FMA-WIN-NEXT: vfnmadd213ps (%r8), %ymm1, %ymm0 413 ; 414 ; CHECK-FMA-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0 415 ; 416 ; CHECK-FMA4-NEXT: vfnmaddps %ymm2, %ymm1, %ymm0, %ymm0 417 ; 418 ; CHECK-NEXT: retq 419 %res = call <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) 420 ret <8 x float> %res 421 } 422 declare <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float>, <8 x float>, <8 x float>) 423 424 define <4 x double> @test_x86_fma_vfnmadd_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 { 425 ; CHECK-LABEL: test_x86_fma_vfnmadd_pd_256: 426 ; CHECK-NEXT: # BB#0: 427 ; 428 ; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %ymm{{0|1}} 429 ; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %ymm{{0|1}} 430 ; CHECK-FMA-WIN-NEXT: vfnmadd213pd (%r8), %ymm1, %ymm0 431 ; 432 ; CHECK-FMA-NEXT: vfnmadd213pd %ymm2, %ymm1, %ymm0 433 ; 434 ; CHECK-FMA4-NEXT: vfnmaddpd %ymm2, %ymm1, %ymm0, %ymm0 435 ; 436 ; CHECK-NEXT: retq 437 %res = call <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) 438 ret <4 x double> %res 439 } 440 declare <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double>, <4 x double>, <4 x double>) 441 442 ; VFNMSUB 443 define <4 x float> @test_x86_fma_vfnmsub_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { 444 ; CHECK-LABEL: test_x86_fma_vfnmsub_ss: 445 ; CHECK-NEXT: # BB#0: 446 ; 447 ; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rcx\), %xmm0|\(%r8\), %xmm1}} 448 ; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rcx\), %xmm0|\(%r8\), %xmm1}} 449 ; CHECK-FMA-WIN-NEXT: vfnmsub132ss (%rdx), %xmm1, %xmm0 450 ; 451 ; CHECK-FMA-NEXT: vfnmsub213ss %xmm2, %xmm1, %xmm0 452 ; 453 ; CHECK-FMA4-NEXT: vfnmsubss %xmm2, %xmm1, %xmm0, %xmm0 454 ; 455 ; CHECK-NEXT: retq 456 %res = call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) 457 ret <4 x float> %res 458 } 459 460 define <4 x float> @test_x86_fma_vfnmsub_bac_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { 461 ; CHECK-LABEL: test_x86_fma_vfnmsub_bac_ss: 462 ; CHECK-NEXT: # BB#0: 463 ; 464 ; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%r8\), %xmm1}} 465 ; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%r8\), %xmm1}} 466 ; CHECK-FMA-WIN-NEXT: vfnmsub132ss (%rcx), %xmm1, %xmm0 467 ; 468 ; CHECK-FMA-NEXT: vfnmsub213ss %xmm2, %xmm0, %xmm1 469 ; CHECK-FMA-NEXT: vmovaps %xmm1, %xmm0 470 ; 471 ; CHECK-FMA4-NEXT: vfnmsubss %xmm2, %xmm0, %xmm1, %xmm0 472 ; 473 ; CHECK-NEXT: retq 474 %res = call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %a1, <4 x float> %a0, <4 x float> %a2) 475 ret <4 x float> %res 476 } 477 declare <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float>, <4 x float>, <4 x float>) 478 479 define <2 x double> @test_x86_fma_vfnmsub_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 { 480 ; CHECK-LABEL: test_x86_fma_vfnmsub_sd: 481 ; CHECK-NEXT: # BB#0: 482 ; 483 ; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rcx\), %xmm0|\(%r8\), %xmm1}} 484 ; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rcx\), %xmm0|\(%r8\), %xmm1}} 485 ; CHECK-FMA-WIN-NEXT: vfnmsub132sd (%rdx), %xmm1, %xmm0 486 ; 487 ; CHECK-FMA-NEXT: vfnmsub213sd %xmm2, %xmm1, %xmm0 488 ; 489 ; CHECK-FMA4-NEXT: vfnmsubsd %xmm2, %xmm1, %xmm0, %xmm0 490 ; 491 ; CHECK-NEXT: retq 492 %res = call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) 493 ret <2 x double> %res 494 } 495 496 define <2 x double> @test_x86_fma_vfnmsub_bac_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 { 497 ; CHECK-LABEL: test_x86_fma_vfnmsub_bac_sd: 498 ; CHECK-NEXT: # BB#0: 499 ; 500 ; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%r8\), %xmm1}} 501 ; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%r8\), %xmm1}} 502 ; CHECK-FMA-WIN-NEXT: vfnmsub132sd (%rcx), %xmm1, %xmm0 503 ; 504 ; CHECK-FMA-NEXT: vfnmsub213sd %xmm2, %xmm0, %xmm1 505 ; CHECK-FMA-NEXT: vmovapd %xmm1, %xmm0 506 ; 507 ; CHECK-FMA4-NEXT: vfnmsubsd %xmm2, %xmm0, %xmm1, %xmm0 508 ; 509 ; CHECK-NEXT: retq 510 %res = call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %a1, <2 x double> %a0, <2 x double> %a2) 511 ret <2 x double> %res 512 } 513 declare <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double>, <2 x double>, <2 x double>) 514 515 define <4 x float> @test_x86_fma_vfnmsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { 516 ; CHECK-LABEL: test_x86_fma_vfnmsub_ps: 517 ; CHECK-NEXT: # BB#0: 518 ; 519 ; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %xmm{{0|1}} 520 ; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %xmm{{0|1}} 521 ; CHECK-FMA-WIN-NEXT: vfnmsub213ps (%r8), %xmm1, %xmm0 522 ; 523 ; CHECK-FMA-NEXT: vfnmsub213ps %xmm2, %xmm1, %xmm0 524 ; 525 ; CHECK-FMA4-NEXT: vfnmsubps %xmm2, %xmm1, %xmm0, %xmm0 526 ; 527 ; CHECK-NEXT: retq 528 %res = call <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) 529 ret <4 x float> %res 530 } 531 declare <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float>, <4 x float>, <4 x float>) 532 533 define <2 x double> @test_x86_fma_vfnmsub_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 { 534 ; CHECK-LABEL: test_x86_fma_vfnmsub_pd: 535 ; CHECK-NEXT: # BB#0: 536 ; 537 ; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %xmm{{0|1}} 538 ; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %xmm{{0|1}} 539 ; CHECK-FMA-WIN-NEXT: vfnmsub213pd (%r8), %xmm1, %xmm0 540 ; 541 ; CHECK-FMA-NEXT: vfnmsub213pd %xmm2, %xmm1, %xmm0 542 ; 543 ; CHECK-FMA4-NEXT: vfnmsubpd %xmm2, %xmm1, %xmm0, %xmm0 544 ; 545 ; CHECK-NEXT: retq 546 %res = call <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) 547 ret <2 x double> %res 548 } 549 declare <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double>, <2 x double>, <2 x double>) 550 551 define <8 x float> @test_x86_fma_vfnmsub_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 { 552 ; CHECK-LABEL: test_x86_fma_vfnmsub_ps_256: 553 ; CHECK-NEXT: # BB#0: 554 ; 555 ; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %ymm{{0|1}} 556 ; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %ymm{{0|1}} 557 ; CHECK-FMA-WIN-NEXT: vfnmsub213ps (%r8), %ymm1, %ymm0 558 ; 559 ; CHECK-FMA-NEXT: vfnmsub213ps %ymm2, %ymm1, %ymm0 560 ; 561 ; CHECK-FMA4-NEXT: vfnmsubps %ymm2, %ymm1, %ymm0, %ymm0 562 ; 563 ; CHECK-NEXT: retq 564 %res = call <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) 565 ret <8 x float> %res 566 } 567 declare <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float>, <8 x float>, <8 x float>) 568 569 define <4 x double> @test_x86_fma_vfnmsub_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 { 570 ; CHECK-LABEL: test_x86_fma_vfnmsub_pd_256: 571 ; CHECK-NEXT: # BB#0: 572 ; 573 ; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %ymm{{0|1}} 574 ; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %ymm{{0|1}} 575 ; CHECK-FMA-WIN-NEXT: vfnmsub213pd (%r8), %ymm1, %ymm0 576 ; 577 ; CHECK-FMA-NEXT: vfnmsub213pd %ymm2, %ymm1, %ymm0 578 ; 579 ; CHECK-FMA4-NEXT: vfnmsubpd %ymm2, %ymm1, %ymm0, %ymm0 580 ; 581 ; CHECK-NEXT: retq 582 %res = call <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) 583 ret <4 x double> %res 584 } 585 declare <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double>, <4 x double>, <4 x double>) 586 587 ; VFMADDSUB 588 define <4 x float> @test_x86_fma_vfmaddsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { 589 ; CHECK-LABEL: test_x86_fma_vfmaddsub_ps: 590 ; CHECK-NEXT: # BB#0: 591 ; 592 ; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %xmm{{0|1}} 593 ; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %xmm{{0|1}} 594 ; CHECK-FMA-WIN-NEXT: vfmaddsub213ps (%r8), %xmm1, %xmm0 595 ; 596 ; CHECK-FMA-NEXT: vfmaddsub213ps %xmm2, %xmm1, %xmm0 597 ; 598 ; CHECK-FMA4-NEXT: vfmaddsubps %xmm2, %xmm1, %xmm0, %xmm0 599 ; 600 ; CHECK-NEXT: retq 601 %res = call <4 x float> @llvm.x86.fma.vfmaddsub.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) 602 ret <4 x float> %res 603 } 604 declare <4 x float> @llvm.x86.fma.vfmaddsub.ps(<4 x float>, <4 x float>, <4 x float>) 605 606 define <2 x double> @test_x86_fma_vfmaddsub_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 { 607 ; CHECK-LABEL: test_x86_fma_vfmaddsub_pd: 608 ; CHECK-NEXT: # BB#0: 609 ; 610 ; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %xmm{{0|1}} 611 ; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %xmm{{0|1}} 612 ; CHECK-FMA-WIN-NEXT: vfmaddsub213pd (%r8), %xmm1, %xmm0 613 ; 614 ; CHECK-FMA-NEXT: vfmaddsub213pd %xmm2, %xmm1, %xmm0 615 ; 616 ; CHECK-FMA4-NEXT: vfmaddsubpd %xmm2, %xmm1, %xmm0, %xmm0 617 ; 618 ; CHECK-NEXT: retq 619 %res = call <2 x double> @llvm.x86.fma.vfmaddsub.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) 620 ret <2 x double> %res 621 } 622 declare <2 x double> @llvm.x86.fma.vfmaddsub.pd(<2 x double>, <2 x double>, <2 x double>) 623 624 define <8 x float> @test_x86_fma_vfmaddsub_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 { 625 ; CHECK-LABEL: test_x86_fma_vfmaddsub_ps_256: 626 ; CHECK-NEXT: # BB#0: 627 ; 628 ; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %ymm{{0|1}} 629 ; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %ymm{{0|1}} 630 ; CHECK-FMA-WIN-NEXT: vfmaddsub213ps (%r8), %ymm1, %ymm0 631 ; 632 ; CHECK-FMA-NEXT: vfmaddsub213ps %ymm2, %ymm1, %ymm0 633 ; 634 ; CHECK-FMA4-NEXT: vfmaddsubps %ymm2, %ymm1, %ymm0, %ymm0 635 ; 636 ; CHECK-NEXT: retq 637 %res = call <8 x float> @llvm.x86.fma.vfmaddsub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) 638 ret <8 x float> %res 639 } 640 declare <8 x float> @llvm.x86.fma.vfmaddsub.ps.256(<8 x float>, <8 x float>, <8 x float>) 641 642 define <4 x double> @test_x86_fma_vfmaddsub_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 { 643 ; CHECK-LABEL: test_x86_fma_vfmaddsub_pd_256: 644 ; CHECK-NEXT: # BB#0: 645 ; 646 ; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %ymm{{0|1}} 647 ; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %ymm{{0|1}} 648 ; CHECK-FMA-WIN-NEXT: vfmaddsub213pd (%r8), %ymm1, %ymm0 649 ; 650 ; CHECK-FMA-NEXT: vfmaddsub213pd %ymm2, %ymm1, %ymm0 651 ; 652 ; CHECK-FMA4-NEXT: vfmaddsubpd %ymm2, %ymm1, %ymm0, %ymm0 653 ; 654 ; CHECK-NEXT: retq 655 %res = call <4 x double> @llvm.x86.fma.vfmaddsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) 656 ret <4 x double> %res 657 } 658 declare <4 x double> @llvm.x86.fma.vfmaddsub.pd.256(<4 x double>, <4 x double>, <4 x double>) 659 660 ; VFMSUBADD 661 define <4 x float> @test_x86_fma_vfmsubadd_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { 662 ; CHECK-LABEL: test_x86_fma_vfmsubadd_ps: 663 ; CHECK-NEXT: # BB#0: 664 ; 665 ; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %xmm{{0|1}} 666 ; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %xmm{{0|1}} 667 ; CHECK-FMA-WIN-NEXT: vfmsubadd213ps (%r8), %xmm1, %xmm0 668 ; 669 ; CHECK-FMA-NEXT: vfmsubadd213ps %xmm2, %xmm1, %xmm0 670 ; 671 ; CHECK-FMA4-NEXT: vfmsubaddps %xmm2, %xmm1, %xmm0, %xmm0 672 ; 673 ; CHECK-NEXT: retq 674 %res = call <4 x float> @llvm.x86.fma.vfmsubadd.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) 675 ret <4 x float> %res 676 } 677 declare <4 x float> @llvm.x86.fma.vfmsubadd.ps(<4 x float>, <4 x float>, <4 x float>) 678 679 define <2 x double> @test_x86_fma_vfmsubadd_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 { 680 ; CHECK-LABEL: test_x86_fma_vfmsubadd_pd: 681 ; CHECK-NEXT: # BB#0: 682 ; 683 ; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %xmm{{0|1}} 684 ; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %xmm{{0|1}} 685 ; CHECK-FMA-WIN-NEXT: vfmsubadd213pd (%r8), %xmm1, %xmm0 686 ; 687 ; CHECK-FMA-NEXT: vfmsubadd213pd %xmm2, %xmm1, %xmm0 688 ; 689 ; CHECK-FMA4-NEXT: vfmsubaddpd %xmm2, %xmm1, %xmm0, %xmm0 690 ; 691 ; CHECK-NEXT: retq 692 %res = call <2 x double> @llvm.x86.fma.vfmsubadd.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) 693 ret <2 x double> %res 694 } 695 declare <2 x double> @llvm.x86.fma.vfmsubadd.pd(<2 x double>, <2 x double>, <2 x double>) 696 697 define <8 x float> @test_x86_fma_vfmsubadd_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 { 698 ; CHECK-LABEL: test_x86_fma_vfmsubadd_ps_256: 699 ; CHECK-NEXT: # BB#0: 700 ; 701 ; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %ymm{{0|1}} 702 ; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %ymm{{0|1}} 703 ; CHECK-FMA-WIN-NEXT: vfmsubadd213ps (%r8), %ymm1, %ymm0 704 ; 705 ; CHECK-FMA-NEXT: vfmsubadd213ps %ymm2, %ymm1, %ymm0 706 ; 707 ; CHECK-FMA4-NEXT: vfmsubaddps %ymm2, %ymm1, %ymm0, %ymm0 708 ; 709 ; CHECK-NEXT: retq 710 %res = call <8 x float> @llvm.x86.fma.vfmsubadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) 711 ret <8 x float> %res 712 } 713 declare <8 x float> @llvm.x86.fma.vfmsubadd.ps.256(<8 x float>, <8 x float>, <8 x float>) 714 715 define <4 x double> @test_x86_fma_vfmsubadd_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 { 716 ; CHECK-LABEL: test_x86_fma_vfmsubadd_pd_256: 717 ; CHECK-NEXT: # BB#0: 718 ; 719 ; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %ymm{{0|1}} 720 ; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %ymm{{0|1}} 721 ; CHECK-FMA-WIN-NEXT: vfmsubadd213pd (%r8), %ymm1, %ymm0 722 ; 723 ; CHECK-FMA-NEXT: vfmsubadd213pd %ymm2, %ymm1, %ymm0 724 ; 725 ; CHECK-FMA4-NEXT: vfmsubaddpd %ymm2, %ymm1, %ymm0, %ymm0 726 ; 727 ; CHECK-NEXT: retq 728 %res = call <4 x double> @llvm.x86.fma.vfmsubadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) 729 ret <4 x double> %res 730 } 731 declare <4 x double> @llvm.x86.fma.vfmsubadd.pd.256(<4 x double>, <4 x double>, <4 x double>) 732 733 attributes #0 = { nounwind } 734