1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+fma,-fma4 -show-mc-encoding | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FMA 3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,-fma4 -show-mc-encoding | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-AVX512VL 4 ; RUN: llc < %s -mtriple=x86_64-pc-windows -mattr=+fma,-fma4 -show-mc-encoding | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FMA-WIN 5 6 ; VFMADD 7 define <4 x float> @test_x86_fma_vfmadd_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { 8 ; CHECK-FMA-LABEL: test_x86_fma_vfmadd_ss: 9 ; CHECK-FMA: # %bb.0: 10 ; CHECK-FMA-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0xa9,0xc2] 11 ; CHECK-FMA-NEXT: # xmm0 = (xmm1 * xmm0) + xmm2 12 ; CHECK-FMA-NEXT: retq # encoding: [0xc3] 13 ; 14 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfmadd_ss: 15 ; CHECK-AVX512VL: # %bb.0: 16 ; CHECK-AVX512VL-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xa9,0xc2] 17 ; CHECK-AVX512VL-NEXT: # xmm0 = (xmm1 * xmm0) + xmm2 18 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 19 ; 20 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmadd_ss: 21 ; CHECK-FMA-WIN: # %bb.0: 22 ; CHECK-FMA-WIN-NEXT: vmovaps (%rcx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x01] 23 ; CHECK-FMA-WIN-NEXT: vmovss (%r8), %xmm1 # encoding: [0xc4,0xc1,0x7a,0x10,0x08] 24 ; CHECK-FMA-WIN-NEXT: # xmm1 = mem[0],zero,zero,zero 25 ; CHECK-FMA-WIN-NEXT: vfmadd132ss (%rdx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0x99,0x02] 26 ; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm0 * mem) + xmm1 27 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 28 %1 = extractelement <4 x float> %a0, i64 0 29 %2 = extractelement <4 x float> %a1, i64 0 30 %3 = extractelement <4 x float> %a2, i64 0 31 %4 = call float @llvm.fma.f32(float %1, float %2, float %3) 32 %5 = insertelement <4 x float> %a0, float %4, i64 0 33 ret <4 x float> %5 34 } 35 36 define <4 x float> @test_x86_fma_vfmadd_bac_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { 37 ; CHECK-FMA-LABEL: test_x86_fma_vfmadd_bac_ss: 38 ; CHECK-FMA: # %bb.0: 39 ; CHECK-FMA-NEXT: vfmadd213ss %xmm2, %xmm0, %xmm1 # encoding: [0xc4,0xe2,0x79,0xa9,0xca] 40 ; CHECK-FMA-NEXT: # xmm1 = (xmm0 * xmm1) + xmm2 41 ; CHECK-FMA-NEXT: vmovaps %xmm1, %xmm0 # encoding: [0xc5,0xf8,0x28,0xc1] 42 ; CHECK-FMA-NEXT: retq # encoding: [0xc3] 43 ; 44 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfmadd_bac_ss: 45 ; CHECK-AVX512VL: # %bb.0: 46 ; CHECK-AVX512VL-NEXT: vfmadd213ss %xmm2, %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xa9,0xca] 47 ; CHECK-AVX512VL-NEXT: # xmm1 = (xmm0 * xmm1) + xmm2 48 ; CHECK-AVX512VL-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1] 49 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 50 ; 51 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmadd_bac_ss: 52 ; CHECK-FMA-WIN: # %bb.0: 53 ; CHECK-FMA-WIN-NEXT: vmovaps (%rdx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x02] 54 ; CHECK-FMA-WIN-NEXT: vmovss (%r8), %xmm1 # encoding: [0xc4,0xc1,0x7a,0x10,0x08] 55 ; CHECK-FMA-WIN-NEXT: # xmm1 = mem[0],zero,zero,zero 56 ; CHECK-FMA-WIN-NEXT: vfmadd132ss (%rcx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0x99,0x01] 57 ; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm0 * mem) + xmm1 58 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 59 %1 = extractelement <4 x float> %a1, i64 0 60 %2 = extractelement <4 x float> %a0, i64 0 61 %3 = extractelement <4 x float> %a2, i64 0 62 %4 = call float @llvm.fma.f32(float %1, float %2, float %3) 63 %5 = insertelement <4 x float> %a1, float %4, i64 0 64 ret <4 x float> %5 65 } 66 67 define <4 x float> @test_x86_fma_vfmadd_ss_231(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { 68 ; CHECK-FMA-LABEL: test_x86_fma_vfmadd_ss_231: 69 ; CHECK-FMA: # %bb.0: 70 ; CHECK-FMA-NEXT: vfmadd231ss %xmm1, %xmm0, %xmm2 # encoding: [0xc4,0xe2,0x79,0xb9,0xd1] 71 ; CHECK-FMA-NEXT: # xmm2 = (xmm0 * xmm1) + xmm2 72 ; CHECK-FMA-NEXT: vmovaps %xmm2, %xmm0 # encoding: [0xc5,0xf8,0x28,0xc2] 73 ; CHECK-FMA-NEXT: retq # encoding: [0xc3] 74 ; 75 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfmadd_ss_231: 76 ; CHECK-AVX512VL: # %bb.0: 77 ; CHECK-AVX512VL-NEXT: vfmadd231ss %xmm1, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xb9,0xd1] 78 ; CHECK-AVX512VL-NEXT: # xmm2 = (xmm0 * xmm1) + xmm2 79 ; CHECK-AVX512VL-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2] 80 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 81 ; 82 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmadd_ss_231: 83 ; CHECK-FMA-WIN: # %bb.0: 84 ; CHECK-FMA-WIN-NEXT: vmovaps (%r8), %xmm0 # encoding: [0xc4,0xc1,0x78,0x28,0x00] 85 ; CHECK-FMA-WIN-NEXT: vmovss (%rcx), %xmm1 # encoding: [0xc5,0xfa,0x10,0x09] 86 ; CHECK-FMA-WIN-NEXT: # xmm1 = mem[0],zero,zero,zero 87 ; CHECK-FMA-WIN-NEXT: vfmadd231ss (%rdx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0xb9,0x02] 88 ; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm1 * mem) + xmm0 89 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 90 %1 = extractelement <4 x float> %a0, i64 0 91 %2 = extractelement <4 x float> %a1, i64 0 92 %3 = extractelement <4 x float> %a2, i64 0 93 %4 = call float @llvm.fma.f32(float %1, float %2, float %3) 94 %5 = insertelement <4 x float> %a2, float %4, i64 0 95 ret <4 x float> %5 96 } 97 98 define <2 x double> @test_x86_fma_vfmadd_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 { 99 ; CHECK-FMA-LABEL: test_x86_fma_vfmadd_sd: 100 ; CHECK-FMA: # %bb.0: 101 ; CHECK-FMA-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0xa9,0xc2] 102 ; CHECK-FMA-NEXT: # xmm0 = (xmm1 * xmm0) + xmm2 103 ; CHECK-FMA-NEXT: retq # encoding: [0xc3] 104 ; 105 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfmadd_sd: 106 ; CHECK-AVX512VL: # %bb.0: 107 ; CHECK-AVX512VL-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xa9,0xc2] 108 ; CHECK-AVX512VL-NEXT: # xmm0 = (xmm1 * xmm0) + xmm2 109 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 110 ; 111 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmadd_sd: 112 ; CHECK-FMA-WIN: # %bb.0: 113 ; CHECK-FMA-WIN-NEXT: vmovapd (%rcx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x01] 114 ; CHECK-FMA-WIN-NEXT: vmovsd (%r8), %xmm1 # encoding: [0xc4,0xc1,0x7b,0x10,0x08] 115 ; CHECK-FMA-WIN-NEXT: # xmm1 = mem[0],zero 116 ; CHECK-FMA-WIN-NEXT: vfmadd132sd (%rdx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0x99,0x02] 117 ; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm0 * mem) + xmm1 118 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 119 %1 = extractelement <2 x double> %a0, i64 0 120 %2 = extractelement <2 x double> %a1, i64 0 121 %3 = extractelement <2 x double> %a2, i64 0 122 %4 = call double @llvm.fma.f64(double %1, double %2, double %3) 123 %5 = insertelement <2 x double> %a0, double %4, i64 0 124 ret <2 x double> %5 125 } 126 127 define <2 x double> @test_x86_fma_vfmadd_bac_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 { 128 ; CHECK-FMA-LABEL: test_x86_fma_vfmadd_bac_sd: 129 ; CHECK-FMA: # %bb.0: 130 ; CHECK-FMA-NEXT: vfmadd213sd %xmm2, %xmm0, %xmm1 # encoding: [0xc4,0xe2,0xf9,0xa9,0xca] 131 ; CHECK-FMA-NEXT: # xmm1 = (xmm0 * xmm1) + xmm2 132 ; CHECK-FMA-NEXT: vmovapd %xmm1, %xmm0 # encoding: [0xc5,0xf9,0x28,0xc1] 133 ; CHECK-FMA-NEXT: retq # encoding: [0xc3] 134 ; 135 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfmadd_bac_sd: 136 ; CHECK-AVX512VL: # %bb.0: 137 ; CHECK-AVX512VL-NEXT: vfmadd213sd %xmm2, %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xa9,0xca] 138 ; CHECK-AVX512VL-NEXT: # xmm1 = (xmm0 * xmm1) + xmm2 139 ; CHECK-AVX512VL-NEXT: vmovapd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc1] 140 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 141 ; 142 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmadd_bac_sd: 143 ; CHECK-FMA-WIN: # %bb.0: 144 ; CHECK-FMA-WIN-NEXT: vmovapd (%rdx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x02] 145 ; CHECK-FMA-WIN-NEXT: vmovsd (%r8), %xmm1 # encoding: [0xc4,0xc1,0x7b,0x10,0x08] 146 ; CHECK-FMA-WIN-NEXT: # xmm1 = mem[0],zero 147 ; CHECK-FMA-WIN-NEXT: vfmadd132sd (%rcx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0x99,0x01] 148 ; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm0 * mem) + xmm1 149 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 150 %1 = extractelement <2 x double> %a1, i64 0 151 %2 = extractelement <2 x double> %a0, i64 0 152 %3 = extractelement <2 x double> %a2, i64 0 153 %4 = call double @llvm.fma.f64(double %1, double %2, double %3) 154 %5 = insertelement <2 x double> %a1, double %4, i64 0 155 ret <2 x double> %5 156 } 157 158 define <4 x float> @test_x86_fma_vfmadd_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { 159 ; CHECK-FMA-LABEL: test_x86_fma_vfmadd_ps: 160 ; CHECK-FMA: # %bb.0: 161 ; CHECK-FMA-NEXT: vfmadd213ps %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0xa8,0xc2] 162 ; CHECK-FMA-NEXT: # xmm0 = (xmm1 * xmm0) + xmm2 163 ; CHECK-FMA-NEXT: retq # encoding: [0xc3] 164 ; 165 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfmadd_ps: 166 ; CHECK-AVX512VL: # %bb.0: 167 ; CHECK-AVX512VL-NEXT: vfmadd213ps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xa8,0xc2] 168 ; CHECK-AVX512VL-NEXT: # xmm0 = (xmm1 * xmm0) + xmm2 169 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 170 ; 171 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmadd_ps: 172 ; CHECK-FMA-WIN: # %bb.0: 173 ; CHECK-FMA-WIN-NEXT: vmovaps (%rcx), %xmm1 # encoding: [0xc5,0xf8,0x28,0x09] 174 ; CHECK-FMA-WIN-NEXT: vmovaps (%rdx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x02] 175 ; CHECK-FMA-WIN-NEXT: vfmadd213ps (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0x71,0xa8,0x00] 176 ; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm1 * xmm0) + mem 177 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 178 %1 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) 179 ret <4 x float> %1 180 } 181 182 define <2 x double> @test_x86_fma_vfmadd_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 { 183 ; CHECK-FMA-LABEL: test_x86_fma_vfmadd_pd: 184 ; CHECK-FMA: # %bb.0: 185 ; CHECK-FMA-NEXT: vfmadd213pd %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0xa8,0xc2] 186 ; CHECK-FMA-NEXT: # xmm0 = (xmm1 * xmm0) + xmm2 187 ; CHECK-FMA-NEXT: retq # encoding: [0xc3] 188 ; 189 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfmadd_pd: 190 ; CHECK-AVX512VL: # %bb.0: 191 ; CHECK-AVX512VL-NEXT: vfmadd213pd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xa8,0xc2] 192 ; CHECK-AVX512VL-NEXT: # xmm0 = (xmm1 * xmm0) + xmm2 193 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 194 ; 195 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmadd_pd: 196 ; CHECK-FMA-WIN: # %bb.0: 197 ; CHECK-FMA-WIN-NEXT: vmovapd (%rcx), %xmm1 # encoding: [0xc5,0xf9,0x28,0x09] 198 ; CHECK-FMA-WIN-NEXT: vmovapd (%rdx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x02] 199 ; CHECK-FMA-WIN-NEXT: vfmadd213pd (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0xf1,0xa8,0x00] 200 ; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm1 * xmm0) + mem 201 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 202 %1 = call <2 x double> @llvm.fma.v2f64(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) 203 ret <2 x double> %1 204 } 205 206 define <8 x float> @test_x86_fma_vfmadd_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 { 207 ; CHECK-FMA-LABEL: test_x86_fma_vfmadd_ps_256: 208 ; CHECK-FMA: # %bb.0: 209 ; CHECK-FMA-NEXT: vfmadd213ps %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x75,0xa8,0xc2] 210 ; CHECK-FMA-NEXT: # ymm0 = (ymm1 * ymm0) + ymm2 211 ; CHECK-FMA-NEXT: retq # encoding: [0xc3] 212 ; 213 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfmadd_ps_256: 214 ; CHECK-AVX512VL: # %bb.0: 215 ; CHECK-AVX512VL-NEXT: vfmadd213ps %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0xa8,0xc2] 216 ; CHECK-AVX512VL-NEXT: # ymm0 = (ymm1 * ymm0) + ymm2 217 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 218 ; 219 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmadd_ps_256: 220 ; CHECK-FMA-WIN: # %bb.0: 221 ; CHECK-FMA-WIN-NEXT: vmovaps (%rcx), %ymm1 # encoding: [0xc5,0xfc,0x28,0x09] 222 ; CHECK-FMA-WIN-NEXT: vmovaps (%rdx), %ymm0 # encoding: [0xc5,0xfc,0x28,0x02] 223 ; CHECK-FMA-WIN-NEXT: vfmadd213ps (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0x75,0xa8,0x00] 224 ; CHECK-FMA-WIN-NEXT: # ymm0 = (ymm1 * ymm0) + mem 225 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 226 %1 = call <8 x float> @llvm.fma.v8f32(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) 227 ret <8 x float> %1 228 } 229 230 define <4 x double> @test_x86_fma_vfmadd_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 { 231 ; CHECK-FMA-LABEL: test_x86_fma_vfmadd_pd_256: 232 ; CHECK-FMA: # %bb.0: 233 ; CHECK-FMA-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0xf5,0xa8,0xc2] 234 ; CHECK-FMA-NEXT: # ymm0 = (ymm1 * ymm0) + ymm2 235 ; CHECK-FMA-NEXT: retq # encoding: [0xc3] 236 ; 237 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfmadd_pd_256: 238 ; CHECK-AVX512VL: # %bb.0: 239 ; CHECK-AVX512VL-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf5,0xa8,0xc2] 240 ; CHECK-AVX512VL-NEXT: # ymm0 = (ymm1 * ymm0) + ymm2 241 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 242 ; 243 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmadd_pd_256: 244 ; CHECK-FMA-WIN: # %bb.0: 245 ; CHECK-FMA-WIN-NEXT: vmovapd (%rcx), %ymm1 # encoding: [0xc5,0xfd,0x28,0x09] 246 ; CHECK-FMA-WIN-NEXT: vmovapd (%rdx), %ymm0 # encoding: [0xc5,0xfd,0x28,0x02] 247 ; CHECK-FMA-WIN-NEXT: vfmadd213pd (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0xf5,0xa8,0x00] 248 ; CHECK-FMA-WIN-NEXT: # ymm0 = (ymm1 * ymm0) + mem 249 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 250 %1 = call <4 x double> @llvm.fma.v4f64(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) 251 ret <4 x double> %1 252 } 253 254 ; VFMSUB 255 define <4 x float> @test_x86_fma_vfmsub_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { 256 ; CHECK-FMA-LABEL: test_x86_fma_vfmsub_ss: 257 ; CHECK-FMA: # %bb.0: 258 ; CHECK-FMA-NEXT: vfmsub213ss %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0xab,0xc2] 259 ; CHECK-FMA-NEXT: # xmm0 = (xmm1 * xmm0) - xmm2 260 ; CHECK-FMA-NEXT: retq # encoding: [0xc3] 261 ; 262 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfmsub_ss: 263 ; CHECK-AVX512VL: # %bb.0: 264 ; CHECK-AVX512VL-NEXT: vfmsub213ss %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xab,0xc2] 265 ; CHECK-AVX512VL-NEXT: # xmm0 = (xmm1 * xmm0) - xmm2 266 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 267 ; 268 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmsub_ss: 269 ; CHECK-FMA-WIN: # %bb.0: 270 ; CHECK-FMA-WIN-NEXT: vmovaps (%rcx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x01] 271 ; CHECK-FMA-WIN-NEXT: vmovss (%r8), %xmm1 # encoding: [0xc4,0xc1,0x7a,0x10,0x08] 272 ; CHECK-FMA-WIN-NEXT: # xmm1 = mem[0],zero,zero,zero 273 ; CHECK-FMA-WIN-NEXT: vfmsub132ss (%rdx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0x9b,0x02] 274 ; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm0 * mem) - xmm1 275 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 276 %1 = extractelement <4 x float> %a0, i64 0 277 %2 = extractelement <4 x float> %a1, i64 0 278 %3 = extractelement <4 x float> %a2, i64 0 279 %4 = fsub float -0.000000e+00, %3 280 %5 = call float @llvm.fma.f32(float %1, float %2, float %4) 281 %6 = insertelement <4 x float> %a0, float %5, i64 0 282 ret <4 x float> %6 283 } 284 285 define <4 x float> @test_x86_fma_vfmsub_bac_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { 286 ; CHECK-FMA-LABEL: test_x86_fma_vfmsub_bac_ss: 287 ; CHECK-FMA: # %bb.0: 288 ; CHECK-FMA-NEXT: vfmsub213ss %xmm2, %xmm0, %xmm1 # encoding: [0xc4,0xe2,0x79,0xab,0xca] 289 ; CHECK-FMA-NEXT: # xmm1 = (xmm0 * xmm1) - xmm2 290 ; CHECK-FMA-NEXT: vmovaps %xmm1, %xmm0 # encoding: [0xc5,0xf8,0x28,0xc1] 291 ; CHECK-FMA-NEXT: retq # encoding: [0xc3] 292 ; 293 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfmsub_bac_ss: 294 ; CHECK-AVX512VL: # %bb.0: 295 ; CHECK-AVX512VL-NEXT: vfmsub213ss %xmm2, %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xab,0xca] 296 ; CHECK-AVX512VL-NEXT: # xmm1 = (xmm0 * xmm1) - xmm2 297 ; CHECK-AVX512VL-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1] 298 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 299 ; 300 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmsub_bac_ss: 301 ; CHECK-FMA-WIN: # %bb.0: 302 ; CHECK-FMA-WIN-NEXT: vmovaps (%rdx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x02] 303 ; CHECK-FMA-WIN-NEXT: vmovss (%r8), %xmm1 # encoding: [0xc4,0xc1,0x7a,0x10,0x08] 304 ; CHECK-FMA-WIN-NEXT: # xmm1 = mem[0],zero,zero,zero 305 ; CHECK-FMA-WIN-NEXT: vfmsub132ss (%rcx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0x9b,0x01] 306 ; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm0 * mem) - xmm1 307 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 308 %1 = extractelement <4 x float> %a1, i64 0 309 %2 = extractelement <4 x float> %a0, i64 0 310 %3 = extractelement <4 x float> %a2, i64 0 311 %4 = fsub float -0.000000e+00, %3 312 %5 = call float @llvm.fma.f32(float %1, float %2, float %4) 313 %6 = insertelement <4 x float> %a1, float %5, i64 0 314 ret <4 x float> %6 315 } 316 317 define <2 x double> @test_x86_fma_vfmsub_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 { 318 ; CHECK-FMA-LABEL: test_x86_fma_vfmsub_sd: 319 ; CHECK-FMA: # %bb.0: 320 ; CHECK-FMA-NEXT: vfmsub213sd %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0xab,0xc2] 321 ; CHECK-FMA-NEXT: # xmm0 = (xmm1 * xmm0) - xmm2 322 ; CHECK-FMA-NEXT: retq # encoding: [0xc3] 323 ; 324 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfmsub_sd: 325 ; CHECK-AVX512VL: # %bb.0: 326 ; CHECK-AVX512VL-NEXT: vfmsub213sd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xab,0xc2] 327 ; CHECK-AVX512VL-NEXT: # xmm0 = (xmm1 * xmm0) - xmm2 328 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 329 ; 330 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmsub_sd: 331 ; CHECK-FMA-WIN: # %bb.0: 332 ; CHECK-FMA-WIN-NEXT: vmovapd (%rcx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x01] 333 ; CHECK-FMA-WIN-NEXT: vmovsd (%r8), %xmm1 # encoding: [0xc4,0xc1,0x7b,0x10,0x08] 334 ; CHECK-FMA-WIN-NEXT: # xmm1 = mem[0],zero 335 ; CHECK-FMA-WIN-NEXT: vfmsub132sd (%rdx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0x9b,0x02] 336 ; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm0 * mem) - xmm1 337 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 338 %1 = extractelement <2 x double> %a0, i64 0 339 %2 = extractelement <2 x double> %a1, i64 0 340 %3 = extractelement <2 x double> %a2, i64 0 341 %4 = fsub double -0.000000e+00, %3 342 %5 = call double @llvm.fma.f64(double %1, double %2, double %4) 343 %6 = insertelement <2 x double> %a0, double %5, i64 0 344 ret <2 x double> %6 345 } 346 347 define <2 x double> @test_x86_fma_vfmsub_bac_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 { 348 ; CHECK-FMA-LABEL: test_x86_fma_vfmsub_bac_sd: 349 ; CHECK-FMA: # %bb.0: 350 ; CHECK-FMA-NEXT: vfmsub213sd %xmm2, %xmm0, %xmm1 # encoding: [0xc4,0xe2,0xf9,0xab,0xca] 351 ; CHECK-FMA-NEXT: # xmm1 = (xmm0 * xmm1) - xmm2 352 ; CHECK-FMA-NEXT: vmovapd %xmm1, %xmm0 # encoding: [0xc5,0xf9,0x28,0xc1] 353 ; CHECK-FMA-NEXT: retq # encoding: [0xc3] 354 ; 355 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfmsub_bac_sd: 356 ; CHECK-AVX512VL: # %bb.0: 357 ; CHECK-AVX512VL-NEXT: vfmsub213sd %xmm2, %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xab,0xca] 358 ; CHECK-AVX512VL-NEXT: # xmm1 = (xmm0 * xmm1) - xmm2 359 ; CHECK-AVX512VL-NEXT: vmovapd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc1] 360 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 361 ; 362 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmsub_bac_sd: 363 ; CHECK-FMA-WIN: # %bb.0: 364 ; CHECK-FMA-WIN-NEXT: vmovapd (%rdx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x02] 365 ; CHECK-FMA-WIN-NEXT: vmovsd (%r8), %xmm1 # encoding: [0xc4,0xc1,0x7b,0x10,0x08] 366 ; CHECK-FMA-WIN-NEXT: # xmm1 = mem[0],zero 367 ; CHECK-FMA-WIN-NEXT: vfmsub132sd (%rcx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0x9b,0x01] 368 ; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm0 * mem) - xmm1 369 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 370 %1 = extractelement <2 x double> %a1, i64 0 371 %2 = extractelement <2 x double> %a0, i64 0 372 %3 = extractelement <2 x double> %a2, i64 0 373 %4 = fsub double -0.000000e+00, %3 374 %5 = call double @llvm.fma.f64(double %1, double %2, double %4) 375 %6 = insertelement <2 x double> %a1, double %5, i64 0 376 ret <2 x double> %6 377 } 378 379 define <4 x float> @test_x86_fma_vfmsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { 380 ; CHECK-FMA-LABEL: test_x86_fma_vfmsub_ps: 381 ; CHECK-FMA: # %bb.0: 382 ; CHECK-FMA-NEXT: vfmsub213ps %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0xaa,0xc2] 383 ; CHECK-FMA-NEXT: # xmm0 = (xmm1 * xmm0) - xmm2 384 ; CHECK-FMA-NEXT: retq # encoding: [0xc3] 385 ; 386 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfmsub_ps: 387 ; CHECK-AVX512VL: # %bb.0: 388 ; CHECK-AVX512VL-NEXT: vfmsub213ps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xaa,0xc2] 389 ; CHECK-AVX512VL-NEXT: # xmm0 = (xmm1 * xmm0) - xmm2 390 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 391 ; 392 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmsub_ps: 393 ; CHECK-FMA-WIN: # %bb.0: 394 ; CHECK-FMA-WIN-NEXT: vmovaps (%rcx), %xmm1 # encoding: [0xc5,0xf8,0x28,0x09] 395 ; CHECK-FMA-WIN-NEXT: vmovaps (%rdx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x02] 396 ; CHECK-FMA-WIN-NEXT: vfmsub213ps (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0x71,0xaa,0x00] 397 ; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm1 * xmm0) - mem 398 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 399 %1 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a2 400 %2 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x float> %1) 401 ret <4 x float> %2 402 } 403 404 define <2 x double> @test_x86_fma_vfmsub_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 { 405 ; CHECK-FMA-LABEL: test_x86_fma_vfmsub_pd: 406 ; CHECK-FMA: # %bb.0: 407 ; CHECK-FMA-NEXT: vfmsub213pd %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0xaa,0xc2] 408 ; CHECK-FMA-NEXT: # xmm0 = (xmm1 * xmm0) - xmm2 409 ; CHECK-FMA-NEXT: retq # encoding: [0xc3] 410 ; 411 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfmsub_pd: 412 ; CHECK-AVX512VL: # %bb.0: 413 ; CHECK-AVX512VL-NEXT: vfmsub213pd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xaa,0xc2] 414 ; CHECK-AVX512VL-NEXT: # xmm0 = (xmm1 * xmm0) - xmm2 415 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 416 ; 417 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmsub_pd: 418 ; CHECK-FMA-WIN: # %bb.0: 419 ; CHECK-FMA-WIN-NEXT: vmovapd (%rcx), %xmm1 # encoding: [0xc5,0xf9,0x28,0x09] 420 ; CHECK-FMA-WIN-NEXT: vmovapd (%rdx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x02] 421 ; CHECK-FMA-WIN-NEXT: vfmsub213pd (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0xf1,0xaa,0x00] 422 ; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm1 * xmm0) - mem 423 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 424 %1 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a2 425 %2 = call <2 x double> @llvm.fma.v2f64(<2 x double> %a0, <2 x double> %a1, <2 x double> %1) 426 ret <2 x double> %2 427 } 428 429 define <8 x float> @test_x86_fma_vfmsub_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 { 430 ; CHECK-FMA-LABEL: test_x86_fma_vfmsub_ps_256: 431 ; CHECK-FMA: # %bb.0: 432 ; CHECK-FMA-NEXT: vfmsub213ps %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x75,0xaa,0xc2] 433 ; CHECK-FMA-NEXT: # ymm0 = (ymm1 * ymm0) - ymm2 434 ; CHECK-FMA-NEXT: retq # encoding: [0xc3] 435 ; 436 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfmsub_ps_256: 437 ; CHECK-AVX512VL: # %bb.0: 438 ; CHECK-AVX512VL-NEXT: vfmsub213ps %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0xaa,0xc2] 439 ; CHECK-AVX512VL-NEXT: # ymm0 = (ymm1 * ymm0) - ymm2 440 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 441 ; 442 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmsub_ps_256: 443 ; CHECK-FMA-WIN: # %bb.0: 444 ; CHECK-FMA-WIN-NEXT: vmovaps (%rcx), %ymm1 # encoding: [0xc5,0xfc,0x28,0x09] 445 ; CHECK-FMA-WIN-NEXT: vmovaps (%rdx), %ymm0 # encoding: [0xc5,0xfc,0x28,0x02] 446 ; CHECK-FMA-WIN-NEXT: vfmsub213ps (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0x75,0xaa,0x00] 447 ; CHECK-FMA-WIN-NEXT: # ymm0 = (ymm1 * ymm0) - mem 448 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 449 %1 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a2 450 %2 = call <8 x float> @llvm.fma.v8f32(<8 x float> %a0, <8 x float> %a1, <8 x float> %1) 451 ret <8 x float> %2 452 } 453 454 define <4 x double> @test_x86_fma_vfmsub_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 { 455 ; CHECK-FMA-LABEL: test_x86_fma_vfmsub_pd_256: 456 ; CHECK-FMA: # %bb.0: 457 ; CHECK-FMA-NEXT: vfmsub213pd %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0xf5,0xaa,0xc2] 458 ; CHECK-FMA-NEXT: # ymm0 = (ymm1 * ymm0) - ymm2 459 ; CHECK-FMA-NEXT: retq # encoding: [0xc3] 460 ; 461 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfmsub_pd_256: 462 ; CHECK-AVX512VL: # %bb.0: 463 ; CHECK-AVX512VL-NEXT: vfmsub213pd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf5,0xaa,0xc2] 464 ; CHECK-AVX512VL-NEXT: # ymm0 = (ymm1 * ymm0) - ymm2 465 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 466 ; 467 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmsub_pd_256: 468 ; CHECK-FMA-WIN: # %bb.0: 469 ; CHECK-FMA-WIN-NEXT: vmovapd (%rcx), %ymm1 # encoding: [0xc5,0xfd,0x28,0x09] 470 ; CHECK-FMA-WIN-NEXT: vmovapd (%rdx), %ymm0 # encoding: [0xc5,0xfd,0x28,0x02] 471 ; CHECK-FMA-WIN-NEXT: vfmsub213pd (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0xf5,0xaa,0x00] 472 ; CHECK-FMA-WIN-NEXT: # ymm0 = (ymm1 * ymm0) - mem 473 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 474 %1 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a2 475 %2 = call <4 x double> @llvm.fma.v4f64(<4 x double> %a0, <4 x double> %a1, <4 x double> %1) 476 ret <4 x double> %2 477 } 478 479 ; VFNMADD 480 define <4 x float> @test_x86_fma_vfnmadd_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { 481 ; CHECK-FMA-LABEL: test_x86_fma_vfnmadd_ss: 482 ; CHECK-FMA: # %bb.0: 483 ; CHECK-FMA-NEXT: vfnmadd213ss %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0xad,0xc2] 484 ; CHECK-FMA-NEXT: # xmm0 = -(xmm1 * xmm0) + xmm2 485 ; CHECK-FMA-NEXT: retq # encoding: [0xc3] 486 ; 487 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfnmadd_ss: 488 ; CHECK-AVX512VL: # %bb.0: 489 ; CHECK-AVX512VL-NEXT: vfnmadd213ss %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xad,0xc2] 490 ; CHECK-AVX512VL-NEXT: # xmm0 = -(xmm1 * xmm0) + xmm2 491 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 492 ; 493 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfnmadd_ss: 494 ; CHECK-FMA-WIN: # %bb.0: 495 ; CHECK-FMA-WIN-NEXT: vmovaps (%rcx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x01] 496 ; CHECK-FMA-WIN-NEXT: vmovss (%r8), %xmm1 # encoding: [0xc4,0xc1,0x7a,0x10,0x08] 497 ; CHECK-FMA-WIN-NEXT: # xmm1 = mem[0],zero,zero,zero 498 ; CHECK-FMA-WIN-NEXT: vfnmadd132ss (%rdx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0x9d,0x02] 499 ; CHECK-FMA-WIN-NEXT: # xmm0 = -(xmm0 * mem) + xmm1 500 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 501 %1 = extractelement <4 x float> %a0, i64 0 502 %2 = extractelement <4 x float> %a1, i64 0 503 %3 = extractelement <4 x float> %a2, i64 0 504 %4 = fsub float -0.000000e+00, %2 505 %5 = call float @llvm.fma.f32(float %1, float %4, float %3) 506 %6 = insertelement <4 x float> %a0, float %5, i64 0 507 ret <4 x float> %6 508 } 509 510 define <4 x float> @test_x86_fma_vfnmadd_bac_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { 511 ; CHECK-FMA-LABEL: test_x86_fma_vfnmadd_bac_ss: 512 ; CHECK-FMA: # %bb.0: 513 ; CHECK-FMA-NEXT: vfnmadd213ss %xmm2, %xmm0, %xmm1 # encoding: [0xc4,0xe2,0x79,0xad,0xca] 514 ; CHECK-FMA-NEXT: # xmm1 = -(xmm0 * xmm1) + xmm2 515 ; CHECK-FMA-NEXT: vmovaps %xmm1, %xmm0 # encoding: [0xc5,0xf8,0x28,0xc1] 516 ; CHECK-FMA-NEXT: retq # encoding: [0xc3] 517 ; 518 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfnmadd_bac_ss: 519 ; CHECK-AVX512VL: # %bb.0: 520 ; CHECK-AVX512VL-NEXT: vfnmadd213ss %xmm2, %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xad,0xca] 521 ; CHECK-AVX512VL-NEXT: # xmm1 = -(xmm0 * xmm1) + xmm2 522 ; CHECK-AVX512VL-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1] 523 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 524 ; 525 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfnmadd_bac_ss: 526 ; CHECK-FMA-WIN: # %bb.0: 527 ; CHECK-FMA-WIN-NEXT: vmovaps (%rdx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x02] 528 ; CHECK-FMA-WIN-NEXT: vmovss (%r8), %xmm1 # encoding: [0xc4,0xc1,0x7a,0x10,0x08] 529 ; CHECK-FMA-WIN-NEXT: # xmm1 = mem[0],zero,zero,zero 530 ; CHECK-FMA-WIN-NEXT: vfnmadd132ss (%rcx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0x9d,0x01] 531 ; CHECK-FMA-WIN-NEXT: # xmm0 = -(xmm0 * mem) + xmm1 532 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 533 %1 = extractelement <4 x float> %a1, i64 0 534 %2 = extractelement <4 x float> %a0, i64 0 535 %3 = extractelement <4 x float> %a2, i64 0 536 %4 = fsub float -0.000000e+00, %2 537 %5 = call float @llvm.fma.f32(float %1, float %4, float %3) 538 %6 = insertelement <4 x float> %a1, float %5, i64 0 539 ret <4 x float> %6 540 } 541 542 define <2 x double> @test_x86_fma_vfnmadd_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 { 543 ; CHECK-FMA-LABEL: test_x86_fma_vfnmadd_sd: 544 ; CHECK-FMA: # %bb.0: 545 ; CHECK-FMA-NEXT: vfnmadd213sd %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0xad,0xc2] 546 ; CHECK-FMA-NEXT: # xmm0 = -(xmm1 * xmm0) + xmm2 547 ; CHECK-FMA-NEXT: retq # encoding: [0xc3] 548 ; 549 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfnmadd_sd: 550 ; CHECK-AVX512VL: # %bb.0: 551 ; CHECK-AVX512VL-NEXT: vfnmadd213sd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xad,0xc2] 552 ; CHECK-AVX512VL-NEXT: # xmm0 = -(xmm1 * xmm0) + xmm2 553 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 554 ; 555 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfnmadd_sd: 556 ; CHECK-FMA-WIN: # %bb.0: 557 ; CHECK-FMA-WIN-NEXT: vmovapd (%rcx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x01] 558 ; CHECK-FMA-WIN-NEXT: vmovsd (%r8), %xmm1 # encoding: [0xc4,0xc1,0x7b,0x10,0x08] 559 ; CHECK-FMA-WIN-NEXT: # xmm1 = mem[0],zero 560 ; CHECK-FMA-WIN-NEXT: vfnmadd132sd (%rdx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0x9d,0x02] 561 ; CHECK-FMA-WIN-NEXT: # xmm0 = -(xmm0 * mem) + xmm1 562 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 563 %1 = extractelement <2 x double> %a0, i64 0 564 %2 = extractelement <2 x double> %a1, i64 0 565 %3 = extractelement <2 x double> %a2, i64 0 566 %4 = fsub double -0.000000e+00, %2 567 %5 = call double @llvm.fma.f64(double %1, double %4, double %3) 568 %6 = insertelement <2 x double> %a0, double %5, i64 0 569 ret <2 x double> %6 570 } 571 572 define <2 x double> @test_x86_fma_vfnmadd_bac_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 { 573 ; CHECK-FMA-LABEL: test_x86_fma_vfnmadd_bac_sd: 574 ; CHECK-FMA: # %bb.0: 575 ; CHECK-FMA-NEXT: vfnmadd213sd %xmm2, %xmm0, %xmm1 # encoding: [0xc4,0xe2,0xf9,0xad,0xca] 576 ; CHECK-FMA-NEXT: # xmm1 = -(xmm0 * xmm1) + xmm2 577 ; CHECK-FMA-NEXT: vmovapd %xmm1, %xmm0 # encoding: [0xc5,0xf9,0x28,0xc1] 578 ; CHECK-FMA-NEXT: retq # encoding: [0xc3] 579 ; 580 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfnmadd_bac_sd: 581 ; CHECK-AVX512VL: # %bb.0: 582 ; CHECK-AVX512VL-NEXT: vfnmadd213sd %xmm2, %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xad,0xca] 583 ; CHECK-AVX512VL-NEXT: # xmm1 = -(xmm0 * xmm1) + xmm2 584 ; CHECK-AVX512VL-NEXT: vmovapd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc1] 585 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 586 ; 587 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfnmadd_bac_sd: 588 ; CHECK-FMA-WIN: # %bb.0: 589 ; CHECK-FMA-WIN-NEXT: vmovapd (%rdx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x02] 590 ; CHECK-FMA-WIN-NEXT: vmovsd (%r8), %xmm1 # encoding: [0xc4,0xc1,0x7b,0x10,0x08] 591 ; CHECK-FMA-WIN-NEXT: # xmm1 = mem[0],zero 592 ; CHECK-FMA-WIN-NEXT: vfnmadd132sd (%rcx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0x9d,0x01] 593 ; CHECK-FMA-WIN-NEXT: # xmm0 = -(xmm0 * mem) + xmm1 594 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 595 %1 = extractelement <2 x double> %a1, i64 0 596 %2 = extractelement <2 x double> %a0, i64 0 597 %3 = extractelement <2 x double> %a2, i64 0 598 %4 = fsub double -0.000000e+00, %2 599 %5 = call double @llvm.fma.f64(double %1, double %4, double %3) 600 %6 = insertelement <2 x double> %a1, double %5, i64 0 601 ret <2 x double> %6 602 } 603 604 define <4 x float> @test_x86_fma_vfnmadd_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { 605 ; CHECK-FMA-LABEL: test_x86_fma_vfnmadd_ps: 606 ; CHECK-FMA: # %bb.0: 607 ; CHECK-FMA-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0xac,0xc2] 608 ; CHECK-FMA-NEXT: # xmm0 = -(xmm1 * xmm0) + xmm2 609 ; CHECK-FMA-NEXT: retq # encoding: [0xc3] 610 ; 611 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfnmadd_ps: 612 ; CHECK-AVX512VL: # %bb.0: 613 ; CHECK-AVX512VL-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xac,0xc2] 614 ; CHECK-AVX512VL-NEXT: # xmm0 = -(xmm1 * xmm0) + xmm2 615 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 616 ; 617 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfnmadd_ps: 618 ; CHECK-FMA-WIN: # %bb.0: 619 ; CHECK-FMA-WIN-NEXT: vmovaps (%rcx), %xmm1 # encoding: [0xc5,0xf8,0x28,0x09] 620 ; CHECK-FMA-WIN-NEXT: vmovaps (%rdx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x02] 621 ; CHECK-FMA-WIN-NEXT: vfnmadd213ps (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0x71,0xac,0x00] 622 ; CHECK-FMA-WIN-NEXT: # xmm0 = -(xmm1 * xmm0) + mem 623 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 624 %1 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a0 625 %2 = call <4 x float> @llvm.fma.v4f32(<4 x float> %1, <4 x float> %a1, <4 x float> %a2) 626 ret <4 x float> %2 627 } 628 629 define <2 x double> @test_x86_fma_vfnmadd_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 { 630 ; CHECK-FMA-LABEL: test_x86_fma_vfnmadd_pd: 631 ; CHECK-FMA: # %bb.0: 632 ; CHECK-FMA-NEXT: vfnmadd213pd %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0xac,0xc2] 633 ; CHECK-FMA-NEXT: # xmm0 = -(xmm1 * xmm0) + xmm2 634 ; CHECK-FMA-NEXT: retq # encoding: [0xc3] 635 ; 636 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfnmadd_pd: 637 ; CHECK-AVX512VL: # %bb.0: 638 ; CHECK-AVX512VL-NEXT: vfnmadd213pd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xac,0xc2] 639 ; CHECK-AVX512VL-NEXT: # xmm0 = -(xmm1 * xmm0) + xmm2 640 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 641 ; 642 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfnmadd_pd: 643 ; CHECK-FMA-WIN: # %bb.0: 644 ; CHECK-FMA-WIN-NEXT: vmovapd (%rcx), %xmm1 # encoding: [0xc5,0xf9,0x28,0x09] 645 ; CHECK-FMA-WIN-NEXT: vmovapd (%rdx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x02] 646 ; CHECK-FMA-WIN-NEXT: vfnmadd213pd (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0xf1,0xac,0x00] 647 ; CHECK-FMA-WIN-NEXT: # xmm0 = -(xmm1 * xmm0) + mem 648 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 649 %1 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a0 650 %2 = call <2 x double> @llvm.fma.v2f64(<2 x double> %1, <2 x double> %a1, <2 x double> %a2) 651 ret <2 x double> %2 652 } 653 654 define <8 x float> @test_x86_fma_vfnmadd_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 { 655 ; CHECK-FMA-LABEL: test_x86_fma_vfnmadd_ps_256: 656 ; CHECK-FMA: # %bb.0: 657 ; CHECK-FMA-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x75,0xac,0xc2] 658 ; CHECK-FMA-NEXT: # ymm0 = -(ymm1 * ymm0) + ymm2 659 ; CHECK-FMA-NEXT: retq # encoding: [0xc3] 660 ; 661 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfnmadd_ps_256: 662 ; CHECK-AVX512VL: # %bb.0: 663 ; CHECK-AVX512VL-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0xac,0xc2] 664 ; CHECK-AVX512VL-NEXT: # ymm0 = -(ymm1 * ymm0) + ymm2 665 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 666 ; 667 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfnmadd_ps_256: 668 ; CHECK-FMA-WIN: # %bb.0: 669 ; CHECK-FMA-WIN-NEXT: vmovaps (%rcx), %ymm1 # encoding: [0xc5,0xfc,0x28,0x09] 670 ; CHECK-FMA-WIN-NEXT: vmovaps (%rdx), %ymm0 # encoding: [0xc5,0xfc,0x28,0x02] 671 ; CHECK-FMA-WIN-NEXT: vfnmadd213ps (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0x75,0xac,0x00] 672 ; CHECK-FMA-WIN-NEXT: # ymm0 = -(ymm1 * ymm0) + mem 673 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 674 %1 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a0 675 %2 = call <8 x float> @llvm.fma.v8f32(<8 x float> %1, <8 x float> %a1, <8 x float> %a2) 676 ret <8 x float> %2 677 } 678 679 define <4 x double> @test_x86_fma_vfnmadd_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 { 680 ; CHECK-FMA-LABEL: test_x86_fma_vfnmadd_pd_256: 681 ; CHECK-FMA: # %bb.0: 682 ; CHECK-FMA-NEXT: vfnmadd213pd %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0xf5,0xac,0xc2] 683 ; CHECK-FMA-NEXT: # ymm0 = -(ymm1 * ymm0) + ymm2 684 ; CHECK-FMA-NEXT: retq # encoding: [0xc3] 685 ; 686 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfnmadd_pd_256: 687 ; CHECK-AVX512VL: # %bb.0: 688 ; CHECK-AVX512VL-NEXT: vfnmadd213pd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf5,0xac,0xc2] 689 ; CHECK-AVX512VL-NEXT: # ymm0 = -(ymm1 * ymm0) + ymm2 690 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 691 ; 692 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfnmadd_pd_256: 693 ; CHECK-FMA-WIN: # %bb.0: 694 ; CHECK-FMA-WIN-NEXT: vmovapd (%rcx), %ymm1 # encoding: [0xc5,0xfd,0x28,0x09] 695 ; CHECK-FMA-WIN-NEXT: vmovapd (%rdx), %ymm0 # encoding: [0xc5,0xfd,0x28,0x02] 696 ; CHECK-FMA-WIN-NEXT: vfnmadd213pd (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0xf5,0xac,0x00] 697 ; CHECK-FMA-WIN-NEXT: # ymm0 = -(ymm1 * ymm0) + mem 698 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 699 %1 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a0 700 %2 = call <4 x double> @llvm.fma.v4f64(<4 x double> %1, <4 x double> %a1, <4 x double> %a2) 701 ret <4 x double> %2 702 } 703 704 ; VFNMSUB 705 define <4 x float> @test_x86_fma_vfnmsub_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { 706 ; CHECK-FMA-LABEL: test_x86_fma_vfnmsub_ss: 707 ; CHECK-FMA: # %bb.0: 708 ; CHECK-FMA-NEXT: vfnmsub213ss %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0xaf,0xc2] 709 ; CHECK-FMA-NEXT: # xmm0 = -(xmm1 * xmm0) - xmm2 710 ; CHECK-FMA-NEXT: retq # encoding: [0xc3] 711 ; 712 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfnmsub_ss: 713 ; CHECK-AVX512VL: # %bb.0: 714 ; CHECK-AVX512VL-NEXT: vfnmsub213ss %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xaf,0xc2] 715 ; CHECK-AVX512VL-NEXT: # xmm0 = -(xmm1 * xmm0) - xmm2 716 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 717 ; 718 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfnmsub_ss: 719 ; CHECK-FMA-WIN: # %bb.0: 720 ; CHECK-FMA-WIN-NEXT: vmovaps (%rcx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x01] 721 ; CHECK-FMA-WIN-NEXT: vmovss (%r8), %xmm1 # encoding: [0xc4,0xc1,0x7a,0x10,0x08] 722 ; CHECK-FMA-WIN-NEXT: # xmm1 = mem[0],zero,zero,zero 723 ; CHECK-FMA-WIN-NEXT: vfnmsub132ss (%rdx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0x9f,0x02] 724 ; CHECK-FMA-WIN-NEXT: # xmm0 = -(xmm0 * mem) - xmm1 725 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 726 %1 = extractelement <4 x float> %a0, i64 0 727 %2 = extractelement <4 x float> %a1, i64 0 728 %3 = extractelement <4 x float> %a2, i64 0 729 %4 = fsub float -0.000000e+00, %2 730 %5 = fsub float -0.000000e+00, %3 731 %6 = call float @llvm.fma.f32(float %1, float %4, float %5) 732 %7 = insertelement <4 x float> %a0, float %6, i64 0 733 ret <4 x float> %7 734 } 735 736 define <4 x float> @test_x86_fma_vfnmsub_bac_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { 737 ; CHECK-FMA-LABEL: test_x86_fma_vfnmsub_bac_ss: 738 ; CHECK-FMA: # %bb.0: 739 ; CHECK-FMA-NEXT: vfnmsub213ss %xmm2, %xmm0, %xmm1 # encoding: [0xc4,0xe2,0x79,0xaf,0xca] 740 ; CHECK-FMA-NEXT: # xmm1 = -(xmm0 * xmm1) - xmm2 741 ; CHECK-FMA-NEXT: vmovaps %xmm1, %xmm0 # encoding: [0xc5,0xf8,0x28,0xc1] 742 ; CHECK-FMA-NEXT: retq # encoding: [0xc3] 743 ; 744 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfnmsub_bac_ss: 745 ; CHECK-AVX512VL: # %bb.0: 746 ; CHECK-AVX512VL-NEXT: vfnmsub213ss %xmm2, %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xaf,0xca] 747 ; CHECK-AVX512VL-NEXT: # xmm1 = -(xmm0 * xmm1) - xmm2 748 ; CHECK-AVX512VL-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1] 749 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 750 ; 751 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfnmsub_bac_ss: 752 ; CHECK-FMA-WIN: # %bb.0: 753 ; CHECK-FMA-WIN-NEXT: vmovaps (%rdx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x02] 754 ; CHECK-FMA-WIN-NEXT: vmovss (%r8), %xmm1 # encoding: [0xc4,0xc1,0x7a,0x10,0x08] 755 ; CHECK-FMA-WIN-NEXT: # xmm1 = mem[0],zero,zero,zero 756 ; CHECK-FMA-WIN-NEXT: vfnmsub132ss (%rcx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0x9f,0x01] 757 ; CHECK-FMA-WIN-NEXT: # xmm0 = -(xmm0 * mem) - xmm1 758 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 759 %1 = extractelement <4 x float> %a1, i64 0 760 %2 = extractelement <4 x float> %a0, i64 0 761 %3 = extractelement <4 x float> %a2, i64 0 762 %4 = fsub float -0.000000e+00, %2 763 %5 = fsub float -0.000000e+00, %3 764 %6 = call float @llvm.fma.f32(float %1, float %4, float %5) 765 %7 = insertelement <4 x float> %a1, float %6, i64 0 766 ret <4 x float> %7 767 } 768 769 define <2 x double> @test_x86_fma_vfnmsub_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 { 770 ; CHECK-FMA-LABEL: test_x86_fma_vfnmsub_sd: 771 ; CHECK-FMA: # %bb.0: 772 ; CHECK-FMA-NEXT: vfnmsub213sd %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0xaf,0xc2] 773 ; CHECK-FMA-NEXT: # xmm0 = -(xmm1 * xmm0) - xmm2 774 ; CHECK-FMA-NEXT: retq # encoding: [0xc3] 775 ; 776 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfnmsub_sd: 777 ; CHECK-AVX512VL: # %bb.0: 778 ; CHECK-AVX512VL-NEXT: vfnmsub213sd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xaf,0xc2] 779 ; CHECK-AVX512VL-NEXT: # xmm0 = -(xmm1 * xmm0) - xmm2 780 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 781 ; 782 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfnmsub_sd: 783 ; CHECK-FMA-WIN: # %bb.0: 784 ; CHECK-FMA-WIN-NEXT: vmovapd (%rcx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x01] 785 ; CHECK-FMA-WIN-NEXT: vmovsd (%r8), %xmm1 # encoding: [0xc4,0xc1,0x7b,0x10,0x08] 786 ; CHECK-FMA-WIN-NEXT: # xmm1 = mem[0],zero 787 ; CHECK-FMA-WIN-NEXT: vfnmsub132sd (%rdx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0x9f,0x02] 788 ; CHECK-FMA-WIN-NEXT: # xmm0 = -(xmm0 * mem) - xmm1 789 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 790 %1 = extractelement <2 x double> %a0, i64 0 791 %2 = extractelement <2 x double> %a1, i64 0 792 %3 = extractelement <2 x double> %a2, i64 0 793 %4 = fsub double -0.000000e+00, %2 794 %5 = fsub double -0.000000e+00, %3 795 %6 = call double @llvm.fma.f64(double %1, double %4, double %5) 796 %7 = insertelement <2 x double> %a0, double %6, i64 0 797 ret <2 x double> %7 798 } 799 800 define <2 x double> @test_x86_fma_vfnmsub_bac_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 { 801 ; CHECK-FMA-LABEL: test_x86_fma_vfnmsub_bac_sd: 802 ; CHECK-FMA: # %bb.0: 803 ; CHECK-FMA-NEXT: vfnmsub213sd %xmm2, %xmm0, %xmm1 # encoding: [0xc4,0xe2,0xf9,0xaf,0xca] 804 ; CHECK-FMA-NEXT: # xmm1 = -(xmm0 * xmm1) - xmm2 805 ; CHECK-FMA-NEXT: vmovapd %xmm1, %xmm0 # encoding: [0xc5,0xf9,0x28,0xc1] 806 ; CHECK-FMA-NEXT: retq # encoding: [0xc3] 807 ; 808 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfnmsub_bac_sd: 809 ; CHECK-AVX512VL: # %bb.0: 810 ; CHECK-AVX512VL-NEXT: vfnmsub213sd %xmm2, %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xaf,0xca] 811 ; CHECK-AVX512VL-NEXT: # xmm1 = -(xmm0 * xmm1) - xmm2 812 ; CHECK-AVX512VL-NEXT: vmovapd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc1] 813 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 814 ; 815 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfnmsub_bac_sd: 816 ; CHECK-FMA-WIN: # %bb.0: 817 ; CHECK-FMA-WIN-NEXT: vmovapd (%rdx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x02] 818 ; CHECK-FMA-WIN-NEXT: vmovsd (%r8), %xmm1 # encoding: [0xc4,0xc1,0x7b,0x10,0x08] 819 ; CHECK-FMA-WIN-NEXT: # xmm1 = mem[0],zero 820 ; CHECK-FMA-WIN-NEXT: vfnmsub132sd (%rcx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0x9f,0x01] 821 ; CHECK-FMA-WIN-NEXT: # xmm0 = -(xmm0 * mem) - xmm1 822 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 823 %1 = extractelement <2 x double> %a1, i64 0 824 %2 = extractelement <2 x double> %a0, i64 0 825 %3 = extractelement <2 x double> %a2, i64 0 826 %4 = fsub double -0.000000e+00, %2 827 %5 = fsub double -0.000000e+00, %3 828 %6 = call double @llvm.fma.f64(double %1, double %4, double %5) 829 %7 = insertelement <2 x double> %a1, double %6, i64 0 830 ret <2 x double> %7 831 } 832 833 define <4 x float> @test_x86_fma_vfnmsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { 834 ; CHECK-FMA-LABEL: test_x86_fma_vfnmsub_ps: 835 ; CHECK-FMA: # %bb.0: 836 ; CHECK-FMA-NEXT: vfnmsub213ps %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0xae,0xc2] 837 ; CHECK-FMA-NEXT: # xmm0 = -(xmm1 * xmm0) - xmm2 838 ; CHECK-FMA-NEXT: retq # encoding: [0xc3] 839 ; 840 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfnmsub_ps: 841 ; CHECK-AVX512VL: # %bb.0: 842 ; CHECK-AVX512VL-NEXT: vfnmsub213ps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xae,0xc2] 843 ; CHECK-AVX512VL-NEXT: # xmm0 = -(xmm1 * xmm0) - xmm2 844 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 845 ; 846 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfnmsub_ps: 847 ; CHECK-FMA-WIN: # %bb.0: 848 ; CHECK-FMA-WIN-NEXT: vmovaps (%rcx), %xmm1 # encoding: [0xc5,0xf8,0x28,0x09] 849 ; CHECK-FMA-WIN-NEXT: vmovaps (%rdx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x02] 850 ; CHECK-FMA-WIN-NEXT: vfnmsub213ps (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0x71,0xae,0x00] 851 ; CHECK-FMA-WIN-NEXT: # xmm0 = -(xmm1 * xmm0) - mem 852 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 853 %1 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a0 854 %2 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a2 855 %3 = call <4 x float> @llvm.fma.v4f32(<4 x float> %1, <4 x float> %a1, <4 x float> %2) 856 ret <4 x float> %3 857 } 858 859 define <2 x double> @test_x86_fma_vfnmsub_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 { 860 ; CHECK-FMA-LABEL: test_x86_fma_vfnmsub_pd: 861 ; CHECK-FMA: # %bb.0: 862 ; CHECK-FMA-NEXT: vfnmsub213pd %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0xae,0xc2] 863 ; CHECK-FMA-NEXT: # xmm0 = -(xmm1 * xmm0) - xmm2 864 ; CHECK-FMA-NEXT: retq # encoding: [0xc3] 865 ; 866 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfnmsub_pd: 867 ; CHECK-AVX512VL: # %bb.0: 868 ; CHECK-AVX512VL-NEXT: vfnmsub213pd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xae,0xc2] 869 ; CHECK-AVX512VL-NEXT: # xmm0 = -(xmm1 * xmm0) - xmm2 870 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 871 ; 872 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfnmsub_pd: 873 ; CHECK-FMA-WIN: # %bb.0: 874 ; CHECK-FMA-WIN-NEXT: vmovapd (%rcx), %xmm1 # encoding: [0xc5,0xf9,0x28,0x09] 875 ; CHECK-FMA-WIN-NEXT: vmovapd (%rdx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x02] 876 ; CHECK-FMA-WIN-NEXT: vfnmsub213pd (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0xf1,0xae,0x00] 877 ; CHECK-FMA-WIN-NEXT: # xmm0 = -(xmm1 * xmm0) - mem 878 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 879 %1 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a0 880 %2 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a2 881 %3 = call <2 x double> @llvm.fma.v2f64(<2 x double> %1, <2 x double> %a1, <2 x double> %2) 882 ret <2 x double> %3 883 } 884 885 define <8 x float> @test_x86_fma_vfnmsub_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 { 886 ; CHECK-FMA-LABEL: test_x86_fma_vfnmsub_ps_256: 887 ; CHECK-FMA: # %bb.0: 888 ; CHECK-FMA-NEXT: vfnmsub213ps %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x75,0xae,0xc2] 889 ; CHECK-FMA-NEXT: # ymm0 = -(ymm1 * ymm0) - ymm2 890 ; CHECK-FMA-NEXT: retq # encoding: [0xc3] 891 ; 892 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfnmsub_ps_256: 893 ; CHECK-AVX512VL: # %bb.0: 894 ; CHECK-AVX512VL-NEXT: vfnmsub213ps %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0xae,0xc2] 895 ; CHECK-AVX512VL-NEXT: # ymm0 = -(ymm1 * ymm0) - ymm2 896 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 897 ; 898 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfnmsub_ps_256: 899 ; CHECK-FMA-WIN: # %bb.0: 900 ; CHECK-FMA-WIN-NEXT: vmovaps (%rcx), %ymm1 # encoding: [0xc5,0xfc,0x28,0x09] 901 ; CHECK-FMA-WIN-NEXT: vmovaps (%rdx), %ymm0 # encoding: [0xc5,0xfc,0x28,0x02] 902 ; CHECK-FMA-WIN-NEXT: vfnmsub213ps (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0x75,0xae,0x00] 903 ; CHECK-FMA-WIN-NEXT: # ymm0 = -(ymm1 * ymm0) - mem 904 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 905 %1 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a0 906 %2 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a2 907 %3 = call <8 x float> @llvm.fma.v8f32(<8 x float> %1, <8 x float> %a1, <8 x float> %2) 908 ret <8 x float> %3 909 } 910 911 define <4 x double> @test_x86_fma_vfnmsub_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 { 912 ; CHECK-FMA-LABEL: test_x86_fma_vfnmsub_pd_256: 913 ; CHECK-FMA: # %bb.0: 914 ; CHECK-FMA-NEXT: vfnmsub213pd %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0xf5,0xae,0xc2] 915 ; CHECK-FMA-NEXT: # ymm0 = -(ymm1 * ymm0) - ymm2 916 ; CHECK-FMA-NEXT: retq # encoding: [0xc3] 917 ; 918 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfnmsub_pd_256: 919 ; CHECK-AVX512VL: # %bb.0: 920 ; CHECK-AVX512VL-NEXT: vfnmsub213pd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf5,0xae,0xc2] 921 ; CHECK-AVX512VL-NEXT: # ymm0 = -(ymm1 * ymm0) - ymm2 922 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 923 ; 924 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfnmsub_pd_256: 925 ; CHECK-FMA-WIN: # %bb.0: 926 ; CHECK-FMA-WIN-NEXT: vmovapd (%rcx), %ymm1 # encoding: [0xc5,0xfd,0x28,0x09] 927 ; CHECK-FMA-WIN-NEXT: vmovapd (%rdx), %ymm0 # encoding: [0xc5,0xfd,0x28,0x02] 928 ; CHECK-FMA-WIN-NEXT: vfnmsub213pd (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0xf5,0xae,0x00] 929 ; CHECK-FMA-WIN-NEXT: # ymm0 = -(ymm1 * ymm0) - mem 930 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 931 %1 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a0 932 %2 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a2 933 %3 = call <4 x double> @llvm.fma.v4f64(<4 x double> %1, <4 x double> %a1, <4 x double> %2) 934 ret <4 x double> %3 935 } 936 937 ; VFMADDSUB 938 define <4 x float> @test_x86_fma_vfmaddsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { 939 ; CHECK-FMA-LABEL: test_x86_fma_vfmaddsub_ps: 940 ; CHECK-FMA: # %bb.0: 941 ; CHECK-FMA-NEXT: vfmaddsub213ps %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0xa6,0xc2] 942 ; CHECK-FMA-NEXT: # xmm0 = (xmm1 * xmm0) +/- xmm2 943 ; CHECK-FMA-NEXT: retq # encoding: [0xc3] 944 ; 945 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfmaddsub_ps: 946 ; CHECK-AVX512VL: # %bb.0: 947 ; CHECK-AVX512VL-NEXT: vfmaddsub213ps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xa6,0xc2] 948 ; CHECK-AVX512VL-NEXT: # xmm0 = (xmm1 * xmm0) +/- xmm2 949 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 950 ; 951 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmaddsub_ps: 952 ; CHECK-FMA-WIN: # %bb.0: 953 ; CHECK-FMA-WIN-NEXT: vmovaps (%rcx), %xmm1 # encoding: [0xc5,0xf8,0x28,0x09] 954 ; CHECK-FMA-WIN-NEXT: vmovaps (%rdx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x02] 955 ; CHECK-FMA-WIN-NEXT: vfmaddsub213ps (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0x71,0xa6,0x00] 956 ; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm1 * xmm0) +/- mem 957 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 958 %1 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) 959 %2 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a2 960 %3 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x float> %2) 961 %4 = shufflevector <4 x float> %3, <4 x float> %1, <4 x i32> <i32 0, i32 5, i32 2, i32 7> 962 ret <4 x float> %4 963 } 964 965 define <2 x double> @test_x86_fma_vfmaddsub_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 { 966 ; CHECK-FMA-LABEL: test_x86_fma_vfmaddsub_pd: 967 ; CHECK-FMA: # %bb.0: 968 ; CHECK-FMA-NEXT: vfmaddsub213pd %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0xa6,0xc2] 969 ; CHECK-FMA-NEXT: # xmm0 = (xmm1 * xmm0) +/- xmm2 970 ; CHECK-FMA-NEXT: retq # encoding: [0xc3] 971 ; 972 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfmaddsub_pd: 973 ; CHECK-AVX512VL: # %bb.0: 974 ; CHECK-AVX512VL-NEXT: vfmaddsub213pd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xa6,0xc2] 975 ; CHECK-AVX512VL-NEXT: # xmm0 = (xmm1 * xmm0) +/- xmm2 976 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 977 ; 978 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmaddsub_pd: 979 ; CHECK-FMA-WIN: # %bb.0: 980 ; CHECK-FMA-WIN-NEXT: vmovapd (%rcx), %xmm1 # encoding: [0xc5,0xf9,0x28,0x09] 981 ; CHECK-FMA-WIN-NEXT: vmovapd (%rdx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x02] 982 ; CHECK-FMA-WIN-NEXT: vfmaddsub213pd (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0xf1,0xa6,0x00] 983 ; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm1 * xmm0) +/- mem 984 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 985 %1 = call <2 x double> @llvm.fma.v2f64(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) 986 %2 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a2 987 %3 = call <2 x double> @llvm.fma.v2f64(<2 x double> %a0, <2 x double> %a1, <2 x double> %2) 988 %4 = shufflevector <2 x double> %3, <2 x double> %1, <2 x i32> <i32 0, i32 3> 989 ret <2 x double> %4 990 } 991 992 define <8 x float> @test_x86_fma_vfmaddsub_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 { 993 ; CHECK-FMA-LABEL: test_x86_fma_vfmaddsub_ps_256: 994 ; CHECK-FMA: # %bb.0: 995 ; CHECK-FMA-NEXT: vfmaddsub213ps %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x75,0xa6,0xc2] 996 ; CHECK-FMA-NEXT: # ymm0 = (ymm1 * ymm0) +/- ymm2 997 ; CHECK-FMA-NEXT: retq # encoding: [0xc3] 998 ; 999 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfmaddsub_ps_256: 1000 ; CHECK-AVX512VL: # %bb.0: 1001 ; CHECK-AVX512VL-NEXT: vfmaddsub213ps %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0xa6,0xc2] 1002 ; CHECK-AVX512VL-NEXT: # ymm0 = (ymm1 * ymm0) +/- ymm2 1003 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 1004 ; 1005 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmaddsub_ps_256: 1006 ; CHECK-FMA-WIN: # %bb.0: 1007 ; CHECK-FMA-WIN-NEXT: vmovaps (%rcx), %ymm1 # encoding: [0xc5,0xfc,0x28,0x09] 1008 ; CHECK-FMA-WIN-NEXT: vmovaps (%rdx), %ymm0 # encoding: [0xc5,0xfc,0x28,0x02] 1009 ; CHECK-FMA-WIN-NEXT: vfmaddsub213ps (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0x75,0xa6,0x00] 1010 ; CHECK-FMA-WIN-NEXT: # ymm0 = (ymm1 * ymm0) +/- mem 1011 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 1012 %1 = call <8 x float> @llvm.fma.v8f32(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) 1013 %2 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a2 1014 %3 = call <8 x float> @llvm.fma.v8f32(<8 x float> %a0, <8 x float> %a1, <8 x float> %2) 1015 %4 = shufflevector <8 x float> %3, <8 x float> %1, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15> 1016 ret <8 x float> %4 1017 } 1018 1019 define <4 x double> @test_x86_fma_vfmaddsub_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 { 1020 ; CHECK-FMA-LABEL: test_x86_fma_vfmaddsub_pd_256: 1021 ; CHECK-FMA: # %bb.0: 1022 ; CHECK-FMA-NEXT: vfmaddsub213pd %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0xf5,0xa6,0xc2] 1023 ; CHECK-FMA-NEXT: # ymm0 = (ymm1 * ymm0) +/- ymm2 1024 ; CHECK-FMA-NEXT: retq # encoding: [0xc3] 1025 ; 1026 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfmaddsub_pd_256: 1027 ; CHECK-AVX512VL: # %bb.0: 1028 ; CHECK-AVX512VL-NEXT: vfmaddsub213pd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf5,0xa6,0xc2] 1029 ; CHECK-AVX512VL-NEXT: # ymm0 = (ymm1 * ymm0) +/- ymm2 1030 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 1031 ; 1032 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmaddsub_pd_256: 1033 ; CHECK-FMA-WIN: # %bb.0: 1034 ; CHECK-FMA-WIN-NEXT: vmovapd (%rcx), %ymm1 # encoding: [0xc5,0xfd,0x28,0x09] 1035 ; CHECK-FMA-WIN-NEXT: vmovapd (%rdx), %ymm0 # encoding: [0xc5,0xfd,0x28,0x02] 1036 ; CHECK-FMA-WIN-NEXT: vfmaddsub213pd (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0xf5,0xa6,0x00] 1037 ; CHECK-FMA-WIN-NEXT: # ymm0 = (ymm1 * ymm0) +/- mem 1038 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 1039 %1 = call <4 x double> @llvm.fma.v4f64(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) 1040 %2 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a2 1041 %3 = call <4 x double> @llvm.fma.v4f64(<4 x double> %a0, <4 x double> %a1, <4 x double> %2) 1042 %4 = shufflevector <4 x double> %3, <4 x double> %1, <4 x i32> <i32 0, i32 5, i32 2, i32 7> 1043 ret <4 x double> %4 1044 } 1045 1046 ; VFMSUBADD 1047 define <4 x float> @test_x86_fma_vfmsubadd_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { 1048 ; CHECK-FMA-LABEL: test_x86_fma_vfmsubadd_ps: 1049 ; CHECK-FMA: # %bb.0: 1050 ; CHECK-FMA-NEXT: vfmsubadd213ps %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0xa7,0xc2] 1051 ; CHECK-FMA-NEXT: # xmm0 = (xmm1 * xmm0) -/+ xmm2 1052 ; CHECK-FMA-NEXT: retq # encoding: [0xc3] 1053 ; 1054 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfmsubadd_ps: 1055 ; CHECK-AVX512VL: # %bb.0: 1056 ; CHECK-AVX512VL-NEXT: vfmsubadd213ps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xa7,0xc2] 1057 ; CHECK-AVX512VL-NEXT: # xmm0 = (xmm1 * xmm0) -/+ xmm2 1058 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 1059 ; 1060 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmsubadd_ps: 1061 ; CHECK-FMA-WIN: # %bb.0: 1062 ; CHECK-FMA-WIN-NEXT: vmovaps (%rcx), %xmm1 # encoding: [0xc5,0xf8,0x28,0x09] 1063 ; CHECK-FMA-WIN-NEXT: vmovaps (%rdx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x02] 1064 ; CHECK-FMA-WIN-NEXT: vfmsubadd213ps (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0x71,0xa7,0x00] 1065 ; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm1 * xmm0) -/+ mem 1066 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 1067 %1 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) 1068 %2 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a2 1069 %3 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x float> %2) 1070 %4 = shufflevector <4 x float> %1, <4 x float> %3, <4 x i32> <i32 0, i32 5, i32 2, i32 7> 1071 ret <4 x float> %4 1072 } 1073 1074 define <2 x double> @test_x86_fma_vfmsubadd_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 { 1075 ; CHECK-FMA-LABEL: test_x86_fma_vfmsubadd_pd: 1076 ; CHECK-FMA: # %bb.0: 1077 ; CHECK-FMA-NEXT: vfmsubadd213pd %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0xa7,0xc2] 1078 ; CHECK-FMA-NEXT: # xmm0 = (xmm1 * xmm0) -/+ xmm2 1079 ; CHECK-FMA-NEXT: retq # encoding: [0xc3] 1080 ; 1081 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfmsubadd_pd: 1082 ; CHECK-AVX512VL: # %bb.0: 1083 ; CHECK-AVX512VL-NEXT: vfmsubadd213pd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xa7,0xc2] 1084 ; CHECK-AVX512VL-NEXT: # xmm0 = (xmm1 * xmm0) -/+ xmm2 1085 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 1086 ; 1087 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmsubadd_pd: 1088 ; CHECK-FMA-WIN: # %bb.0: 1089 ; CHECK-FMA-WIN-NEXT: vmovapd (%rcx), %xmm1 # encoding: [0xc5,0xf9,0x28,0x09] 1090 ; CHECK-FMA-WIN-NEXT: vmovapd (%rdx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x02] 1091 ; CHECK-FMA-WIN-NEXT: vfmsubadd213pd (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0xf1,0xa7,0x00] 1092 ; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm1 * xmm0) -/+ mem 1093 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 1094 %1 = call <2 x double> @llvm.fma.v2f64(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) 1095 %2 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a2 1096 %3 = call <2 x double> @llvm.fma.v2f64(<2 x double> %a0, <2 x double> %a1, <2 x double> %2) 1097 %4 = shufflevector <2 x double> %1, <2 x double> %3, <2 x i32> <i32 0, i32 3> 1098 ret <2 x double> %4 1099 } 1100 1101 define <8 x float> @test_x86_fma_vfmsubadd_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 { 1102 ; CHECK-FMA-LABEL: test_x86_fma_vfmsubadd_ps_256: 1103 ; CHECK-FMA: # %bb.0: 1104 ; CHECK-FMA-NEXT: vfmsubadd213ps %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x75,0xa7,0xc2] 1105 ; CHECK-FMA-NEXT: # ymm0 = (ymm1 * ymm0) -/+ ymm2 1106 ; CHECK-FMA-NEXT: retq # encoding: [0xc3] 1107 ; 1108 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfmsubadd_ps_256: 1109 ; CHECK-AVX512VL: # %bb.0: 1110 ; CHECK-AVX512VL-NEXT: vfmsubadd213ps %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0xa7,0xc2] 1111 ; CHECK-AVX512VL-NEXT: # ymm0 = (ymm1 * ymm0) -/+ ymm2 1112 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 1113 ; 1114 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmsubadd_ps_256: 1115 ; CHECK-FMA-WIN: # %bb.0: 1116 ; CHECK-FMA-WIN-NEXT: vmovaps (%rcx), %ymm1 # encoding: [0xc5,0xfc,0x28,0x09] 1117 ; CHECK-FMA-WIN-NEXT: vmovaps (%rdx), %ymm0 # encoding: [0xc5,0xfc,0x28,0x02] 1118 ; CHECK-FMA-WIN-NEXT: vfmsubadd213ps (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0x75,0xa7,0x00] 1119 ; CHECK-FMA-WIN-NEXT: # ymm0 = (ymm1 * ymm0) -/+ mem 1120 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 1121 %1 = call <8 x float> @llvm.fma.v8f32(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) 1122 %2 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a2 1123 %3 = call <8 x float> @llvm.fma.v8f32(<8 x float> %a0, <8 x float> %a1, <8 x float> %2) 1124 %4 = shufflevector <8 x float> %1, <8 x float> %3, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15> 1125 ret <8 x float> %4 1126 } 1127 1128 define <4 x double> @test_x86_fma_vfmsubadd_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 { 1129 ; CHECK-FMA-LABEL: test_x86_fma_vfmsubadd_pd_256: 1130 ; CHECK-FMA: # %bb.0: 1131 ; CHECK-FMA-NEXT: vfmsubadd213pd %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0xf5,0xa7,0xc2] 1132 ; CHECK-FMA-NEXT: # ymm0 = (ymm1 * ymm0) -/+ ymm2 1133 ; CHECK-FMA-NEXT: retq # encoding: [0xc3] 1134 ; 1135 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfmsubadd_pd_256: 1136 ; CHECK-AVX512VL: # %bb.0: 1137 ; CHECK-AVX512VL-NEXT: vfmsubadd213pd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf5,0xa7,0xc2] 1138 ; CHECK-AVX512VL-NEXT: # ymm0 = (ymm1 * ymm0) -/+ ymm2 1139 ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 1140 ; 1141 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmsubadd_pd_256: 1142 ; CHECK-FMA-WIN: # %bb.0: 1143 ; CHECK-FMA-WIN-NEXT: vmovapd (%rcx), %ymm1 # encoding: [0xc5,0xfd,0x28,0x09] 1144 ; CHECK-FMA-WIN-NEXT: vmovapd (%rdx), %ymm0 # encoding: [0xc5,0xfd,0x28,0x02] 1145 ; CHECK-FMA-WIN-NEXT: vfmsubadd213pd (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0xf5,0xa7,0x00] 1146 ; CHECK-FMA-WIN-NEXT: # ymm0 = (ymm1 * ymm0) -/+ mem 1147 ; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 1148 %1 = call <4 x double> @llvm.fma.v4f64(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) 1149 %2 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a2 1150 %3 = call <4 x double> @llvm.fma.v4f64(<4 x double> %a0, <4 x double> %a1, <4 x double> %2) 1151 %4 = shufflevector <4 x double> %1, <4 x double> %3, <4 x i32> <i32 0, i32 5, i32 2, i32 7> 1152 ret <4 x double> %4 1153 } 1154 1155 declare float @llvm.fma.f32(float, float, float) 1156 declare double @llvm.fma.f64(double, double, double) 1157 declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) 1158 declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) 1159 declare <8 x float> @llvm.fma.v8f32(<8 x float>, <8 x float>, <8 x float>) 1160 declare <4 x double> @llvm.fma.v4f64(<4 x double>, <4 x double>, <4 x double>) 1161 1162 attributes #0 = { nounwind } 1163