1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512bw -mattr=+avx512vl -mattr=+avx512dq | FileCheck %s --check-prefix=CHECK --check-prefix=SKX 3 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f -mattr=+fma | FileCheck %s --check-prefix=CHECK --check-prefix=KNL 4 5 ; This test checks combinations of FNEG and FMA intrinsics on AVX-512 target 6 ; PR28892 7 8 define <16 x float> @test1(<16 x float> %a, <16 x float> %b, <16 x float> %c) { 9 ; CHECK-LABEL: test1: 10 ; CHECK: # %bb.0: # %entry 11 ; CHECK-NEXT: vfmsub213ps %zmm2, %zmm1, %zmm0 12 ; CHECK-NEXT: retq 13 entry: 14 %sub.i = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c 15 %0 = tail call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %sub.i, i32 4) #2 16 ret <16 x float> %0 17 } 18 19 declare <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i32) 20 declare <16 x float> @llvm.x86.avx512.mask.vfnmadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) 21 declare <16 x float> @llvm.x86.avx512.mask.vfnmsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) 22 23 24 define <16 x float> @test2(<16 x float> %a, <16 x float> %b, <16 x float> %c) { 25 ; CHECK-LABEL: test2: 26 ; CHECK: # %bb.0: # %entry 27 ; CHECK-NEXT: vfnmsub213ps %zmm2, %zmm1, %zmm0 28 ; CHECK-NEXT: retq 29 entry: 30 %0 = tail call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %c, i32 4) #2 31 %sub.i = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %0 32 ret <16 x float> %sub.i 33 } 34 35 define <16 x float> @test3(<16 x float> %a, <16 x float> %b, <16 x float> %c) { 36 ; CHECK-LABEL: test3: 37 ; CHECK: # %bb.0: # %entry 38 ; CHECK-NEXT: vfmsub213ps %zmm2, %zmm1, %zmm0 39 ; CHECK-NEXT: retq 40 entry: 41 %0 = tail call <16 x float> @llvm.x86.avx512.mask.vfnmadd.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 -1, i32 4) #2 42 %sub.i = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %0 43 ret <16 x float> %sub.i 44 } 45 46 define <16 x float> @test4(<16 x float> %a, <16 x float> %b, <16 x float> %c) { 47 ; CHECK-LABEL: test4: 48 ; CHECK: # %bb.0: # %entry 49 ; CHECK-NEXT: vfmadd213ps %zmm2, %zmm1, %zmm0 50 ; CHECK-NEXT: retq 51 entry: 52 %0 = tail call <16 x float> @llvm.x86.avx512.mask.vfnmsub.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 -1, i32 4) #2 53 %sub.i = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %0 54 ret <16 x float> %sub.i 55 } 56 57 define <16 x float> @test5(<16 x float> %a, <16 x float> %b, <16 x float> %c) { 58 ; CHECK-LABEL: test5: 59 ; CHECK: # %bb.0: # %entry 60 ; CHECK-NEXT: vfmsub213ps {ru-sae}, %zmm2, %zmm1, %zmm0 61 ; CHECK-NEXT: retq 62 entry: 63 %sub.i = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c 64 %0 = tail call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %sub.i, i32 2) #2 65 ret <16 x float> %0 66 } 67 68 define <16 x float> @test6(<16 x float> %a, <16 x float> %b, <16 x float> %c) { 69 ; CHECK-LABEL: test6: 70 ; CHECK: # %bb.0: # %entry 71 ; CHECK-NEXT: vfmadd213ps {ru-sae}, %zmm2, %zmm1, %zmm0 72 ; CHECK-NEXT: retq 73 entry: 74 %0 = tail call <16 x float> @llvm.x86.avx512.mask.vfnmsub.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 -1, i32 2) #2 75 %sub.i = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %0 76 ret <16 x float> %sub.i 77 } 78 79 80 define <8 x float> @test7(<8 x float> %a, <8 x float> %b, <8 x float> %c) { 81 ; CHECK-LABEL: test7: 82 ; CHECK: # %bb.0: # %entry 83 ; CHECK-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 84 ; CHECK-NEXT: retq 85 entry: 86 %0 = tail call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %c) #2 87 %sub.i = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %0 88 ret <8 x float> %sub.i 89 } 90 91 define <8 x float> @test8(<8 x float> %a, <8 x float> %b, <8 x float> %c) { 92 ; CHECK-LABEL: test8: 93 ; CHECK: # %bb.0: # %entry 94 ; CHECK-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 95 ; CHECK-NEXT: retq 96 entry: 97 %sub.c = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c 98 %0 = tail call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %sub.c) #2 99 ret <8 x float> %0 100 } 101 102 declare <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float>, <8 x float>, <8 x float>) 103 104 105 define <8 x double> @test9(<8 x double> %a, <8 x double> %b, <8 x double> %c) { 106 ; CHECK-LABEL: test9: 107 ; CHECK: # %bb.0: # %entry 108 ; CHECK-NEXT: vfnmsub213pd %zmm2, %zmm1, %zmm0 109 ; CHECK-NEXT: retq 110 entry: 111 %0 = tail call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a, <8 x double> %b, <8 x double> %c, i32 4) #2 112 %sub.i = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %0 113 ret <8 x double> %sub.i 114 } 115 116 declare <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a, <8 x double> %b, <8 x double> %c, i32) 117 118 define <2 x double> @test10(<2 x double> %a, <2 x double> %b, <2 x double> %c) { 119 ; CHECK-LABEL: test10: 120 ; CHECK: # %bb.0: # %entry 121 ; CHECK-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm0 122 ; CHECK-NEXT: vxorpd {{.*}}(%rip), %xmm0, %xmm0 123 ; CHECK-NEXT: retq 124 entry: 125 %0 = tail call <2 x double> @llvm.x86.avx512.mask.vfmadd.sd(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 -1, i32 4) #2 126 %sub.i = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %0 127 ret <2 x double> %sub.i 128 } 129 130 declare <2 x double> @llvm.x86.avx512.mask.vfmadd.sd(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8, i32) 131 132 define <4 x float> @test11(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 zeroext %mask) local_unnamed_addr #0 { 133 ; SKX-LABEL: test11: 134 ; SKX: # %bb.0: # %entry 135 ; SKX-NEXT: vxorps {{.*}}(%rip){1to4}, %xmm2, %xmm3 136 ; SKX-NEXT: vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 137 ; SKX-NEXT: kmovd %edi, %k1 138 ; SKX-NEXT: vmovss %xmm0, %xmm3, %xmm3 {%k1} 139 ; SKX-NEXT: vmovaps %xmm3, %xmm0 140 ; SKX-NEXT: retq 141 ; 142 ; KNL-LABEL: test11: 143 ; KNL: # %bb.0: # %entry 144 ; KNL-NEXT: vbroadcastss {{.*#+}} xmm3 = [-0,-0,-0,-0] 145 ; KNL-NEXT: vxorps %xmm3, %xmm2, %xmm3 146 ; KNL-NEXT: vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 147 ; KNL-NEXT: kmovw %edi, %k1 148 ; KNL-NEXT: vmovss %xmm0, %xmm3, %xmm3 {%k1} 149 ; KNL-NEXT: vmovaps %xmm3, %xmm0 150 ; KNL-NEXT: retq 151 entry: 152 %sub.i = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c 153 %0 = tail call <4 x float> @llvm.x86.avx512.mask3.vfmadd.ss(<4 x float> %a, <4 x float> %b, <4 x float> %sub.i, i8 %mask, i32 4) #10 154 ret <4 x float> %0 155 } 156 157 declare <4 x float> @llvm.x86.avx512.mask3.vfmadd.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32) 158 159 define <4 x float> @test11b(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 zeroext %mask) local_unnamed_addr #0 { 160 ; SKX-LABEL: test11b: 161 ; SKX: # %bb.0: # %entry 162 ; SKX-NEXT: kmovd %edi, %k1 163 ; SKX-NEXT: vfmsub213ss %xmm2, %xmm1, %xmm0 {%k1} 164 ; SKX-NEXT: retq 165 ; 166 ; KNL-LABEL: test11b: 167 ; KNL: # %bb.0: # %entry 168 ; KNL-NEXT: kmovw %edi, %k1 169 ; KNL-NEXT: vfmsub213ss %xmm2, %xmm1, %xmm0 {%k1} 170 ; KNL-NEXT: retq 171 entry: 172 %sub.i = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c 173 %0 = tail call <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %a, <4 x float> %b, <4 x float> %sub.i, i8 %mask, i32 4) #10 174 ret <4 x float> %0 175 } 176 177 declare <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32) 178 179 define <8 x double> @test12(<8 x double> %a, <8 x double> %b, <8 x double> %c, i8 %mask) { 180 ; SKX-LABEL: test12: 181 ; SKX: # %bb.0: # %entry 182 ; SKX-NEXT: kmovd %edi, %k1 183 ; SKX-NEXT: vfmadd132pd %zmm1, %zmm2, %zmm0 {%k1} 184 ; SKX-NEXT: vxorpd {{.*}}(%rip){1to8}, %zmm0, %zmm0 185 ; SKX-NEXT: retq 186 ; 187 ; KNL-LABEL: test12: 188 ; KNL: # %bb.0: # %entry 189 ; KNL-NEXT: kmovw %edi, %k1 190 ; KNL-NEXT: vfmadd132pd %zmm1, %zmm2, %zmm0 {%k1} 191 ; KNL-NEXT: vpxorq {{.*}}(%rip){1to8}, %zmm0, %zmm0 192 ; KNL-NEXT: retq 193 entry: 194 %0 = tail call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a, <8 x double> %b, <8 x double> %c, i32 4) #2 195 %bc = bitcast i8 %mask to <8 x i1> 196 %sel = select <8 x i1> %bc, <8 x double> %0, <8 x double> %a 197 %sub.i = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %sel 198 ret <8 x double> %sub.i 199 } 200 201 define <2 x double> @test13(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) { 202 ; SKX-LABEL: test13: 203 ; SKX: # %bb.0: # %entry 204 ; SKX-NEXT: vxorpd {{.*}}(%rip), %xmm0, %xmm3 205 ; SKX-NEXT: vfnmadd213sd {{.*#+}} xmm1 = -(xmm0 * xmm1) + xmm2 206 ; SKX-NEXT: kmovd %edi, %k1 207 ; SKX-NEXT: vmovsd %xmm1, %xmm3, %xmm3 {%k1} 208 ; SKX-NEXT: vmovapd %xmm3, %xmm0 209 ; SKX-NEXT: retq 210 ; 211 ; KNL-LABEL: test13: 212 ; KNL: # %bb.0: # %entry 213 ; KNL-NEXT: vxorpd {{.*}}(%rip), %xmm0, %xmm3 214 ; KNL-NEXT: vfnmadd213sd {{.*#+}} xmm1 = -(xmm0 * xmm1) + xmm2 215 ; KNL-NEXT: kmovw %edi, %k1 216 ; KNL-NEXT: vmovsd %xmm1, %xmm3, %xmm3 {%k1} 217 ; KNL-NEXT: vmovapd %xmm3, %xmm0 218 ; KNL-NEXT: retq 219 220 entry: 221 %sub.i = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a 222 %0 = tail call <2 x double> @llvm.x86.avx512.mask.vfmadd.sd(<2 x double> %sub.i, <2 x double> %b, <2 x double> %c, i8 %mask, i32 4) 223 ret <2 x double> %0 224 } 225 226 define <16 x float> @test14(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask) { 227 ; SKX-LABEL: test14: 228 ; SKX: # %bb.0: # %entry 229 ; SKX-NEXT: kmovd %edi, %k1 230 ; SKX-NEXT: vfnmsub132ps {ru-sae}, %zmm1, %zmm2, %zmm0 {%k1} 231 ; SKX-NEXT: vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0 232 ; SKX-NEXT: retq 233 ; 234 ; KNL-LABEL: test14: 235 ; KNL: # %bb.0: # %entry 236 ; KNL-NEXT: kmovw %edi, %k1 237 ; KNL-NEXT: vfnmsub132ps {ru-sae}, %zmm1, %zmm2, %zmm0 {%k1} 238 ; KNL-NEXT: vpxord {{.*}}(%rip){1to16}, %zmm0, %zmm0 239 ; KNL-NEXT: retq 240 entry: 241 %0 = tail call <16 x float> @llvm.x86.avx512.mask.vfnmsub.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask, i32 2) #2 242 %sub.i = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %0 243 ret <16 x float> %sub.i 244 } 245 246 define <16 x float> @test15(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask) { 247 ; SKX-LABEL: test15: 248 ; SKX: # %bb.0: # %entry 249 ; SKX-NEXT: kmovd %edi, %k1 250 ; SKX-NEXT: vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm3 251 ; SKX-NEXT: vfnmadd213ps {ru-sae}, %zmm2, %zmm0, %zmm1 252 ; SKX-NEXT: vmovaps %zmm1, %zmm3 {%k1} 253 ; SKX-NEXT: vfnmadd132ps {rd-sae}, %zmm0, %zmm2, %zmm3 {%k1} 254 ; SKX-NEXT: vmovaps %zmm3, %zmm0 255 ; SKX-NEXT: retq 256 ; 257 ; KNL-LABEL: test15: 258 ; KNL: # %bb.0: # %entry 259 ; KNL-NEXT: kmovw %edi, %k1 260 ; KNL-NEXT: vpxord {{.*}}(%rip){1to16}, %zmm0, %zmm3 261 ; KNL-NEXT: vfnmadd213ps {ru-sae}, %zmm2, %zmm0, %zmm1 262 ; KNL-NEXT: vmovaps %zmm1, %zmm3 {%k1} 263 ; KNL-NEXT: vfnmadd132ps {rd-sae}, %zmm0, %zmm2, %zmm3 {%k1} 264 ; KNL-NEXT: vmovaps %zmm3, %zmm0 265 ; KNL-NEXT: retq 266 entry: 267 %bc = bitcast i16 %mask to <16 x i1> 268 %sub.i = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a 269 %0 = tail call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %sub.i, <16 x float> %b, <16 x float> %c, i32 2) 270 %sel = select <16 x i1> %bc, <16 x float> %0, <16 x float> %sub.i 271 %1 = tail call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %sel, <16 x float> %sub.i, <16 x float> %c, i32 1) 272 %sel2 = select <16 x i1> %bc, <16 x float> %1, <16 x float> %sel 273 ret <16 x float> %sel2 274 } 275 276 define <16 x float> @test16(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask) { 277 ; SKX-LABEL: test16: 278 ; SKX: # %bb.0: 279 ; SKX-NEXT: kmovd %edi, %k1 280 ; SKX-NEXT: vfmsubadd132ps {rd-sae}, %zmm1, %zmm2, %zmm0 {%k1} 281 ; SKX-NEXT: retq 282 ; 283 ; KNL-LABEL: test16: 284 ; KNL: # %bb.0: 285 ; KNL-NEXT: kmovw %edi, %k1 286 ; KNL-NEXT: vfmsubadd132ps {rd-sae}, %zmm1, %zmm2, %zmm0 {%k1} 287 ; KNL-NEXT: retq 288 %sub.i = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c 289 %res = call <16 x float> @llvm.x86.avx512.vfmaddsub.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %sub.i, i32 1) 290 %bc = bitcast i16 %mask to <16 x i1> 291 %sel = select <16 x i1> %bc, <16 x float> %res, <16 x float> %a 292 ret <16 x float> %sel 293 } 294 declare <16 x float> @llvm.x86.avx512.vfmaddsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i32) 295 296 define <8 x double> @test17(<8 x double> %a, <8 x double> %b, <8 x double> %c, i8 %mask) { 297 ; SKX-LABEL: test17: 298 ; SKX: # %bb.0: 299 ; SKX-NEXT: kmovd %edi, %k1 300 ; SKX-NEXT: vfmsubadd132pd %zmm1, %zmm2, %zmm0 {%k1} 301 ; SKX-NEXT: retq 302 ; 303 ; KNL-LABEL: test17: 304 ; KNL: # %bb.0: 305 ; KNL-NEXT: kmovw %edi, %k1 306 ; KNL-NEXT: vfmsubadd132pd %zmm1, %zmm2, %zmm0 {%k1} 307 ; KNL-NEXT: retq 308 %sub.i = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %c 309 %res = call <8 x double> @llvm.x86.avx512.vfmaddsub.pd.512(<8 x double> %a, <8 x double> %b, <8 x double> %sub.i, i32 4) 310 %bc = bitcast i8 %mask to <8 x i1> 311 %sel = select <8 x i1> %bc, <8 x double> %res, <8 x double> %a 312 ret <8 x double> %sel 313 } 314 declare <8 x double> @llvm.x86.avx512.vfmaddsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i32) 315 316 define <4 x float> @test18(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 zeroext %mask) local_unnamed_addr #0 { 317 ; SKX-LABEL: test18: 318 ; SKX: # %bb.0: # %entry 319 ; SKX-NEXT: kmovd %edi, %k1 320 ; SKX-NEXT: vfnmadd213ss %xmm2, %xmm1, %xmm0 {%k1} 321 ; SKX-NEXT: retq 322 ; 323 ; KNL-LABEL: test18: 324 ; KNL: # %bb.0: # %entry 325 ; KNL-NEXT: kmovw %edi, %k1 326 ; KNL-NEXT: vfnmadd213ss %xmm2, %xmm1, %xmm0 {%k1} 327 ; KNL-NEXT: retq 328 entry: 329 %sub.i = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %b 330 %0 = tail call <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %a, <4 x float> %sub.i, <4 x float> %c, i8 %mask, i32 4) #10 331 ret <4 x float> %0 332 } 333 334 define <4 x float> @test19(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 zeroext %mask) local_unnamed_addr #0 { 335 ; SKX-LABEL: test19: 336 ; SKX: # %bb.0: # %entry 337 ; SKX-NEXT: kmovd %edi, %k1 338 ; SKX-NEXT: vfnmsub213ss %xmm2, %xmm1, %xmm0 {%k1} 339 ; SKX-NEXT: retq 340 ; 341 ; KNL-LABEL: test19: 342 ; KNL: # %bb.0: # %entry 343 ; KNL-NEXT: kmovw %edi, %k1 344 ; KNL-NEXT: vfnmsub213ss %xmm2, %xmm1, %xmm0 {%k1} 345 ; KNL-NEXT: retq 346 entry: 347 %sub.i = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %b 348 %sub.i.2 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c 349 %0 = tail call <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %a, <4 x float> %sub.i, <4 x float> %sub.i.2, i8 %mask, i32 4) #10 350 ret <4 x float> %0 351 } 352 353 define <4 x float> @test20(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 zeroext %mask) local_unnamed_addr #0 { 354 ; SKX-LABEL: test20: 355 ; SKX: # %bb.0: # %entry 356 ; SKX-NEXT: kmovd %edi, %k1 357 ; SKX-NEXT: vfnmadd231ss %xmm1, %xmm0, %xmm2 {%k1} 358 ; SKX-NEXT: vmovaps %xmm2, %xmm0 359 ; SKX-NEXT: retq 360 ; 361 ; KNL-LABEL: test20: 362 ; KNL: # %bb.0: # %entry 363 ; KNL-NEXT: kmovw %edi, %k1 364 ; KNL-NEXT: vfnmadd231ss %xmm1, %xmm0, %xmm2 {%k1} 365 ; KNL-NEXT: vmovaps %xmm2, %xmm0 366 ; KNL-NEXT: retq 367 entry: 368 %sub.i = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %b 369 %0 = tail call <4 x float> @llvm.x86.avx512.mask3.vfmadd.ss(<4 x float> %a, <4 x float> %sub.i, <4 x float> %c, i8 %mask, i32 4) #10 370 ret <4 x float> %0 371 } 372 373 define <4 x float> @test21(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 zeroext %mask) local_unnamed_addr #0 { 374 ; SKX-LABEL: test21: 375 ; SKX: # %bb.0: # %entry 376 ; SKX-NEXT: kmovd %edi, %k1 377 ; SKX-NEXT: vfnmadd213ss {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1} 378 ; SKX-NEXT: retq 379 ; 380 ; KNL-LABEL: test21: 381 ; KNL: # %bb.0: # %entry 382 ; KNL-NEXT: kmovw %edi, %k1 383 ; KNL-NEXT: vfnmadd213ss {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1} 384 ; KNL-NEXT: retq 385 entry: 386 %sub.i = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %b 387 %0 = tail call <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %a, <4 x float> %sub.i, <4 x float> %c, i8 %mask, i32 8) #10 388 ret <4 x float> %0 389 } 390 391 define <4 x float> @test22(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 zeroext %mask) local_unnamed_addr #0 { 392 ; SKX-LABEL: test22: 393 ; SKX: # %bb.0: # %entry 394 ; SKX-NEXT: kmovd %edi, %k1 395 ; SKX-NEXT: vfnmsub213ss {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1} 396 ; SKX-NEXT: retq 397 ; 398 ; KNL-LABEL: test22: 399 ; KNL: # %bb.0: # %entry 400 ; KNL-NEXT: kmovw %edi, %k1 401 ; KNL-NEXT: vfnmsub213ss {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1} 402 ; KNL-NEXT: retq 403 entry: 404 %sub.i = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %b 405 %sub.i.2 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c 406 %0 = tail call <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %a, <4 x float> %sub.i, <4 x float> %sub.i.2, i8 %mask, i32 8) #10 407 ret <4 x float> %0 408 } 409 410 define <4 x float> @test23(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 zeroext %mask) local_unnamed_addr #0 { 411 ; SKX-LABEL: test23: 412 ; SKX: # %bb.0: # %entry 413 ; SKX-NEXT: kmovd %edi, %k1 414 ; SKX-NEXT: vfnmadd231ss {rn-sae}, %xmm1, %xmm0, %xmm2 {%k1} 415 ; SKX-NEXT: vmovaps %xmm2, %xmm0 416 ; SKX-NEXT: retq 417 ; 418 ; KNL-LABEL: test23: 419 ; KNL: # %bb.0: # %entry 420 ; KNL-NEXT: kmovw %edi, %k1 421 ; KNL-NEXT: vfnmadd231ss {rn-sae}, %xmm1, %xmm0, %xmm2 {%k1} 422 ; KNL-NEXT: vmovaps %xmm2, %xmm0 423 ; KNL-NEXT: retq 424 entry: 425 %sub.i = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %b 426 %0 = tail call <4 x float> @llvm.x86.avx512.mask3.vfmadd.ss(<4 x float> %a, <4 x float> %sub.i, <4 x float> %c, i8 %mask, i32 8) #10 427 ret <4 x float> %0 428 } 429 430 define <4 x float> @test24(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 zeroext %mask) local_unnamed_addr #0 { 431 ; SKX-LABEL: test24: 432 ; SKX: # %bb.0: # %entry 433 ; SKX-NEXT: kmovd %edi, %k1 434 ; SKX-NEXT: vfmsub213ss {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1} 435 ; SKX-NEXT: retq 436 ; 437 ; KNL-LABEL: test24: 438 ; KNL: # %bb.0: # %entry 439 ; KNL-NEXT: kmovw %edi, %k1 440 ; KNL-NEXT: vfmsub213ss {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1} 441 ; KNL-NEXT: retq 442 entry: 443 %sub.i = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c 444 %0 = tail call <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %a, <4 x float> %b, <4 x float> %sub.i, i8 %mask, i32 8) #10 445 ret <4 x float> %0 446 } 447 448 define <16 x float> @test25(<16 x float> %a, <16 x float> %b, <16 x float> %c) { 449 ; CHECK-LABEL: test25: 450 ; CHECK: # %bb.0: # %entry 451 ; CHECK-NEXT: vfnmsub213ps {rn-sae}, %zmm2, %zmm1, %zmm0 452 ; CHECK-NEXT: retq 453 entry: 454 %sub.i = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %b 455 %sub.i.2 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c 456 %0 = tail call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %a, <16 x float> %sub.i, <16 x float> %sub.i.2, i32 8) #2 457 ret <16 x float> %0 458 } 459