1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f -mattr=+fma -show-mc-encoding | FileCheck %s --check-prefix=CHECK --check-prefix=SKX 3 4 define <2 x double> @combine_scalar_mask_fmadd_f32(<2 x double> %a, i8 zeroext %k, <2 x double> %b, <2 x double> %c) { 5 ; CHECK-LABEL: combine_scalar_mask_fmadd_f32: 6 ; CHECK: # %bb.0: # %entry 7 ; CHECK-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 8 ; CHECK-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0xa9,0xc2] 9 ; CHECK-NEXT: # xmm0 = (xmm1 * xmm0) + xmm2 10 ; CHECK-NEXT: retq # encoding: [0xc3] 11 entry: 12 %0 = bitcast <2 x double> %a to <4 x float> 13 %1 = bitcast <2 x double> %b to <4 x float> 14 %2 = bitcast <2 x double> %c to <4 x float> 15 %3 = extractelement <4 x float> %0, i64 0 16 %4 = extractelement <4 x float> %1, i64 0 17 %5 = extractelement <4 x float> %2, i64 0 18 %6 = fmul fast float %4, %3 19 %7 = fadd fast float %6, %5 20 %8 = bitcast i8 %k to <8 x i1> 21 %9 = extractelement <8 x i1> %8, i64 0 22 %10 = select i1 %9, float %7, float %3 23 %11 = insertelement <4 x float> %0, float %10, i64 0 24 %12 = bitcast <4 x float> %11 to <2 x double> 25 ret <2 x double> %12 26 } 27 28 define <2 x double> @combine_scalar_mask_fmadd_f64(<2 x double> %a, i8 zeroext %k, <2 x double> %b, <2 x double> %c) { 29 ; CHECK-LABEL: combine_scalar_mask_fmadd_f64: 30 ; CHECK: # %bb.0: # %entry 31 ; CHECK-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 32 ; CHECK-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0xa9,0xc2] 33 ; CHECK-NEXT: # xmm0 = (xmm1 * xmm0) + xmm2 34 ; CHECK-NEXT: retq # encoding: [0xc3] 35 entry: 36 %0 = extractelement <2 x double> %a, i64 0 37 %1 = extractelement <2 x double> %b, i64 0 38 %2 = extractelement <2 x double> %c, i64 0 39 %3 = fmul fast double %1, %0 40 %4 = fadd fast double %3, %2 41 %5 = bitcast i8 %k to <8 x i1> 42 %6 = extractelement <8 x i1> %5, i64 0 43 %7 = select i1 %6, double %4, double %0 44 %8 = insertelement <2 x double> %a, double %7, i64 0 45 ret <2 x double> %8 46 } 47 48 define <2 x double> @combine_scalar_maskz_fmadd_32(i8 zeroext %k, <2 x double> %a, <2 x double> %b, <2 x double> %c) { 49 ; CHECK-LABEL: combine_scalar_maskz_fmadd_32: 50 ; CHECK: # %bb.0: # %entry 51 ; CHECK-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 52 ; CHECK-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0xa9,0xc2] 53 ; CHECK-NEXT: # xmm0 = (xmm1 * xmm0) + xmm2 54 ; CHECK-NEXT: retq # encoding: [0xc3] 55 entry: 56 %0 = bitcast <2 x double> %a to <4 x float> 57 %1 = bitcast <2 x double> %b to <4 x float> 58 %2 = bitcast <2 x double> %c to <4 x float> 59 %3 = extractelement <4 x float> %0, i64 0 60 %4 = extractelement <4 x float> %1, i64 0 61 %5 = extractelement <4 x float> %2, i64 0 62 %6 = fmul fast float %4, %3 63 %7 = fadd fast float %6, %5 64 %8 = bitcast i8 %k to <8 x i1> 65 %9 = extractelement <8 x i1> %8, i64 0 66 %10 = select i1 %9, float %7, float 0.000000e+00 67 %11 = insertelement <4 x float> %0, float %10, i64 0 68 %12 = bitcast <4 x float> %11 to <2 x double> 69 ret <2 x double> %12 70 } 71 72 define <2 x double> @combine_scalar_maskz_fmadd_64(i8 zeroext %k, <2 x double> %a, <2 x double> %b, <2 x double> %c) { 73 ; CHECK-LABEL: combine_scalar_maskz_fmadd_64: 74 ; CHECK: # %bb.0: # %entry 75 ; CHECK-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 76 ; CHECK-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0xa9,0xc2] 77 ; CHECK-NEXT: # xmm0 = (xmm1 * xmm0) + xmm2 78 ; CHECK-NEXT: retq # encoding: [0xc3] 79 entry: 80 %0 = extractelement <2 x double> %a, i64 0 81 %1 = extractelement <2 x double> %b, i64 0 82 %2 = extractelement <2 x double> %c, i64 0 83 %3 = fmul fast double %1, %0 84 %4 = fadd fast double %3, %2 85 %5 = bitcast i8 %k to <8 x i1> 86 %6 = extractelement <8 x i1> %5, i64 0 87 %7 = select i1 %6, double %4, double 0.000000e+00 88 %8 = insertelement <2 x double> %a, double %7, i64 0 89 ret <2 x double> %8 90 } 91 92 define <2 x double> @combine_scalar_mask3_fmadd_32(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 zeroext %k) { 93 ; CHECK-LABEL: combine_scalar_mask3_fmadd_32: 94 ; CHECK: # %bb.0: # %entry 95 ; CHECK-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 96 ; CHECK-NEXT: vfmadd231ss %xmm0, %xmm1, %xmm2 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0xb9,0xd0] 97 ; CHECK-NEXT: # xmm2 = (xmm1 * xmm0) + xmm2 98 ; CHECK-NEXT: vmovaps %xmm2, %xmm0 # encoding: [0xc5,0xf8,0x28,0xc2] 99 ; CHECK-NEXT: retq # encoding: [0xc3] 100 entry: 101 %0 = bitcast <2 x double> %a to <4 x float> 102 %1 = bitcast <2 x double> %b to <4 x float> 103 %2 = bitcast <2 x double> %c to <4 x float> 104 %3 = extractelement <4 x float> %0, i64 0 105 %4 = extractelement <4 x float> %1, i64 0 106 %5 = extractelement <4 x float> %2, i64 0 107 %6 = fmul fast float %4, %3 108 %7 = fadd fast float %6, %5 109 %8 = bitcast i8 %k to <8 x i1> 110 %9 = extractelement <8 x i1> %8, i64 0 111 %10 = select i1 %9, float %7, float %5 112 %11 = insertelement <4 x float> %2, float %10, i64 0 113 %12 = bitcast <4 x float> %11 to <2 x double> 114 ret <2 x double> %12 115 } 116 117 define <2 x double> @combine_scalar_mask3_fmadd_64(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 zeroext %k) { 118 ; CHECK-LABEL: combine_scalar_mask3_fmadd_64: 119 ; CHECK: # %bb.0: # %entry 120 ; CHECK-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 121 ; CHECK-NEXT: vfmadd231sd %xmm0, %xmm1, %xmm2 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0xb9,0xd0] 122 ; CHECK-NEXT: # xmm2 = (xmm1 * xmm0) + xmm2 123 ; CHECK-NEXT: vmovapd %xmm2, %xmm0 # encoding: [0xc5,0xf9,0x28,0xc2] 124 ; CHECK-NEXT: retq # encoding: [0xc3] 125 entry: 126 %0 = extractelement <2 x double> %a, i64 0 127 %1 = extractelement <2 x double> %b, i64 0 128 %2 = extractelement <2 x double> %c, i64 0 129 %3 = fmul fast double %1, %0 130 %4 = fadd fast double %3, %2 131 %5 = bitcast i8 %k to <8 x i1> 132 %6 = extractelement <8 x i1> %5, i64 0 133 %7 = select i1 %6, double %4, double %2 134 %8 = insertelement <2 x double> %c, double %7, i64 0 135 ret <2 x double> %8 136 } 137 138 define <2 x double> @combine_scalar_mask_fmsub_f32(<2 x double> %a, i8 zeroext %k, <2 x double> %b, <2 x double> %c) { 139 ; CHECK-LABEL: combine_scalar_mask_fmsub_f32: 140 ; CHECK: # %bb.0: # %entry 141 ; CHECK-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 142 ; CHECK-NEXT: vfmsub213ss %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0xab,0xc2] 143 ; CHECK-NEXT: # xmm0 = (xmm1 * xmm0) - xmm2 144 ; CHECK-NEXT: retq # encoding: [0xc3] 145 entry: 146 %0 = bitcast <2 x double> %a to <4 x float> 147 %1 = bitcast <2 x double> %b to <4 x float> 148 %2 = bitcast <2 x double> %c to <4 x float> 149 %3 = extractelement <4 x float> %0, i64 0 150 %4 = extractelement <4 x float> %1, i64 0 151 %5 = extractelement <4 x float> %2, i64 0 152 %6 = fmul fast float %4, %3 153 %7 = fsub fast float %6, %5 154 %8 = bitcast i8 %k to <8 x i1> 155 %9 = extractelement <8 x i1> %8, i64 0 156 %10 = select i1 %9, float %7, float %3 157 %11 = insertelement <4 x float> %0, float %10, i64 0 158 %12 = bitcast <4 x float> %11 to <2 x double> 159 ret <2 x double> %12 160 } 161 162 define <2 x double> @combine_scalar_mask_fmsub_f64(<2 x double> %a, i8 zeroext %k, <2 x double> %b, <2 x double> %c) { 163 ; CHECK-LABEL: combine_scalar_mask_fmsub_f64: 164 ; CHECK: # %bb.0: # %entry 165 ; CHECK-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 166 ; CHECK-NEXT: vfmsub213sd %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0xab,0xc2] 167 ; CHECK-NEXT: # xmm0 = (xmm1 * xmm0) - xmm2 168 ; CHECK-NEXT: retq # encoding: [0xc3] 169 entry: 170 %0 = extractelement <2 x double> %a, i64 0 171 %1 = extractelement <2 x double> %b, i64 0 172 %2 = extractelement <2 x double> %c, i64 0 173 %3 = fmul fast double %1, %0 174 %4 = fsub fast double %3, %2 175 %5 = bitcast i8 %k to <8 x i1> 176 %6 = extractelement <8 x i1> %5, i64 0 177 %7 = select i1 %6, double %4, double %0 178 %8 = insertelement <2 x double> %a, double %7, i64 0 179 ret <2 x double> %8 180 } 181 182 define <2 x double> @combine_scalar_maskz_fmsub_32(i8 zeroext %k, <2 x double> %a, <2 x double> %b, <2 x double> %c) { 183 ; CHECK-LABEL: combine_scalar_maskz_fmsub_32: 184 ; CHECK: # %bb.0: # %entry 185 ; CHECK-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 186 ; CHECK-NEXT: vfmsub213ss %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0xab,0xc2] 187 ; CHECK-NEXT: # xmm0 = (xmm1 * xmm0) - xmm2 188 ; CHECK-NEXT: retq # encoding: [0xc3] 189 entry: 190 %0 = bitcast <2 x double> %a to <4 x float> 191 %1 = bitcast <2 x double> %b to <4 x float> 192 %2 = bitcast <2 x double> %c to <4 x float> 193 %3 = extractelement <4 x float> %0, i64 0 194 %4 = extractelement <4 x float> %1, i64 0 195 %5 = extractelement <4 x float> %2, i64 0 196 %6 = fmul fast float %4, %3 197 %7 = fsub fast float %6, %5 198 %8 = bitcast i8 %k to <8 x i1> 199 %9 = extractelement <8 x i1> %8, i64 0 200 %10 = select i1 %9, float %7, float 0.000000e+00 201 %11 = insertelement <4 x float> %0, float %10, i64 0 202 %12 = bitcast <4 x float> %11 to <2 x double> 203 ret <2 x double> %12 204 } 205 206 define <2 x double> @combine_scalar_maskz_fmsub_64(i8 zeroext %k, <2 x double> %a, <2 x double> %b, <2 x double> %c) { 207 ; CHECK-LABEL: combine_scalar_maskz_fmsub_64: 208 ; CHECK: # %bb.0: # %entry 209 ; CHECK-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 210 ; CHECK-NEXT: vfmsub213sd %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0xab,0xc2] 211 ; CHECK-NEXT: # xmm0 = (xmm1 * xmm0) - xmm2 212 ; CHECK-NEXT: retq # encoding: [0xc3] 213 entry: 214 %0 = extractelement <2 x double> %a, i64 0 215 %1 = extractelement <2 x double> %b, i64 0 216 %2 = extractelement <2 x double> %c, i64 0 217 %3 = fmul fast double %1, %0 218 %4 = fsub fast double %3, %2 219 %5 = bitcast i8 %k to <8 x i1> 220 %6 = extractelement <8 x i1> %5, i64 0 221 %7 = select i1 %6, double %4, double 0.000000e+00 222 %8 = insertelement <2 x double> %a, double %7, i64 0 223 ret <2 x double> %8 224 } 225 226 define <2 x double> @combine_scalar_mask3_fmsub_32(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 zeroext %k) { 227 ; CHECK-LABEL: combine_scalar_mask3_fmsub_32: 228 ; CHECK: # %bb.0: # %entry 229 ; CHECK-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 230 ; CHECK-NEXT: vfmsub231ss %xmm0, %xmm1, %xmm2 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0xbb,0xd0] 231 ; CHECK-NEXT: # xmm2 = (xmm1 * xmm0) - xmm2 232 ; CHECK-NEXT: vmovaps %xmm2, %xmm0 # encoding: [0xc5,0xf8,0x28,0xc2] 233 ; CHECK-NEXT: retq # encoding: [0xc3] 234 entry: 235 %0 = bitcast <2 x double> %a to <4 x float> 236 %1 = bitcast <2 x double> %b to <4 x float> 237 %2 = bitcast <2 x double> %c to <4 x float> 238 %3 = extractelement <4 x float> %0, i64 0 239 %4 = extractelement <4 x float> %1, i64 0 240 %5 = extractelement <4 x float> %2, i64 0 241 %6 = fmul fast float %4, %3 242 %7 = fsub fast float %6, %5 243 %8 = bitcast i8 %k to <8 x i1> 244 %9 = extractelement <8 x i1> %8, i64 0 245 %10 = select i1 %9, float %7, float %5 246 %11 = insertelement <4 x float> %2, float %10, i64 0 247 %12 = bitcast <4 x float> %11 to <2 x double> 248 ret <2 x double> %12 249 } 250 251 define <2 x double> @combine_scalar_mask3_fmsub_64(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 zeroext %k) { 252 ; CHECK-LABEL: combine_scalar_mask3_fmsub_64: 253 ; CHECK: # %bb.0: # %entry 254 ; CHECK-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 255 ; CHECK-NEXT: vfmsub231sd %xmm0, %xmm1, %xmm2 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0xbb,0xd0] 256 ; CHECK-NEXT: # xmm2 = (xmm1 * xmm0) - xmm2 257 ; CHECK-NEXT: vmovapd %xmm2, %xmm0 # encoding: [0xc5,0xf9,0x28,0xc2] 258 ; CHECK-NEXT: retq # encoding: [0xc3] 259 entry: 260 %0 = extractelement <2 x double> %a, i64 0 261 %1 = extractelement <2 x double> %b, i64 0 262 %2 = extractelement <2 x double> %c, i64 0 263 %3 = fmul fast double %1, %0 264 %4 = fsub fast double %3, %2 265 %5 = bitcast i8 %k to <8 x i1> 266 %6 = extractelement <8 x i1> %5, i64 0 267 %7 = select i1 %6, double %4, double %2 268 %8 = insertelement <2 x double> %c, double %7, i64 0 269 ret <2 x double> %8 270 } 271 272 define <2 x double> @combine_scalar_mask_fnmadd_f32(<2 x double> %a, i8 zeroext %k, <2 x double> %b, <2 x double> %c) { 273 ; CHECK-LABEL: combine_scalar_mask_fnmadd_f32: 274 ; CHECK: # %bb.0: # %entry 275 ; CHECK-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 276 ; CHECK-NEXT: vfnmadd213ss %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0xad,0xc2] 277 ; CHECK-NEXT: # xmm0 = -(xmm1 * xmm0) + xmm2 278 ; CHECK-NEXT: retq # encoding: [0xc3] 279 entry: 280 %0 = bitcast <2 x double> %a to <4 x float> 281 %1 = bitcast <2 x double> %b to <4 x float> 282 %2 = bitcast <2 x double> %c to <4 x float> 283 %3 = extractelement <4 x float> %0, i64 0 284 %4 = extractelement <4 x float> %1, i64 0 285 %5 = extractelement <4 x float> %2, i64 0 286 %6 = fmul fast float %4, %3 287 %7 = fsub fast float %5, %6 288 %8 = bitcast i8 %k to <8 x i1> 289 %9 = extractelement <8 x i1> %8, i64 0 290 %10 = select i1 %9, float %7, float %3 291 %11 = insertelement <4 x float> %0, float %10, i64 0 292 %12 = bitcast <4 x float> %11 to <2 x double> 293 ret <2 x double> %12 294 } 295 296 define <2 x double> @combine_scalar_mask_fnmadd_f64(<2 x double> %a, i8 zeroext %k, <2 x double> %b, <2 x double> %c) { 297 ; CHECK-LABEL: combine_scalar_mask_fnmadd_f64: 298 ; CHECK: # %bb.0: # %entry 299 ; CHECK-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 300 ; CHECK-NEXT: vfnmadd213sd %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0xad,0xc2] 301 ; CHECK-NEXT: # xmm0 = -(xmm1 * xmm0) + xmm2 302 ; CHECK-NEXT: retq # encoding: [0xc3] 303 entry: 304 %0 = extractelement <2 x double> %a, i64 0 305 %1 = extractelement <2 x double> %b, i64 0 306 %2 = extractelement <2 x double> %c, i64 0 307 %3 = fmul fast double %1, %0 308 %4 = fsub fast double %2, %3 309 %5 = bitcast i8 %k to <8 x i1> 310 %6 = extractelement <8 x i1> %5, i64 0 311 %7 = select i1 %6, double %4, double %0 312 %8 = insertelement <2 x double> %a, double %7, i64 0 313 ret <2 x double> %8 314 } 315 316 define <2 x double> @combine_scalar_maskz_fnmadd_32(i8 zeroext %k, <2 x double> %a, <2 x double> %b, <2 x double> %c) { 317 ; CHECK-LABEL: combine_scalar_maskz_fnmadd_32: 318 ; CHECK: # %bb.0: # %entry 319 ; CHECK-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 320 ; CHECK-NEXT: vfnmadd213ss %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0xad,0xc2] 321 ; CHECK-NEXT: # xmm0 = -(xmm1 * xmm0) + xmm2 322 ; CHECK-NEXT: retq # encoding: [0xc3] 323 entry: 324 %0 = bitcast <2 x double> %a to <4 x float> 325 %1 = bitcast <2 x double> %b to <4 x float> 326 %2 = bitcast <2 x double> %c to <4 x float> 327 %3 = extractelement <4 x float> %0, i64 0 328 %4 = extractelement <4 x float> %1, i64 0 329 %5 = extractelement <4 x float> %2, i64 0 330 %6 = fmul fast float %4, %3 331 %7 = fsub fast float %5, %6 332 %8 = bitcast i8 %k to <8 x i1> 333 %9 = extractelement <8 x i1> %8, i64 0 334 %10 = select i1 %9, float %7, float 0.000000e+00 335 %11 = insertelement <4 x float> %0, float %10, i64 0 336 %12 = bitcast <4 x float> %11 to <2 x double> 337 ret <2 x double> %12 338 } 339 340 define <2 x double> @combine_scalar_maskz_fnmadd_64(i8 zeroext %k, <2 x double> %a, <2 x double> %b, <2 x double> %c) { 341 ; CHECK-LABEL: combine_scalar_maskz_fnmadd_64: 342 ; CHECK: # %bb.0: # %entry 343 ; CHECK-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 344 ; CHECK-NEXT: vfnmadd213sd %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0xad,0xc2] 345 ; CHECK-NEXT: # xmm0 = -(xmm1 * xmm0) + xmm2 346 ; CHECK-NEXT: retq # encoding: [0xc3] 347 entry: 348 %0 = extractelement <2 x double> %a, i64 0 349 %1 = extractelement <2 x double> %b, i64 0 350 %2 = extractelement <2 x double> %c, i64 0 351 %3 = fmul fast double %1, %0 352 %4 = fsub fast double %2, %3 353 %5 = bitcast i8 %k to <8 x i1> 354 %6 = extractelement <8 x i1> %5, i64 0 355 %7 = select i1 %6, double %4, double 0.000000e+00 356 %8 = insertelement <2 x double> %a, double %7, i64 0 357 ret <2 x double> %8 358 } 359 360 define <2 x double> @combine_scalar_mask3_fnmadd_32(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 zeroext %k) { 361 ; CHECK-LABEL: combine_scalar_mask3_fnmadd_32: 362 ; CHECK: # %bb.0: # %entry 363 ; CHECK-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 364 ; CHECK-NEXT: vfnmadd231ss %xmm0, %xmm1, %xmm2 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0xbd,0xd0] 365 ; CHECK-NEXT: # xmm2 = -(xmm1 * xmm0) + xmm2 366 ; CHECK-NEXT: vmovaps %xmm2, %xmm0 # encoding: [0xc5,0xf8,0x28,0xc2] 367 ; CHECK-NEXT: retq # encoding: [0xc3] 368 entry: 369 %0 = bitcast <2 x double> %a to <4 x float> 370 %1 = bitcast <2 x double> %b to <4 x float> 371 %2 = bitcast <2 x double> %c to <4 x float> 372 %3 = extractelement <4 x float> %0, i64 0 373 %4 = extractelement <4 x float> %1, i64 0 374 %5 = extractelement <4 x float> %2, i64 0 375 %6 = fmul fast float %4, %3 376 %7 = fsub fast float %5, %6 377 %8 = bitcast i8 %k to <8 x i1> 378 %9 = extractelement <8 x i1> %8, i64 0 379 %10 = select i1 %9, float %7, float %5 380 %11 = insertelement <4 x float> %2, float %10, i64 0 381 %12 = bitcast <4 x float> %11 to <2 x double> 382 ret <2 x double> %12 383 } 384 385 define <2 x double> @combine_scalar_mask3_fnmadd_64(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 zeroext %k) { 386 ; CHECK-LABEL: combine_scalar_mask3_fnmadd_64: 387 ; CHECK: # %bb.0: # %entry 388 ; CHECK-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 389 ; CHECK-NEXT: vfnmadd231sd %xmm0, %xmm1, %xmm2 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0xbd,0xd0] 390 ; CHECK-NEXT: # xmm2 = -(xmm1 * xmm0) + xmm2 391 ; CHECK-NEXT: vmovapd %xmm2, %xmm0 # encoding: [0xc5,0xf9,0x28,0xc2] 392 ; CHECK-NEXT: retq # encoding: [0xc3] 393 entry: 394 %0 = extractelement <2 x double> %a, i64 0 395 %1 = extractelement <2 x double> %b, i64 0 396 %2 = extractelement <2 x double> %c, i64 0 397 %3 = fmul fast double %1, %0 398 %4 = fsub fast double %2, %3 399 %5 = bitcast i8 %k to <8 x i1> 400 %6 = extractelement <8 x i1> %5, i64 0 401 %7 = select i1 %6, double %4, double %2 402 %8 = insertelement <2 x double> %c, double %7, i64 0 403 ret <2 x double> %8 404 } 405 406 define <2 x double> @combine_scalar_mask_fnmsub_f32(<2 x double> %a, i8 zeroext %k, <2 x double> %b, <2 x double> %c) { 407 ; CHECK-LABEL: combine_scalar_mask_fnmsub_f32: 408 ; CHECK: # %bb.0: # %entry 409 ; CHECK-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 410 ; CHECK-NEXT: vfnmsub213ss %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0xaf,0xc2] 411 ; CHECK-NEXT: # xmm0 = -(xmm1 * xmm0) - xmm2 412 ; CHECK-NEXT: retq # encoding: [0xc3] 413 entry: 414 %0 = bitcast <2 x double> %a to <4 x float> 415 %1 = bitcast <2 x double> %b to <4 x float> 416 %2 = bitcast <2 x double> %c to <4 x float> 417 %3 = extractelement <4 x float> %0, i64 0 418 %4 = extractelement <4 x float> %1, i64 0 419 %5 = extractelement <4 x float> %2, i64 0 420 %sub = fsub fast float -0.000000e+00, %5 421 %6 = fmul fast float %4, %3 422 %7 = fsub fast float %sub, %6 423 %8 = bitcast i8 %k to <8 x i1> 424 %9 = extractelement <8 x i1> %8, i64 0 425 %10 = select i1 %9, float %7, float %3 426 %11 = insertelement <4 x float> %0, float %10, i64 0 427 %12 = bitcast <4 x float> %11 to <2 x double> 428 ret <2 x double> %12 429 } 430 431 define <2 x double> @combine_scalar_mask_fnmsub_f64(<2 x double> %a, i8 zeroext %k, <2 x double> %b, <2 x double> %c) { 432 ; CHECK-LABEL: combine_scalar_mask_fnmsub_f64: 433 ; CHECK: # %bb.0: # %entry 434 ; CHECK-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 435 ; CHECK-NEXT: vfnmsub213sd %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0xaf,0xc2] 436 ; CHECK-NEXT: # xmm0 = -(xmm1 * xmm0) - xmm2 437 ; CHECK-NEXT: retq # encoding: [0xc3] 438 entry: 439 %0 = extractelement <2 x double> %a, i64 0 440 %1 = extractelement <2 x double> %b, i64 0 441 %2 = extractelement <2 x double> %c, i64 0 442 %sub = fsub fast double -0.000000e+00, %2 443 %3 = fmul fast double %1, %0 444 %4 = fsub fast double %sub, %3 445 %5 = bitcast i8 %k to <8 x i1> 446 %6 = extractelement <8 x i1> %5, i64 0 447 %7 = select i1 %6, double %4, double %0 448 %8 = insertelement <2 x double> %a, double %7, i64 0 449 ret <2 x double> %8 450 } 451 452 define <2 x double> @combine_scalar_maskz_fnmsub_32(i8 zeroext %k, <2 x double> %a, <2 x double> %b, <2 x double> %c) { 453 ; CHECK-LABEL: combine_scalar_maskz_fnmsub_32: 454 ; CHECK: # %bb.0: # %entry 455 ; CHECK-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 456 ; CHECK-NEXT: vfnmsub213ss %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0xaf,0xc2] 457 ; CHECK-NEXT: # xmm0 = -(xmm1 * xmm0) - xmm2 458 ; CHECK-NEXT: retq # encoding: [0xc3] 459 entry: 460 %0 = bitcast <2 x double> %a to <4 x float> 461 %1 = bitcast <2 x double> %b to <4 x float> 462 %2 = bitcast <2 x double> %c to <4 x float> 463 %3 = extractelement <4 x float> %0, i64 0 464 %4 = extractelement <4 x float> %1, i64 0 465 %5 = extractelement <4 x float> %2, i64 0 466 %sub = fsub fast float -0.000000e+00, %5 467 %6 = fmul fast float %4, %3 468 %7 = fsub fast float %sub, %6 469 %8 = bitcast i8 %k to <8 x i1> 470 %9 = extractelement <8 x i1> %8, i64 0 471 %10 = select i1 %9, float %7, float 0.000000e+00 472 %11 = insertelement <4 x float> %0, float %10, i64 0 473 %12 = bitcast <4 x float> %11 to <2 x double> 474 ret <2 x double> %12 475 } 476 477 define <2 x double> @combine_scalar_maskz_fnmsub_64(i8 zeroext %k, <2 x double> %a, <2 x double> %b, <2 x double> %c) { 478 ; CHECK-LABEL: combine_scalar_maskz_fnmsub_64: 479 ; CHECK: # %bb.0: # %entry 480 ; CHECK-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 481 ; CHECK-NEXT: vfnmsub213sd %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0xaf,0xc2] 482 ; CHECK-NEXT: # xmm0 = -(xmm1 * xmm0) - xmm2 483 ; CHECK-NEXT: retq # encoding: [0xc3] 484 entry: 485 %0 = extractelement <2 x double> %a, i64 0 486 %1 = extractelement <2 x double> %b, i64 0 487 %2 = extractelement <2 x double> %c, i64 0 488 %sub = fsub fast double -0.000000e+00, %2 489 %3 = fmul fast double %1, %0 490 %4 = fsub fast double %sub, %3 491 %5 = bitcast i8 %k to <8 x i1> 492 %6 = extractelement <8 x i1> %5, i64 0 493 %7 = select i1 %6, double %4, double 0.000000e+00 494 %8 = insertelement <2 x double> %a, double %7, i64 0 495 ret <2 x double> %8 496 } 497 498 define <2 x double> @combine_scalar_mask3_fnmsub_32(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 zeroext %k) { 499 ; CHECK-LABEL: combine_scalar_mask3_fnmsub_32: 500 ; CHECK: # %bb.0: # %entry 501 ; CHECK-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 502 ; CHECK-NEXT: vfnmsub231ss %xmm0, %xmm1, %xmm2 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0xbf,0xd0] 503 ; CHECK-NEXT: # xmm2 = -(xmm1 * xmm0) - xmm2 504 ; CHECK-NEXT: vmovaps %xmm2, %xmm0 # encoding: [0xc5,0xf8,0x28,0xc2] 505 ; CHECK-NEXT: retq # encoding: [0xc3] 506 entry: 507 %0 = bitcast <2 x double> %a to <4 x float> 508 %1 = bitcast <2 x double> %b to <4 x float> 509 %2 = bitcast <2 x double> %c to <4 x float> 510 %3 = extractelement <4 x float> %0, i64 0 511 %4 = extractelement <4 x float> %1, i64 0 512 %5 = extractelement <4 x float> %2, i64 0 513 %sub = fsub fast float -0.000000e+00, %5 514 %6 = fmul fast float %4, %3 515 %7 = fsub fast float %sub, %6 516 %8 = bitcast i8 %k to <8 x i1> 517 %9 = extractelement <8 x i1> %8, i64 0 518 %10 = select i1 %9, float %7, float %5 519 %11 = insertelement <4 x float> %2, float %10, i64 0 520 %12 = bitcast <4 x float> %11 to <2 x double> 521 ret <2 x double> %12 522 } 523 524 define <2 x double> @combine_scalar_mask3_fnmsub_64(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 zeroext %k) { 525 ; CHECK-LABEL: combine_scalar_mask3_fnmsub_64: 526 ; CHECK: # %bb.0: # %entry 527 ; CHECK-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 528 ; CHECK-NEXT: vfnmsub231sd %xmm0, %xmm1, %xmm2 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0xbf,0xd0] 529 ; CHECK-NEXT: # xmm2 = -(xmm1 * xmm0) - xmm2 530 ; CHECK-NEXT: vmovapd %xmm2, %xmm0 # encoding: [0xc5,0xf9,0x28,0xc2] 531 ; CHECK-NEXT: retq # encoding: [0xc3] 532 entry: 533 %0 = extractelement <2 x double> %a, i64 0 534 %1 = extractelement <2 x double> %b, i64 0 535 %2 = extractelement <2 x double> %c, i64 0 536 %sub = fsub fast double -0.000000e+00, %2 537 %3 = fmul fast double %1, %0 538 %4 = fsub fast double %sub, %3 539 %5 = bitcast i8 %k to <8 x i1> 540 %6 = extractelement <8 x i1> %5, i64 0 541 %7 = select i1 %6, double %4, double %2 542 %8 = insertelement <2 x double> %c, double %7, i64 0 543 ret <2 x double> %8 544 } 545