Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f -mattr=+fma -show-mc-encoding | FileCheck %s --check-prefix=CHECK --check-prefix=SKX
      3 
      4 define <2 x double> @combine_scalar_mask_fmadd_f32(<2 x double> %a, i8 zeroext %k, <2 x double> %b, <2 x double> %c) {
      5 ; CHECK-LABEL: combine_scalar_mask_fmadd_f32:
      6 ; CHECK:       # %bb.0: # %entry
      7 ; CHECK-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
      8 ; CHECK-NEXT:    vfmadd213ss %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0xa9,0xc2]
      9 ; CHECK-NEXT:    # xmm0 = (xmm1 * xmm0) + xmm2
     10 ; CHECK-NEXT:    retq # encoding: [0xc3]
     11 entry:
     12   %0 = bitcast <2 x double> %a to <4 x float>
     13   %1 = bitcast <2 x double> %b to <4 x float>
     14   %2 = bitcast <2 x double> %c to <4 x float>
     15   %3 = extractelement <4 x float> %0, i64 0
     16   %4 = extractelement <4 x float> %1, i64 0
     17   %5 = extractelement <4 x float> %2, i64 0
     18   %6 = fmul fast float %4, %3
     19   %7 = fadd fast float %6, %5
     20   %8 = bitcast i8 %k to <8 x i1>
     21   %9 = extractelement <8 x i1> %8, i64 0
     22   %10 = select i1 %9, float %7, float %3
     23   %11 = insertelement <4 x float> %0, float %10, i64 0
     24   %12 = bitcast <4 x float> %11 to <2 x double>
     25   ret <2 x double> %12
     26 }
     27 
     28 define <2 x double> @combine_scalar_mask_fmadd_f64(<2 x double> %a, i8 zeroext %k, <2 x double> %b, <2 x double> %c) {
     29 ; CHECK-LABEL: combine_scalar_mask_fmadd_f64:
     30 ; CHECK:       # %bb.0: # %entry
     31 ; CHECK-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
     32 ; CHECK-NEXT:    vfmadd213sd %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0xa9,0xc2]
     33 ; CHECK-NEXT:    # xmm0 = (xmm1 * xmm0) + xmm2
     34 ; CHECK-NEXT:    retq # encoding: [0xc3]
     35 entry:
     36   %0 = extractelement <2 x double> %a, i64 0
     37   %1 = extractelement <2 x double> %b, i64 0
     38   %2 = extractelement <2 x double> %c, i64 0
     39   %3 = fmul fast double %1, %0
     40   %4 = fadd fast double %3, %2
     41   %5 = bitcast i8 %k to <8 x i1>
     42   %6 = extractelement <8 x i1> %5, i64 0
     43   %7 = select i1 %6, double %4, double %0
     44   %8 = insertelement <2 x double> %a, double %7, i64 0
     45   ret <2 x double> %8
     46 }
     47 
     48 define <2 x double> @combine_scalar_maskz_fmadd_32(i8 zeroext %k, <2 x double> %a, <2 x double> %b, <2 x double> %c) {
     49 ; CHECK-LABEL: combine_scalar_maskz_fmadd_32:
     50 ; CHECK:       # %bb.0: # %entry
     51 ; CHECK-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
     52 ; CHECK-NEXT:    vfmadd213ss %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0xa9,0xc2]
     53 ; CHECK-NEXT:    # xmm0 = (xmm1 * xmm0) + xmm2
     54 ; CHECK-NEXT:    retq # encoding: [0xc3]
     55 entry:
     56   %0 = bitcast <2 x double> %a to <4 x float>
     57   %1 = bitcast <2 x double> %b to <4 x float>
     58   %2 = bitcast <2 x double> %c to <4 x float>
     59   %3 = extractelement <4 x float> %0, i64 0
     60   %4 = extractelement <4 x float> %1, i64 0
     61   %5 = extractelement <4 x float> %2, i64 0
     62   %6 = fmul fast float %4, %3
     63   %7 = fadd fast float %6, %5
     64   %8 = bitcast i8 %k to <8 x i1>
     65   %9 = extractelement <8 x i1> %8, i64 0
     66   %10 = select i1 %9, float %7, float 0.000000e+00
     67   %11 = insertelement <4 x float> %0, float %10, i64 0
     68   %12 = bitcast <4 x float> %11 to <2 x double>
     69   ret <2 x double> %12
     70 }
     71 
     72 define <2 x double> @combine_scalar_maskz_fmadd_64(i8 zeroext %k, <2 x double> %a, <2 x double> %b, <2 x double> %c) {
     73 ; CHECK-LABEL: combine_scalar_maskz_fmadd_64:
     74 ; CHECK:       # %bb.0: # %entry
     75 ; CHECK-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
     76 ; CHECK-NEXT:    vfmadd213sd %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0xa9,0xc2]
     77 ; CHECK-NEXT:    # xmm0 = (xmm1 * xmm0) + xmm2
     78 ; CHECK-NEXT:    retq # encoding: [0xc3]
     79 entry:
     80   %0 = extractelement <2 x double> %a, i64 0
     81   %1 = extractelement <2 x double> %b, i64 0
     82   %2 = extractelement <2 x double> %c, i64 0
     83   %3 = fmul fast double %1, %0
     84   %4 = fadd fast double %3, %2
     85   %5 = bitcast i8 %k to <8 x i1>
     86   %6 = extractelement <8 x i1> %5, i64 0
     87   %7 = select i1 %6, double %4, double 0.000000e+00
     88   %8 = insertelement <2 x double> %a, double %7, i64 0
     89   ret <2 x double> %8
     90 }
     91 
     92 define <2 x double> @combine_scalar_mask3_fmadd_32(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 zeroext %k) {
     93 ; CHECK-LABEL: combine_scalar_mask3_fmadd_32:
     94 ; CHECK:       # %bb.0: # %entry
     95 ; CHECK-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
     96 ; CHECK-NEXT:    vfmadd231ss %xmm0, %xmm1, %xmm2 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0xb9,0xd0]
     97 ; CHECK-NEXT:    # xmm2 = (xmm1 * xmm0) + xmm2
     98 ; CHECK-NEXT:    vmovaps %xmm2, %xmm0 # encoding: [0xc5,0xf8,0x28,0xc2]
     99 ; CHECK-NEXT:    retq # encoding: [0xc3]
    100 entry:
    101   %0 = bitcast <2 x double> %a to <4 x float>
    102   %1 = bitcast <2 x double> %b to <4 x float>
    103   %2 = bitcast <2 x double> %c to <4 x float>
    104   %3 = extractelement <4 x float> %0, i64 0
    105   %4 = extractelement <4 x float> %1, i64 0
    106   %5 = extractelement <4 x float> %2, i64 0
    107   %6 = fmul fast float %4, %3
    108   %7 = fadd fast float %6, %5
    109   %8 = bitcast i8 %k to <8 x i1>
    110   %9 = extractelement <8 x i1> %8, i64 0
    111   %10 = select i1 %9, float %7, float %5
    112   %11 = insertelement <4 x float> %2, float %10, i64 0
    113   %12 = bitcast <4 x float> %11 to <2 x double>
    114   ret <2 x double> %12
    115 }
    116 
    117 define <2 x double> @combine_scalar_mask3_fmadd_64(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 zeroext %k) {
    118 ; CHECK-LABEL: combine_scalar_mask3_fmadd_64:
    119 ; CHECK:       # %bb.0: # %entry
    120 ; CHECK-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
    121 ; CHECK-NEXT:    vfmadd231sd %xmm0, %xmm1, %xmm2 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0xb9,0xd0]
    122 ; CHECK-NEXT:    # xmm2 = (xmm1 * xmm0) + xmm2
    123 ; CHECK-NEXT:    vmovapd %xmm2, %xmm0 # encoding: [0xc5,0xf9,0x28,0xc2]
    124 ; CHECK-NEXT:    retq # encoding: [0xc3]
    125 entry:
    126   %0 = extractelement <2 x double> %a, i64 0
    127   %1 = extractelement <2 x double> %b, i64 0
    128   %2 = extractelement <2 x double> %c, i64 0
    129   %3 = fmul fast double %1, %0
    130   %4 = fadd fast double %3, %2
    131   %5 = bitcast i8 %k to <8 x i1>
    132   %6 = extractelement <8 x i1> %5, i64 0
    133   %7 = select i1 %6, double %4, double %2
    134   %8 = insertelement <2 x double> %c, double %7, i64 0
    135   ret <2 x double> %8
    136 }
    137 
    138 define <2 x double> @combine_scalar_mask_fmsub_f32(<2 x double> %a, i8 zeroext %k, <2 x double> %b, <2 x double> %c) {
    139 ; CHECK-LABEL: combine_scalar_mask_fmsub_f32:
    140 ; CHECK:       # %bb.0: # %entry
    141 ; CHECK-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
    142 ; CHECK-NEXT:    vfmsub213ss %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0xab,0xc2]
    143 ; CHECK-NEXT:    # xmm0 = (xmm1 * xmm0) - xmm2
    144 ; CHECK-NEXT:    retq # encoding: [0xc3]
    145 entry:
    146   %0 = bitcast <2 x double> %a to <4 x float>
    147   %1 = bitcast <2 x double> %b to <4 x float>
    148   %2 = bitcast <2 x double> %c to <4 x float>
    149   %3 = extractelement <4 x float> %0, i64 0
    150   %4 = extractelement <4 x float> %1, i64 0
    151   %5 = extractelement <4 x float> %2, i64 0
    152   %6 = fmul fast float %4, %3
    153   %7 = fsub fast float %6, %5
    154   %8 = bitcast i8 %k to <8 x i1>
    155   %9 = extractelement <8 x i1> %8, i64 0
    156   %10 = select i1 %9, float %7, float %3
    157   %11 = insertelement <4 x float> %0, float %10, i64 0
    158   %12 = bitcast <4 x float> %11 to <2 x double>
    159   ret <2 x double> %12
    160 }
    161 
    162 define <2 x double> @combine_scalar_mask_fmsub_f64(<2 x double> %a, i8 zeroext %k, <2 x double> %b, <2 x double> %c) {
    163 ; CHECK-LABEL: combine_scalar_mask_fmsub_f64:
    164 ; CHECK:       # %bb.0: # %entry
    165 ; CHECK-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
    166 ; CHECK-NEXT:    vfmsub213sd %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0xab,0xc2]
    167 ; CHECK-NEXT:    # xmm0 = (xmm1 * xmm0) - xmm2
    168 ; CHECK-NEXT:    retq # encoding: [0xc3]
    169 entry:
    170   %0 = extractelement <2 x double> %a, i64 0
    171   %1 = extractelement <2 x double> %b, i64 0
    172   %2 = extractelement <2 x double> %c, i64 0
    173   %3 = fmul fast double %1, %0
    174   %4 = fsub fast double %3, %2
    175   %5 = bitcast i8 %k to <8 x i1>
    176   %6 = extractelement <8 x i1> %5, i64 0
    177   %7 = select i1 %6, double %4, double %0
    178   %8 = insertelement <2 x double> %a, double %7, i64 0
    179   ret <2 x double> %8
    180 }
    181 
    182 define <2 x double> @combine_scalar_maskz_fmsub_32(i8 zeroext %k, <2 x double> %a, <2 x double> %b, <2 x double> %c) {
    183 ; CHECK-LABEL: combine_scalar_maskz_fmsub_32:
    184 ; CHECK:       # %bb.0: # %entry
    185 ; CHECK-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
    186 ; CHECK-NEXT:    vfmsub213ss %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0xab,0xc2]
    187 ; CHECK-NEXT:    # xmm0 = (xmm1 * xmm0) - xmm2
    188 ; CHECK-NEXT:    retq # encoding: [0xc3]
    189 entry:
    190   %0 = bitcast <2 x double> %a to <4 x float>
    191   %1 = bitcast <2 x double> %b to <4 x float>
    192   %2 = bitcast <2 x double> %c to <4 x float>
    193   %3 = extractelement <4 x float> %0, i64 0
    194   %4 = extractelement <4 x float> %1, i64 0
    195   %5 = extractelement <4 x float> %2, i64 0
    196   %6 = fmul fast float %4, %3
    197   %7 = fsub fast float %6, %5
    198   %8 = bitcast i8 %k to <8 x i1>
    199   %9 = extractelement <8 x i1> %8, i64 0
    200   %10 = select i1 %9, float %7, float 0.000000e+00
    201   %11 = insertelement <4 x float> %0, float %10, i64 0
    202   %12 = bitcast <4 x float> %11 to <2 x double>
    203   ret <2 x double> %12
    204 }
    205 
    206 define <2 x double> @combine_scalar_maskz_fmsub_64(i8 zeroext %k, <2 x double> %a, <2 x double> %b, <2 x double> %c) {
    207 ; CHECK-LABEL: combine_scalar_maskz_fmsub_64:
    208 ; CHECK:       # %bb.0: # %entry
    209 ; CHECK-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
    210 ; CHECK-NEXT:    vfmsub213sd %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0xab,0xc2]
    211 ; CHECK-NEXT:    # xmm0 = (xmm1 * xmm0) - xmm2
    212 ; CHECK-NEXT:    retq # encoding: [0xc3]
    213 entry:
    214   %0 = extractelement <2 x double> %a, i64 0
    215   %1 = extractelement <2 x double> %b, i64 0
    216   %2 = extractelement <2 x double> %c, i64 0
    217   %3 = fmul fast double %1, %0
    218   %4 = fsub fast double %3, %2
    219   %5 = bitcast i8 %k to <8 x i1>
    220   %6 = extractelement <8 x i1> %5, i64 0
    221   %7 = select i1 %6, double %4, double 0.000000e+00
    222   %8 = insertelement <2 x double> %a, double %7, i64 0
    223   ret <2 x double> %8
    224 }
    225 
    226 define <2 x double> @combine_scalar_mask3_fmsub_32(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 zeroext %k) {
    227 ; CHECK-LABEL: combine_scalar_mask3_fmsub_32:
    228 ; CHECK:       # %bb.0: # %entry
    229 ; CHECK-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
    230 ; CHECK-NEXT:    vfmsub231ss %xmm0, %xmm1, %xmm2 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0xbb,0xd0]
    231 ; CHECK-NEXT:    # xmm2 = (xmm1 * xmm0) - xmm2
    232 ; CHECK-NEXT:    vmovaps %xmm2, %xmm0 # encoding: [0xc5,0xf8,0x28,0xc2]
    233 ; CHECK-NEXT:    retq # encoding: [0xc3]
    234 entry:
    235   %0 = bitcast <2 x double> %a to <4 x float>
    236   %1 = bitcast <2 x double> %b to <4 x float>
    237   %2 = bitcast <2 x double> %c to <4 x float>
    238   %3 = extractelement <4 x float> %0, i64 0
    239   %4 = extractelement <4 x float> %1, i64 0
    240   %5 = extractelement <4 x float> %2, i64 0
    241   %6 = fmul fast float %4, %3
    242   %7 = fsub fast float %6, %5
    243   %8 = bitcast i8 %k to <8 x i1>
    244   %9 = extractelement <8 x i1> %8, i64 0
    245   %10 = select i1 %9, float %7, float %5
    246   %11 = insertelement <4 x float> %2, float %10, i64 0
    247   %12 = bitcast <4 x float> %11 to <2 x double>
    248   ret <2 x double> %12
    249 }
    250 
    251 define <2 x double> @combine_scalar_mask3_fmsub_64(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 zeroext %k) {
    252 ; CHECK-LABEL: combine_scalar_mask3_fmsub_64:
    253 ; CHECK:       # %bb.0: # %entry
    254 ; CHECK-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
    255 ; CHECK-NEXT:    vfmsub231sd %xmm0, %xmm1, %xmm2 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0xbb,0xd0]
    256 ; CHECK-NEXT:    # xmm2 = (xmm1 * xmm0) - xmm2
    257 ; CHECK-NEXT:    vmovapd %xmm2, %xmm0 # encoding: [0xc5,0xf9,0x28,0xc2]
    258 ; CHECK-NEXT:    retq # encoding: [0xc3]
    259 entry:
    260   %0 = extractelement <2 x double> %a, i64 0
    261   %1 = extractelement <2 x double> %b, i64 0
    262   %2 = extractelement <2 x double> %c, i64 0
    263   %3 = fmul fast double %1, %0
    264   %4 = fsub fast double %3, %2
    265   %5 = bitcast i8 %k to <8 x i1>
    266   %6 = extractelement <8 x i1> %5, i64 0
    267   %7 = select i1 %6, double %4, double %2
    268   %8 = insertelement <2 x double> %c, double %7, i64 0
    269   ret <2 x double> %8
    270 }
    271 
    272 define <2 x double> @combine_scalar_mask_fnmadd_f32(<2 x double> %a, i8 zeroext %k, <2 x double> %b, <2 x double> %c) {
    273 ; CHECK-LABEL: combine_scalar_mask_fnmadd_f32:
    274 ; CHECK:       # %bb.0: # %entry
    275 ; CHECK-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
    276 ; CHECK-NEXT:    vfnmadd213ss %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0xad,0xc2]
    277 ; CHECK-NEXT:    # xmm0 = -(xmm1 * xmm0) + xmm2
    278 ; CHECK-NEXT:    retq # encoding: [0xc3]
    279 entry:
    280   %0 = bitcast <2 x double> %a to <4 x float>
    281   %1 = bitcast <2 x double> %b to <4 x float>
    282   %2 = bitcast <2 x double> %c to <4 x float>
    283   %3 = extractelement <4 x float> %0, i64 0
    284   %4 = extractelement <4 x float> %1, i64 0
    285   %5 = extractelement <4 x float> %2, i64 0
    286   %6 = fmul fast float %4, %3
    287   %7 = fsub fast float %5, %6
    288   %8 = bitcast i8 %k to <8 x i1>
    289   %9 = extractelement <8 x i1> %8, i64 0
    290   %10 = select i1 %9, float %7, float %3
    291   %11 = insertelement <4 x float> %0, float %10, i64 0
    292   %12 = bitcast <4 x float> %11 to <2 x double>
    293   ret <2 x double> %12
    294 }
    295 
    296 define <2 x double> @combine_scalar_mask_fnmadd_f64(<2 x double> %a, i8 zeroext %k, <2 x double> %b, <2 x double> %c) {
    297 ; CHECK-LABEL: combine_scalar_mask_fnmadd_f64:
    298 ; CHECK:       # %bb.0: # %entry
    299 ; CHECK-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
    300 ; CHECK-NEXT:    vfnmadd213sd %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0xad,0xc2]
    301 ; CHECK-NEXT:    # xmm0 = -(xmm1 * xmm0) + xmm2
    302 ; CHECK-NEXT:    retq # encoding: [0xc3]
    303 entry:
    304   %0 = extractelement <2 x double> %a, i64 0
    305   %1 = extractelement <2 x double> %b, i64 0
    306   %2 = extractelement <2 x double> %c, i64 0
    307   %3 = fmul fast double %1, %0
    308   %4 = fsub fast double %2, %3
    309   %5 = bitcast i8 %k to <8 x i1>
    310   %6 = extractelement <8 x i1> %5, i64 0
    311   %7 = select i1 %6, double %4, double %0
    312   %8 = insertelement <2 x double> %a, double %7, i64 0
    313   ret <2 x double> %8
    314 }
    315 
    316 define <2 x double> @combine_scalar_maskz_fnmadd_32(i8 zeroext %k, <2 x double> %a, <2 x double> %b, <2 x double> %c) {
    317 ; CHECK-LABEL: combine_scalar_maskz_fnmadd_32:
    318 ; CHECK:       # %bb.0: # %entry
    319 ; CHECK-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
    320 ; CHECK-NEXT:    vfnmadd213ss %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0xad,0xc2]
    321 ; CHECK-NEXT:    # xmm0 = -(xmm1 * xmm0) + xmm2
    322 ; CHECK-NEXT:    retq # encoding: [0xc3]
    323 entry:
    324   %0 = bitcast <2 x double> %a to <4 x float>
    325   %1 = bitcast <2 x double> %b to <4 x float>
    326   %2 = bitcast <2 x double> %c to <4 x float>
    327   %3 = extractelement <4 x float> %0, i64 0
    328   %4 = extractelement <4 x float> %1, i64 0
    329   %5 = extractelement <4 x float> %2, i64 0
    330   %6 = fmul fast float %4, %3
    331   %7 = fsub fast float %5, %6
    332   %8 = bitcast i8 %k to <8 x i1>
    333   %9 = extractelement <8 x i1> %8, i64 0
    334   %10 = select i1 %9, float %7, float 0.000000e+00
    335   %11 = insertelement <4 x float> %0, float %10, i64 0
    336   %12 = bitcast <4 x float> %11 to <2 x double>
    337   ret <2 x double> %12
    338 }
    339 
    340 define <2 x double> @combine_scalar_maskz_fnmadd_64(i8 zeroext %k, <2 x double> %a, <2 x double> %b, <2 x double> %c) {
    341 ; CHECK-LABEL: combine_scalar_maskz_fnmadd_64:
    342 ; CHECK:       # %bb.0: # %entry
    343 ; CHECK-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
    344 ; CHECK-NEXT:    vfnmadd213sd %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0xad,0xc2]
    345 ; CHECK-NEXT:    # xmm0 = -(xmm1 * xmm0) + xmm2
    346 ; CHECK-NEXT:    retq # encoding: [0xc3]
    347 entry:
    348   %0 = extractelement <2 x double> %a, i64 0
    349   %1 = extractelement <2 x double> %b, i64 0
    350   %2 = extractelement <2 x double> %c, i64 0
    351   %3 = fmul fast double %1, %0
    352   %4 = fsub fast double %2, %3
    353   %5 = bitcast i8 %k to <8 x i1>
    354   %6 = extractelement <8 x i1> %5, i64 0
    355   %7 = select i1 %6, double %4, double 0.000000e+00
    356   %8 = insertelement <2 x double> %a, double %7, i64 0
    357   ret <2 x double> %8
    358 }
    359 
    360 define <2 x double> @combine_scalar_mask3_fnmadd_32(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 zeroext %k) {
    361 ; CHECK-LABEL: combine_scalar_mask3_fnmadd_32:
    362 ; CHECK:       # %bb.0: # %entry
    363 ; CHECK-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
    364 ; CHECK-NEXT:    vfnmadd231ss %xmm0, %xmm1, %xmm2 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0xbd,0xd0]
    365 ; CHECK-NEXT:    # xmm2 = -(xmm1 * xmm0) + xmm2
    366 ; CHECK-NEXT:    vmovaps %xmm2, %xmm0 # encoding: [0xc5,0xf8,0x28,0xc2]
    367 ; CHECK-NEXT:    retq # encoding: [0xc3]
    368 entry:
    369   %0 = bitcast <2 x double> %a to <4 x float>
    370   %1 = bitcast <2 x double> %b to <4 x float>
    371   %2 = bitcast <2 x double> %c to <4 x float>
    372   %3 = extractelement <4 x float> %0, i64 0
    373   %4 = extractelement <4 x float> %1, i64 0
    374   %5 = extractelement <4 x float> %2, i64 0
    375   %6 = fmul fast float %4, %3
    376   %7 = fsub fast float %5, %6
    377   %8 = bitcast i8 %k to <8 x i1>
    378   %9 = extractelement <8 x i1> %8, i64 0
    379   %10 = select i1 %9, float %7, float %5
    380   %11 = insertelement <4 x float> %2, float %10, i64 0
    381   %12 = bitcast <4 x float> %11 to <2 x double>
    382   ret <2 x double> %12
    383 }
    384 
    385 define <2 x double> @combine_scalar_mask3_fnmadd_64(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 zeroext %k) {
    386 ; CHECK-LABEL: combine_scalar_mask3_fnmadd_64:
    387 ; CHECK:       # %bb.0: # %entry
    388 ; CHECK-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
    389 ; CHECK-NEXT:    vfnmadd231sd %xmm0, %xmm1, %xmm2 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0xbd,0xd0]
    390 ; CHECK-NEXT:    # xmm2 = -(xmm1 * xmm0) + xmm2
    391 ; CHECK-NEXT:    vmovapd %xmm2, %xmm0 # encoding: [0xc5,0xf9,0x28,0xc2]
    392 ; CHECK-NEXT:    retq # encoding: [0xc3]
    393 entry:
    394   %0 = extractelement <2 x double> %a, i64 0
    395   %1 = extractelement <2 x double> %b, i64 0
    396   %2 = extractelement <2 x double> %c, i64 0
    397   %3 = fmul fast double %1, %0
    398   %4 = fsub fast double %2, %3
    399   %5 = bitcast i8 %k to <8 x i1>
    400   %6 = extractelement <8 x i1> %5, i64 0
    401   %7 = select i1 %6, double %4, double %2
    402   %8 = insertelement <2 x double> %c, double %7, i64 0
    403   ret <2 x double> %8
    404 }
    405 
    406 define <2 x double> @combine_scalar_mask_fnmsub_f32(<2 x double> %a, i8 zeroext %k, <2 x double> %b, <2 x double> %c) {
    407 ; CHECK-LABEL: combine_scalar_mask_fnmsub_f32:
    408 ; CHECK:       # %bb.0: # %entry
    409 ; CHECK-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
    410 ; CHECK-NEXT:    vfnmsub213ss %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0xaf,0xc2]
    411 ; CHECK-NEXT:    # xmm0 = -(xmm1 * xmm0) - xmm2
    412 ; CHECK-NEXT:    retq # encoding: [0xc3]
    413 entry:
    414   %0 = bitcast <2 x double> %a to <4 x float>
    415   %1 = bitcast <2 x double> %b to <4 x float>
    416   %2 = bitcast <2 x double> %c to <4 x float>
    417   %3 = extractelement <4 x float> %0, i64 0
    418   %4 = extractelement <4 x float> %1, i64 0
    419   %5 = extractelement <4 x float> %2, i64 0
    420   %sub = fsub fast float -0.000000e+00, %5
    421   %6 = fmul fast float %4, %3
    422   %7 = fsub fast float %sub, %6
    423   %8 = bitcast i8 %k to <8 x i1>
    424   %9 = extractelement <8 x i1> %8, i64 0
    425   %10 = select i1 %9, float %7, float %3
    426   %11 = insertelement <4 x float> %0, float %10, i64 0
    427   %12 = bitcast <4 x float> %11 to <2 x double>
    428   ret <2 x double> %12
    429 }
    430 
    431 define <2 x double> @combine_scalar_mask_fnmsub_f64(<2 x double> %a, i8 zeroext %k, <2 x double> %b, <2 x double> %c) {
    432 ; CHECK-LABEL: combine_scalar_mask_fnmsub_f64:
    433 ; CHECK:       # %bb.0: # %entry
    434 ; CHECK-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
    435 ; CHECK-NEXT:    vfnmsub213sd %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0xaf,0xc2]
    436 ; CHECK-NEXT:    # xmm0 = -(xmm1 * xmm0) - xmm2
    437 ; CHECK-NEXT:    retq # encoding: [0xc3]
    438 entry:
    439   %0 = extractelement <2 x double> %a, i64 0
    440   %1 = extractelement <2 x double> %b, i64 0
    441   %2 = extractelement <2 x double> %c, i64 0
    442   %sub = fsub fast double -0.000000e+00, %2
    443   %3 = fmul fast double %1, %0
    444   %4 = fsub fast double %sub, %3
    445   %5 = bitcast i8 %k to <8 x i1>
    446   %6 = extractelement <8 x i1> %5, i64 0
    447   %7 = select i1 %6, double %4, double %0
    448   %8 = insertelement <2 x double> %a, double %7, i64 0
    449   ret <2 x double> %8
    450 }
    451 
    452 define <2 x double> @combine_scalar_maskz_fnmsub_32(i8 zeroext %k, <2 x double> %a, <2 x double> %b, <2 x double> %c) {
    453 ; CHECK-LABEL: combine_scalar_maskz_fnmsub_32:
    454 ; CHECK:       # %bb.0: # %entry
    455 ; CHECK-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
    456 ; CHECK-NEXT:    vfnmsub213ss %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0xaf,0xc2]
    457 ; CHECK-NEXT:    # xmm0 = -(xmm1 * xmm0) - xmm2
    458 ; CHECK-NEXT:    retq # encoding: [0xc3]
    459 entry:
    460   %0 = bitcast <2 x double> %a to <4 x float>
    461   %1 = bitcast <2 x double> %b to <4 x float>
    462   %2 = bitcast <2 x double> %c to <4 x float>
    463   %3 = extractelement <4 x float> %0, i64 0
    464   %4 = extractelement <4 x float> %1, i64 0
    465   %5 = extractelement <4 x float> %2, i64 0
    466   %sub = fsub fast float -0.000000e+00, %5
    467   %6 = fmul fast float %4, %3
    468   %7 = fsub fast float %sub, %6
    469   %8 = bitcast i8 %k to <8 x i1>
    470   %9 = extractelement <8 x i1> %8, i64 0
    471   %10 = select i1 %9, float %7, float 0.000000e+00
    472   %11 = insertelement <4 x float> %0, float %10, i64 0
    473   %12 = bitcast <4 x float> %11 to <2 x double>
    474   ret <2 x double> %12
    475 }
    476 
    477 define <2 x double> @combine_scalar_maskz_fnmsub_64(i8 zeroext %k, <2 x double> %a, <2 x double> %b, <2 x double> %c) {
    478 ; CHECK-LABEL: combine_scalar_maskz_fnmsub_64:
    479 ; CHECK:       # %bb.0: # %entry
    480 ; CHECK-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
    481 ; CHECK-NEXT:    vfnmsub213sd %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0xaf,0xc2]
    482 ; CHECK-NEXT:    # xmm0 = -(xmm1 * xmm0) - xmm2
    483 ; CHECK-NEXT:    retq # encoding: [0xc3]
    484 entry:
    485   %0 = extractelement <2 x double> %a, i64 0
    486   %1 = extractelement <2 x double> %b, i64 0
    487   %2 = extractelement <2 x double> %c, i64 0
    488   %sub = fsub fast double -0.000000e+00, %2
    489   %3 = fmul fast double %1, %0
    490   %4 = fsub fast double %sub, %3
    491   %5 = bitcast i8 %k to <8 x i1>
    492   %6 = extractelement <8 x i1> %5, i64 0
    493   %7 = select i1 %6, double %4, double 0.000000e+00
    494   %8 = insertelement <2 x double> %a, double %7, i64 0
    495   ret <2 x double> %8
    496 }
    497 
    498 define <2 x double> @combine_scalar_mask3_fnmsub_32(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 zeroext %k) {
    499 ; CHECK-LABEL: combine_scalar_mask3_fnmsub_32:
    500 ; CHECK:       # %bb.0: # %entry
    501 ; CHECK-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
    502 ; CHECK-NEXT:    vfnmsub231ss %xmm0, %xmm1, %xmm2 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0xbf,0xd0]
    503 ; CHECK-NEXT:    # xmm2 = -(xmm1 * xmm0) - xmm2
    504 ; CHECK-NEXT:    vmovaps %xmm2, %xmm0 # encoding: [0xc5,0xf8,0x28,0xc2]
    505 ; CHECK-NEXT:    retq # encoding: [0xc3]
    506 entry:
    507   %0 = bitcast <2 x double> %a to <4 x float>
    508   %1 = bitcast <2 x double> %b to <4 x float>
    509   %2 = bitcast <2 x double> %c to <4 x float>
    510   %3 = extractelement <4 x float> %0, i64 0
    511   %4 = extractelement <4 x float> %1, i64 0
    512   %5 = extractelement <4 x float> %2, i64 0
    513   %sub = fsub fast float -0.000000e+00, %5
    514   %6 = fmul fast float %4, %3
    515   %7 = fsub fast float %sub, %6
    516   %8 = bitcast i8 %k to <8 x i1>
    517   %9 = extractelement <8 x i1> %8, i64 0
    518   %10 = select i1 %9, float %7, float %5
    519   %11 = insertelement <4 x float> %2, float %10, i64 0
    520   %12 = bitcast <4 x float> %11 to <2 x double>
    521   ret <2 x double> %12
    522 }
    523 
    524 define <2 x double> @combine_scalar_mask3_fnmsub_64(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 zeroext %k) {
    525 ; CHECK-LABEL: combine_scalar_mask3_fnmsub_64:
    526 ; CHECK:       # %bb.0: # %entry
    527 ; CHECK-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
    528 ; CHECK-NEXT:    vfnmsub231sd %xmm0, %xmm1, %xmm2 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0xbf,0xd0]
    529 ; CHECK-NEXT:    # xmm2 = -(xmm1 * xmm0) - xmm2
    530 ; CHECK-NEXT:    vmovapd %xmm2, %xmm0 # encoding: [0xc5,0xf9,0x28,0xc2]
    531 ; CHECK-NEXT:    retq # encoding: [0xc3]
    532 entry:
    533   %0 = extractelement <2 x double> %a, i64 0
    534   %1 = extractelement <2 x double> %b, i64 0
    535   %2 = extractelement <2 x double> %c, i64 0
    536   %sub = fsub fast double -0.000000e+00, %2
    537   %3 = fmul fast double %1, %0
    538   %4 = fsub fast double %sub, %3
    539   %5 = bitcast i8 %k to <8 x i1>
    540   %6 = extractelement <8 x i1> %5, i64 0
    541   %7 = select i1 %6, double %4, double %2
    542   %8 = insertelement <2 x double> %c, double %7, i64 0
    543   ret <2 x double> %8
    544 }
    545