Home | History | Annotate | Download | only in X86
      1 ; RUN: llc < %s -mtriple=x86_64-pc-win32 -mcpu=core-avx2 | FileCheck %s
      2 
      3 attributes #0 = { nounwind }
      4 
      5 declare <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float>, <4 x float>, <4 x float>)
      6 declare <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float>, <4 x float>, <4 x float>)
      7 declare <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float>, <4 x float>, <4 x float>)
      8 declare <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float>, <4 x float>, <4 x float>)
      9 
     10 declare <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double>, <2 x double>, <2 x double>)
     11 declare <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double>, <2 x double>, <2 x double>)
     12 declare <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double>, <2 x double>, <2 x double>)
     13 declare <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double>, <2 x double>, <2 x double>)
     14 
     15 define void @fmadd_aab_ss(float* %a, float* %b) #0 {
     16 ; CHECK-LABEL: fmadd_aab_ss:
     17 ; CHECK:      vmovss (%rcx), %[[XMM:xmm[0-9]+]]
     18 ; CHECK-NEXT: vfmadd213ss (%rdx), %[[XMM]], %[[XMM]]
     19 ; CHECK-NEXT: vmovss %[[XMM]], (%rcx)
     20 ; CHECK-NEXT: ret
     21   %a.val = load float, float* %a
     22   %av0 = insertelement <4 x float> undef, float %a.val, i32 0
     23   %av1 = insertelement <4 x float> %av0, float 0.000000e+00, i32 1
     24   %av2 = insertelement <4 x float> %av1, float 0.000000e+00, i32 2
     25   %av  = insertelement <4 x float> %av2, float 0.000000e+00, i32 3
     26 
     27   %b.val = load float, float* %b
     28   %bv0 = insertelement <4 x float> undef, float %b.val, i32 0
     29   %bv1 = insertelement <4 x float> %bv0, float 0.000000e+00, i32 1
     30   %bv2 = insertelement <4 x float> %bv1, float 0.000000e+00, i32 2
     31   %bv  = insertelement <4 x float> %bv2, float 0.000000e+00, i32 3
     32 
     33   %vr = call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %av, <4 x float> %av, <4 x float> %bv)
     34 
     35   %sr = extractelement <4 x float> %vr, i32 0
     36   store float %sr, float* %a
     37   ret void
     38 }
     39 
     40 define void @fmadd_aba_ss(float* %a, float* %b) #0 {
     41 ; CHECK-LABEL: fmadd_aba_ss:
     42 ; CHECK:      vmovss (%rcx), %[[XMM:xmm[0-9]+]]
     43 ; CHECK-NEXT: vfmadd132ss (%rdx), %[[XMM]], %[[XMM]]
     44 ; CHECK-NEXT: vmovss %[[XMM]], (%rcx)
     45 ; CHECK-NEXT: ret
     46   %a.val = load float, float* %a
     47   %av0 = insertelement <4 x float> undef, float %a.val, i32 0
     48   %av1 = insertelement <4 x float> %av0, float 0.000000e+00, i32 1
     49   %av2 = insertelement <4 x float> %av1, float 0.000000e+00, i32 2
     50   %av  = insertelement <4 x float> %av2, float 0.000000e+00, i32 3
     51 
     52   %b.val = load float, float* %b
     53   %bv0 = insertelement <4 x float> undef, float %b.val, i32 0
     54   %bv1 = insertelement <4 x float> %bv0, float 0.000000e+00, i32 1
     55   %bv2 = insertelement <4 x float> %bv1, float 0.000000e+00, i32 2
     56   %bv  = insertelement <4 x float> %bv2, float 0.000000e+00, i32 3
     57 
     58   %vr = call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %av, <4 x float> %bv, <4 x float> %av)
     59 
     60   %sr = extractelement <4 x float> %vr, i32 0
     61   store float %sr, float* %a
     62   ret void
     63 }
     64 
     65 define void @fmsub_aab_ss(float* %a, float* %b) #0 {
     66 ; CHECK-LABEL: fmsub_aab_ss:
     67 ; CHECK:      vmovss (%rcx), %[[XMM:xmm[0-9]+]]
     68 ; CHECK-NEXT: vfmsub213ss (%rdx), %[[XMM]], %[[XMM]]
     69 ; CHECK-NEXT: vmovss %[[XMM]], (%rcx)
     70 ; CHECK-NEXT: ret
     71   %a.val = load float, float* %a
     72   %av0 = insertelement <4 x float> undef, float %a.val, i32 0
     73   %av1 = insertelement <4 x float> %av0, float 0.000000e+00, i32 1
     74   %av2 = insertelement <4 x float> %av1, float 0.000000e+00, i32 2
     75   %av  = insertelement <4 x float> %av2, float 0.000000e+00, i32 3
     76 
     77   %b.val = load float, float* %b
     78   %bv0 = insertelement <4 x float> undef, float %b.val, i32 0
     79   %bv1 = insertelement <4 x float> %bv0, float 0.000000e+00, i32 1
     80   %bv2 = insertelement <4 x float> %bv1, float 0.000000e+00, i32 2
     81   %bv  = insertelement <4 x float> %bv2, float 0.000000e+00, i32 3
     82 
     83   %vr = call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %av, <4 x float> %av, <4 x float> %bv)
     84 
     85   %sr = extractelement <4 x float> %vr, i32 0
     86   store float %sr, float* %a
     87   ret void
     88 }
     89 
     90 define void @fmsub_aba_ss(float* %a, float* %b) #0 {
     91 ; CHECK-LABEL: fmsub_aba_ss:
     92 ; CHECK:      vmovss (%rcx), %[[XMM:xmm[0-9]+]]
     93 ; CHECK-NEXT: vfmsub132ss (%rdx), %[[XMM]], %[[XMM]]
     94 ; CHECK-NEXT: vmovss %[[XMM]], (%rcx)
     95 ; CHECK-NEXT: ret
     96   %a.val = load float, float* %a
     97   %av0 = insertelement <4 x float> undef, float %a.val, i32 0
     98   %av1 = insertelement <4 x float> %av0, float 0.000000e+00, i32 1
     99   %av2 = insertelement <4 x float> %av1, float 0.000000e+00, i32 2
    100   %av  = insertelement <4 x float> %av2, float 0.000000e+00, i32 3
    101 
    102   %b.val = load float, float* %b
    103   %bv0 = insertelement <4 x float> undef, float %b.val, i32 0
    104   %bv1 = insertelement <4 x float> %bv0, float 0.000000e+00, i32 1
    105   %bv2 = insertelement <4 x float> %bv1, float 0.000000e+00, i32 2
    106   %bv  = insertelement <4 x float> %bv2, float 0.000000e+00, i32 3
    107 
    108   %vr = call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %av, <4 x float> %bv, <4 x float> %av)
    109 
    110   %sr = extractelement <4 x float> %vr, i32 0
    111   store float %sr, float* %a
    112   ret void
    113 }
    114 
    115 define void @fnmadd_aab_ss(float* %a, float* %b) #0 {
    116 ; CHECK-LABEL: fnmadd_aab_ss:
    117 ; CHECK:      vmovss (%rcx), %[[XMM:xmm[0-9]+]]
    118 ; CHECK-NEXT: vfnmadd213ss (%rdx), %[[XMM]], %[[XMM]]
    119 ; CHECK-NEXT: vmovss %[[XMM]], (%rcx)
    120 ; CHECK-NEXT: ret
    121   %a.val = load float, float* %a
    122   %av0 = insertelement <4 x float> undef, float %a.val, i32 0
    123   %av1 = insertelement <4 x float> %av0, float 0.000000e+00, i32 1
    124   %av2 = insertelement <4 x float> %av1, float 0.000000e+00, i32 2
    125   %av  = insertelement <4 x float> %av2, float 0.000000e+00, i32 3
    126 
    127   %b.val = load float, float* %b
    128   %bv0 = insertelement <4 x float> undef, float %b.val, i32 0
    129   %bv1 = insertelement <4 x float> %bv0, float 0.000000e+00, i32 1
    130   %bv2 = insertelement <4 x float> %bv1, float 0.000000e+00, i32 2
    131   %bv  = insertelement <4 x float> %bv2, float 0.000000e+00, i32 3
    132 
    133   %vr = call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %av, <4 x float> %av, <4 x float> %bv)
    134 
    135   %sr = extractelement <4 x float> %vr, i32 0
    136   store float %sr, float* %a
    137   ret void
    138 }
    139 
    140 define void @fnmadd_aba_ss(float* %a, float* %b) #0 {
    141 ; CHECK-LABEL: fnmadd_aba_ss:
    142 ; CHECK:      vmovss (%rcx), %[[XMM:xmm[0-9]+]]
    143 ; CHECK-NEXT: vfnmadd132ss (%rdx), %[[XMM]], %[[XMM]]
    144 ; CHECK-NEXT: vmovss %[[XMM]], (%rcx)
    145 ; CHECK-NEXT: ret
    146   %a.val = load float, float* %a
    147   %av0 = insertelement <4 x float> undef, float %a.val, i32 0
    148   %av1 = insertelement <4 x float> %av0, float 0.000000e+00, i32 1
    149   %av2 = insertelement <4 x float> %av1, float 0.000000e+00, i32 2
    150   %av  = insertelement <4 x float> %av2, float 0.000000e+00, i32 3
    151 
    152   %b.val = load float, float* %b
    153   %bv0 = insertelement <4 x float> undef, float %b.val, i32 0
    154   %bv1 = insertelement <4 x float> %bv0, float 0.000000e+00, i32 1
    155   %bv2 = insertelement <4 x float> %bv1, float 0.000000e+00, i32 2
    156   %bv  = insertelement <4 x float> %bv2, float 0.000000e+00, i32 3
    157 
    158   %vr = call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %av, <4 x float> %bv, <4 x float> %av)
    159 
    160   %sr = extractelement <4 x float> %vr, i32 0
    161   store float %sr, float* %a
    162   ret void
    163 }
    164 
    165 define void @fnmsub_aab_ss(float* %a, float* %b) #0 {
    166 ; CHECK-LABEL: fnmsub_aab_ss:
    167 ; CHECK:      vmovss (%rcx), %[[XMM:xmm[0-9]+]]
    168 ; CHECK-NEXT: vfnmsub213ss (%rdx), %[[XMM]], %[[XMM]]
    169 ; CHECK-NEXT: vmovss %[[XMM]], (%rcx)
    170 ; CHECK-NEXT: ret
    171   %a.val = load float, float* %a
    172   %av0 = insertelement <4 x float> undef, float %a.val, i32 0
    173   %av1 = insertelement <4 x float> %av0, float 0.000000e+00, i32 1
    174   %av2 = insertelement <4 x float> %av1, float 0.000000e+00, i32 2
    175   %av  = insertelement <4 x float> %av2, float 0.000000e+00, i32 3
    176 
    177   %b.val = load float, float* %b
    178   %bv0 = insertelement <4 x float> undef, float %b.val, i32 0
    179   %bv1 = insertelement <4 x float> %bv0, float 0.000000e+00, i32 1
    180   %bv2 = insertelement <4 x float> %bv1, float 0.000000e+00, i32 2
    181   %bv  = insertelement <4 x float> %bv2, float 0.000000e+00, i32 3
    182 
    183   %vr = call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %av, <4 x float> %av, <4 x float> %bv)
    184 
    185   %sr = extractelement <4 x float> %vr, i32 0
    186   store float %sr, float* %a
    187   ret void
    188 }
    189 
    190 define void @fnmsub_aba_ss(float* %a, float* %b) #0 {
    191 ; CHECK-LABEL: fnmsub_aba_ss:
    192 ; CHECK:      vmovss (%rcx), %[[XMM:xmm[0-9]+]]
    193 ; CHECK-NEXT: vfnmsub132ss (%rdx), %[[XMM]], %[[XMM]]
    194 ; CHECK-NEXT: vmovss %[[XMM]], (%rcx)
    195 ; CHECK-NEXT: ret
    196   %a.val = load float, float* %a
    197   %av0 = insertelement <4 x float> undef, float %a.val, i32 0
    198   %av1 = insertelement <4 x float> %av0, float 0.000000e+00, i32 1
    199   %av2 = insertelement <4 x float> %av1, float 0.000000e+00, i32 2
    200   %av  = insertelement <4 x float> %av2, float 0.000000e+00, i32 3
    201 
    202   %b.val = load float, float* %b
    203   %bv0 = insertelement <4 x float> undef, float %b.val, i32 0
    204   %bv1 = insertelement <4 x float> %bv0, float 0.000000e+00, i32 1
    205   %bv2 = insertelement <4 x float> %bv1, float 0.000000e+00, i32 2
    206   %bv  = insertelement <4 x float> %bv2, float 0.000000e+00, i32 3
    207 
    208   %vr = call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %av, <4 x float> %bv, <4 x float> %av)
    209 
    210   %sr = extractelement <4 x float> %vr, i32 0
    211   store float %sr, float* %a
    212   ret void
    213 }
    214 
    215 define void @fmadd_aab_sd(double* %a, double* %b) #0 {
    216 ; CHECK-LABEL: fmadd_aab_sd:
    217 ; CHECK:      vmovsd (%rcx), %[[XMM:xmm[0-9]+]]
    218 ; CHECK-NEXT: vfmadd213sd (%rdx), %[[XMM]], %[[XMM]]
    219 ; CHECK-NEXT: vmovlpd %[[XMM]], (%rcx)
    220 ; CHECK-NEXT: ret
    221   %a.val = load double, double* %a
    222   %av0 = insertelement <2 x double> undef, double %a.val, i32 0
    223   %av  = insertelement <2 x double> %av0, double 0.000000e+00, i32 1
    224 
    225   %b.val = load double, double* %b
    226   %bv0 = insertelement <2 x double> undef, double %b.val, i32 0
    227   %bv  = insertelement <2 x double> %bv0, double 0.000000e+00, i32 1
    228 
    229   %vr = call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %av, <2 x double> %av, <2 x double> %bv)
    230 
    231   %sr = extractelement <2 x double> %vr, i32 0
    232   store double %sr, double* %a
    233   ret void
    234 }
    235 
    236 define void @fmadd_aba_sd(double* %a, double* %b) #0 {
    237 ; CHECK-LABEL: fmadd_aba_sd:
    238 ; CHECK:      vmovsd (%rcx), %[[XMM:xmm[0-9]+]]
    239 ; CHECK-NEXT: vfmadd132sd (%rdx), %[[XMM]], %[[XMM]]
    240 ; CHECK-NEXT: vmovlpd %[[XMM]], (%rcx)
    241 ; CHECK-NEXT: ret
    242   %a.val = load double, double* %a
    243   %av0 = insertelement <2 x double> undef, double %a.val, i32 0
    244   %av  = insertelement <2 x double> %av0, double 0.000000e+00, i32 1
    245 
    246   %b.val = load double, double* %b
    247   %bv0 = insertelement <2 x double> undef, double %b.val, i32 0
    248   %bv  = insertelement <2 x double> %bv0, double 0.000000e+00, i32 1
    249 
    250   %vr = call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %av, <2 x double> %bv, <2 x double> %av)
    251 
    252   %sr = extractelement <2 x double> %vr, i32 0
    253   store double %sr, double* %a
    254   ret void
    255 }
    256 
    257 define void @fmsub_aab_sd(double* %a, double* %b) #0 {
    258 ; CHECK-LABEL: fmsub_aab_sd:
    259 ; CHECK:      vmovsd (%rcx), %[[XMM:xmm[0-9]+]]
    260 ; CHECK-NEXT: vfmsub213sd (%rdx), %[[XMM]], %[[XMM]]
    261 ; CHECK-NEXT: vmovlpd %[[XMM]], (%rcx)
    262 ; CHECK-NEXT: ret
    263   %a.val = load double, double* %a
    264   %av0 = insertelement <2 x double> undef, double %a.val, i32 0
    265   %av  = insertelement <2 x double> %av0, double 0.000000e+00, i32 1
    266 
    267   %b.val = load double, double* %b
    268   %bv0 = insertelement <2 x double> undef, double %b.val, i32 0
    269   %bv  = insertelement <2 x double> %bv0, double 0.000000e+00, i32 1
    270 
    271   %vr = call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %av, <2 x double> %av, <2 x double> %bv)
    272 
    273   %sr = extractelement <2 x double> %vr, i32 0
    274   store double %sr, double* %a
    275   ret void
    276 }
    277 
    278 define void @fmsub_aba_sd(double* %a, double* %b) #0 {
    279 ; CHECK-LABEL: fmsub_aba_sd:
    280 ; CHECK:      vmovsd (%rcx), %[[XMM:xmm[0-9]+]]
    281 ; CHECK-NEXT: vfmsub132sd (%rdx), %[[XMM]], %[[XMM]]
    282 ; CHECK-NEXT: vmovlpd %[[XMM]], (%rcx)
    283 ; CHECK-NEXT: ret
    284   %a.val = load double, double* %a
    285   %av0 = insertelement <2 x double> undef, double %a.val, i32 0
    286   %av  = insertelement <2 x double> %av0, double 0.000000e+00, i32 1
    287 
    288   %b.val = load double, double* %b
    289   %bv0 = insertelement <2 x double> undef, double %b.val, i32 0
    290   %bv  = insertelement <2 x double> %bv0, double 0.000000e+00, i32 1
    291 
    292   %vr = call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %av, <2 x double> %bv, <2 x double> %av)
    293 
    294   %sr = extractelement <2 x double> %vr, i32 0
    295   store double %sr, double* %a
    296   ret void
    297 }
    298 
    299 define void @fnmadd_aab_sd(double* %a, double* %b) #0 {
    300 ; CHECK-LABEL: fnmadd_aab_sd:
    301 ; CHECK:      vmovsd (%rcx), %[[XMM:xmm[0-9]+]]
    302 ; CHECK-NEXT: vfnmadd213sd (%rdx), %[[XMM]], %[[XMM]]
    303 ; CHECK-NEXT: vmovlpd %[[XMM]], (%rcx)
    304 ; CHECK-NEXT: ret
    305   %a.val = load double, double* %a
    306   %av0 = insertelement <2 x double> undef, double %a.val, i32 0
    307   %av  = insertelement <2 x double> %av0, double 0.000000e+00, i32 1
    308 
    309   %b.val = load double, double* %b
    310   %bv0 = insertelement <2 x double> undef, double %b.val, i32 0
    311   %bv  = insertelement <2 x double> %bv0, double 0.000000e+00, i32 1
    312 
    313   %vr = call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %av, <2 x double> %av, <2 x double> %bv)
    314 
    315   %sr = extractelement <2 x double> %vr, i32 0
    316   store double %sr, double* %a
    317   ret void
    318 }
    319 
    320 define void @fnmadd_aba_sd(double* %a, double* %b) #0 {
    321 ; CHECK-LABEL: fnmadd_aba_sd:
    322 ; CHECK:      vmovsd (%rcx), %[[XMM:xmm[0-9]+]]
    323 ; CHECK-NEXT: vfnmadd132sd (%rdx), %[[XMM]], %[[XMM]]
    324 ; CHECK-NEXT: vmovlpd %[[XMM]], (%rcx)
    325 ; CHECK-NEXT: ret
    326   %a.val = load double, double* %a
    327   %av0 = insertelement <2 x double> undef, double %a.val, i32 0
    328   %av  = insertelement <2 x double> %av0, double 0.000000e+00, i32 1
    329 
    330   %b.val = load double, double* %b
    331   %bv0 = insertelement <2 x double> undef, double %b.val, i32 0
    332   %bv  = insertelement <2 x double> %bv0, double 0.000000e+00, i32 1
    333 
    334   %vr = call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %av, <2 x double> %bv, <2 x double> %av)
    335 
    336   %sr = extractelement <2 x double> %vr, i32 0
    337   store double %sr, double* %a
    338   ret void
    339 }
    340 
    341 define void @fnmsub_aab_sd(double* %a, double* %b) #0 {
    342 ; CHECK-LABEL: fnmsub_aab_sd:
    343 ; CHECK:      vmovsd (%rcx), %[[XMM:xmm[0-9]+]]
    344 ; CHECK-NEXT: vfnmsub213sd (%rdx), %[[XMM]], %[[XMM]]
    345 ; CHECK-NEXT: vmovlpd %[[XMM]], (%rcx)
    346 ; CHECK-NEXT: ret
    347   %a.val = load double, double* %a
    348   %av0 = insertelement <2 x double> undef, double %a.val, i32 0
    349   %av  = insertelement <2 x double> %av0, double 0.000000e+00, i32 1
    350 
    351   %b.val = load double, double* %b
    352   %bv0 = insertelement <2 x double> undef, double %b.val, i32 0
    353   %bv  = insertelement <2 x double> %bv0, double 0.000000e+00, i32 1
    354 
    355   %vr = call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %av, <2 x double> %av, <2 x double> %bv)
    356 
    357   %sr = extractelement <2 x double> %vr, i32 0
    358   store double %sr, double* %a
    359   ret void
    360 }
    361 
    362 define void @fnmsub_aba_sd(double* %a, double* %b) #0 {
    363 ; CHECK-LABEL: fnmsub_aba_sd:
    364 ; CHECK:      vmovsd (%rcx), %[[XMM:xmm[0-9]+]]
    365 ; CHECK-NEXT: vfnmsub132sd (%rdx), %[[XMM]], %[[XMM]]
    366 ; CHECK-NEXT: vmovlpd %[[XMM]], (%rcx)
    367 ; CHECK-NEXT: ret
    368   %a.val = load double, double* %a
    369   %av0 = insertelement <2 x double> undef, double %a.val, i32 0
    370   %av  = insertelement <2 x double> %av0, double 0.000000e+00, i32 1
    371 
    372   %b.val = load double, double* %b
    373   %bv0 = insertelement <2 x double> undef, double %b.val, i32 0
    374   %bv  = insertelement <2 x double> %bv0, double 0.000000e+00, i32 1
    375 
    376   %vr = call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %av, <2 x double> %bv, <2 x double> %av)
    377 
    378   %sr = extractelement <2 x double> %vr, i32 0
    379   store double %sr, double* %a
    380   ret void
    381 }
    382 
    383 
    384