Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mattr=fma4 | FileCheck %s
      3 
      4 target triple = "x86_64-unknown-unknown"
      5 
      6 declare <4 x float> @llvm.x86.fma4.vfmadd.ss(<4 x float>, <4 x float>, <4 x float>)
      7 declare <2 x double> @llvm.x86.fma4.vfmadd.sd(<2 x double>, <2 x double>, <2 x double>)
      8 
      9 define void @fmadd_aab_ss(float* %a, float* %b) {
     10 ; CHECK-LABEL: fmadd_aab_ss:
     11 ; CHECK:       # %bb.0:
     12 ; CHECK-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
     13 ; CHECK-NEXT:    vfmaddss (%rsi), %xmm0, %xmm0, %xmm0
     14 ; CHECK-NEXT:    vmovss %xmm0, (%rdi)
     15 ; CHECK-NEXT:    retq
     16   %a.val = load float, float* %a
     17   %av0 = insertelement <4 x float> undef, float %a.val, i32 0
     18   %av1 = insertelement <4 x float> %av0, float 0.000000e+00, i32 1
     19   %av2 = insertelement <4 x float> %av1, float 0.000000e+00, i32 2
     20   %av  = insertelement <4 x float> %av2, float 0.000000e+00, i32 3
     21 
     22   %b.val = load float, float* %b
     23   %bv0 = insertelement <4 x float> undef, float %b.val, i32 0
     24   %bv1 = insertelement <4 x float> %bv0, float 0.000000e+00, i32 1
     25   %bv2 = insertelement <4 x float> %bv1, float 0.000000e+00, i32 2
     26   %bv  = insertelement <4 x float> %bv2, float 0.000000e+00, i32 3
     27 
     28   %vr = call <4 x float> @llvm.x86.fma4.vfmadd.ss(<4 x float> %av, <4 x float> %av, <4 x float> %bv)
     29 
     30   %sr = extractelement <4 x float> %vr, i32 0
     31   store float %sr, float* %a
     32   ret void
     33 }
     34 
     35 define void @fmadd_aba_ss(float* %a, float* %b) {
     36 ; CHECK-LABEL: fmadd_aba_ss:
     37 ; CHECK:       # %bb.0:
     38 ; CHECK-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
     39 ; CHECK-NEXT:    vfmaddss %xmm0, (%rsi), %xmm0, %xmm0
     40 ; CHECK-NEXT:    vmovss %xmm0, (%rdi)
     41 ; CHECK-NEXT:    retq
     42   %a.val = load float, float* %a
     43   %av0 = insertelement <4 x float> undef, float %a.val, i32 0
     44   %av1 = insertelement <4 x float> %av0, float 0.000000e+00, i32 1
     45   %av2 = insertelement <4 x float> %av1, float 0.000000e+00, i32 2
     46   %av  = insertelement <4 x float> %av2, float 0.000000e+00, i32 3
     47 
     48   %b.val = load float, float* %b
     49   %bv0 = insertelement <4 x float> undef, float %b.val, i32 0
     50   %bv1 = insertelement <4 x float> %bv0, float 0.000000e+00, i32 1
     51   %bv2 = insertelement <4 x float> %bv1, float 0.000000e+00, i32 2
     52   %bv  = insertelement <4 x float> %bv2, float 0.000000e+00, i32 3
     53 
     54   %vr = call <4 x float> @llvm.x86.fma4.vfmadd.ss(<4 x float> %av, <4 x float> %bv, <4 x float> %av)
     55 
     56   %sr = extractelement <4 x float> %vr, i32 0
     57   store float %sr, float* %a
     58   ret void
     59 }
     60 
     61 define void @fmadd_aab_sd(double* %a, double* %b) {
     62 ; CHECK-LABEL: fmadd_aab_sd:
     63 ; CHECK:       # %bb.0:
     64 ; CHECK-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
     65 ; CHECK-NEXT:    vfmaddsd (%rsi), %xmm0, %xmm0, %xmm0
     66 ; CHECK-NEXT:    vmovsd %xmm0, (%rdi)
     67 ; CHECK-NEXT:    retq
     68   %a.val = load double, double* %a
     69   %av0 = insertelement <2 x double> undef, double %a.val, i32 0
     70   %av  = insertelement <2 x double> %av0, double 0.000000e+00, i32 1
     71 
     72   %b.val = load double, double* %b
     73   %bv0 = insertelement <2 x double> undef, double %b.val, i32 0
     74   %bv  = insertelement <2 x double> %bv0, double 0.000000e+00, i32 1
     75 
     76   %vr = call <2 x double> @llvm.x86.fma4.vfmadd.sd(<2 x double> %av, <2 x double> %av, <2 x double> %bv)
     77 
     78   %sr = extractelement <2 x double> %vr, i32 0
     79   store double %sr, double* %a
     80   ret void
     81 }
     82 
     83 define void @fmadd_aba_sd(double* %a, double* %b) {
     84 ; CHECK-LABEL: fmadd_aba_sd:
     85 ; CHECK:       # %bb.0:
     86 ; CHECK-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
     87 ; CHECK-NEXT:    vfmaddsd %xmm0, (%rsi), %xmm0, %xmm0
     88 ; CHECK-NEXT:    vmovsd %xmm0, (%rdi)
     89 ; CHECK-NEXT:    retq
     90   %a.val = load double, double* %a
     91   %av0 = insertelement <2 x double> undef, double %a.val, i32 0
     92   %av  = insertelement <2 x double> %av0, double 0.000000e+00, i32 1
     93 
     94   %b.val = load double, double* %b
     95   %bv0 = insertelement <2 x double> undef, double %b.val, i32 0
     96   %bv  = insertelement <2 x double> %bv0, double 0.000000e+00, i32 1
     97 
     98   %vr = call <2 x double> @llvm.x86.fma4.vfmadd.sd(<2 x double> %av, <2 x double> %bv, <2 x double> %av)
     99 
    100   %sr = extractelement <2 x double> %vr, i32 0
    101   store double %sr, double* %a
    102   ret void
    103 }
    104 
    105