Home | History | Annotate | Download | only in X86
      1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -march=x86-64 -mcpu=corei7-avx -mattr=+fma4 | FileCheck %s
      2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=bdver2 -mattr=+avx,-fma | FileCheck %s
      3 
      4 ; VFMADD
      5 define < 4 x float > @test_x86_fma_vfmadd_ss_load(< 4 x float > %a0, < 4 x float > %a1, float* %a2) {
      6   ; CHECK: vfmaddss (%{{.*}})
      7   %x = load float , float *%a2
      8   %y = insertelement <4 x float> undef, float %x, i32 0
      9   %res = call < 4 x float > @llvm.x86.fma.vfmadd.ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %y)
     10   ret < 4 x float > %res
     11 }
     12 define < 4 x float > @test_x86_fma_vfmadd_ss_load2(< 4 x float > %a0, float* %a1, < 4 x float > %a2) {
     13   ; CHECK: vfmaddss %{{.*}}, (%{{.*}})
     14   %x = load float , float *%a1
     15   %y = insertelement <4 x float> undef, float %x, i32 0
     16   %res = call < 4 x float > @llvm.x86.fma.vfmadd.ss(< 4 x float > %a0, < 4 x float > %y, < 4 x float > %a2)
     17   ret < 4 x float > %res
     18 }
     19 
     20 declare < 4 x float > @llvm.x86.fma.vfmadd.ss(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
     21 
     22 define < 2 x double > @test_x86_fma_vfmadd_sd_load(< 2 x double > %a0, < 2 x double > %a1, double* %a2) {
     23   ; CHECK: vfmaddsd (%{{.*}})
     24   %x = load double , double *%a2
     25   %y = insertelement <2 x double> undef, double %x, i32 0
     26   %res = call < 2 x double > @llvm.x86.fma.vfmadd.sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %y)
     27   ret < 2 x double > %res
     28 }
     29 define < 2 x double > @test_x86_fma_vfmadd_sd_load2(< 2 x double > %a0, double* %a1, < 2 x double > %a2) {
     30   ; CHECK: vfmaddsd %{{.*}}, (%{{.*}})
     31   %x = load double , double *%a1
     32   %y = insertelement <2 x double> undef, double %x, i32 0
     33   %res = call < 2 x double > @llvm.x86.fma.vfmadd.sd(< 2 x double > %a0, < 2 x double > %y, < 2 x double > %a2)
     34   ret < 2 x double > %res
     35 }
     36 declare < 2 x double > @llvm.x86.fma.vfmadd.sd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
     37 define < 4 x float > @test_x86_fma_vfmadd_ps_load(< 4 x float > %a0, < 4 x float > %a1, < 4 x float >* %a2) {
     38   ; CHECK: vfmaddps (%{{.*}})
     39   %x = load <4 x float>, <4 x float>* %a2
     40   %res = call < 4 x float > @llvm.x86.fma.vfmadd.ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %x)
     41   ret < 4 x float > %res
     42 }
     43 define < 4 x float > @test_x86_fma_vfmadd_ps_load2(< 4 x float > %a0, < 4 x float >* %a1, < 4 x float > %a2) {
     44   ; CHECK: vfmaddps %{{.*}}, (%{{.*}})
     45   %x = load <4 x float>, <4 x float>* %a1
     46   %res = call < 4 x float > @llvm.x86.fma.vfmadd.ps(< 4 x float > %a0, < 4 x float > %x, < 4 x float > %a2)
     47   ret < 4 x float > %res
     48 }
     49 declare < 4 x float > @llvm.x86.fma.vfmadd.ps(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
     50 
     51 ; To test execution dependency
     52 define < 4 x float > @test_x86_fma_vfmadd_ps_load3(< 4 x float >* %a0, < 4 x float >* %a1, < 4 x float > %a2) {
     53   ; CHECK: vmovaps
     54   ; CHECK: vfmaddps %{{.*}}, (%{{.*}})
     55   %x = load <4 x float>, <4 x float>* %a0
     56   %y = load <4 x float>, <4 x float>* %a1
     57   %res = call < 4 x float > @llvm.x86.fma.vfmadd.ps(< 4 x float > %x, < 4 x float > %y, < 4 x float > %a2)
     58   ret < 4 x float > %res
     59 }
     60 
     61 define < 2 x double > @test_x86_fma_vfmadd_pd_load(< 2 x double > %a0, < 2 x double > %a1, < 2 x double >* %a2) {
     62   ; CHECK: vfmaddpd (%{{.*}})
     63   %x = load <2 x double>, <2 x double>* %a2
     64   %res = call < 2 x double > @llvm.x86.fma.vfmadd.pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %x)
     65   ret < 2 x double > %res
     66 }
     67 define < 2 x double > @test_x86_fma_vfmadd_pd_load2(< 2 x double > %a0, < 2 x double >* %a1, < 2 x double > %a2) {
     68   ; CHECK: vfmaddpd %{{.*}}, (%{{.*}})
     69   %x = load <2 x double>, <2 x double>* %a1
     70   %res = call < 2 x double > @llvm.x86.fma.vfmadd.pd(< 2 x double > %a0, < 2 x double > %x, < 2 x double > %a2)
     71   ret < 2 x double > %res
     72 }
     73 declare < 2 x double > @llvm.x86.fma.vfmadd.pd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
     74 
     75 ; To test execution dependency
     76 define < 2 x double > @test_x86_fma_vfmadd_pd_load3(< 2 x double >* %a0, < 2 x double >* %a1, < 2 x double > %a2) {
     77   ; CHECK: vmovapd
     78   ; CHECK: vfmaddpd %{{.*}}, (%{{.*}})
     79   %x = load <2 x double>, <2 x double>* %a0
     80   %y = load <2 x double>, <2 x double>* %a1
     81   %res = call < 2 x double > @llvm.x86.fma.vfmadd.pd(< 2 x double > %x, < 2 x double > %y, < 2 x double > %a2)
     82   ret < 2 x double > %res
     83 }
     84 
     85