Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=corei7-avx -mattr=+fma4 | FileCheck %s
      3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=bdver2 -mattr=+avx,-fma | FileCheck %s
      4 
      5 ; VFMADD
      6 define < 4 x float > @test_x86_fma4_vfmadd_ss_load(< 4 x float > %a0, < 4 x float > %a1, float* %a2) {
      7 ; CHECK-LABEL: test_x86_fma4_vfmadd_ss_load:
      8 ; CHECK:       # %bb.0:
      9 ; CHECK-NEXT:    vfmaddss (%rdi), %xmm1, %xmm0, %xmm0
     10 ; CHECK-NEXT:    retq
     11   %x = load float , float *%a2
     12   %y = insertelement <4 x float> undef, float %x, i32 0
     13   %res = call < 4 x float > @llvm.x86.fma4.vfmadd.ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %y)
     14   ret < 4 x float > %res
     15 }
     16 define < 4 x float > @test_x86_fma4_vfmadd_ss_load2(< 4 x float > %a0, float* %a1, < 4 x float > %a2) {
     17 ; CHECK-LABEL: test_x86_fma4_vfmadd_ss_load2:
     18 ; CHECK:       # %bb.0:
     19 ; CHECK-NEXT:    vfmaddss %xmm1, (%rdi), %xmm0, %xmm0
     20 ; CHECK-NEXT:    retq
     21   %x = load float , float *%a1
     22   %y = insertelement <4 x float> undef, float %x, i32 0
     23   %res = call < 4 x float > @llvm.x86.fma4.vfmadd.ss(< 4 x float > %a0, < 4 x float > %y, < 4 x float > %a2)
     24   ret < 4 x float > %res
     25 }
     26 
     27 declare < 4 x float > @llvm.x86.fma4.vfmadd.ss(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
     28 
     29 define < 2 x double > @test_x86_fma4_vfmadd_sd_load(< 2 x double > %a0, < 2 x double > %a1, double* %a2) {
     30 ; CHECK-LABEL: test_x86_fma4_vfmadd_sd_load:
     31 ; CHECK:       # %bb.0:
     32 ; CHECK-NEXT:    vfmaddsd (%rdi), %xmm1, %xmm0, %xmm0
     33 ; CHECK-NEXT:    retq
     34   %x = load double , double *%a2
     35   %y = insertelement <2 x double> undef, double %x, i32 0
     36   %res = call < 2 x double > @llvm.x86.fma4.vfmadd.sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %y)
     37   ret < 2 x double > %res
     38 }
     39 define < 2 x double > @test_x86_fma4_vfmadd_sd_load2(< 2 x double > %a0, double* %a1, < 2 x double > %a2) {
     40 ; CHECK-LABEL: test_x86_fma4_vfmadd_sd_load2:
     41 ; CHECK:       # %bb.0:
     42 ; CHECK-NEXT:    vfmaddsd %xmm1, (%rdi), %xmm0, %xmm0
     43 ; CHECK-NEXT:    retq
     44   %x = load double , double *%a1
     45   %y = insertelement <2 x double> undef, double %x, i32 0
     46   %res = call < 2 x double > @llvm.x86.fma4.vfmadd.sd(< 2 x double > %a0, < 2 x double > %y, < 2 x double > %a2)
     47   ret < 2 x double > %res
     48 }
     49 declare < 2 x double > @llvm.x86.fma4.vfmadd.sd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
     50 define < 4 x float > @test_x86_fma_vfmadd_ps_load(< 4 x float > %a0, < 4 x float > %a1, < 4 x float >* %a2) {
     51 ; CHECK-LABEL: test_x86_fma_vfmadd_ps_load:
     52 ; CHECK:       # %bb.0:
     53 ; CHECK-NEXT:    vfmaddps (%rdi), %xmm1, %xmm0, %xmm0
     54 ; CHECK-NEXT:    retq
     55   %x = load <4 x float>, <4 x float>* %a2
     56   %res = call < 4 x float > @llvm.x86.fma.vfmadd.ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %x)
     57   ret < 4 x float > %res
     58 }
     59 define < 4 x float > @test_x86_fma_vfmadd_ps_load2(< 4 x float > %a0, < 4 x float >* %a1, < 4 x float > %a2) {
     60 ; CHECK-LABEL: test_x86_fma_vfmadd_ps_load2:
     61 ; CHECK:       # %bb.0:
     62 ; CHECK-NEXT:    vfmaddps %xmm1, (%rdi), %xmm0, %xmm0
     63 ; CHECK-NEXT:    retq
     64   %x = load <4 x float>, <4 x float>* %a1
     65   %res = call < 4 x float > @llvm.x86.fma.vfmadd.ps(< 4 x float > %a0, < 4 x float > %x, < 4 x float > %a2)
     66   ret < 4 x float > %res
     67 }
     68 declare < 4 x float > @llvm.x86.fma.vfmadd.ps(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
     69 
     70 ; To test execution dependency
     71 define < 4 x float > @test_x86_fma_vfmadd_ps_load3(< 4 x float >* %a0, < 4 x float >* %a1, < 4 x float > %a2) {
     72 ; CHECK-LABEL: test_x86_fma_vfmadd_ps_load3:
     73 ; CHECK:       # %bb.0:
     74 ; CHECK-NEXT:    vmovaps (%rdi), %xmm1
     75 ; CHECK-NEXT:    vfmaddps %xmm0, (%rsi), %xmm1, %xmm0
     76 ; CHECK-NEXT:    retq
     77   %x = load <4 x float>, <4 x float>* %a0
     78   %y = load <4 x float>, <4 x float>* %a1
     79   %res = call < 4 x float > @llvm.x86.fma.vfmadd.ps(< 4 x float > %x, < 4 x float > %y, < 4 x float > %a2)
     80   ret < 4 x float > %res
     81 }
     82 
     83 define < 2 x double > @test_x86_fma_vfmadd_pd_load(< 2 x double > %a0, < 2 x double > %a1, < 2 x double >* %a2) {
     84 ; CHECK-LABEL: test_x86_fma_vfmadd_pd_load:
     85 ; CHECK:       # %bb.0:
     86 ; CHECK-NEXT:    vfmaddpd (%rdi), %xmm1, %xmm0, %xmm0
     87 ; CHECK-NEXT:    retq
     88   %x = load <2 x double>, <2 x double>* %a2
     89   %res = call < 2 x double > @llvm.x86.fma.vfmadd.pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %x)
     90   ret < 2 x double > %res
     91 }
     92 define < 2 x double > @test_x86_fma_vfmadd_pd_load2(< 2 x double > %a0, < 2 x double >* %a1, < 2 x double > %a2) {
     93 ; CHECK-LABEL: test_x86_fma_vfmadd_pd_load2:
     94 ; CHECK:       # %bb.0:
     95 ; CHECK-NEXT:    vfmaddpd %xmm1, (%rdi), %xmm0, %xmm0
     96 ; CHECK-NEXT:    retq
     97   %x = load <2 x double>, <2 x double>* %a1
     98   %res = call < 2 x double > @llvm.x86.fma.vfmadd.pd(< 2 x double > %a0, < 2 x double > %x, < 2 x double > %a2)
     99   ret < 2 x double > %res
    100 }
    101 declare < 2 x double > @llvm.x86.fma.vfmadd.pd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
    102 
    103 ; To test execution dependency
    104 define < 2 x double > @test_x86_fma_vfmadd_pd_load3(< 2 x double >* %a0, < 2 x double >* %a1, < 2 x double > %a2) {
    105 ; CHECK-LABEL: test_x86_fma_vfmadd_pd_load3:
    106 ; CHECK:       # %bb.0:
    107 ; CHECK-NEXT:    vmovapd (%rdi), %xmm1
    108 ; CHECK-NEXT:    vfmaddpd %xmm0, (%rsi), %xmm1, %xmm0
    109 ; CHECK-NEXT:    retq
    110   %x = load <2 x double>, <2 x double>* %a0
    111   %y = load <2 x double>, <2 x double>* %a1
    112   %res = call < 2 x double > @llvm.x86.fma.vfmadd.pd(< 2 x double > %x, < 2 x double > %y, < 2 x double > %a2)
    113   ret < 2 x double > %res
    114 }
    115 
    116