Home | History | Annotate | Download | only in X86
      1 ; RUN: llc < %s -mtriple=x86_64-linux -mattr=+64bit,+sse3 -print-failed-fuse-candidates -regalloc=basic |& FileCheck %s
      2 ; CHECK: fail
      3 ; CHECK-NOT: fail
      4 
      5 declare float @test_f(float %f)
      6 declare double @test_d(double %f)
      7 declare <4 x float> @test_vf(<4 x float> %f)
      8 declare <2 x double> @test_vd(<2 x double> %f)
      9 declare float @llvm.sqrt.f32(float)
     10 declare double @llvm.sqrt.f64(double)
     11 
     12 declare <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float>)
     13 declare <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float>)
     14 declare <4 x float> @llvm.x86.sse.rcp.ps(<4 x float>)
     15 declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>)
     16 declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>)
     17 declare <4 x float> @llvm.x86.sse.cmp.ps(<4 x float>, <4 x float>, i8)
     18 declare <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float>, <4 x float>)
     19 declare <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float>, <4 x float>)
     20 declare <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float>, <4 x float>)
     21 declare <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double>)
     22 declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>)
     23 declare <2 x double> @llvm.x86.sse2.max.pd(<2 x double>, <2 x double>)
     24 declare <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double>, <2 x double>, i8)
     25 declare <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double>, <2 x double>)
     26 declare <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double>, <2 x double>)
     27 declare <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double>, <2 x double>)
     28 
     29 define float @foo(float %f) {
     30   %a = call float @test_f(float %f)
     31   %t = call float @llvm.sqrt.f32(float %f)
     32   ret float %t
     33 }
     34 define double @doo(double %f) {
     35   %a = call double @test_d(double %f)
     36   %t = call double @llvm.sqrt.f64(double %f)
     37   ret double %t
     38 }
     39 define <4 x float> @a0(<4 x float> %f) {
     40   %a = call <4 x float> @test_vf(<4 x float> %f)
     41   %t = call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> %f)
     42   ret <4 x float> %t
     43 }
     44 define <4 x float> @a1(<4 x float> %f) {
     45   %a = call <4 x float> @test_vf(<4 x float> %f)
     46   %t = call <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float> %f)
     47   ret <4 x float> %t
     48 }
     49 define <4 x float> @a2(<4 x float> %f) {
     50   %a = call <4 x float> @test_vf(<4 x float> %f)
     51   %t = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %f)
     52   ret <4 x float> %t
     53 }
     54 define <4 x float> @b3(<4 x float> %f) {
     55   %y = call <4 x float> @test_vf(<4 x float> %f)
     56   %t = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %y, <4 x float> %f)
     57   ret <4 x float> %t
     58 }
     59 define <4 x float> @b4(<4 x float> %f) {
     60   %y = call <4 x float> @test_vf(<4 x float> %f)
     61   %t = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %y, <4 x float> %f)
     62   ret <4 x float> %t
     63 }
     64 define <4 x float> @b5(<4 x float> %f) {
     65   %y = call <4 x float> @test_vf(<4 x float> %f)
     66   %t = call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> %y, <4 x float> %f, i8 7)
     67   ret <4 x float> %t
     68 }
     69 define <4 x float> @b6(<4 x float> %f) {
     70   %y = call <4 x float> @test_vf(<4 x float> %f)
     71   %t = call <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float> %y, <4 x float> %f)
     72   ret <4 x float> %t
     73 }
     74 define <4 x float> @b7(<4 x float> %f) {
     75   %y = call <4 x float> @test_vf(<4 x float> %f)
     76   %t = call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %y, <4 x float> %f)
     77   ret <4 x float> %t
     78 }
     79 define <4 x float> @b8(<4 x float> %f) {
     80   %y = call <4 x float> @test_vf(<4 x float> %f)
     81   %t = call <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float> %y, <4 x float> %f)
     82   ret <4 x float> %t
     83 }
     84 define <2 x double> @c1(<2 x double> %f) {
     85   %a = call <2 x double> @test_vd(<2 x double> %f)
     86   %t = call <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double> %f)
     87   ret <2 x double> %t
     88 }
     89 define <2 x double> @d3(<2 x double> %f) {
     90   %y = call <2 x double> @test_vd(<2 x double> %f)
     91   %t = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %y, <2 x double> %f)
     92   ret <2 x double> %t
     93 }
     94 define <2 x double> @d4(<2 x double> %f) {
     95   %y = call <2 x double> @test_vd(<2 x double> %f)
     96   %t = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %y, <2 x double> %f)
     97   ret <2 x double> %t
     98 }
     99 define <2 x double> @d5(<2 x double> %f) {
    100   %y = call <2 x double> @test_vd(<2 x double> %f)
    101   %t = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %y, <2 x double> %f, i8 7)
    102   ret <2 x double> %t
    103 }
    104 define <2 x double> @d6(<2 x double> %f) {
    105   %y = call <2 x double> @test_vd(<2 x double> %f)
    106   %t = call <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> %y, <2 x double> %f)
    107   ret <2 x double> %t
    108 }
    109 define <2 x double> @d7(<2 x double> %f) {
    110   %y = call <2 x double> @test_vd(<2 x double> %f)
    111   %t = call <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double> %y, <2 x double> %f)
    112   ret <2 x double> %t
    113 }
    114 define <2 x double> @d8(<2 x double> %f) {
    115   %y = call <2 x double> @test_vd(<2 x double> %f)
    116   %t = call <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double> %y, <2 x double> %f)
    117   ret <2 x double> %t
    118 }
    119 
    120 ; This one should fail to fuse, but -regalloc=greedy isn't even trying. Instead
    121 ; it produces:
    122 ;   callq	test_vd
    123 ;   movapd	(%rsp), %xmm1           # 16-byte Reload
    124 ;   hsubpd	%xmm0, %xmm1
    125 ;   movapd	%xmm1, %xmm0
    126 ;   addq	$24, %rsp
    127 ;   ret
    128 ; RABasic still tries to fold this one.
    129 
    130 define <2 x double> @z0(<2 x double> %f) {
    131   %y = call <2 x double> @test_vd(<2 x double> %f)
    132   %t = call <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double> %f, <2 x double> %y)
    133   ret <2 x double> %t
    134 }
    135