Home | History | Annotate | Download | only in BBVectorize
      1 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
      2 ; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -instcombine -gvn -S | FileCheck %s
      3 
      4 declare double @llvm.fma.f64(double, double, double)
      5 declare double @llvm.cos.f64(double)
      6 declare double @llvm.powi.f64(double, i32)
      7 
      8 ; Basic depth-3 chain with fma
      9 define double @test1(double %A1, double %A2, double %B1, double %B2, double %C1, double %C2) {
     10 	%X1 = fsub double %A1, %B1
     11 	%X2 = fsub double %A2, %B2
     12 	%Y1 = call double @llvm.fma.f64(double %X1, double %A1, double %C1)
     13 	%Y2 = call double @llvm.fma.f64(double %X2, double %A2, double %C2)
     14 	%Z1 = fadd double %Y1, %B1
     15 	%Z2 = fadd double %Y2, %B2
     16 	%R  = fmul double %Z1, %Z2
     17 	ret double %R
     18 ; CHECK: @test1
     19 ; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
     20 ; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
     21 ; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
     22 ; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1
     23 ; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2
     24 ; CHECK: %Y1.v.i2.1 = insertelement <2 x double> undef, double %C1, i32 0
     25 ; CHECK: %Y1.v.i2.2 = insertelement <2 x double> %Y1.v.i2.1, double %C2, i32 1
     26 ; CHECK: %Y1 = call <2 x double> @llvm.fma.v2f64(<2 x double> %X1, <2 x double> %X1.v.i0.2, <2 x double> %Y1.v.i2.2)
     27 ; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2
     28 ; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0
     29 ; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1
     30 ; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2
     31 ; CHECK: ret double %R
     32 }
     33 
     34 ; Basic depth-3 chain with cos
     35 define double @test2(double %A1, double %A2, double %B1, double %B2) {
     36 	%X1 = fsub double %A1, %B1
     37 	%X2 = fsub double %A2, %B2
     38 	%Y1 = call double @llvm.cos.f64(double %X1)
     39 	%Y2 = call double @llvm.cos.f64(double %X2)
     40 	%Z1 = fadd double %Y1, %B1
     41 	%Z2 = fadd double %Y2, %B2
     42 	%R  = fmul double %Z1, %Z2
     43 	ret double %R
     44 ; CHECK: @test2
     45 ; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
     46 ; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
     47 ; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
     48 ; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1
     49 ; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2
     50 ; CHECK: %Y1 = call <2 x double> @llvm.cos.v2f64(<2 x double> %X1)
     51 ; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2
     52 ; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0
     53 ; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1
     54 ; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2
     55 ; CHECK: ret double %R
     56 }
     57 
     58 ; Basic depth-3 chain with powi
     59 define double @test3(double %A1, double %A2, double %B1, double %B2, i32 %P) {
     60 
     61 	%X1 = fsub double %A1, %B1
     62 	%X2 = fsub double %A2, %B2
     63 	%Y1 = call double @llvm.powi.f64(double %X1, i32 %P)
     64 	%Y2 = call double @llvm.powi.f64(double %X2, i32 %P)
     65 	%Z1 = fadd double %Y1, %B1
     66 	%Z2 = fadd double %Y2, %B2
     67 	%R  = fmul double %Z1, %Z2
     68 	ret double %R
     69 ; CHECK: @test3
     70 ; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
     71 ; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
     72 ; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
     73 ; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1
     74 ; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2
     75 ; CHECK: %Y1 = call <2 x double> @llvm.powi.v2f64(<2 x double> %X1, i32 %P)
     76 ; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2
     77 ; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0
     78 ; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1
     79 ; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2
     80 ; CHECK: ret double %R
     81 }
     82 
     83 ; Basic depth-3 chain with powi (different powers: should not vectorize)
     84 define double @test4(double %A1, double %A2, double %B1, double %B2, i32 %P) {
     85 
     86 	%X1 = fsub double %A1, %B1
     87 	%X2 = fsub double %A2, %B2
     88         %P2 = add i32 %P, 1
     89 	%Y1 = call double @llvm.powi.f64(double %X1, i32 %P)
     90 	%Y2 = call double @llvm.powi.f64(double %X2, i32 %P2)
     91 	%Z1 = fadd double %Y1, %B1
     92 	%Z2 = fadd double %Y2, %B2
     93 	%R  = fmul double %Z1, %Z2
     94 	ret double %R
     95 ; CHECK: @test4
     96 ; CHECK-NOT: <2 x double>
     97 ; CHECK: ret double %R
     98 }
     99 
    100 ; CHECK: declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
    101 ; CHECK: declare <2 x double> @llvm.cos.v2f64(<2 x double>) nounwind readonly
    102 ; CHECK: declare <2 x double> @llvm.powi.v2f64(<2 x double>, i32) nounwind readonly
    103 
    104