Home | History | Annotate | Download | only in X86
      1 ; RUN: opt -slp-vectorizer < %s -S -mtriple="x86_64-grtev3-linux-gnu" -mcpu=corei7-avx | FileCheck %s
      2 
      3 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
      4 target triple = "x86_64-grtev3-linux-gnu"
      5 
      6 ; We used to crash on this example because we were building a constant
      7 ; expression during vectorization and the vectorizer expects instructions
      8 ; as elements of the vectorized tree.
      9 ; CHECK-LABEL: @test
     10 ; PR19621
     11 
     12 define void @test() {
     13 bb279:
     14   br label %bb283
     15 
     16 bb283:
     17   %Av.sroa.8.0 = phi float [ undef, %bb279 ], [ %tmp315, %exit ]
     18   %Av.sroa.5.0 = phi float [ undef, %bb279 ], [ %tmp319, %exit ]
     19   %Av.sroa.3.0 = phi float [ undef, %bb279 ], [ %tmp307, %exit ]
     20   %Av.sroa.0.0 = phi float [ undef, %bb279 ], [ %tmp317, %exit ]
     21   br label %bb284
     22 
     23 bb284:
     24   %tmp7.i = fpext float %Av.sroa.3.0 to double
     25   %tmp8.i = fsub double %tmp7.i, undef
     26   %tmp9.i = fsub double %tmp8.i, undef
     27   %tmp17.i = fpext float %Av.sroa.8.0 to double
     28   %tmp19.i = fsub double %tmp17.i, undef
     29   %tmp20.i = fsub double %tmp19.i, undef
     30   br label %bb21.i
     31 
     32 bb21.i:
     33   br i1 undef, label %bb22.i, label %exit
     34 
     35 bb22.i:
     36   %tmp24.i = fadd double undef, %tmp9.i
     37   %tmp26.i = fadd double undef, %tmp20.i
     38   br label %bb32.i
     39 
     40 bb32.i:
     41   %xs.0.i = phi double [ %tmp24.i, %bb22.i ], [ 0.000000e+00, %bb32.i ]
     42   %ys.0.i = phi double [ %tmp26.i, %bb22.i ], [ 0.000000e+00, %bb32.i ]
     43   br i1 undef, label %bb32.i, label %bb21.i
     44 
     45 exit:
     46   %tmp303 = fpext float %Av.sroa.0.0 to double
     47   %tmp304 = fmul double %tmp303, undef
     48   %tmp305 = fadd double undef, %tmp304
     49   %tmp306 = fadd double %tmp305, undef
     50   %tmp307 = fptrunc double %tmp306 to float
     51   %tmp311 = fpext float %Av.sroa.5.0 to double
     52   %tmp312 = fmul double %tmp311, 0.000000e+00
     53   %tmp313 = fadd double undef, %tmp312
     54   %tmp314 = fadd double %tmp313, undef
     55   %tmp315 = fptrunc double %tmp314 to float
     56   %tmp317 = fptrunc double undef to float
     57   %tmp319 = fptrunc double undef to float
     58   br label %bb283
     59 }
     60 
     61 ; Make sure that we probably handle constant folded vectorized trees. The
     62 ; vectorizer starts at the type (%t2, %t3) and wil constant fold the tree.
     63 ; The code that handles insertelement instructions must handle this.
     64 define <4 x double> @constant_folding() {
     65 entry:
     66   %t0 = fadd double 1.000000e+00 , 0.000000e+00
     67   %t1 = fadd double 1.000000e+00 , 1.000000e+00
     68   %t2 = fmul double %t0, 1.000000e+00
     69   %i1 = insertelement <4 x double> undef, double %t2, i32 1
     70   %t3 = fmul double %t1, 1.000000e+00
     71   %i2 = insertelement <4 x double> %i1, double %t3, i32 0
     72   ret <4 x double> %i2
     73 }
     74 
     75 ; CHECK-LABEL: @constant_folding
     76 ; CHECK: %[[V0:.+]] = extractelement <2 x double> <double 1.000000e+00, double 2.000000e+00>, i32 0
     77 ; CHECK: %[[V1:.+]] = insertelement <4 x double> undef, double %[[V0]], i32 1
     78 ; CHECK: %[[V2:.+]] = extractelement <2 x double> <double 1.000000e+00, double 2.000000e+00>, i32 1
     79 ; CHECK: %[[V3:.+]] = insertelement <4 x double> %[[V1]], double %[[V2]], i32 0
     80 ; CHECK: ret <4 x double> %[[V3]]
     81