1 ; RUN: opt -slp-vectorizer < %s -S -mtriple="x86_64-grtev3-linux-gnu" -mcpu=corei7-avx | FileCheck %s 2 3 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" 4 target triple = "x86_64-grtev3-linux-gnu" 5 6 ; We used to crash on this example because we were building a constant 7 ; expression during vectorization and the vectorizer expects instructions 8 ; as elements of the vectorized tree. 9 ; CHECK-LABEL: @test 10 ; PR19621 11 12 define void @test() { 13 bb279: 14 br label %bb283 15 16 bb283: 17 %Av.sroa.8.0 = phi float [ undef, %bb279 ], [ %tmp315, %exit ] 18 %Av.sroa.5.0 = phi float [ undef, %bb279 ], [ %tmp319, %exit ] 19 %Av.sroa.3.0 = phi float [ undef, %bb279 ], [ %tmp307, %exit ] 20 %Av.sroa.0.0 = phi float [ undef, %bb279 ], [ %tmp317, %exit ] 21 br label %bb284 22 23 bb284: 24 %tmp7.i = fpext float %Av.sroa.3.0 to double 25 %tmp8.i = fsub double %tmp7.i, undef 26 %tmp9.i = fsub double %tmp8.i, undef 27 %tmp17.i = fpext float %Av.sroa.8.0 to double 28 %tmp19.i = fsub double %tmp17.i, undef 29 %tmp20.i = fsub double %tmp19.i, undef 30 br label %bb21.i 31 32 bb21.i: 33 br i1 undef, label %bb22.i, label %exit 34 35 bb22.i: 36 %tmp24.i = fadd double undef, %tmp9.i 37 %tmp26.i = fadd double undef, %tmp20.i 38 br label %bb32.i 39 40 bb32.i: 41 %xs.0.i = phi double [ %tmp24.i, %bb22.i ], [ 0.000000e+00, %bb32.i ] 42 %ys.0.i = phi double [ %tmp26.i, %bb22.i ], [ 0.000000e+00, %bb32.i ] 43 br i1 undef, label %bb32.i, label %bb21.i 44 45 exit: 46 %tmp303 = fpext float %Av.sroa.0.0 to double 47 %tmp304 = fmul double %tmp303, undef 48 %tmp305 = fadd double undef, %tmp304 49 %tmp306 = fadd double %tmp305, undef 50 %tmp307 = fptrunc double %tmp306 to float 51 %tmp311 = fpext float %Av.sroa.5.0 to double 52 %tmp312 = fmul double %tmp311, 0.000000e+00 53 %tmp313 = fadd double undef, %tmp312 54 %tmp314 = fadd double %tmp313, undef 55 %tmp315 = fptrunc double %tmp314 to float 56 %tmp317 = fptrunc double undef to float 57 %tmp319 = fptrunc double undef to float 58 br label %bb283 59 } 60 61 ; Make sure that we probably handle constant folded vectorized trees. The 62 ; vectorizer starts at the type (%t2, %t3) and wil constant fold the tree. 63 ; The code that handles insertelement instructions must handle this. 64 define <4 x double> @constant_folding() { 65 entry: 66 %t0 = fadd double 1.000000e+00 , 0.000000e+00 67 %t1 = fadd double 1.000000e+00 , 1.000000e+00 68 %t2 = fmul double %t0, 1.000000e+00 69 %i1 = insertelement <4 x double> undef, double %t2, i32 1 70 %t3 = fmul double %t1, 1.000000e+00 71 %i2 = insertelement <4 x double> %i1, double %t3, i32 0 72 ret <4 x double> %i2 73 } 74 75 ; CHECK-LABEL: @constant_folding 76 ; CHECK: %[[V0:.+]] = extractelement <2 x double> <double 1.000000e+00, double 2.000000e+00>, i32 0 77 ; CHECK: %[[V1:.+]] = insertelement <4 x double> undef, double %[[V0]], i32 1 78 ; CHECK: %[[V2:.+]] = extractelement <2 x double> <double 1.000000e+00, double 2.000000e+00>, i32 1 79 ; CHECK: %[[V3:.+]] = insertelement <4 x double> %[[V1]], double %[[V2]], i32 0 80 ; CHECK: ret <4 x double> %[[V3]] 81