1 ; RUN: opt -S -slp-vectorizer -slp-threshold=-18 -dce -instcombine < %s | FileCheck %s 2 3 target datalayout = "e-m:e-i32:64-i128:128-n32:64-S128" 4 target triple = "aarch64--linux-gnu" 5 6 ; These tests check that we remove from consideration pairs of seed 7 ; getelementptrs when they are known to have a constant difference. Such pairs 8 ; are likely not good candidates for vectorization since one can be computed 9 ; from the other. We use an unprofitable threshold to force vectorization. 10 ; 11 ; int getelementptr(int *g, int n, int w, int x, int y, int z) { 12 ; int sum = 0; 13 ; for (int i = 0; i < n ; ++i) { 14 ; sum += g[2*i + w]; sum += g[2*i + x]; 15 ; sum += g[2*i + y]; sum += g[2*i + z]; 16 ; } 17 ; return sum; 18 ; } 19 ; 20 21 ; CHECK-LABEL: @getelementptr_4x32 22 ; 23 ; CHECK: [[A:%[a-zA-Z0-9.]+]] = add nsw <4 x i32> 24 ; CHECK: [[X:%[a-zA-Z0-9.]+]] = extractelement <4 x i32> [[A]] 25 ; CHECK: sext i32 [[X]] to i64 26 ; 27 define i32 @getelementptr_4x32(i32* nocapture readonly %g, i32 %n, i32 %x, i32 %y, i32 %z) { 28 entry: 29 %cmp31 = icmp sgt i32 %n, 0 30 br i1 %cmp31, label %for.body.preheader, label %for.cond.cleanup 31 32 for.body.preheader: 33 br label %for.body 34 35 for.cond.cleanup.loopexit: 36 br label %for.cond.cleanup 37 38 for.cond.cleanup: 39 %sum.0.lcssa = phi i32 [ 0, %entry ], [ %add16, %for.cond.cleanup.loopexit ] 40 ret i32 %sum.0.lcssa 41 42 for.body: 43 %indvars.iv = phi i32 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ] 44 %sum.032 = phi i32 [ 0, %for.body.preheader ], [ %add16, %for.body ] 45 %t4 = shl nsw i32 %indvars.iv, 1 46 %t5 = add nsw i32 %t4, 0 47 %arrayidx = getelementptr inbounds i32, i32* %g, i32 %t5 48 %t6 = load i32, i32* %arrayidx, align 4 49 %add1 = add nsw i32 %t6, %sum.032 50 %t7 = add nsw i32 %t4, %x 51 %arrayidx5 = getelementptr inbounds i32, i32* %g, i32 %t7 52 %t8 = load i32, i32* %arrayidx5, align 4 53 %add6 = add nsw i32 %add1, %t8 54 %t9 = add nsw i32 %t4, %y 55 %arrayidx10 = getelementptr inbounds i32, i32* %g, i32 %t9 56 %t10 = load i32, i32* %arrayidx10, align 4 57 %add11 = add nsw i32 %add6, %t10 58 %t11 = add nsw i32 %t4, %z 59 %arrayidx15 = getelementptr inbounds i32, i32* %g, i32 %t11 60 %t12 = load i32, i32* %arrayidx15, align 4 61 %add16 = add nsw i32 %add11, %t12 62 %indvars.iv.next = add nuw nsw i32 %indvars.iv, 1 63 %exitcond = icmp eq i32 %indvars.iv.next , %n 64 br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body 65 } 66 67 ; CHECK-LABEL: @getelementptr_2x32 68 ; 69 ; CHECK: [[A:%[a-zA-Z0-9.]+]] = add nsw <2 x i32> 70 ; CHECK: [[X:%[a-zA-Z0-9.]+]] = extractelement <2 x i32> [[A]] 71 ; CHECK: sext i32 [[X]] to i64 72 ; 73 define i32 @getelementptr_2x32(i32* nocapture readonly %g, i32 %n, i32 %x, i32 %y, i32 %z) { 74 entry: 75 %cmp31 = icmp sgt i32 %n, 0 76 br i1 %cmp31, label %for.body.preheader, label %for.cond.cleanup 77 78 for.body.preheader: 79 br label %for.body 80 81 for.cond.cleanup.loopexit: 82 br label %for.cond.cleanup 83 84 for.cond.cleanup: 85 %sum.0.lcssa = phi i32 [ 0, %entry ], [ %add16, %for.cond.cleanup.loopexit ] 86 ret i32 %sum.0.lcssa 87 88 for.body: 89 %indvars.iv = phi i32 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ] 90 %sum.032 = phi i32 [ 0, %for.body.preheader ], [ %add16, %for.body ] 91 %t4 = shl nsw i32 %indvars.iv, 1 92 %t5 = add nsw i32 %t4, 0 93 %arrayidx = getelementptr inbounds i32, i32* %g, i32 %t5 94 %t6 = load i32, i32* %arrayidx, align 4 95 %add1 = add nsw i32 %t6, %sum.032 96 %t7 = add nsw i32 %t4, 1 97 %arrayidx5 = getelementptr inbounds i32, i32* %g, i32 %t7 98 %t8 = load i32, i32* %arrayidx5, align 4 99 %add6 = add nsw i32 %add1, %t8 100 %t9 = add nsw i32 %t4, %y 101 %arrayidx10 = getelementptr inbounds i32, i32* %g, i32 %t9 102 %t10 = load i32, i32* %arrayidx10, align 4 103 %add11 = add nsw i32 %add6, %t10 104 %t11 = add nsw i32 %t4, %z 105 %arrayidx15 = getelementptr inbounds i32, i32* %g, i32 %t11 106 %t12 = load i32, i32* %arrayidx15, align 4 107 %add16 = add nsw i32 %add11, %t12 108 %indvars.iv.next = add nuw nsw i32 %indvars.iv, 1 109 %exitcond = icmp eq i32 %indvars.iv.next , %n 110 br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body 111 } 112