Home | History | Annotate | Download | only in AArch64
      1 ; RUN: opt -S -slp-vectorizer -slp-threshold=-18 -dce -instcombine < %s | FileCheck %s
      2 
      3 target datalayout = "e-m:e-i32:64-i128:128-n32:64-S128"
      4 target triple = "aarch64--linux-gnu"
      5 
      6 ; These tests check that we remove from consideration pairs of seed
      7 ; getelementptrs when they are known to have a constant difference. Such pairs
      8 ; are likely not good candidates for vectorization since one can be computed
      9 ; from the other. We use an unprofitable threshold to force vectorization.
     10 ;
     11 ; int getelementptr(int *g, int n, int w, int x, int y, int z) {
     12 ;   int sum = 0;
     13 ;   for (int i = 0; i < n ; ++i) {
     14 ;     sum += g[2*i + w]; sum += g[2*i + x];
     15 ;     sum += g[2*i + y]; sum += g[2*i + z];
     16 ;   }
     17 ;   return sum;
     18 ; }
     19 ;
     20 
     21 ; CHECK-LABEL: @getelementptr_4x32
     22 ;
     23 ; CHECK: [[A:%[a-zA-Z0-9.]+]] = add nsw <4 x i32>
     24 ; CHECK: [[X:%[a-zA-Z0-9.]+]] = extractelement <4 x i32> [[A]]
     25 ; CHECK: sext i32 [[X]] to i64
     26 ;
     27 define i32 @getelementptr_4x32(i32* nocapture readonly %g, i32 %n, i32 %x, i32 %y, i32 %z) {
     28 entry:
     29   %cmp31 = icmp sgt i32 %n, 0
     30   br i1 %cmp31, label %for.body.preheader, label %for.cond.cleanup
     31 
     32 for.body.preheader:
     33   br label %for.body
     34 
     35 for.cond.cleanup.loopexit:
     36   br label %for.cond.cleanup
     37 
     38 for.cond.cleanup:
     39   %sum.0.lcssa = phi i32 [ 0, %entry ], [ %add16, %for.cond.cleanup.loopexit ]
     40   ret i32 %sum.0.lcssa
     41 
     42 for.body:
     43   %indvars.iv = phi i32 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
     44   %sum.032 = phi i32 [ 0, %for.body.preheader ], [ %add16, %for.body ]
     45   %t4 = shl nsw i32 %indvars.iv, 1
     46   %t5 = add nsw i32 %t4, 0
     47   %arrayidx = getelementptr inbounds i32, i32* %g, i32 %t5
     48   %t6 = load i32, i32* %arrayidx, align 4
     49   %add1 = add nsw i32 %t6, %sum.032
     50   %t7 = add nsw i32 %t4, %x
     51   %arrayidx5 = getelementptr inbounds i32, i32* %g, i32 %t7
     52   %t8 = load i32, i32* %arrayidx5, align 4
     53   %add6 = add nsw i32 %add1, %t8
     54   %t9 = add nsw i32 %t4, %y
     55   %arrayidx10 = getelementptr inbounds i32, i32* %g, i32 %t9
     56   %t10 = load i32, i32* %arrayidx10, align 4
     57   %add11 = add nsw i32 %add6, %t10
     58   %t11 = add nsw i32 %t4, %z
     59   %arrayidx15 = getelementptr inbounds i32, i32* %g, i32 %t11
     60   %t12 = load i32, i32* %arrayidx15, align 4
     61   %add16 = add nsw i32 %add11, %t12
     62   %indvars.iv.next = add nuw nsw i32 %indvars.iv, 1
     63   %exitcond = icmp eq i32 %indvars.iv.next , %n
     64   br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body
     65 }
     66 
     67 ; CHECK-LABEL: @getelementptr_2x32
     68 ;
     69 ; CHECK: [[A:%[a-zA-Z0-9.]+]] = add nsw <2 x i32>
     70 ; CHECK: [[X:%[a-zA-Z0-9.]+]] = extractelement <2 x i32> [[A]]
     71 ; CHECK: sext i32 [[X]] to i64
     72 ;
     73 define i32 @getelementptr_2x32(i32* nocapture readonly %g, i32 %n, i32 %x, i32 %y, i32 %z) {
     74 entry:
     75   %cmp31 = icmp sgt i32 %n, 0
     76   br i1 %cmp31, label %for.body.preheader, label %for.cond.cleanup
     77 
     78 for.body.preheader:
     79   br label %for.body
     80 
     81 for.cond.cleanup.loopexit:
     82   br label %for.cond.cleanup
     83 
     84 for.cond.cleanup:
     85   %sum.0.lcssa = phi i32 [ 0, %entry ], [ %add16, %for.cond.cleanup.loopexit ]
     86   ret i32 %sum.0.lcssa
     87 
     88 for.body:
     89   %indvars.iv = phi i32 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
     90   %sum.032 = phi i32 [ 0, %for.body.preheader ], [ %add16, %for.body ]
     91   %t4 = shl nsw i32 %indvars.iv, 1
     92   %t5 = add nsw i32 %t4, 0
     93   %arrayidx = getelementptr inbounds i32, i32* %g, i32 %t5
     94   %t6 = load i32, i32* %arrayidx, align 4
     95   %add1 = add nsw i32 %t6, %sum.032
     96   %t7 = add nsw i32 %t4, 1
     97   %arrayidx5 = getelementptr inbounds i32, i32* %g, i32 %t7
     98   %t8 = load i32, i32* %arrayidx5, align 4
     99   %add6 = add nsw i32 %add1, %t8
    100   %t9 = add nsw i32 %t4, %y
    101   %arrayidx10 = getelementptr inbounds i32, i32* %g, i32 %t9
    102   %t10 = load i32, i32* %arrayidx10, align 4
    103   %add11 = add nsw i32 %add6, %t10
    104   %t11 = add nsw i32 %t4, %z
    105   %arrayidx15 = getelementptr inbounds i32, i32* %g, i32 %t11
    106   %t12 = load i32, i32* %arrayidx15, align 4
    107   %add16 = add nsw i32 %add11, %t12
    108   %indvars.iv.next = add nuw nsw i32 %indvars.iv, 1
    109   %exitcond = icmp eq i32 %indvars.iv.next , %n
    110   br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body
    111 }
    112