Home | History | Annotate | Download | only in LoopVectorize
      1 ; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -instcombine -S | FileCheck %s
      2 
      3 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
      4 target triple = "x86_64-apple-macosx10.9.0"
      5 
      6 %struct.coordinate = type { i32, i32 }
      7 
      8 ; Make sure that we don't generate a wide load when accessing the struct.
      9 ; struct coordinate {
     10 ;  int x;
     11 ;  int y;
     12 ; };
     13 ;
     14 ;
     15 ; int foo(struct coordinate *A, int n) {
     16 ;
     17 ;   int sum = 0;
     18 ;   for (int i = 0; i < n; ++i)
     19 ;     sum += A[i].x;
     20 ;
     21 ;   return sum;
     22 ; }
     23 
     24 ;CHECK-LABEL: @foo(
     25 ;CHECK-NOT: load <4 x i32>
     26 ;CHECK: ret
     27 define i32 @foo(%struct.coordinate* nocapture %A, i32 %n) nounwind uwtable readonly ssp {
     28 entry:
     29   %cmp4 = icmp sgt i32 %n, 0
     30   br i1 %cmp4, label %for.body, label %for.end
     31 
     32 for.body:                                         ; preds = %entry, %for.body
     33   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
     34   %sum.05 = phi i32 [ %add, %for.body ], [ 0, %entry ]
     35   %x = getelementptr inbounds %struct.coordinate* %A, i64 %indvars.iv, i32 0
     36   %0 = load i32* %x, align 4
     37   %add = add nsw i32 %0, %sum.05
     38   %indvars.iv.next = add i64 %indvars.iv, 1
     39   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
     40   %exitcond = icmp eq i32 %lftr.wideiv, %n
     41   br i1 %exitcond, label %for.end, label %for.body
     42 
     43 for.end:                                          ; preds = %for.body, %entry
     44   %sum.0.lcssa = phi i32 [ 0, %entry ], [ %add, %for.body ]
     45   ret i32 %sum.0.lcssa
     46 }
     47 
     48 %struct.lit = type { i32 }
     49 
     50 ; Verify that we still vectorize the access if the struct has the same size as
     51 ; the loaded element.
     52 ; struct lit {
     53 ;  int x;
     54 ; };
     55 ;
     56 ;
     57 ; int bar(struct lit *A, int n) {
     58 ;
     59 ;   int sum = 0;
     60 ;   for (int i = 0; i < n; ++i)
     61 ;     sum += A[i].x;
     62 ;
     63 ;   return sum;
     64 ; }
     65 
     66 ;CHECK-LABEL: @bar(
     67 ;CHECK: load <4 x i32>
     68 ;CHECK: ret
     69 define i32 @bar(%struct.lit* nocapture %A, i32 %n) nounwind uwtable readonly ssp {
     70 entry:
     71   %cmp4 = icmp sgt i32 %n, 0
     72   br i1 %cmp4, label %for.body, label %for.end
     73 
     74 for.body:                                         ; preds = %entry, %for.body
     75   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
     76   %sum.05 = phi i32 [ %add, %for.body ], [ 0, %entry ]
     77   %x = getelementptr inbounds %struct.lit* %A, i64 %indvars.iv, i32 0
     78   %0 = load i32* %x, align 4
     79   %add = add nsw i32 %0, %sum.05
     80   %indvars.iv.next = add i64 %indvars.iv, 1
     81   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
     82   %exitcond = icmp eq i32 %lftr.wideiv, %n
     83   br i1 %exitcond, label %for.end, label %for.body
     84 
     85 for.end:                                          ; preds = %for.body, %entry
     86   %sum.0.lcssa = phi i32 [ 0, %entry ], [ %add, %for.body ]
     87   ret i32 %sum.0.lcssa
     88 }
     89