Home | History | Annotate | Download | only in X86
      1 ; RUN: opt < %s  -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
      2 
      3 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
      4 target triple = "x86_64-unknown-linux-gnu"
      5 
      6 ; A tricky loop:
      7 ;
      8 ; void loop(int *a, int *b) {
      9 ;    for (int i = 0; i < 512; ++i) {
     10 ;        a[a[i]] = b[i];
     11 ;        a[i] = b[i+1];
     12 ;    }
     13 ;}
     14 
     15 ;CHECK-LABEL: @loop(
     16 ;CHECK-NOT: <4 x i32>
     17 define void @loop(i32* nocapture %a, i32* nocapture %b) nounwind uwtable {
     18 entry:
     19   br label %for.body
     20 
     21 for.body:                                         ; preds = %for.body, %entry
     22   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
     23   %arrayidx = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
     24   %0 = load i32, i32* %arrayidx, align 4
     25   %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
     26   %1 = load i32, i32* %arrayidx2, align 4
     27   %idxprom3 = sext i32 %1 to i64
     28   %arrayidx4 = getelementptr inbounds i32, i32* %a, i64 %idxprom3
     29   store i32 %0, i32* %arrayidx4, align 4
     30   %indvars.iv.next = add i64 %indvars.iv, 1
     31   %arrayidx6 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv.next
     32   %2 = load i32, i32* %arrayidx6, align 4
     33   store i32 %2, i32* %arrayidx2, align 4
     34   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
     35   %exitcond = icmp eq i32 %lftr.wideiv, 512
     36   br i1 %exitcond, label %for.end, label %for.body
     37 
     38 for.end:                                          ; preds = %for.body
     39   ret void
     40 }
     41 
     42 ; The same loop with parallel loop metadata added to the loop branch
     43 ; and the memory instructions.
     44 
     45 ;CHECK-LABEL: @parallel_loop(
     46 ;CHECK: <4 x i32>
     47 define void @parallel_loop(i32* nocapture %a, i32* nocapture %b) nounwind uwtable {
     48 entry:
     49   br label %for.body
     50 
     51 for.body:                                         ; preds = %for.body, %entry
     52   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
     53   %arrayidx = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
     54   %0 = load i32, i32* %arrayidx, align 4, !llvm.mem.parallel_loop_access !3
     55   %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
     56   %1 = load i32, i32* %arrayidx2, align 4, !llvm.mem.parallel_loop_access !3
     57   %idxprom3 = sext i32 %1 to i64
     58   %arrayidx4 = getelementptr inbounds i32, i32* %a, i64 %idxprom3
     59   ; This store might have originated from inlining a function with a parallel
     60   ; loop. Refers to a list with the "original loop reference" (!4) also included.
     61   store i32 %0, i32* %arrayidx4, align 4, !llvm.mem.parallel_loop_access !5
     62   %indvars.iv.next = add i64 %indvars.iv, 1
     63   %arrayidx6 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv.next
     64   %2 = load i32, i32* %arrayidx6, align 4, !llvm.mem.parallel_loop_access !3
     65   store i32 %2, i32* %arrayidx2, align 4, !llvm.mem.parallel_loop_access !3
     66   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
     67   %exitcond = icmp eq i32 %lftr.wideiv, 512
     68   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !3
     69 
     70 for.end:                                          ; preds = %for.body
     71   ret void
     72 }
     73 
     74 ; The same loop with an illegal parallel loop metadata: the memory
     75 ; accesses refer to a different loop's identifier.
     76 
     77 ;CHECK-LABEL: @mixed_metadata(
     78 ;CHECK-NOT: <4 x i32>
     79 
     80 define void @mixed_metadata(i32* nocapture %a, i32* nocapture %b) nounwind uwtable {
     81 entry:
     82   br label %for.body
     83 
     84 for.body:                                         ; preds = %for.body, %entry
     85   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
     86   %arrayidx = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
     87   %0 = load i32, i32* %arrayidx, align 4, !llvm.mem.parallel_loop_access !6
     88   %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
     89   %1 = load i32, i32* %arrayidx2, align 4, !llvm.mem.parallel_loop_access !6
     90   %idxprom3 = sext i32 %1 to i64
     91   %arrayidx4 = getelementptr inbounds i32, i32* %a, i64 %idxprom3
     92   ; This refers to the loop marked with !7 which we are not in at the moment.
     93   ; It should prevent detecting as a parallel loop.
     94   store i32 %0, i32* %arrayidx4, align 4, !llvm.mem.parallel_loop_access !7
     95   %indvars.iv.next = add i64 %indvars.iv, 1
     96   %arrayidx6 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv.next
     97   %2 = load i32, i32* %arrayidx6, align 4, !llvm.mem.parallel_loop_access !6
     98   store i32 %2, i32* %arrayidx2, align 4, !llvm.mem.parallel_loop_access !6
     99   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
    100   %exitcond = icmp eq i32 %lftr.wideiv, 512
    101   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !6
    102 
    103 for.end:                                          ; preds = %for.body
    104   ret void
    105 }
    106 
    107 !3 = !{!3}
    108 !4 = !{!4}
    109 !5 = !{!3, !4}
    110 !6 = !{!6}
    111 !7 = !{!7}
    112