Home | History | Annotate | Download | only in AArch64
      1 ; RUN: opt -S -debug-only=loop-vectorize -loop-vectorize -instcombine < %s 2>&1 | FileCheck %s
      2 ; REQUIRES: asserts
      3 
      4 target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
      5 target triple = "aarch64--linux-gnueabi"
      6 
      7 @AB = common global [1024 x i8] zeroinitializer, align 4
      8 @CD = common global [1024 x i8] zeroinitializer, align 4
      9 
     10 define void @test_byte_interleaved_cost(i8 %C, i8 %D) {
     11 entry:
     12   br label %for.body
     13 
     14 ; 8xi8 and 16xi8 are valid i8 vector types, so the cost of the interleaved
     15 ; access group is 2.
     16 
     17 ; CHECK: LV: Checking a loop in "test_byte_interleaved_cost"
     18 ; CHECK: LV: Found an estimated cost of 2 for VF 8 For instruction:   %tmp = load i8, i8* %arrayidx0, align 4
     19 ; CHECK: LV: Found an estimated cost of 2 for VF 16 For instruction:   %tmp = load i8, i8* %arrayidx0, align 4
     20 
     21 for.body:                                         ; preds = %for.body, %entry
     22   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
     23   %arrayidx0 = getelementptr inbounds [1024 x i8], [1024 x i8]* @AB, i64 0, i64 %indvars.iv
     24   %tmp = load i8, i8* %arrayidx0, align 4
     25   %tmp1 = or i64 %indvars.iv, 1
     26   %arrayidx1 = getelementptr inbounds [1024 x i8], [1024 x i8]* @AB, i64 0, i64 %tmp1
     27   %tmp2 = load i8, i8* %arrayidx1, align 4
     28   %add = add nsw i8 %tmp, %C
     29   %mul = mul nsw i8 %tmp2, %D
     30   %arrayidx2 = getelementptr inbounds [1024 x i8], [1024 x i8]* @CD, i64 0, i64 %indvars.iv
     31   store i8 %add, i8* %arrayidx2, align 4
     32   %arrayidx3 = getelementptr inbounds [1024 x i8], [1024 x i8]* @CD, i64 0, i64 %tmp1
     33   store i8 %mul, i8* %arrayidx3, align 4
     34   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 2
     35   %cmp = icmp slt i64 %indvars.iv.next, 1024
     36   br i1 %cmp, label %for.body, label %for.end
     37 
     38 for.end:                                          ; preds = %for.body
     39   ret void
     40 }
     41 
     42 %ig.factor.8 = type { double*, double, double, double, double, double, double, double }
     43 define double @wide_interleaved_group(%ig.factor.8* %s, double %a, double %b, i32 %n) {
     44 entry:
     45   br label %for.body
     46 
     47 ; Check the default cost of a strided load with a factor that is greater than
     48 ; the maximum allowed. In this test, the interleave factor would be 8, which is
     49 ; not supported.
     50 
     51 ; CHECK: LV: Checking a loop in "wide_interleaved_group"
     52 ; CHECK: LV: Found an estimated cost of 6 for VF 2 For instruction:   %1 = load double, double* %0, align 8
     53 ; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction:   %5 = load double, double* %4, align 8
     54 ; CHECK: LV: Found an estimated cost of 10 for VF 2 For instruction:   store double %9, double* %10, align 8
     55 
     56 for.body:
     57   %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
     58   %r = phi double [ 0.000000e+00, %entry ], [ %12, %for.body ]
     59   %0 = getelementptr inbounds %ig.factor.8, %ig.factor.8* %s, i64 %i, i32 2
     60   %1 = load double, double* %0, align 8
     61   %2 = fcmp fast olt double %1, %a
     62   %3 = select i1 %2, double 0.000000e+00, double %1
     63   %4 = getelementptr inbounds %ig.factor.8, %ig.factor.8* %s, i64 %i, i32 6
     64   %5 = load double, double* %4, align 8
     65   %6 = fcmp fast olt double %5, %a
     66   %7 = select i1 %6, double 0.000000e+00, double %5
     67   %8 = fmul fast double %7, %b
     68   %9 = fadd fast double %8, %3
     69   %10 = getelementptr inbounds %ig.factor.8, %ig.factor.8* %s, i64 %i, i32 3
     70   store double %9, double* %10, align 8
     71   %11 = fmul fast double %9, %9
     72   %12 = fadd fast double %11, %r
     73   %i.next = add nuw nsw i64 %i, 1
     74   %13 = trunc i64 %i.next to i32
     75   %cond = icmp eq i32 %13, %n
     76   br i1 %cond, label %for.exit, label %for.body
     77 
     78 for.exit:
     79   %r.lcssa = phi double [ %12, %for.body ]
     80   ret double %r.lcssa
     81 }
     82