Home | History | Annotate | Download | only in X86
      1 ; REQUIRES: asserts
      2 ; RUN: opt -S -loop-vectorize -debug-only=loop-vectorize -mcpu=skylake %s 2>&1 | FileCheck %s
      3 target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128"
      4 target triple = "i386-unknown-linux-gnu"
      5 
      6 @src = common local_unnamed_addr global [120 x float] zeroinitializer, align 4
      7 @dst = common local_unnamed_addr global [120 x float] zeroinitializer, align 4
      8 
      9 ; Function Attrs: norecurse nounwind
     10 define void @stride8(float %k, i32 %width_) {
     11 entry:
     12 
     13 ; CHECK: Found an estimated cost of 48 for VF 8 For instruction:   %0 = load float
     14 
     15   %cmp72 = icmp sgt i32 %width_, 0
     16   br i1 %cmp72, label %for.body.lr.ph, label %for.cond.cleanup
     17 
     18 for.body.lr.ph:                                   ; preds = %entry
     19   br label %for.body
     20 
     21 for.cond.cleanup.loopexit:                        ; preds = %for.body
     22   br label %for.cond.cleanup
     23 
     24 for.cond.cleanup:                                 ; preds = %for.cond.cleanup.loopexit, %entry
     25   ret void
     26 
     27 for.body:                                         ; preds = %for.body.lr.ph, %for.body
     28   %i.073 = phi i32 [ 0, %for.body.lr.ph ], [ %add46, %for.body ]
     29   %arrayidx = getelementptr inbounds [120 x float], [120 x float]* @src, i32 0, i32 %i.073
     30   %0 = load float, float* %arrayidx, align 4
     31   %mul = fmul fast float %0, %k
     32   %arrayidx2 = getelementptr inbounds [120 x float], [120 x float]* @dst, i32 0, i32 %i.073
     33   %1 = load float, float* %arrayidx2, align 4
     34   %add3 = fadd fast float %1, %mul
     35   store float %add3, float* %arrayidx2, align 4
     36   %add4 = or i32 %i.073, 1
     37   %arrayidx5 = getelementptr inbounds [120 x float], [120 x float]* @src, i32 0, i32 %add4
     38   %2 = load float, float* %arrayidx5, align 4
     39   %mul6 = fmul fast float %2, %k
     40   %arrayidx8 = getelementptr inbounds [120 x float], [120 x float]* @dst, i32 0, i32 %add4
     41   %3 = load float, float* %arrayidx8, align 4
     42   %add9 = fadd fast float %3, %mul6
     43   store float %add9, float* %arrayidx8, align 4
     44   %add10 = or i32 %i.073, 2
     45   %arrayidx11 = getelementptr inbounds [120 x float], [120 x float]* @src, i32 0, i32 %add10
     46   %4 = load float, float* %arrayidx11, align 4
     47   %mul12 = fmul fast float %4, %k
     48   %arrayidx14 = getelementptr inbounds [120 x float], [120 x float]* @dst, i32 0, i32 %add10
     49   %5 = load float, float* %arrayidx14, align 4
     50   %add15 = fadd fast float %5, %mul12
     51   store float %add15, float* %arrayidx14, align 4
     52   %add16 = or i32 %i.073, 3
     53   %arrayidx17 = getelementptr inbounds [120 x float], [120 x float]* @src, i32 0, i32 %add16
     54   %6 = load float, float* %arrayidx17, align 4
     55   %mul18 = fmul fast float %6, %k
     56   %arrayidx20 = getelementptr inbounds [120 x float], [120 x float]* @dst, i32 0, i32 %add16
     57   %7 = load float, float* %arrayidx20, align 4
     58   %add21 = fadd fast float %7, %mul18
     59   store float %add21, float* %arrayidx20, align 4
     60   %add22 = or i32 %i.073, 4
     61   %arrayidx23 = getelementptr inbounds [120 x float], [120 x float]* @src, i32 0, i32 %add22
     62   %8 = load float, float* %arrayidx23, align 4
     63   %mul24 = fmul fast float %8, %k
     64   %arrayidx26 = getelementptr inbounds [120 x float], [120 x float]* @dst, i32 0, i32 %add22
     65   %9 = load float, float* %arrayidx26, align 4
     66   %add27 = fadd fast float %9, %mul24
     67   store float %add27, float* %arrayidx26, align 4
     68   %add28 = or i32 %i.073, 5
     69   %arrayidx29 = getelementptr inbounds [120 x float], [120 x float]* @src, i32 0, i32 %add28
     70   %10 = load float, float* %arrayidx29, align 4
     71   %mul30 = fmul fast float %10, %k
     72   %arrayidx32 = getelementptr inbounds [120 x float], [120 x float]* @dst, i32 0, i32 %add28
     73   %11 = load float, float* %arrayidx32, align 4
     74   %add33 = fadd fast float %11, %mul30
     75   store float %add33, float* %arrayidx32, align 4
     76   %add34 = or i32 %i.073, 6
     77   %arrayidx35 = getelementptr inbounds [120 x float], [120 x float]* @src, i32 0, i32 %add34
     78   %12 = load float, float* %arrayidx35, align 4
     79   %mul36 = fmul fast float %12, %k
     80   %arrayidx38 = getelementptr inbounds [120 x float], [120 x float]* @dst, i32 0, i32 %add34
     81   %13 = load float, float* %arrayidx38, align 4
     82   %add39 = fadd fast float %13, %mul36
     83   store float %add39, float* %arrayidx38, align 4
     84   %add40 = or i32 %i.073, 7
     85   %arrayidx41 = getelementptr inbounds [120 x float], [120 x float]* @src, i32 0, i32 %add40
     86   %14 = load float, float* %arrayidx41, align 4
     87   %mul42 = fmul fast float %14, %k
     88   %arrayidx44 = getelementptr inbounds [120 x float], [120 x float]* @dst, i32 0, i32 %add40
     89   %15 = load float, float* %arrayidx44, align 4
     90   %add45 = fadd fast float %15, %mul42
     91   store float %add45, float* %arrayidx44, align 4
     92   %add46 = add nuw nsw i32 %i.073, 8
     93   %cmp = icmp slt i32 %add46, %width_
     94   br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit
     95 }
     96 
     97 ; Function Attrs: norecurse nounwind
     98 define void @stride3(float %k, i32 %width_) {
     99 entry:
    100 
    101 ; CHECK: Found an estimated cost of 20 for VF 8 For instruction:   %0 = load float
    102 
    103   %cmp27 = icmp sgt i32 %width_, 0
    104   br i1 %cmp27, label %for.body.lr.ph, label %for.cond.cleanup
    105 
    106 for.body.lr.ph:                                   ; preds = %entry
    107   br label %for.body
    108 
    109 for.cond.cleanup:                                 ; preds = %for.body, %entry
    110   ret void
    111 
    112 for.body:                                         ; preds = %for.body.lr.ph, %for.body
    113   %i.028 = phi i32 [ 0, %for.body.lr.ph ], [ %add16, %for.body ]
    114   %arrayidx = getelementptr inbounds [120 x float], [120 x float]* @src, i32 0, i32 %i.028
    115   %0 = load float, float* %arrayidx, align 4
    116   %mul = fmul fast float %0, %k
    117   %arrayidx2 = getelementptr inbounds [120 x float], [120 x float]* @dst, i32 0, i32 %i.028
    118   %1 = load float, float* %arrayidx2, align 4
    119   %add3 = fadd fast float %1, %mul
    120   store float %add3, float* %arrayidx2, align 4
    121   %add4 = add nuw nsw i32 %i.028, 1
    122   %arrayidx5 = getelementptr inbounds [120 x float], [120 x float]* @src, i32 0, i32 %add4
    123   %2 = load float, float* %arrayidx5, align 4
    124   %mul6 = fmul fast float %2, %k
    125   %arrayidx8 = getelementptr inbounds [120 x float], [120 x float]* @dst, i32 0, i32 %add4
    126   %3 = load float, float* %arrayidx8, align 4
    127   %add9 = fadd fast float %3, %mul6
    128   store float %add9, float* %arrayidx8, align 4
    129   %add10 = add nuw nsw i32 %i.028, 2
    130   %arrayidx11 = getelementptr inbounds [120 x float], [120 x float]* @src, i32 0, i32 %add10
    131   %4 = load float, float* %arrayidx11, align 4
    132   %mul12 = fmul fast float %4, %k
    133   %arrayidx14 = getelementptr inbounds [120 x float], [120 x float]* @dst, i32 0, i32 %add10
    134   %5 = load float, float* %arrayidx14, align 4
    135   %add15 = fadd fast float %5, %mul12
    136   store float %add15, float* %arrayidx14, align 4
    137   %add16 = add nuw nsw i32 %i.028, 3
    138   %cmp = icmp slt i32 %add16, %width_
    139   br i1 %cmp, label %for.body, label %for.cond.cleanup
    140 }
    141 
    142