Home | History | Annotate | Download | only in X86
      1 ; RUN: opt < %s -basicaa -slp-vectorizer -slp-threshold=-100 -dce -S -mtriple=i386-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
      2 
      3 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32-S128"
      4 target triple = "i386-apple-macosx10.9.0"
      5 
      6 ;int foo(double *A, int k) {
      7 ;  double A0;
      8 ;  double A1;
      9 ;  if (k) {
     10 ;    A0 = 3;
     11 ;    A1 = 5;
     12 ;  } else {
     13 ;    A0 = A[10];
     14 ;    A1 = A[11];
     15 ;  }
     16 ;  A[0] = A0;
     17 ;  A[1] = A1;
     18 ;}
     19 
     20 
     21 ;CHECK: i32 @foo
     22 ;CHECK: load <2 x double>
     23 ;CHECK: phi <2 x double>
     24 ;CHECK: store <2 x double>
     25 ;CHECK: ret i32 undef
     26 define i32 @foo(double* nocapture %A, i32 %k) {
     27 entry:
     28   %tobool = icmp eq i32 %k, 0
     29   br i1 %tobool, label %if.else, label %if.end
     30 
     31 if.else:                                          ; preds = %entry
     32   %arrayidx = getelementptr inbounds double, double* %A, i64 10
     33   %0 = load double, double* %arrayidx, align 8
     34   %arrayidx1 = getelementptr inbounds double, double* %A, i64 11
     35   %1 = load double, double* %arrayidx1, align 8
     36   br label %if.end
     37 
     38 if.end:                                           ; preds = %entry, %if.else
     39   %A0.0 = phi double [ %0, %if.else ], [ 3.000000e+00, %entry ]
     40   %A1.0 = phi double [ %1, %if.else ], [ 5.000000e+00, %entry ]
     41   store double %A0.0, double* %A, align 8
     42   %arrayidx3 = getelementptr inbounds double, double* %A, i64 1
     43   store double %A1.0, double* %arrayidx3, align 8
     44   ret i32 undef
     45 }
     46 
     47 
     48 ;int foo(double * restrict B,  double * restrict A, int n, int m) {
     49 ;  double R=A[1];
     50 ;  double G=A[0];
     51 ;  for (int i=0; i < 100; i++) {
     52 ;    R += 10;
     53 ;    G += 10;
     54 ;    R *= 4;
     55 ;    G *= 4;
     56 ;    R += 4;
     57 ;    G += 4;
     58 ;  }
     59 ;  B[0] = G;
     60 ;  B[1] = R;
     61 ;  return 0;
     62 ;}
     63 
     64 ;CHECK: foo2
     65 ;CHECK: load <2 x double>
     66 ;CHECK: phi <2 x double>
     67 ;CHECK: fmul <2 x double>
     68 ;CHECK: store <2 x double>
     69 ;CHECK: ret
     70 define i32 @foo2(double* noalias nocapture %B, double* noalias nocapture %A, i32 %n, i32 %m) #0 {
     71 entry:
     72   %arrayidx = getelementptr inbounds double, double* %A, i64 1
     73   %0 = load double, double* %arrayidx, align 8
     74   %1 = load double, double* %A, align 8
     75   br label %for.body
     76 
     77 for.body:                                         ; preds = %for.body, %entry
     78   %i.019 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
     79   %G.018 = phi double [ %1, %entry ], [ %add5, %for.body ]
     80   %R.017 = phi double [ %0, %entry ], [ %add4, %for.body ]
     81   %add = fadd double %R.017, 1.000000e+01
     82   %add2 = fadd double %G.018, 1.000000e+01
     83   %mul = fmul double %add, 4.000000e+00
     84   %mul3 = fmul double %add2, 4.000000e+00
     85   %add4 = fadd double %mul, 4.000000e+00
     86   %add5 = fadd double %mul3, 4.000000e+00
     87   %inc = add nsw i32 %i.019, 1
     88   %exitcond = icmp eq i32 %inc, 100
     89   br i1 %exitcond, label %for.end, label %for.body
     90 
     91 for.end:                                          ; preds = %for.body
     92   store double %add5, double* %B, align 8
     93   %arrayidx7 = getelementptr inbounds double, double* %B, i64 1
     94   store double %add4, double* %arrayidx7, align 8
     95   ret i32 0
     96 }
     97 
     98 ; float foo3(float *A) {
     99 ;
    100 ;   float R = A[0];
    101 ;   float G = A[1];
    102 ;   float B = A[2];
    103 ;   float Y = A[3];
    104 ;   float P = A[4];
    105 ;   for (int i=0; i < 121; i+=3) {
    106 ;     R+=A[i+0]*7;
    107 ;     G+=A[i+1]*8;
    108 ;     B+=A[i+2]*9;
    109 ;     Y+=A[i+3]*10;
    110 ;     P+=A[i+4]*11;
    111 ;   }
    112 ;
    113 ;   return R+G+B+Y+P;
    114 ; }
    115 
    116 ;CHECK: foo3
    117 ;CHECK: phi <4 x float>
    118 ;CHECK: fmul <4 x float>
    119 ;CHECK: fadd <4 x float>
    120 ;CHECK-NOT: phi <5 x float>
    121 ;CHECK-NOT: fmul <5 x float>
    122 ;CHECK-NOT: fadd <5 x float>
    123 
    124 define float @foo3(float* nocapture readonly %A) #0 {
    125 entry:
    126   %0 = load float, float* %A, align 4
    127   %arrayidx1 = getelementptr inbounds float, float* %A, i64 1
    128   %1 = load float, float* %arrayidx1, align 4
    129   %arrayidx2 = getelementptr inbounds float, float* %A, i64 2
    130   %2 = load float, float* %arrayidx2, align 4
    131   %arrayidx3 = getelementptr inbounds float, float* %A, i64 3
    132   %3 = load float, float* %arrayidx3, align 4
    133   %arrayidx4 = getelementptr inbounds float, float* %A, i64 4
    134   %4 = load float, float* %arrayidx4, align 4
    135   br label %for.body
    136 
    137 for.body:                                         ; preds = %for.body, %entry
    138   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
    139   %P.056 = phi float [ %4, %entry ], [ %add26, %for.body ]
    140   %Y.055 = phi float [ %3, %entry ], [ %add21, %for.body ]
    141   %B.054 = phi float [ %2, %entry ], [ %add16, %for.body ]
    142   %G.053 = phi float [ %1, %entry ], [ %add11, %for.body ]
    143   %R.052 = phi float [ %0, %entry ], [ %add6, %for.body ]
    144   %5 = phi float [ %1, %entry ], [ %11, %for.body ]
    145   %6 = phi float [ %0, %entry ], [ %9, %for.body ]
    146   %mul = fmul float %6, 7.000000e+00
    147   %add6 = fadd float %R.052, %mul
    148   %mul10 = fmul float %5, 8.000000e+00
    149   %add11 = fadd float %G.053, %mul10
    150   %7 = add nsw i64 %indvars.iv, 2
    151   %arrayidx14 = getelementptr inbounds float, float* %A, i64 %7
    152   %8 = load float, float* %arrayidx14, align 4
    153   %mul15 = fmul float %8, 9.000000e+00
    154   %add16 = fadd float %B.054, %mul15
    155   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 3
    156   %arrayidx19 = getelementptr inbounds float, float* %A, i64 %indvars.iv.next
    157   %9 = load float, float* %arrayidx19, align 4
    158   %mul20 = fmul float %9, 1.000000e+01
    159   %add21 = fadd float %Y.055, %mul20
    160   %10 = add nsw i64 %indvars.iv, 4
    161   %arrayidx24 = getelementptr inbounds float, float* %A, i64 %10
    162   %11 = load float, float* %arrayidx24, align 4
    163   %mul25 = fmul float %11, 1.100000e+01
    164   %add26 = fadd float %P.056, %mul25
    165   %12 = trunc i64 %indvars.iv.next to i32
    166   %cmp = icmp slt i32 %12, 121
    167   br i1 %cmp, label %for.body, label %for.end
    168 
    169 for.end:                                          ; preds = %for.body
    170   %add28 = fadd float %add6, %add11
    171   %add29 = fadd float %add28, %add16
    172   %add30 = fadd float %add29, %add21
    173   %add31 = fadd float %add30, %add26
    174   ret float %add31
    175 }
    176 
    177 ; Make sure the order of phi nodes of different types does not prevent
    178 ; vectorization of same typed phi nodes.
    179 ; CHECK-LABEL: sort_phi_type
    180 ; CHECK: phi <4 x float>
    181 ; CHECK: fmul <4 x float>
    182 
    183 define float @sort_phi_type(float* nocapture readonly %A) {
    184 entry:
    185   br label %for.body
    186 
    187 for.body:                                         ; preds = %for.body, %entry
    188   %Y = phi float [ 1.000000e+01, %entry ], [ %mul10, %for.body ]
    189   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
    190   %B = phi float [ 1.000000e+01, %entry ], [ %mul15, %for.body ]
    191   %G = phi float [ 1.000000e+01, %entry ], [ %mul20, %for.body ]
    192   %R = phi float [ 1.000000e+01, %entry ], [ %mul25, %for.body ]
    193   %mul10 = fmul float %Y, 8.000000e+00
    194   %mul15 = fmul float %B, 9.000000e+00
    195   %mul20 = fmul float %R, 10.000000e+01
    196   %mul25 = fmul float %G, 11.100000e+01
    197   %indvars.iv.next = add nsw i64 %indvars.iv, 4
    198   %cmp = icmp slt i64 %indvars.iv.next, 128
    199   br i1 %cmp, label %for.body, label %for.end
    200 
    201 for.end:                                          ; preds = %for.body
    202   %add28 = fadd float 1.000000e+01, %mul10
    203   %add29 = fadd float %mul10, %mul15
    204   %add30 = fadd float %add29, %mul20
    205   %add31 = fadd float %add30, %mul25
    206   ret float %add31
    207 }
    208 
    209 define void @test(x86_fp80* %i1, x86_fp80* %i2, x86_fp80* %o) {
    210 ; CHECK-LABEL: @test(
    211 ;
    212 ; Test that we correctly recognize the discontiguous memory in arrays where the
    213 ; size is less than the alignment, and through various different GEP formations.
    214 ;
    215 ; We disable the vectorization of x86_fp80 for now. 
    216 
    217 entry:
    218   %i1.0 = load x86_fp80, x86_fp80* %i1, align 16
    219   %i1.gep1 = getelementptr x86_fp80, x86_fp80* %i1, i64 1
    220   %i1.1 = load x86_fp80, x86_fp80* %i1.gep1, align 16
    221 ; CHECK: load x86_fp80, x86_fp80*
    222 ; CHECK: load x86_fp80, x86_fp80*
    223 ; CHECK-NOT: insertelement <2 x x86_fp80>
    224 ; CHECK-NOT: insertelement <2 x x86_fp80>
    225   br i1 undef, label %then, label %end
    226 
    227 then:
    228   %i2.gep0 = getelementptr inbounds x86_fp80, x86_fp80* %i2, i64 0
    229   %i2.0 = load x86_fp80, x86_fp80* %i2.gep0, align 16
    230   %i2.gep1 = getelementptr inbounds x86_fp80, x86_fp80* %i2, i64 1
    231   %i2.1 = load x86_fp80, x86_fp80* %i2.gep1, align 16
    232 ; CHECK: load x86_fp80, x86_fp80*
    233 ; CHECK: load x86_fp80, x86_fp80*
    234 ; CHECK-NOT: insertelement <2 x x86_fp80>
    235 ; CHECK-NOT: insertelement <2 x x86_fp80>
    236   br label %end
    237 
    238 end:
    239   %phi0 = phi x86_fp80 [ %i1.0, %entry ], [ %i2.0, %then ]
    240   %phi1 = phi x86_fp80 [ %i1.1, %entry ], [ %i2.1, %then ]
    241 ; CHECK-NOT: phi <2 x x86_fp80>
    242 ; CHECK-NOT: extractelement <2 x x86_fp80>
    243 ; CHECK-NOT: extractelement <2 x x86_fp80>
    244   store x86_fp80 %phi0, x86_fp80* %o, align 16
    245   %o.gep1 = getelementptr inbounds x86_fp80, x86_fp80* %o, i64 1
    246   store x86_fp80 %phi1, x86_fp80* %o.gep1, align 16
    247   ret void
    248 }
    249