Home | History | Annotate | Download | only in X86
      1 ; RUN: opt < %s -basicaa -slp-vectorizer -S -mtriple=i386-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
      2 
      3 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32-S128"
      4 target triple = "i386-apple-macosx10.8.0"
      5 
      6 ;int test(double *G) {
      7 ;  G[0] = 1+G[5]*4;
      8 ;  G[1] = 6+G[6]*3;
      9 ;  G[2] = 7+G[5]*4;
     10 ;  G[3] = 8+G[6]*4;
     11 ;}
     12 
     13 ;CHECK-LABEL: @test(
     14 ;CHECK: load <2 x double>
     15 ;CHECK: fadd <2 x double>
     16 ;CHECK: store <2 x double>
     17 ;CHECK: insertelement <2 x double>
     18 ;CHECK: fadd <2 x double>
     19 ;CHECK: store <2 x double>
     20 ;CHECK: ret i32
     21 
     22 define i32 @test(double* nocapture %G) {
     23 entry:
     24   %arrayidx = getelementptr inbounds double* %G, i64 5
     25   %0 = load double* %arrayidx, align 8
     26   %mul = fmul double %0, 4.000000e+00
     27   %add = fadd double %mul, 1.000000e+00
     28   store double %add, double* %G, align 8
     29   %arrayidx2 = getelementptr inbounds double* %G, i64 6
     30   %1 = load double* %arrayidx2, align 8
     31   %mul3 = fmul double %1, 3.000000e+00
     32   %add4 = fadd double %mul3, 6.000000e+00
     33   %arrayidx5 = getelementptr inbounds double* %G, i64 1
     34   store double %add4, double* %arrayidx5, align 8
     35   %add8 = fadd double %mul, 7.000000e+00
     36   %arrayidx9 = getelementptr inbounds double* %G, i64 2
     37   store double %add8, double* %arrayidx9, align 8
     38   %mul11 = fmul double %1, 4.000000e+00
     39   %add12 = fadd double %mul11, 8.000000e+00
     40   %arrayidx13 = getelementptr inbounds double* %G, i64 3
     41   store double %add12, double* %arrayidx13, align 8
     42   ret i32 undef
     43 }
     44 
     45 ;int foo(double *A, int n) {
     46 ;  A[0] = A[0] * 7.9 * n + 6.0;
     47 ;  A[1] = A[1] * 7.7 * n + 2.0;
     48 ;  A[2] = A[2] * 7.6 * n + 3.0;
     49 ;  A[3] = A[3] * 7.4 * n + 4.0;
     50 ;}
     51 ;CHECK-LABEL: @foo(
     52 ;CHECK: insertelement <2 x double>
     53 ;CHECK: insertelement <2 x double>
     54 ;CHECK-NOT: insertelement <2 x double>
     55 ;CHECK: ret
     56 define i32 @foo(double* nocapture %A, i32 %n) {
     57 entry:
     58   %0 = load double* %A, align 8
     59   %mul = fmul double %0, 7.900000e+00
     60   %conv = sitofp i32 %n to double
     61   %mul1 = fmul double %conv, %mul
     62   %add = fadd double %mul1, 6.000000e+00
     63   store double %add, double* %A, align 8
     64   %arrayidx3 = getelementptr inbounds double* %A, i64 1
     65   %1 = load double* %arrayidx3, align 8
     66   %mul4 = fmul double %1, 7.700000e+00
     67   %mul6 = fmul double %conv, %mul4
     68   %add7 = fadd double %mul6, 2.000000e+00
     69   store double %add7, double* %arrayidx3, align 8
     70   %arrayidx9 = getelementptr inbounds double* %A, i64 2
     71   %2 = load double* %arrayidx9, align 8
     72   %mul10 = fmul double %2, 7.600000e+00
     73   %mul12 = fmul double %conv, %mul10
     74   %add13 = fadd double %mul12, 3.000000e+00
     75   store double %add13, double* %arrayidx9, align 8
     76   %arrayidx15 = getelementptr inbounds double* %A, i64 3
     77   %3 = load double* %arrayidx15, align 8
     78   %mul16 = fmul double %3, 7.400000e+00
     79   %mul18 = fmul double %conv, %mul16
     80   %add19 = fadd double %mul18, 4.000000e+00
     81   store double %add19, double* %arrayidx15, align 8
     82   ret i32 undef
     83 }
     84 
     85 ; int test2(double *G, int k) {
     86 ;   if (k) {
     87 ;     G[0] = 1+G[5]*4;
     88 ;     G[1] = 6+G[6]*3;
     89 ;   } else {
     90 ;     G[2] = 7+G[5]*4;
     91 ;     G[3] = 8+G[6]*3;
     92 ;   }
     93 ; }
     94 
     95 ; We can't merge the gather sequences because one does not dominate the other.
     96 ; CHECK: test2
     97 ; CHECK: insertelement
     98 ; CHECK: insertelement
     99 ; CHECK: insertelement
    100 ; CHECK: insertelement
    101 ; CHECK: ret
    102 define i32 @test2(double* nocapture %G, i32 %k) {
    103   %1 = icmp eq i32 %k, 0
    104   %2 = getelementptr inbounds double* %G, i64 5
    105   %3 = load double* %2, align 8
    106   %4 = fmul double %3, 4.000000e+00
    107   br i1 %1, label %12, label %5
    108 
    109 ; <label>:5                                       ; preds = %0
    110   %6 = fadd double %4, 1.000000e+00
    111   store double %6, double* %G, align 8
    112   %7 = getelementptr inbounds double* %G, i64 6
    113   %8 = load double* %7, align 8
    114   %9 = fmul double %8, 3.000000e+00
    115   %10 = fadd double %9, 6.000000e+00
    116   %11 = getelementptr inbounds double* %G, i64 1
    117   store double %10, double* %11, align 8
    118   br label %20
    119 
    120 ; <label>:12                                      ; preds = %0
    121   %13 = fadd double %4, 7.000000e+00
    122   %14 = getelementptr inbounds double* %G, i64 2
    123   store double %13, double* %14, align 8
    124   %15 = getelementptr inbounds double* %G, i64 6
    125   %16 = load double* %15, align 8
    126   %17 = fmul double %16, 3.000000e+00
    127   %18 = fadd double %17, 8.000000e+00
    128   %19 = getelementptr inbounds double* %G, i64 3
    129   store double %18, double* %19, align 8
    130   br label %20
    131 
    132 ; <label>:20                                      ; preds = %12, %5
    133   ret i32 undef
    134 }
    135 
    136 
    137 ;int foo(double *A, int n) {
    138 ;  A[0] = A[0] * 7.9 * n + 6.0;
    139 ;  A[1] = A[1] * 7.9 * n + 6.0;
    140 ;  A[2] = A[2] * 7.9 * n + 6.0;
    141 ;  A[3] = A[3] * 7.9 * n + 6.0;
    142 ;}
    143 ;CHECK-LABEL: @foo4(
    144 ;CHECK: insertelement <2 x double>
    145 ;CHECK: insertelement <2 x double>
    146 ;CHECK-NOT: insertelement <2 x double>
    147 ;CHECK: ret
    148 define i32 @foo4(double* nocapture %A, i32 %n) {
    149 entry:
    150   %0 = load double* %A, align 8
    151   %mul = fmul double %0, 7.900000e+00
    152   %conv = sitofp i32 %n to double
    153   %mul1 = fmul double %conv, %mul
    154   %add = fadd double %mul1, 6.000000e+00
    155   store double %add, double* %A, align 8
    156   %arrayidx3 = getelementptr inbounds double* %A, i64 1
    157   %1 = load double* %arrayidx3, align 8
    158   %mul4 = fmul double %1, 7.900000e+00
    159   %mul6 = fmul double %conv, %mul4
    160   %add7 = fadd double %mul6, 6.000000e+00
    161   store double %add7, double* %arrayidx3, align 8
    162   %arrayidx9 = getelementptr inbounds double* %A, i64 2
    163   %2 = load double* %arrayidx9, align 8
    164   %mul10 = fmul double %2, 7.900000e+00
    165   %mul12 = fmul double %conv, %mul10
    166   %add13 = fadd double %mul12, 6.000000e+00
    167   store double %add13, double* %arrayidx9, align 8
    168   %arrayidx15 = getelementptr inbounds double* %A, i64 3
    169   %3 = load double* %arrayidx15, align 8
    170   %mul16 = fmul double %3, 7.900000e+00
    171   %mul18 = fmul double %conv, %mul16
    172   %add19 = fadd double %mul18, 6.000000e+00
    173   store double %add19, double* %arrayidx15, align 8
    174   ret i32 undef
    175 }
    176 
    177 ;int partial_mrg(double *A, int n) {
    178 ;  A[0] = A[0] * n;
    179 ;  A[1] = A[1] * n;
    180 ;  if (n < 4) return 0;
    181 ;  A[2] = A[2] * n;
    182 ;  A[3] = A[3] * (n+4);
    183 ;}
    184 ;CHECK-LABEL: @partial_mrg(
    185 ;CHECK: insertelement <2 x double>
    186 ;CHECK: insertelement <2 x double>
    187 ;CHECK: insertelement <2 x double>
    188 ;CHECK-NOT: insertelement <2 x double>
    189 ;CHECK: ret
    190 define i32 @partial_mrg(double* nocapture %A, i32 %n) {
    191 entry:
    192   %0 = load double* %A, align 8
    193   %conv = sitofp i32 %n to double
    194   %mul = fmul double %conv, %0
    195   store double %mul, double* %A, align 8
    196   %arrayidx2 = getelementptr inbounds double* %A, i64 1
    197   %1 = load double* %arrayidx2, align 8
    198   %mul4 = fmul double %conv, %1
    199   store double %mul4, double* %arrayidx2, align 8
    200   %cmp = icmp slt i32 %n, 4
    201   br i1 %cmp, label %return, label %if.end
    202 
    203 if.end:                                           ; preds = %entry
    204   %arrayidx7 = getelementptr inbounds double* %A, i64 2
    205   %2 = load double* %arrayidx7, align 8
    206   %mul9 = fmul double %conv, %2
    207   store double %mul9, double* %arrayidx7, align 8
    208   %arrayidx11 = getelementptr inbounds double* %A, i64 3
    209   %3 = load double* %arrayidx11, align 8
    210   %add = add nsw i32 %n, 4
    211   %conv12 = sitofp i32 %add to double
    212   %mul13 = fmul double %conv12, %3
    213   store double %mul13, double* %arrayidx11, align 8
    214   br label %return
    215 
    216 return:                                           ; preds = %entry, %if.end
    217   ret i32 0
    218 }
    219 
    220