Home | History | Annotate | Download | only in X86
      1 ; RUN: opt < %s -basicaa -slp-vectorizer -slp-threshold=-100 -instcombine -dce -S -mtriple=i386-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
      2 
      3 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32-S128"
      4 
      5 
      6 
      7 ; Make sure we order the operands of commutative operations so that we get
      8 ; bigger vectorizable trees.
      9 
     10 ; CHECK-LABEL: shuffle_operands1
     11 ; CHECK:         load <2 x double>
     12 ; CHECK:         fadd <2 x double>
     13 
     14 define void @shuffle_operands1(double * noalias %from, double * noalias %to,
     15                                double %v1, double %v2) {
     16   %from_1 = getelementptr double *%from, i64 1
     17   %v0_1 = load double * %from
     18   %v0_2 = load double * %from_1
     19   %v1_1 = fadd double %v0_1, %v1
     20   %v1_2 = fadd double %v2, %v0_2
     21   %to_2 = getelementptr double * %to, i64 1
     22   store double %v1_1, double *%to
     23   store double %v1_2, double *%to_2
     24   ret void
     25 }
     26 
     27 ; CHECK-LABEL: shuffle_preserve_broadcast
     28 ; CHECK: %[[BCAST:[a-z0-9]+]] = insertelement <2 x double> undef, double %v0_1
     29 ; CHECK:                      = insertelement <2 x double> %[[BCAST]], double %v0_1
     30 define void @shuffle_preserve_broadcast(double * noalias %from,
     31                                         double * noalias %to,
     32                                         double %v1, double %v2) {
     33 entry:
     34 br label %lp
     35 
     36 lp:
     37   %p = phi double [ 1.000000e+00, %lp ], [ 0.000000e+00, %entry ]
     38   %from_1 = getelementptr double *%from, i64 1
     39   %v0_1 = load double * %from
     40   %v0_2 = load double * %from_1
     41   %v1_1 = fadd double %v0_1, %p
     42   %v1_2 = fadd double %v0_1, %v0_2
     43   %to_2 = getelementptr double * %to, i64 1
     44   store double %v1_1, double *%to
     45   store double %v1_2, double *%to_2
     46 br i1 undef, label %lp, label %ext
     47 
     48 ext:
     49   ret void
     50 }
     51 
     52 ; CHECK-LABEL: shuffle_preserve_broadcast2
     53 ; CHECK: %[[BCAST:[a-z0-9]+]] = insertelement <2 x double> undef, double %v0_1
     54 ; CHECK:                      = insertelement <2 x double> %[[BCAST]], double %v0_1
     55 define void @shuffle_preserve_broadcast2(double * noalias %from,
     56                                         double * noalias %to,
     57                                         double %v1, double %v2) {
     58 entry:
     59 br label %lp
     60 
     61 lp:
     62   %p = phi double [ 1.000000e+00, %lp ], [ 0.000000e+00, %entry ]
     63   %from_1 = getelementptr double *%from, i64 1
     64   %v0_1 = load double * %from
     65   %v0_2 = load double * %from_1
     66   %v1_1 = fadd double %p, %v0_1
     67   %v1_2 = fadd double %v0_2, %v0_1
     68   %to_2 = getelementptr double * %to, i64 1
     69   store double %v1_1, double *%to
     70   store double %v1_2, double *%to_2
     71 br i1 undef, label %lp, label %ext
     72 
     73 ext:
     74   ret void
     75 }
     76 
     77 ; CHECK-LABEL: shuffle_preserve_broadcast3
     78 ; CHECK: %[[BCAST:[a-z0-9]+]] = insertelement <2 x double> undef, double %v0_1
     79 ; CHECK:                      = insertelement <2 x double> %[[BCAST]], double %v0_1
     80 define void @shuffle_preserve_broadcast3(double * noalias %from,
     81                                         double * noalias %to,
     82                                         double %v1, double %v2) {
     83 entry:
     84 br label %lp
     85 
     86 lp:
     87   %p = phi double [ 1.000000e+00, %lp ], [ 0.000000e+00, %entry ]
     88   %from_1 = getelementptr double *%from, i64 1
     89   %v0_1 = load double * %from
     90   %v0_2 = load double * %from_1
     91   %v1_1 = fadd double %p, %v0_1
     92   %v1_2 = fadd double %v0_1, %v0_2
     93   %to_2 = getelementptr double * %to, i64 1
     94   store double %v1_1, double *%to
     95   store double %v1_2, double *%to_2
     96 br i1 undef, label %lp, label %ext
     97 
     98 ext:
     99   ret void
    100 }
    101 
    102 
    103 ; CHECK-LABEL: shuffle_preserve_broadcast4
    104 ; CHECK: %[[BCAST:[a-z0-9]+]] = insertelement <2 x double> undef, double %v0_1
    105 ; CHECK:                      = insertelement <2 x double> %[[BCAST]], double %v0_1
    106 define void @shuffle_preserve_broadcast4(double * noalias %from,
    107                                         double * noalias %to,
    108                                         double %v1, double %v2) {
    109 entry:
    110 br label %lp
    111 
    112 lp:
    113   %p = phi double [ 1.000000e+00, %lp ], [ 0.000000e+00, %entry ]
    114   %from_1 = getelementptr double *%from, i64 1
    115   %v0_1 = load double * %from
    116   %v0_2 = load double * %from_1
    117   %v1_1 = fadd double %v0_2, %v0_1
    118   %v1_2 = fadd double %p, %v0_1
    119   %to_2 = getelementptr double * %to, i64 1
    120   store double %v1_1, double *%to
    121   store double %v1_2, double *%to_2
    122 br i1 undef, label %lp, label %ext
    123 
    124 ext:
    125   ret void
    126 }
    127 
    128 ; CHECK-LABEL: shuffle_preserve_broadcast5
    129 ; CHECK: %[[BCAST:[a-z0-9]+]] = insertelement <2 x double> undef, double %v0_1
    130 ; CHECK:                      = insertelement <2 x double> %[[BCAST]], double %v0_1
    131 define void @shuffle_preserve_broadcast5(double * noalias %from,
    132                                         double * noalias %to,
    133                                         double %v1, double %v2) {
    134 entry:
    135 br label %lp
    136 
    137 lp:
    138   %p = phi double [ 1.000000e+00, %lp ], [ 0.000000e+00, %entry ]
    139   %from_1 = getelementptr double *%from, i64 1
    140   %v0_1 = load double * %from
    141   %v0_2 = load double * %from_1
    142   %v1_1 = fadd double %v0_1, %v0_2
    143   %v1_2 = fadd double %p, %v0_1
    144   %to_2 = getelementptr double * %to, i64 1
    145   store double %v1_1, double *%to
    146   store double %v1_2, double *%to_2
    147 br i1 undef, label %lp, label %ext
    148 
    149 ext:
    150   ret void
    151 }
    152 
    153 
    154 ; CHECK-LABEL: shuffle_preserve_broadcast6
    155 ; CHECK: %[[BCAST:[a-z0-9]+]] = insertelement <2 x double> undef, double %v0_1
    156 ; CHECK:                      = insertelement <2 x double> %[[BCAST]], double %v0_1
    157 define void @shuffle_preserve_broadcast6(double * noalias %from,
    158                                         double * noalias %to,
    159                                         double %v1, double %v2) {
    160 entry:
    161 br label %lp
    162 
    163 lp:
    164   %p = phi double [ 1.000000e+00, %lp ], [ 0.000000e+00, %entry ]
    165   %from_1 = getelementptr double *%from, i64 1
    166   %v0_1 = load double * %from
    167   %v0_2 = load double * %from_1
    168   %v1_1 = fadd double %v0_1, %v0_2
    169   %v1_2 = fadd double %v0_1, %p
    170   %to_2 = getelementptr double * %to, i64 1
    171   store double %v1_1, double *%to
    172   store double %v1_2, double *%to_2
    173 br i1 undef, label %lp, label %ext
    174 
    175 ext:
    176   ret void
    177 }
    178 
    179 ; Make sure we don't scramble operands when we reorder them and destroy
    180 ; 'good' source order.
    181 
    182 ; CHECK-LABEL: good_load_order
    183 
    184 ; CHECK: %[[V1:[0-9]+]] = load <4 x float>*
    185 ; CHECK: %[[V2:[0-9]+]] = insertelement <4 x float> undef, float %1, i32 0
    186 ; CHECK: %[[V3:[0-9]+]] = shufflevector <4 x float> %[[V2]], <4 x float> %[[V1]], <4 x i32> <i32 0, i32 4, i32 5, i32 6>
    187 ; CHECK:                = fmul <4 x float> %[[V1]], %[[V3]]
    188 
    189 @a = common global [32000 x float] zeroinitializer, align 16
    190 
    191 define void @good_load_order() {
    192 entry:
    193   br label %for.cond1.preheader
    194 
    195 for.cond1.preheader:
    196   %0 = load float* getelementptr inbounds ([32000 x float]* @a, i64 0, i64 0), align 16
    197   br label %for.body3
    198 
    199 for.body3:
    200   %1 = phi float [ %0, %for.cond1.preheader ], [ %10, %for.body3 ]
    201   %indvars.iv = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next, %for.body3 ]
    202   %2 = add nsw i64 %indvars.iv, 1
    203   %arrayidx = getelementptr inbounds [32000 x float]* @a, i64 0, i64 %2
    204   %3 = load float* %arrayidx, align 4
    205   %arrayidx5 = getelementptr inbounds [32000 x float]* @a, i64 0, i64 %indvars.iv
    206   %mul6 = fmul float %3, %1
    207   store float %mul6, float* %arrayidx5, align 4
    208   %4 = add nsw i64 %indvars.iv, 2
    209   %arrayidx11 = getelementptr inbounds [32000 x float]* @a, i64 0, i64 %4
    210   %5 = load float* %arrayidx11, align 4
    211   %mul15 = fmul float %5, %3
    212   store float %mul15, float* %arrayidx, align 4
    213   %6 = add nsw i64 %indvars.iv, 3
    214   %arrayidx21 = getelementptr inbounds [32000 x float]* @a, i64 0, i64 %6
    215   %7 = load float* %arrayidx21, align 4
    216   %mul25 = fmul float %7, %5
    217   store float %mul25, float* %arrayidx11, align 4
    218   %8 = add nsw i64 %indvars.iv, 4
    219   %arrayidx31 = getelementptr inbounds [32000 x float]* @a, i64 0, i64 %8
    220   %9 = load float* %arrayidx31, align 4
    221   %mul35 = fmul float %9, %7
    222   store float %mul35, float* %arrayidx21, align 4
    223   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 5
    224   %arrayidx41 = getelementptr inbounds [32000 x float]* @a, i64 0, i64 %indvars.iv.next
    225   %10 = load float* %arrayidx41, align 4
    226   %mul45 = fmul float %10, %9
    227   store float %mul45, float* %arrayidx31, align 4
    228   %11 = trunc i64 %indvars.iv.next to i32
    229   %cmp2 = icmp slt i32 %11, 31995
    230   br i1 %cmp2, label %for.body3, label %for.end
    231 
    232 for.end:
    233   ret void
    234 }
    235