Home | History | Annotate | Download | only in X86
      1 ; RUN: opt < %s -basicaa -slp-vectorizer -slp-threshold=-999 -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
      2 
      3 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
      4 target triple = "x86_64-apple-macosx10.8.0"
      5 
      6 declare double @llvm.fabs.f64(double) nounwind readnone
      7 
      8 ;CHECK-LABEL: @vec_fabs_f64(
      9 ;CHECK: load <2 x double>
     10 ;CHECK: load <2 x double>
     11 ;CHECK: call <2 x double> @llvm.fabs.v2f64
     12 ;CHECK: store <2 x double>
     13 ;CHECK: ret
     14 define void @vec_fabs_f64(double* %a, double* %b, double* %c) {
     15 entry:
     16   %i0 = load double, double* %a, align 8
     17   %i1 = load double, double* %b, align 8
     18   %mul = fmul double %i0, %i1
     19   %call = tail call double @llvm.fabs.f64(double %mul) nounwind readnone
     20   %arrayidx3 = getelementptr inbounds double, double* %a, i64 1
     21   %i3 = load double, double* %arrayidx3, align 8
     22   %arrayidx4 = getelementptr inbounds double, double* %b, i64 1
     23   %i4 = load double, double* %arrayidx4, align 8
     24   %mul5 = fmul double %i3, %i4
     25   %call5 = tail call double @llvm.fabs.f64(double %mul5) nounwind readnone
     26   store double %call, double* %c, align 8
     27   %arrayidx5 = getelementptr inbounds double, double* %c, i64 1
     28   store double %call5, double* %arrayidx5, align 8
     29   ret void
     30 }
     31 
     32 declare float @llvm.copysign.f32(float, float) nounwind readnone
     33 
     34 ;CHECK-LABEL: @vec_copysign_f32(
     35 ;CHECK: load <4 x float>
     36 ;CHECK: load <4 x float>
     37 ;CHECK: call <4 x float> @llvm.copysign.v4f32
     38 ;CHECK: store <4 x float>
     39 ;CHECK: ret
     40 define void @vec_copysign_f32(float* %a, float* %b, float* noalias %c) {
     41 entry:
     42   %0 = load float, float* %a, align 4
     43   %1 = load float, float* %b, align 4
     44   %call0 = tail call float @llvm.copysign.f32(float %0, float %1) nounwind readnone
     45   store float %call0, float* %c, align 4
     46 
     47   %ix2 = getelementptr inbounds float, float* %a, i64 1
     48   %2 = load float, float* %ix2, align 4
     49   %ix3 = getelementptr inbounds float, float* %b, i64 1
     50   %3 = load float, float* %ix3, align 4
     51   %call1 = tail call float @llvm.copysign.f32(float %2, float %3) nounwind readnone
     52   %c1 = getelementptr inbounds float, float* %c, i64 1
     53   store float %call1, float* %c1, align 4
     54 
     55   %ix4 = getelementptr inbounds float, float* %a, i64 2
     56   %4 = load float, float* %ix4, align 4
     57   %ix5 = getelementptr inbounds float, float* %b, i64 2
     58   %5 = load float, float* %ix5, align 4
     59   %call2 = tail call float @llvm.copysign.f32(float %4, float %5) nounwind readnone
     60   %c2 = getelementptr inbounds float, float* %c, i64 2
     61   store float %call2, float* %c2, align 4
     62 
     63   %ix6 = getelementptr inbounds float, float* %a, i64 3
     64   %6 = load float, float* %ix6, align 4
     65   %ix7 = getelementptr inbounds float, float* %b, i64 3
     66   %7 = load float, float* %ix7, align 4
     67   %call3 = tail call float @llvm.copysign.f32(float %6, float %7) nounwind readnone
     68   %c3 = getelementptr inbounds float, float* %c, i64 3
     69   store float %call3, float* %c3, align 4
     70 
     71   ret void
     72 }
     73 
     74 declare i32 @llvm.bswap.i32(i32) nounwind readnone
     75 
     76 define void @vec_bswap_i32(i32* %a, i32* %b, i32* %c) {
     77 entry:
     78   %i0 = load i32, i32* %a, align 4
     79   %i1 = load i32, i32* %b, align 4
     80   %add1 = add i32 %i0, %i1
     81   %call1 = tail call i32 @llvm.bswap.i32(i32 %add1) nounwind readnone
     82 
     83   %arrayidx2 = getelementptr inbounds i32, i32* %a, i32 1
     84   %i2 = load i32, i32* %arrayidx2, align 4
     85   %arrayidx3 = getelementptr inbounds i32, i32* %b, i32 1
     86   %i3 = load i32, i32* %arrayidx3, align 4
     87   %add2 = add i32 %i2, %i3
     88   %call2 = tail call i32 @llvm.bswap.i32(i32 %add2) nounwind readnone
     89 
     90   %arrayidx4 = getelementptr inbounds i32, i32* %a, i32 2
     91   %i4 = load i32, i32* %arrayidx4, align 4
     92   %arrayidx5 = getelementptr inbounds i32, i32* %b, i32 2
     93   %i5 = load i32, i32* %arrayidx5, align 4
     94   %add3 = add i32 %i4, %i5
     95   %call3 = tail call i32 @llvm.bswap.i32(i32 %add3) nounwind readnone
     96 
     97   %arrayidx6 = getelementptr inbounds i32, i32* %a, i32 3
     98   %i6 = load i32, i32* %arrayidx6, align 4
     99   %arrayidx7 = getelementptr inbounds i32, i32* %b, i32 3
    100   %i7 = load i32, i32* %arrayidx7, align 4
    101   %add4 = add i32 %i6, %i7
    102   %call4 = tail call i32 @llvm.bswap.i32(i32 %add4) nounwind readnone
    103 
    104   store i32 %call1, i32* %c, align 4
    105   %arrayidx8 = getelementptr inbounds i32, i32* %c, i32 1
    106   store i32 %call2, i32* %arrayidx8, align 4
    107   %arrayidx9 = getelementptr inbounds i32, i32* %c, i32 2
    108   store i32 %call3, i32* %arrayidx9, align 4
    109   %arrayidx10 = getelementptr inbounds i32, i32* %c, i32 3
    110   store i32 %call4, i32* %arrayidx10, align 4
    111   ret void
    112 
    113 ; CHECK-LABEL: @vec_bswap_i32(
    114 ; CHECK: load <4 x i32>
    115 ; CHECK: load <4 x i32>
    116 ; CHECK: call <4 x i32> @llvm.bswap.v4i32
    117 ; CHECK: store <4 x i32>
    118 ; CHECK: ret
    119 }
    120 
    121 declare i32 @llvm.ctlz.i32(i32,i1) nounwind readnone
    122 
    123 define void @vec_ctlz_i32(i32* %a, i32* %b, i32* %c, i1) {
    124 entry:
    125   %i0 = load i32, i32* %a, align 4
    126   %i1 = load i32, i32* %b, align 4
    127   %add1 = add i32 %i0, %i1
    128   %call1 = tail call i32 @llvm.ctlz.i32(i32 %add1,i1 true) nounwind readnone
    129 
    130   %arrayidx2 = getelementptr inbounds i32, i32* %a, i32 1
    131   %i2 = load i32, i32* %arrayidx2, align 4
    132   %arrayidx3 = getelementptr inbounds i32, i32* %b, i32 1
    133   %i3 = load i32, i32* %arrayidx3, align 4
    134   %add2 = add i32 %i2, %i3
    135   %call2 = tail call i32 @llvm.ctlz.i32(i32 %add2,i1 true) nounwind readnone
    136 
    137   %arrayidx4 = getelementptr inbounds i32, i32* %a, i32 2
    138   %i4 = load i32, i32* %arrayidx4, align 4
    139   %arrayidx5 = getelementptr inbounds i32, i32* %b, i32 2
    140   %i5 = load i32, i32* %arrayidx5, align 4
    141   %add3 = add i32 %i4, %i5
    142   %call3 = tail call i32 @llvm.ctlz.i32(i32 %add3,i1 true) nounwind readnone
    143 
    144   %arrayidx6 = getelementptr inbounds i32, i32* %a, i32 3
    145   %i6 = load i32, i32* %arrayidx6, align 4
    146   %arrayidx7 = getelementptr inbounds i32, i32* %b, i32 3
    147   %i7 = load i32, i32* %arrayidx7, align 4
    148   %add4 = add i32 %i6, %i7
    149   %call4 = tail call i32 @llvm.ctlz.i32(i32 %add4,i1 true) nounwind readnone
    150 
    151   store i32 %call1, i32* %c, align 4
    152   %arrayidx8 = getelementptr inbounds i32, i32* %c, i32 1
    153   store i32 %call2, i32* %arrayidx8, align 4
    154   %arrayidx9 = getelementptr inbounds i32, i32* %c, i32 2
    155   store i32 %call3, i32* %arrayidx9, align 4
    156   %arrayidx10 = getelementptr inbounds i32, i32* %c, i32 3
    157   store i32 %call4, i32* %arrayidx10, align 4
    158   ret void
    159 
    160 ; CHECK-LABEL: @vec_ctlz_i32(
    161 ; CHECK: load <4 x i32>
    162 ; CHECK: load <4 x i32>
    163 ; CHECK: call <4 x i32> @llvm.ctlz.v4i32
    164 ; CHECK: store <4 x i32>
    165 ; CHECK: ret
    166 }
    167 
    168 define void @vec_ctlz_i32_neg(i32* %a, i32* %b, i32* %c, i1) {
    169 entry:
    170   %i0 = load i32, i32* %a, align 4
    171   %i1 = load i32, i32* %b, align 4
    172   %add1 = add i32 %i0, %i1
    173   %call1 = tail call i32 @llvm.ctlz.i32(i32 %add1,i1 true) nounwind readnone
    174 
    175   %arrayidx2 = getelementptr inbounds i32, i32* %a, i32 1
    176   %i2 = load i32, i32* %arrayidx2, align 4
    177   %arrayidx3 = getelementptr inbounds i32, i32* %b, i32 1
    178   %i3 = load i32, i32* %arrayidx3, align 4
    179   %add2 = add i32 %i2, %i3
    180   %call2 = tail call i32 @llvm.ctlz.i32(i32 %add2,i1 false) nounwind readnone
    181 
    182   %arrayidx4 = getelementptr inbounds i32, i32* %a, i32 2
    183   %i4 = load i32, i32* %arrayidx4, align 4
    184   %arrayidx5 = getelementptr inbounds i32, i32* %b, i32 2
    185   %i5 = load i32, i32* %arrayidx5, align 4
    186   %add3 = add i32 %i4, %i5
    187   %call3 = tail call i32 @llvm.ctlz.i32(i32 %add3,i1 true) nounwind readnone
    188 
    189   %arrayidx6 = getelementptr inbounds i32, i32* %a, i32 3
    190   %i6 = load i32, i32* %arrayidx6, align 4
    191   %arrayidx7 = getelementptr inbounds i32, i32* %b, i32 3
    192   %i7 = load i32, i32* %arrayidx7, align 4
    193   %add4 = add i32 %i6, %i7
    194   %call4 = tail call i32 @llvm.ctlz.i32(i32 %add4,i1 false) nounwind readnone
    195 
    196   store i32 %call1, i32* %c, align 4
    197   %arrayidx8 = getelementptr inbounds i32, i32* %c, i32 1
    198   store i32 %call2, i32* %arrayidx8, align 4
    199   %arrayidx9 = getelementptr inbounds i32, i32* %c, i32 2
    200   store i32 %call3, i32* %arrayidx9, align 4
    201   %arrayidx10 = getelementptr inbounds i32, i32* %c, i32 3
    202   store i32 %call4, i32* %arrayidx10, align 4
    203   ret void
    204 
    205 ; CHECK-LABEL: @vec_ctlz_i32_neg(
    206 ; CHECK-NOT: call <4 x i32> @llvm.ctlz.v4i32
    207 
    208 }
    209 
    210 
    211 declare i32 @llvm.cttz.i32(i32,i1) nounwind readnone
    212 
    213 define void @vec_cttz_i32(i32* %a, i32* %b, i32* %c, i1) {
    214 entry:
    215   %i0 = load i32, i32* %a, align 4
    216   %i1 = load i32, i32* %b, align 4
    217   %add1 = add i32 %i0, %i1
    218   %call1 = tail call i32 @llvm.cttz.i32(i32 %add1,i1 true) nounwind readnone
    219 
    220   %arrayidx2 = getelementptr inbounds i32, i32* %a, i32 1
    221   %i2 = load i32, i32* %arrayidx2, align 4
    222   %arrayidx3 = getelementptr inbounds i32, i32* %b, i32 1
    223   %i3 = load i32, i32* %arrayidx3, align 4
    224   %add2 = add i32 %i2, %i3
    225   %call2 = tail call i32 @llvm.cttz.i32(i32 %add2,i1 true) nounwind readnone
    226 
    227   %arrayidx4 = getelementptr inbounds i32, i32* %a, i32 2
    228   %i4 = load i32, i32* %arrayidx4, align 4
    229   %arrayidx5 = getelementptr inbounds i32, i32* %b, i32 2
    230   %i5 = load i32, i32* %arrayidx5, align 4
    231   %add3 = add i32 %i4, %i5
    232   %call3 = tail call i32 @llvm.cttz.i32(i32 %add3,i1 true) nounwind readnone
    233 
    234   %arrayidx6 = getelementptr inbounds i32, i32* %a, i32 3
    235   %i6 = load i32, i32* %arrayidx6, align 4
    236   %arrayidx7 = getelementptr inbounds i32, i32* %b, i32 3
    237   %i7 = load i32, i32* %arrayidx7, align 4
    238   %add4 = add i32 %i6, %i7
    239   %call4 = tail call i32 @llvm.cttz.i32(i32 %add4,i1 true) nounwind readnone
    240 
    241   store i32 %call1, i32* %c, align 4
    242   %arrayidx8 = getelementptr inbounds i32, i32* %c, i32 1
    243   store i32 %call2, i32* %arrayidx8, align 4
    244   %arrayidx9 = getelementptr inbounds i32, i32* %c, i32 2
    245   store i32 %call3, i32* %arrayidx9, align 4
    246   %arrayidx10 = getelementptr inbounds i32, i32* %c, i32 3
    247   store i32 %call4, i32* %arrayidx10, align 4
    248   ret void
    249 
    250 ; CHECK-LABEL: @vec_cttz_i32(
    251 ; CHECK: load <4 x i32>
    252 ; CHECK: load <4 x i32>
    253 ; CHECK: call <4 x i32> @llvm.cttz.v4i32
    254 ; CHECK: store <4 x i32>
    255 ; CHECK: ret
    256 }
    257 
    258 define void @vec_cttz_i32_neg(i32* %a, i32* %b, i32* %c, i1) {
    259 entry:
    260   %i0 = load i32, i32* %a, align 4
    261   %i1 = load i32, i32* %b, align 4
    262   %add1 = add i32 %i0, %i1
    263   %call1 = tail call i32 @llvm.cttz.i32(i32 %add1,i1 true) nounwind readnone
    264 
    265   %arrayidx2 = getelementptr inbounds i32, i32* %a, i32 1
    266   %i2 = load i32, i32* %arrayidx2, align 4
    267   %arrayidx3 = getelementptr inbounds i32, i32* %b, i32 1
    268   %i3 = load i32, i32* %arrayidx3, align 4
    269   %add2 = add i32 %i2, %i3
    270   %call2 = tail call i32 @llvm.cttz.i32(i32 %add2,i1 false) nounwind readnone
    271 
    272   %arrayidx4 = getelementptr inbounds i32, i32* %a, i32 2
    273   %i4 = load i32, i32* %arrayidx4, align 4
    274   %arrayidx5 = getelementptr inbounds i32, i32* %b, i32 2
    275   %i5 = load i32, i32* %arrayidx5, align 4
    276   %add3 = add i32 %i4, %i5
    277   %call3 = tail call i32 @llvm.cttz.i32(i32 %add3,i1 true) nounwind readnone
    278 
    279   %arrayidx6 = getelementptr inbounds i32, i32* %a, i32 3
    280   %i6 = load i32, i32* %arrayidx6, align 4
    281   %arrayidx7 = getelementptr inbounds i32, i32* %b, i32 3
    282   %i7 = load i32, i32* %arrayidx7, align 4
    283   %add4 = add i32 %i6, %i7
    284   %call4 = tail call i32 @llvm.cttz.i32(i32 %add4,i1 false) nounwind readnone
    285 
    286   store i32 %call1, i32* %c, align 4
    287   %arrayidx8 = getelementptr inbounds i32, i32* %c, i32 1
    288   store i32 %call2, i32* %arrayidx8, align 4
    289   %arrayidx9 = getelementptr inbounds i32, i32* %c, i32 2
    290   store i32 %call3, i32* %arrayidx9, align 4
    291   %arrayidx10 = getelementptr inbounds i32, i32* %c, i32 3
    292   store i32 %call4, i32* %arrayidx10, align 4
    293   ret void
    294 
    295 ; CHECK-LABEL: @vec_cttz_i32_neg(
    296 ; CHECK-NOT: call <4 x i32> @llvm.cttz.v4i32
    297 }
    298 
    299 
    300 declare float @llvm.powi.f32(float, i32)
    301 define void @vec_powi_f32(float* %a, float* %b, float* %c, i32 %P) {
    302 entry:
    303   %i0 = load float, float* %a, align 4
    304   %i1 = load float, float* %b, align 4
    305   %add1 = fadd float %i0, %i1
    306   %call1 = tail call float @llvm.powi.f32(float %add1,i32 %P) nounwind readnone
    307 
    308   %arrayidx2 = getelementptr inbounds float, float* %a, i32 1
    309   %i2 = load float, float* %arrayidx2, align 4
    310   %arrayidx3 = getelementptr inbounds float, float* %b, i32 1
    311   %i3 = load float, float* %arrayidx3, align 4
    312   %add2 = fadd float %i2, %i3
    313   %call2 = tail call float @llvm.powi.f32(float %add2,i32 %P) nounwind readnone
    314 
    315   %arrayidx4 = getelementptr inbounds float, float* %a, i32 2
    316   %i4 = load float, float* %arrayidx4, align 4
    317   %arrayidx5 = getelementptr inbounds float, float* %b, i32 2
    318   %i5 = load float, float* %arrayidx5, align 4
    319   %add3 = fadd float %i4, %i5
    320   %call3 = tail call float @llvm.powi.f32(float %add3,i32 %P) nounwind readnone
    321 
    322   %arrayidx6 = getelementptr inbounds float, float* %a, i32 3
    323   %i6 = load float, float* %arrayidx6, align 4
    324   %arrayidx7 = getelementptr inbounds float, float* %b, i32 3
    325   %i7 = load float, float* %arrayidx7, align 4
    326   %add4 = fadd float %i6, %i7
    327   %call4 = tail call float @llvm.powi.f32(float %add4,i32 %P) nounwind readnone
    328 
    329   store float %call1, float* %c, align 4
    330   %arrayidx8 = getelementptr inbounds float, float* %c, i32 1
    331   store float %call2, float* %arrayidx8, align 4
    332   %arrayidx9 = getelementptr inbounds float, float* %c, i32 2
    333   store float %call3, float* %arrayidx9, align 4
    334   %arrayidx10 = getelementptr inbounds float, float* %c, i32 3
    335   store float %call4, float* %arrayidx10, align 4
    336   ret void
    337 
    338 ; CHECK-LABEL: @vec_powi_f32(
    339 ; CHECK: load <4 x float>
    340 ; CHECK: load <4 x float>
    341 ; CHECK: call <4 x float> @llvm.powi.v4f32
    342 ; CHECK: store <4 x float>
    343 ; CHECK: ret
    344 }
    345 
    346 
    347 define void @vec_powi_f32_neg(float* %a, float* %b, float* %c, i32 %P, i32 %Q) {
    348 entry:
    349   %i0 = load float, float* %a, align 4
    350   %i1 = load float, float* %b, align 4
    351   %add1 = fadd float %i0, %i1
    352   %call1 = tail call float @llvm.powi.f32(float %add1,i32 %P) nounwind readnone
    353 
    354   %arrayidx2 = getelementptr inbounds float, float* %a, i32 1
    355   %i2 = load float, float* %arrayidx2, align 4
    356   %arrayidx3 = getelementptr inbounds float, float* %b, i32 1
    357   %i3 = load float, float* %arrayidx3, align 4
    358   %add2 = fadd float %i2, %i3
    359   %call2 = tail call float @llvm.powi.f32(float %add2,i32 %Q) nounwind readnone
    360 
    361   %arrayidx4 = getelementptr inbounds float, float* %a, i32 2
    362   %i4 = load float, float* %arrayidx4, align 4
    363   %arrayidx5 = getelementptr inbounds float, float* %b, i32 2
    364   %i5 = load float, float* %arrayidx5, align 4
    365   %add3 = fadd float %i4, %i5
    366   %call3 = tail call float @llvm.powi.f32(float %add3,i32 %P) nounwind readnone
    367 
    368   %arrayidx6 = getelementptr inbounds float, float* %a, i32 3
    369   %i6 = load float, float* %arrayidx6, align 4
    370   %arrayidx7 = getelementptr inbounds float, float* %b, i32 3
    371   %i7 = load float, float* %arrayidx7, align 4
    372   %add4 = fadd float %i6, %i7
    373   %call4 = tail call float @llvm.powi.f32(float %add4,i32 %Q) nounwind readnone
    374 
    375   store float %call1, float* %c, align 4
    376   %arrayidx8 = getelementptr inbounds float, float* %c, i32 1
    377   store float %call2, float* %arrayidx8, align 4
    378   %arrayidx9 = getelementptr inbounds float, float* %c, i32 2
    379   store float %call3, float* %arrayidx9, align 4
    380   %arrayidx10 = getelementptr inbounds float, float* %c, i32 3
    381   store float %call4, float* %arrayidx10, align 4
    382   ret void
    383 
    384 ; CHECK-LABEL: @vec_powi_f32_neg(
    385 ; CHECK-NOT: call <4 x float> @llvm.powi.v4f32
    386 }
    387