Home | History | Annotate | Download | only in X86
      1 ; RUN: llc -march=x86-64 -mcpu=corei7 -mattr=+avx < %s | FileCheck %s
      2 ; RUN: llc -march=x86-64 -mcpu=corei7 -mattr=+avx -addr-sink-using-gep=1 < %s | FileCheck %s
      3 
      4 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
      5 target triple = "x86_64-apple-macosx10.8.0"
      6 
      7 %struct.A = type { i8, i8, i8, i8, i8, i8, i8, i8 }
      8 %struct.B = type { i32, i32, i32, i32, i32, i32, i32, i32 }
      9 
     10 ; CHECK: merge_const_store
     11 ; save 1,2,3 ... as one big integer.
     12 ; CHECK: movabsq $578437695752307201
     13 ; CHECK: ret
     14 define void @merge_const_store(i32 %count, %struct.A* nocapture %p) nounwind uwtable noinline ssp {
     15   %1 = icmp sgt i32 %count, 0
     16   br i1 %1, label %.lr.ph, label %._crit_edge
     17 .lr.ph:
     18   %i.02 = phi i32 [ %10, %.lr.ph ], [ 0, %0 ]
     19   %.01 = phi %struct.A* [ %11, %.lr.ph ], [ %p, %0 ]
     20   %2 = getelementptr inbounds %struct.A* %.01, i64 0, i32 0
     21   store i8 1, i8* %2, align 1
     22   %3 = getelementptr inbounds %struct.A* %.01, i64 0, i32 1
     23   store i8 2, i8* %3, align 1
     24   %4 = getelementptr inbounds %struct.A* %.01, i64 0, i32 2
     25   store i8 3, i8* %4, align 1
     26   %5 = getelementptr inbounds %struct.A* %.01, i64 0, i32 3
     27   store i8 4, i8* %5, align 1
     28   %6 = getelementptr inbounds %struct.A* %.01, i64 0, i32 4
     29   store i8 5, i8* %6, align 1
     30   %7 = getelementptr inbounds %struct.A* %.01, i64 0, i32 5
     31   store i8 6, i8* %7, align 1
     32   %8 = getelementptr inbounds %struct.A* %.01, i64 0, i32 6
     33   store i8 7, i8* %8, align 1
     34   %9 = getelementptr inbounds %struct.A* %.01, i64 0, i32 7
     35   store i8 8, i8* %9, align 1
     36   %10 = add nsw i32 %i.02, 1
     37   %11 = getelementptr inbounds %struct.A* %.01, i64 1
     38   %exitcond = icmp eq i32 %10, %count
     39   br i1 %exitcond, label %._crit_edge, label %.lr.ph
     40 ._crit_edge:
     41   ret void
     42 }
     43 
     44 ; No vectors because we use noimplicitfloat
     45 ; CHECK: merge_const_store_no_vec
     46 ; CHECK-NOT: vmovups
     47 ; CHECK: ret
     48 define void @merge_const_store_no_vec(i32 %count, %struct.B* nocapture %p) noimplicitfloat{
     49   %1 = icmp sgt i32 %count, 0
     50   br i1 %1, label %.lr.ph, label %._crit_edge
     51 .lr.ph:
     52   %i.02 = phi i32 [ %10, %.lr.ph ], [ 0, %0 ]
     53   %.01 = phi %struct.B* [ %11, %.lr.ph ], [ %p, %0 ]
     54   %2 = getelementptr inbounds %struct.B* %.01, i64 0, i32 0
     55   store i32 0, i32* %2, align 4
     56   %3 = getelementptr inbounds %struct.B* %.01, i64 0, i32 1
     57   store i32 0, i32* %3, align 4
     58   %4 = getelementptr inbounds %struct.B* %.01, i64 0, i32 2
     59   store i32 0, i32* %4, align 4
     60   %5 = getelementptr inbounds %struct.B* %.01, i64 0, i32 3
     61   store i32 0, i32* %5, align 4
     62   %6 = getelementptr inbounds %struct.B* %.01, i64 0, i32 4
     63   store i32 0, i32* %6, align 4
     64   %7 = getelementptr inbounds %struct.B* %.01, i64 0, i32 5
     65   store i32 0, i32* %7, align 4
     66   %8 = getelementptr inbounds %struct.B* %.01, i64 0, i32 6
     67   store i32 0, i32* %8, align 4
     68   %9 = getelementptr inbounds %struct.B* %.01, i64 0, i32 7
     69   store i32 0, i32* %9, align 4
     70   %10 = add nsw i32 %i.02, 1
     71   %11 = getelementptr inbounds %struct.B* %.01, i64 1
     72   %exitcond = icmp eq i32 %10, %count
     73   br i1 %exitcond, label %._crit_edge, label %.lr.ph
     74 ._crit_edge:
     75   ret void
     76 }
     77 
     78 ; Move the constants using a single vector store.
     79 ; CHECK: merge_const_store_vec
     80 ; CHECK: vmovups
     81 ; CHECK: ret
     82 define void @merge_const_store_vec(i32 %count, %struct.B* nocapture %p) nounwind uwtable noinline ssp {
     83   %1 = icmp sgt i32 %count, 0
     84   br i1 %1, label %.lr.ph, label %._crit_edge
     85 .lr.ph:
     86   %i.02 = phi i32 [ %10, %.lr.ph ], [ 0, %0 ]
     87   %.01 = phi %struct.B* [ %11, %.lr.ph ], [ %p, %0 ]
     88   %2 = getelementptr inbounds %struct.B* %.01, i64 0, i32 0
     89   store i32 0, i32* %2, align 4
     90   %3 = getelementptr inbounds %struct.B* %.01, i64 0, i32 1
     91   store i32 0, i32* %3, align 4
     92   %4 = getelementptr inbounds %struct.B* %.01, i64 0, i32 2
     93   store i32 0, i32* %4, align 4
     94   %5 = getelementptr inbounds %struct.B* %.01, i64 0, i32 3
     95   store i32 0, i32* %5, align 4
     96   %6 = getelementptr inbounds %struct.B* %.01, i64 0, i32 4
     97   store i32 0, i32* %6, align 4
     98   %7 = getelementptr inbounds %struct.B* %.01, i64 0, i32 5
     99   store i32 0, i32* %7, align 4
    100   %8 = getelementptr inbounds %struct.B* %.01, i64 0, i32 6
    101   store i32 0, i32* %8, align 4
    102   %9 = getelementptr inbounds %struct.B* %.01, i64 0, i32 7
    103   store i32 0, i32* %9, align 4
    104   %10 = add nsw i32 %i.02, 1
    105   %11 = getelementptr inbounds %struct.B* %.01, i64 1
    106   %exitcond = icmp eq i32 %10, %count
    107   br i1 %exitcond, label %._crit_edge, label %.lr.ph
    108 ._crit_edge:
    109   ret void
    110 }
    111 
    112 ; Move the first 4 constants as a single vector. Move the rest as scalars.
    113 ; CHECK: merge_nonconst_store
    114 ; CHECK: movl $67305985
    115 ; CHECK: movb
    116 ; CHECK: movb
    117 ; CHECK: movb
    118 ; CHECK: movb
    119 ; CHECK: ret
    120 define void @merge_nonconst_store(i32 %count, i8 %zz, %struct.A* nocapture %p) nounwind uwtable noinline ssp {
    121   %1 = icmp sgt i32 %count, 0
    122   br i1 %1, label %.lr.ph, label %._crit_edge
    123 .lr.ph:
    124   %i.02 = phi i32 [ %10, %.lr.ph ], [ 0, %0 ]
    125   %.01 = phi %struct.A* [ %11, %.lr.ph ], [ %p, %0 ]
    126   %2 = getelementptr inbounds %struct.A* %.01, i64 0, i32 0
    127   store i8 1, i8* %2, align 1
    128   %3 = getelementptr inbounds %struct.A* %.01, i64 0, i32 1
    129   store i8 2, i8* %3, align 1
    130   %4 = getelementptr inbounds %struct.A* %.01, i64 0, i32 2
    131   store i8 3, i8* %4, align 1
    132   %5 = getelementptr inbounds %struct.A* %.01, i64 0, i32 3
    133   store i8 4, i8* %5, align 1
    134   %6 = getelementptr inbounds %struct.A* %.01, i64 0, i32 4
    135   store i8 %zz, i8* %6, align 1                     ;  <----------- Not a const;
    136   %7 = getelementptr inbounds %struct.A* %.01, i64 0, i32 5
    137   store i8 6, i8* %7, align 1
    138   %8 = getelementptr inbounds %struct.A* %.01, i64 0, i32 6
    139   store i8 7, i8* %8, align 1
    140   %9 = getelementptr inbounds %struct.A* %.01, i64 0, i32 7
    141   store i8 8, i8* %9, align 1
    142   %10 = add nsw i32 %i.02, 1
    143   %11 = getelementptr inbounds %struct.A* %.01, i64 1
    144   %exitcond = icmp eq i32 %10, %count
    145   br i1 %exitcond, label %._crit_edge, label %.lr.ph
    146 ._crit_edge:
    147   ret void
    148 }
    149 
    150 
    151 ;CHECK-LABEL: merge_loads_i16:
    152 ; load:
    153 ;CHECK: movw
    154 ; store:
    155 ;CHECK: movw
    156 ;CHECK: ret
    157 define void @merge_loads_i16(i32 %count, %struct.A* noalias nocapture %q, %struct.A* noalias nocapture %p) nounwind uwtable noinline ssp {
    158   %1 = icmp sgt i32 %count, 0
    159   br i1 %1, label %.lr.ph, label %._crit_edge
    160 
    161 .lr.ph:                                           ; preds = %0
    162   %2 = getelementptr inbounds %struct.A* %q, i64 0, i32 0
    163   %3 = getelementptr inbounds %struct.A* %q, i64 0, i32 1
    164   br label %4
    165 
    166 ; <label>:4                                       ; preds = %4, %.lr.ph
    167   %i.02 = phi i32 [ 0, %.lr.ph ], [ %9, %4 ]
    168   %.01 = phi %struct.A* [ %p, %.lr.ph ], [ %10, %4 ]
    169   %5 = load i8* %2, align 1
    170   %6 = load i8* %3, align 1
    171   %7 = getelementptr inbounds %struct.A* %.01, i64 0, i32 0
    172   store i8 %5, i8* %7, align 1
    173   %8 = getelementptr inbounds %struct.A* %.01, i64 0, i32 1
    174   store i8 %6, i8* %8, align 1
    175   %9 = add nsw i32 %i.02, 1
    176   %10 = getelementptr inbounds %struct.A* %.01, i64 1
    177   %exitcond = icmp eq i32 %9, %count
    178   br i1 %exitcond, label %._crit_edge, label %4
    179 
    180 ._crit_edge:                                      ; preds = %4, %0
    181   ret void
    182 }
    183 
    184 ; The loads and the stores are interleved. Can't merge them.
    185 ;CHECK-LABEL: no_merge_loads:
    186 ;CHECK: movb
    187 ;CHECK: movb
    188 ;CHECK: movb
    189 ;CHECK: movb
    190 ;CHECK: ret
    191 define void @no_merge_loads(i32 %count, %struct.A* noalias nocapture %q, %struct.A* noalias nocapture %p) nounwind uwtable noinline ssp {
    192   %1 = icmp sgt i32 %count, 0
    193   br i1 %1, label %.lr.ph, label %._crit_edge
    194 
    195 .lr.ph:                                           ; preds = %0
    196   %2 = getelementptr inbounds %struct.A* %q, i64 0, i32 0
    197   %3 = getelementptr inbounds %struct.A* %q, i64 0, i32 1
    198   br label %a4
    199 
    200 a4:                                       ; preds = %4, %.lr.ph
    201   %i.02 = phi i32 [ 0, %.lr.ph ], [ %a9, %a4 ]
    202   %.01 = phi %struct.A* [ %p, %.lr.ph ], [ %a10, %a4 ]
    203   %a5 = load i8* %2, align 1
    204   %a7 = getelementptr inbounds %struct.A* %.01, i64 0, i32 0
    205   store i8 %a5, i8* %a7, align 1
    206   %a8 = getelementptr inbounds %struct.A* %.01, i64 0, i32 1
    207   %a6 = load i8* %3, align 1
    208   store i8 %a6, i8* %a8, align 1
    209   %a9 = add nsw i32 %i.02, 1
    210   %a10 = getelementptr inbounds %struct.A* %.01, i64 1
    211   %exitcond = icmp eq i32 %a9, %count
    212   br i1 %exitcond, label %._crit_edge, label %a4
    213 
    214 ._crit_edge:                                      ; preds = %4, %0
    215   ret void
    216 }
    217 
    218 
    219 ;CHECK-LABEL: merge_loads_integer:
    220 ; load:
    221 ;CHECK: movq
    222 ; store:
    223 ;CHECK: movq
    224 ;CHECK: ret
    225 define void @merge_loads_integer(i32 %count, %struct.B* noalias nocapture %q, %struct.B* noalias nocapture %p) nounwind uwtable noinline ssp {
    226   %1 = icmp sgt i32 %count, 0
    227   br i1 %1, label %.lr.ph, label %._crit_edge
    228 
    229 .lr.ph:                                           ; preds = %0
    230   %2 = getelementptr inbounds %struct.B* %q, i64 0, i32 0
    231   %3 = getelementptr inbounds %struct.B* %q, i64 0, i32 1
    232   br label %4
    233 
    234 ; <label>:4                                       ; preds = %4, %.lr.ph
    235   %i.02 = phi i32 [ 0, %.lr.ph ], [ %9, %4 ]
    236   %.01 = phi %struct.B* [ %p, %.lr.ph ], [ %10, %4 ]
    237   %5 = load i32* %2
    238   %6 = load i32* %3
    239   %7 = getelementptr inbounds %struct.B* %.01, i64 0, i32 0
    240   store i32 %5, i32* %7
    241   %8 = getelementptr inbounds %struct.B* %.01, i64 0, i32 1
    242   store i32 %6, i32* %8
    243   %9 = add nsw i32 %i.02, 1
    244   %10 = getelementptr inbounds %struct.B* %.01, i64 1
    245   %exitcond = icmp eq i32 %9, %count
    246   br i1 %exitcond, label %._crit_edge, label %4
    247 
    248 ._crit_edge:                                      ; preds = %4, %0
    249   ret void
    250 }
    251 
    252 
    253 ;CHECK-LABEL: merge_loads_vector:
    254 ; load:
    255 ;CHECK: movups
    256 ; store:
    257 ;CHECK: movups
    258 ;CHECK: ret
    259 define void @merge_loads_vector(i32 %count, %struct.B* noalias nocapture %q, %struct.B* noalias nocapture %p) nounwind uwtable noinline ssp {
    260   %a1 = icmp sgt i32 %count, 0
    261   br i1 %a1, label %.lr.ph, label %._crit_edge
    262 
    263 .lr.ph:                                           ; preds = %0
    264   %a2 = getelementptr inbounds %struct.B* %q, i64 0, i32 0
    265   %a3 = getelementptr inbounds %struct.B* %q, i64 0, i32 1
    266   %a4 = getelementptr inbounds %struct.B* %q, i64 0, i32 2
    267   %a5 = getelementptr inbounds %struct.B* %q, i64 0, i32 3
    268   br label %block4
    269 
    270 block4:                                       ; preds = %4, %.lr.ph
    271   %i.02 = phi i32 [ 0, %.lr.ph ], [ %c9, %block4 ]
    272   %.01 = phi %struct.B* [ %p, %.lr.ph ], [ %c10, %block4 ]
    273   %a7 = getelementptr inbounds %struct.B* %.01, i64 0, i32 0
    274   %a8 = getelementptr inbounds %struct.B* %.01, i64 0, i32 1
    275   %a9 = getelementptr inbounds %struct.B* %.01, i64 0, i32 2
    276   %a10 = getelementptr inbounds %struct.B* %.01, i64 0, i32 3
    277   %b1 = load i32* %a2
    278   %b2 = load i32* %a3
    279   %b3 = load i32* %a4
    280   %b4 = load i32* %a5
    281   store i32 %b1, i32* %a7
    282   store i32 %b2, i32* %a8
    283   store i32 %b3, i32* %a9
    284   store i32 %b4, i32* %a10
    285   %c9 = add nsw i32 %i.02, 1
    286   %c10 = getelementptr inbounds %struct.B* %.01, i64 1
    287   %exitcond = icmp eq i32 %c9, %count
    288   br i1 %exitcond, label %._crit_edge, label %block4
    289 
    290 ._crit_edge:                                      ; preds = %4, %0
    291   ret void
    292 }
    293 
    294 ;CHECK-LABEL: merge_loads_no_align:
    295 ; load:
    296 ;CHECK: movl
    297 ;CHECK: movl
    298 ;CHECK: movl
    299 ;CHECK: movl
    300 ; store:
    301 ;CHECK: movl
    302 ;CHECK: movl
    303 ;CHECK: movl
    304 ;CHECK: movl
    305 ;CHECK: ret
    306 define void @merge_loads_no_align(i32 %count, %struct.B* noalias nocapture %q, %struct.B* noalias nocapture %p) nounwind uwtable noinline ssp {
    307   %a1 = icmp sgt i32 %count, 0
    308   br i1 %a1, label %.lr.ph, label %._crit_edge
    309 
    310 .lr.ph:                                           ; preds = %0
    311   %a2 = getelementptr inbounds %struct.B* %q, i64 0, i32 0
    312   %a3 = getelementptr inbounds %struct.B* %q, i64 0, i32 1
    313   %a4 = getelementptr inbounds %struct.B* %q, i64 0, i32 2
    314   %a5 = getelementptr inbounds %struct.B* %q, i64 0, i32 3
    315   br label %block4
    316 
    317 block4:                                       ; preds = %4, %.lr.ph
    318   %i.02 = phi i32 [ 0, %.lr.ph ], [ %c9, %block4 ]
    319   %.01 = phi %struct.B* [ %p, %.lr.ph ], [ %c10, %block4 ]
    320   %a7 = getelementptr inbounds %struct.B* %.01, i64 0, i32 0
    321   %a8 = getelementptr inbounds %struct.B* %.01, i64 0, i32 1
    322   %a9 = getelementptr inbounds %struct.B* %.01, i64 0, i32 2
    323   %a10 = getelementptr inbounds %struct.B* %.01, i64 0, i32 3
    324   %b1 = load i32* %a2, align 1
    325   %b2 = load i32* %a3, align 1
    326   %b3 = load i32* %a4, align 1
    327   %b4 = load i32* %a5, align 1
    328   store i32 %b1, i32* %a7, align 1
    329   store i32 %b2, i32* %a8, align 1
    330   store i32 %b3, i32* %a9, align 1
    331   store i32 %b4, i32* %a10, align 1
    332   %c9 = add nsw i32 %i.02, 1
    333   %c10 = getelementptr inbounds %struct.B* %.01, i64 1
    334   %exitcond = icmp eq i32 %c9, %count
    335   br i1 %exitcond, label %._crit_edge, label %block4
    336 
    337 ._crit_edge:                                      ; preds = %4, %0
    338   ret void
    339 }
    340 
    341 ; Make sure that we merge the consecutive load/store sequence below and use a
    342 ; word (16 bit) instead of a byte copy.
    343 ; CHECK: MergeLoadStoreBaseIndexOffset
    344 ; CHECK: movw    (%{{.*}},%{{.*}}), [[REG:%[a-z]+]]
    345 ; CHECK: movw    [[REG]], (%{{.*}})
    346 define void @MergeLoadStoreBaseIndexOffset(i64* %a, i8* %b, i8* %c, i32 %n) {
    347   br label %1
    348 
    349 ; <label>:1
    350   %.09 = phi i32 [ %n, %0 ], [ %11, %1 ]
    351   %.08 = phi i8* [ %b, %0 ], [ %10, %1 ]
    352   %.0 = phi i64* [ %a, %0 ], [ %2, %1 ]
    353   %2 = getelementptr inbounds i64* %.0, i64 1
    354   %3 = load i64* %.0, align 1
    355   %4 = getelementptr inbounds i8* %c, i64 %3
    356   %5 = load i8* %4, align 1
    357   %6 = add i64 %3, 1
    358   %7 = getelementptr inbounds i8* %c, i64 %6
    359   %8 = load i8* %7, align 1
    360   store i8 %5, i8* %.08, align 1
    361   %9 = getelementptr inbounds i8* %.08, i64 1
    362   store i8 %8, i8* %9, align 1
    363   %10 = getelementptr inbounds i8* %.08, i64 2
    364   %11 = add nsw i32 %.09, -1
    365   %12 = icmp eq i32 %11, 0
    366   br i1 %12, label %13, label %1
    367 
    368 ; <label>:13
    369   ret void
    370 }
    371 
    372 ; Make sure that we merge the consecutive load/store sequence below and use a
    373 ; word (16 bit) instead of a byte copy even if there are intermediate sign
    374 ; extensions.
    375 ; CHECK: MergeLoadStoreBaseIndexOffsetSext
    376 ; CHECK: movw    (%{{.*}},%{{.*}}), [[REG:%[a-z]+]]
    377 ; CHECK: movw    [[REG]], (%{{.*}})
    378 define void @MergeLoadStoreBaseIndexOffsetSext(i8* %a, i8* %b, i8* %c, i32 %n) {
    379   br label %1
    380 
    381 ; <label>:1
    382   %.09 = phi i32 [ %n, %0 ], [ %12, %1 ]
    383   %.08 = phi i8* [ %b, %0 ], [ %11, %1 ]
    384   %.0 = phi i8* [ %a, %0 ], [ %2, %1 ]
    385   %2 = getelementptr inbounds i8* %.0, i64 1
    386   %3 = load i8* %.0, align 1
    387   %4 = sext i8 %3 to i64
    388   %5 = getelementptr inbounds i8* %c, i64 %4
    389   %6 = load i8* %5, align 1
    390   %7 = add i64 %4, 1
    391   %8 = getelementptr inbounds i8* %c, i64 %7
    392   %9 = load i8* %8, align 1
    393   store i8 %6, i8* %.08, align 1
    394   %10 = getelementptr inbounds i8* %.08, i64 1
    395   store i8 %9, i8* %10, align 1
    396   %11 = getelementptr inbounds i8* %.08, i64 2
    397   %12 = add nsw i32 %.09, -1
    398   %13 = icmp eq i32 %12, 0
    399   br i1 %13, label %14, label %1
    400 
    401 ; <label>:14
    402   ret void
    403 }
    404 
    405 ; However, we can only merge ignore sign extensions when they are on all memory
    406 ; computations;
    407 ; CHECK: loadStoreBaseIndexOffsetSextNoSex
    408 ; CHECK-NOT: movw    (%{{.*}},%{{.*}}), [[REG:%[a-z]+]]
    409 ; CHECK-NOT: movw    [[REG]], (%{{.*}})
    410 define void @loadStoreBaseIndexOffsetSextNoSex(i8* %a, i8* %b, i8* %c, i32 %n) {
    411   br label %1
    412 
    413 ; <label>:1
    414   %.09 = phi i32 [ %n, %0 ], [ %12, %1 ]
    415   %.08 = phi i8* [ %b, %0 ], [ %11, %1 ]
    416   %.0 = phi i8* [ %a, %0 ], [ %2, %1 ]
    417   %2 = getelementptr inbounds i8* %.0, i64 1
    418   %3 = load i8* %.0, align 1
    419   %4 = sext i8 %3 to i64
    420   %5 = getelementptr inbounds i8* %c, i64 %4
    421   %6 = load i8* %5, align 1
    422   %7 = add i8 %3, 1
    423   %wrap.4 = sext i8 %7 to i64
    424   %8 = getelementptr inbounds i8* %c, i64 %wrap.4
    425   %9 = load i8* %8, align 1
    426   store i8 %6, i8* %.08, align 1
    427   %10 = getelementptr inbounds i8* %.08, i64 1
    428   store i8 %9, i8* %10, align 1
    429   %11 = getelementptr inbounds i8* %.08, i64 2
    430   %12 = add nsw i32 %.09, -1
    431   %13 = icmp eq i32 %12, 0
    432   br i1 %13, label %14, label %1
    433 
    434 ; <label>:14
    435   ret void
    436 }
    437