Home | History | Annotate | Download | only in X86
      1 ; RUN: llc -march=x86-64 -mcpu=corei7 -mattr=+avx < %s | FileCheck %s
      2 
      3 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
      4 target triple = "x86_64-apple-macosx10.8.0"
      5 
      6 %struct.A = type { i8, i8, i8, i8, i8, i8, i8, i8 }
      7 %struct.B = type { i32, i32, i32, i32, i32, i32, i32, i32 }
      8 
      9 ; CHECK: merge_const_store
     10 ; save 1,2,3 ... as one big integer.
     11 ; CHECK: movabsq $578437695752307201
     12 ; CHECK: ret
     13 define void @merge_const_store(i32 %count, %struct.A* nocapture %p) nounwind uwtable noinline ssp {
     14   %1 = icmp sgt i32 %count, 0
     15   br i1 %1, label %.lr.ph, label %._crit_edge
     16 .lr.ph:
     17   %i.02 = phi i32 [ %10, %.lr.ph ], [ 0, %0 ]
     18   %.01 = phi %struct.A* [ %11, %.lr.ph ], [ %p, %0 ]
     19   %2 = getelementptr inbounds %struct.A* %.01, i64 0, i32 0
     20   store i8 1, i8* %2, align 1
     21   %3 = getelementptr inbounds %struct.A* %.01, i64 0, i32 1
     22   store i8 2, i8* %3, align 1
     23   %4 = getelementptr inbounds %struct.A* %.01, i64 0, i32 2
     24   store i8 3, i8* %4, align 1
     25   %5 = getelementptr inbounds %struct.A* %.01, i64 0, i32 3
     26   store i8 4, i8* %5, align 1
     27   %6 = getelementptr inbounds %struct.A* %.01, i64 0, i32 4
     28   store i8 5, i8* %6, align 1
     29   %7 = getelementptr inbounds %struct.A* %.01, i64 0, i32 5
     30   store i8 6, i8* %7, align 1
     31   %8 = getelementptr inbounds %struct.A* %.01, i64 0, i32 6
     32   store i8 7, i8* %8, align 1
     33   %9 = getelementptr inbounds %struct.A* %.01, i64 0, i32 7
     34   store i8 8, i8* %9, align 1
     35   %10 = add nsw i32 %i.02, 1
     36   %11 = getelementptr inbounds %struct.A* %.01, i64 1
     37   %exitcond = icmp eq i32 %10, %count
     38   br i1 %exitcond, label %._crit_edge, label %.lr.ph
     39 ._crit_edge:
     40   ret void
     41 }
     42 
     43 ; No vectors because we use noimplicitfloat
     44 ; CHECK: merge_const_store_no_vec
     45 ; CHECK-NOT: vmovups
     46 ; CHECK: ret
     47 define void @merge_const_store_no_vec(i32 %count, %struct.B* nocapture %p) noimplicitfloat{
     48   %1 = icmp sgt i32 %count, 0
     49   br i1 %1, label %.lr.ph, label %._crit_edge
     50 .lr.ph:
     51   %i.02 = phi i32 [ %10, %.lr.ph ], [ 0, %0 ]
     52   %.01 = phi %struct.B* [ %11, %.lr.ph ], [ %p, %0 ]
     53   %2 = getelementptr inbounds %struct.B* %.01, i64 0, i32 0
     54   store i32 0, i32* %2, align 4
     55   %3 = getelementptr inbounds %struct.B* %.01, i64 0, i32 1
     56   store i32 0, i32* %3, align 4
     57   %4 = getelementptr inbounds %struct.B* %.01, i64 0, i32 2
     58   store i32 0, i32* %4, align 4
     59   %5 = getelementptr inbounds %struct.B* %.01, i64 0, i32 3
     60   store i32 0, i32* %5, align 4
     61   %6 = getelementptr inbounds %struct.B* %.01, i64 0, i32 4
     62   store i32 0, i32* %6, align 4
     63   %7 = getelementptr inbounds %struct.B* %.01, i64 0, i32 5
     64   store i32 0, i32* %7, align 4
     65   %8 = getelementptr inbounds %struct.B* %.01, i64 0, i32 6
     66   store i32 0, i32* %8, align 4
     67   %9 = getelementptr inbounds %struct.B* %.01, i64 0, i32 7
     68   store i32 0, i32* %9, align 4
     69   %10 = add nsw i32 %i.02, 1
     70   %11 = getelementptr inbounds %struct.B* %.01, i64 1
     71   %exitcond = icmp eq i32 %10, %count
     72   br i1 %exitcond, label %._crit_edge, label %.lr.ph
     73 ._crit_edge:
     74   ret void
     75 }
     76 
     77 ; Move the constants using a single vector store.
     78 ; CHECK: merge_const_store_vec
     79 ; CHECK: vmovups
     80 ; CHECK: ret
     81 define void @merge_const_store_vec(i32 %count, %struct.B* nocapture %p) nounwind uwtable noinline ssp {
     82   %1 = icmp sgt i32 %count, 0
     83   br i1 %1, label %.lr.ph, label %._crit_edge
     84 .lr.ph:
     85   %i.02 = phi i32 [ %10, %.lr.ph ], [ 0, %0 ]
     86   %.01 = phi %struct.B* [ %11, %.lr.ph ], [ %p, %0 ]
     87   %2 = getelementptr inbounds %struct.B* %.01, i64 0, i32 0
     88   store i32 0, i32* %2, align 4
     89   %3 = getelementptr inbounds %struct.B* %.01, i64 0, i32 1
     90   store i32 0, i32* %3, align 4
     91   %4 = getelementptr inbounds %struct.B* %.01, i64 0, i32 2
     92   store i32 0, i32* %4, align 4
     93   %5 = getelementptr inbounds %struct.B* %.01, i64 0, i32 3
     94   store i32 0, i32* %5, align 4
     95   %6 = getelementptr inbounds %struct.B* %.01, i64 0, i32 4
     96   store i32 0, i32* %6, align 4
     97   %7 = getelementptr inbounds %struct.B* %.01, i64 0, i32 5
     98   store i32 0, i32* %7, align 4
     99   %8 = getelementptr inbounds %struct.B* %.01, i64 0, i32 6
    100   store i32 0, i32* %8, align 4
    101   %9 = getelementptr inbounds %struct.B* %.01, i64 0, i32 7
    102   store i32 0, i32* %9, align 4
    103   %10 = add nsw i32 %i.02, 1
    104   %11 = getelementptr inbounds %struct.B* %.01, i64 1
    105   %exitcond = icmp eq i32 %10, %count
    106   br i1 %exitcond, label %._crit_edge, label %.lr.ph
    107 ._crit_edge:
    108   ret void
    109 }
    110 
    111 ; Move the first 4 constants as a single vector. Move the rest as scalars.
    112 ; CHECK: merge_nonconst_store
    113 ; CHECK: movl $67305985
    114 ; CHECK: movb
    115 ; CHECK: movb
    116 ; CHECK: movb
    117 ; CHECK: movb
    118 ; CHECK: ret
    119 define void @merge_nonconst_store(i32 %count, i8 %zz, %struct.A* nocapture %p) nounwind uwtable noinline ssp {
    120   %1 = icmp sgt i32 %count, 0
    121   br i1 %1, label %.lr.ph, label %._crit_edge
    122 .lr.ph:
    123   %i.02 = phi i32 [ %10, %.lr.ph ], [ 0, %0 ]
    124   %.01 = phi %struct.A* [ %11, %.lr.ph ], [ %p, %0 ]
    125   %2 = getelementptr inbounds %struct.A* %.01, i64 0, i32 0
    126   store i8 1, i8* %2, align 1
    127   %3 = getelementptr inbounds %struct.A* %.01, i64 0, i32 1
    128   store i8 2, i8* %3, align 1
    129   %4 = getelementptr inbounds %struct.A* %.01, i64 0, i32 2
    130   store i8 3, i8* %4, align 1
    131   %5 = getelementptr inbounds %struct.A* %.01, i64 0, i32 3
    132   store i8 4, i8* %5, align 1
    133   %6 = getelementptr inbounds %struct.A* %.01, i64 0, i32 4
    134   store i8 %zz, i8* %6, align 1                     ;  <----------- Not a const;
    135   %7 = getelementptr inbounds %struct.A* %.01, i64 0, i32 5
    136   store i8 6, i8* %7, align 1
    137   %8 = getelementptr inbounds %struct.A* %.01, i64 0, i32 6
    138   store i8 7, i8* %8, align 1
    139   %9 = getelementptr inbounds %struct.A* %.01, i64 0, i32 7
    140   store i8 8, i8* %9, align 1
    141   %10 = add nsw i32 %i.02, 1
    142   %11 = getelementptr inbounds %struct.A* %.01, i64 1
    143   %exitcond = icmp eq i32 %10, %count
    144   br i1 %exitcond, label %._crit_edge, label %.lr.ph
    145 ._crit_edge:
    146   ret void
    147 }
    148 
    149 
    150 ;CHECK-LABEL: merge_loads_i16:
    151 ; load:
    152 ;CHECK: movw
    153 ; store:
    154 ;CHECK: movw
    155 ;CHECK: ret
    156 define void @merge_loads_i16(i32 %count, %struct.A* noalias nocapture %q, %struct.A* noalias nocapture %p) nounwind uwtable noinline ssp {
    157   %1 = icmp sgt i32 %count, 0
    158   br i1 %1, label %.lr.ph, label %._crit_edge
    159 
    160 .lr.ph:                                           ; preds = %0
    161   %2 = getelementptr inbounds %struct.A* %q, i64 0, i32 0
    162   %3 = getelementptr inbounds %struct.A* %q, i64 0, i32 1
    163   br label %4
    164 
    165 ; <label>:4                                       ; preds = %4, %.lr.ph
    166   %i.02 = phi i32 [ 0, %.lr.ph ], [ %9, %4 ]
    167   %.01 = phi %struct.A* [ %p, %.lr.ph ], [ %10, %4 ]
    168   %5 = load i8* %2, align 1
    169   %6 = load i8* %3, align 1
    170   %7 = getelementptr inbounds %struct.A* %.01, i64 0, i32 0
    171   store i8 %5, i8* %7, align 1
    172   %8 = getelementptr inbounds %struct.A* %.01, i64 0, i32 1
    173   store i8 %6, i8* %8, align 1
    174   %9 = add nsw i32 %i.02, 1
    175   %10 = getelementptr inbounds %struct.A* %.01, i64 1
    176   %exitcond = icmp eq i32 %9, %count
    177   br i1 %exitcond, label %._crit_edge, label %4
    178 
    179 ._crit_edge:                                      ; preds = %4, %0
    180   ret void
    181 }
    182 
    183 ; The loads and the stores are interleved. Can't merge them.
    184 ;CHECK-LABEL: no_merge_loads:
    185 ;CHECK: movb
    186 ;CHECK: movb
    187 ;CHECK: movb
    188 ;CHECK: movb
    189 ;CHECK: ret
    190 define void @no_merge_loads(i32 %count, %struct.A* noalias nocapture %q, %struct.A* noalias nocapture %p) nounwind uwtable noinline ssp {
    191   %1 = icmp sgt i32 %count, 0
    192   br i1 %1, label %.lr.ph, label %._crit_edge
    193 
    194 .lr.ph:                                           ; preds = %0
    195   %2 = getelementptr inbounds %struct.A* %q, i64 0, i32 0
    196   %3 = getelementptr inbounds %struct.A* %q, i64 0, i32 1
    197   br label %a4
    198 
    199 a4:                                       ; preds = %4, %.lr.ph
    200   %i.02 = phi i32 [ 0, %.lr.ph ], [ %a9, %a4 ]
    201   %.01 = phi %struct.A* [ %p, %.lr.ph ], [ %a10, %a4 ]
    202   %a5 = load i8* %2, align 1
    203   %a7 = getelementptr inbounds %struct.A* %.01, i64 0, i32 0
    204   store i8 %a5, i8* %a7, align 1
    205   %a8 = getelementptr inbounds %struct.A* %.01, i64 0, i32 1
    206   %a6 = load i8* %3, align 1
    207   store i8 %a6, i8* %a8, align 1
    208   %a9 = add nsw i32 %i.02, 1
    209   %a10 = getelementptr inbounds %struct.A* %.01, i64 1
    210   %exitcond = icmp eq i32 %a9, %count
    211   br i1 %exitcond, label %._crit_edge, label %a4
    212 
    213 ._crit_edge:                                      ; preds = %4, %0
    214   ret void
    215 }
    216 
    217 
    218 ;CHECK-LABEL: merge_loads_integer:
    219 ; load:
    220 ;CHECK: movq
    221 ; store:
    222 ;CHECK: movq
    223 ;CHECK: ret
    224 define void @merge_loads_integer(i32 %count, %struct.B* noalias nocapture %q, %struct.B* noalias nocapture %p) nounwind uwtable noinline ssp {
    225   %1 = icmp sgt i32 %count, 0
    226   br i1 %1, label %.lr.ph, label %._crit_edge
    227 
    228 .lr.ph:                                           ; preds = %0
    229   %2 = getelementptr inbounds %struct.B* %q, i64 0, i32 0
    230   %3 = getelementptr inbounds %struct.B* %q, i64 0, i32 1
    231   br label %4
    232 
    233 ; <label>:4                                       ; preds = %4, %.lr.ph
    234   %i.02 = phi i32 [ 0, %.lr.ph ], [ %9, %4 ]
    235   %.01 = phi %struct.B* [ %p, %.lr.ph ], [ %10, %4 ]
    236   %5 = load i32* %2
    237   %6 = load i32* %3
    238   %7 = getelementptr inbounds %struct.B* %.01, i64 0, i32 0
    239   store i32 %5, i32* %7
    240   %8 = getelementptr inbounds %struct.B* %.01, i64 0, i32 1
    241   store i32 %6, i32* %8
    242   %9 = add nsw i32 %i.02, 1
    243   %10 = getelementptr inbounds %struct.B* %.01, i64 1
    244   %exitcond = icmp eq i32 %9, %count
    245   br i1 %exitcond, label %._crit_edge, label %4
    246 
    247 ._crit_edge:                                      ; preds = %4, %0
    248   ret void
    249 }
    250 
    251 
    252 ;CHECK-LABEL: merge_loads_vector:
    253 ; load:
    254 ;CHECK: movups
    255 ; store:
    256 ;CHECK: movups
    257 ;CHECK: ret
    258 define void @merge_loads_vector(i32 %count, %struct.B* noalias nocapture %q, %struct.B* noalias nocapture %p) nounwind uwtable noinline ssp {
    259   %a1 = icmp sgt i32 %count, 0
    260   br i1 %a1, label %.lr.ph, label %._crit_edge
    261 
    262 .lr.ph:                                           ; preds = %0
    263   %a2 = getelementptr inbounds %struct.B* %q, i64 0, i32 0
    264   %a3 = getelementptr inbounds %struct.B* %q, i64 0, i32 1
    265   %a4 = getelementptr inbounds %struct.B* %q, i64 0, i32 2
    266   %a5 = getelementptr inbounds %struct.B* %q, i64 0, i32 3
    267   br label %block4
    268 
    269 block4:                                       ; preds = %4, %.lr.ph
    270   %i.02 = phi i32 [ 0, %.lr.ph ], [ %c9, %block4 ]
    271   %.01 = phi %struct.B* [ %p, %.lr.ph ], [ %c10, %block4 ]
    272   %a7 = getelementptr inbounds %struct.B* %.01, i64 0, i32 0
    273   %a8 = getelementptr inbounds %struct.B* %.01, i64 0, i32 1
    274   %a9 = getelementptr inbounds %struct.B* %.01, i64 0, i32 2
    275   %a10 = getelementptr inbounds %struct.B* %.01, i64 0, i32 3
    276   %b1 = load i32* %a2
    277   %b2 = load i32* %a3
    278   %b3 = load i32* %a4
    279   %b4 = load i32* %a5
    280   store i32 %b1, i32* %a7
    281   store i32 %b2, i32* %a8
    282   store i32 %b3, i32* %a9
    283   store i32 %b4, i32* %a10
    284   %c9 = add nsw i32 %i.02, 1
    285   %c10 = getelementptr inbounds %struct.B* %.01, i64 1
    286   %exitcond = icmp eq i32 %c9, %count
    287   br i1 %exitcond, label %._crit_edge, label %block4
    288 
    289 ._crit_edge:                                      ; preds = %4, %0
    290   ret void
    291 }
    292 
    293 ;CHECK-LABEL: merge_loads_no_align:
    294 ; load:
    295 ;CHECK: movl
    296 ;CHECK: movl
    297 ;CHECK: movl
    298 ;CHECK: movl
    299 ; store:
    300 ;CHECK: movl
    301 ;CHECK: movl
    302 ;CHECK: movl
    303 ;CHECK: movl
    304 ;CHECK: ret
    305 define void @merge_loads_no_align(i32 %count, %struct.B* noalias nocapture %q, %struct.B* noalias nocapture %p) nounwind uwtable noinline ssp {
    306   %a1 = icmp sgt i32 %count, 0
    307   br i1 %a1, label %.lr.ph, label %._crit_edge
    308 
    309 .lr.ph:                                           ; preds = %0
    310   %a2 = getelementptr inbounds %struct.B* %q, i64 0, i32 0
    311   %a3 = getelementptr inbounds %struct.B* %q, i64 0, i32 1
    312   %a4 = getelementptr inbounds %struct.B* %q, i64 0, i32 2
    313   %a5 = getelementptr inbounds %struct.B* %q, i64 0, i32 3
    314   br label %block4
    315 
    316 block4:                                       ; preds = %4, %.lr.ph
    317   %i.02 = phi i32 [ 0, %.lr.ph ], [ %c9, %block4 ]
    318   %.01 = phi %struct.B* [ %p, %.lr.ph ], [ %c10, %block4 ]
    319   %a7 = getelementptr inbounds %struct.B* %.01, i64 0, i32 0
    320   %a8 = getelementptr inbounds %struct.B* %.01, i64 0, i32 1
    321   %a9 = getelementptr inbounds %struct.B* %.01, i64 0, i32 2
    322   %a10 = getelementptr inbounds %struct.B* %.01, i64 0, i32 3
    323   %b1 = load i32* %a2, align 1
    324   %b2 = load i32* %a3, align 1
    325   %b3 = load i32* %a4, align 1
    326   %b4 = load i32* %a5, align 1
    327   store i32 %b1, i32* %a7, align 1
    328   store i32 %b2, i32* %a8, align 1
    329   store i32 %b3, i32* %a9, align 1
    330   store i32 %b4, i32* %a10, align 1
    331   %c9 = add nsw i32 %i.02, 1
    332   %c10 = getelementptr inbounds %struct.B* %.01, i64 1
    333   %exitcond = icmp eq i32 %c9, %count
    334   br i1 %exitcond, label %._crit_edge, label %block4
    335 
    336 ._crit_edge:                                      ; preds = %4, %0
    337   ret void
    338 }
    339 
    340 ; Make sure that we merge the consecutive load/store sequence below and use a
    341 ; word (16 bit) instead of a byte copy.
    342 ; CHECK: MergeLoadStoreBaseIndexOffset
    343 ; CHECK: movw    (%{{.*}},%{{.*}}), [[REG:%[a-z]+]]
    344 ; CHECK: movw    [[REG]], (%{{.*}})
    345 define void @MergeLoadStoreBaseIndexOffset(i64* %a, i8* %b, i8* %c, i32 %n) {
    346   br label %1
    347 
    348 ; <label>:1
    349   %.09 = phi i32 [ %n, %0 ], [ %11, %1 ]
    350   %.08 = phi i8* [ %b, %0 ], [ %10, %1 ]
    351   %.0 = phi i64* [ %a, %0 ], [ %2, %1 ]
    352   %2 = getelementptr inbounds i64* %.0, i64 1
    353   %3 = load i64* %.0, align 1
    354   %4 = getelementptr inbounds i8* %c, i64 %3
    355   %5 = load i8* %4, align 1
    356   %6 = add i64 %3, 1
    357   %7 = getelementptr inbounds i8* %c, i64 %6
    358   %8 = load i8* %7, align 1
    359   store i8 %5, i8* %.08, align 1
    360   %9 = getelementptr inbounds i8* %.08, i64 1
    361   store i8 %8, i8* %9, align 1
    362   %10 = getelementptr inbounds i8* %.08, i64 2
    363   %11 = add nsw i32 %.09, -1
    364   %12 = icmp eq i32 %11, 0
    365   br i1 %12, label %13, label %1
    366 
    367 ; <label>:13
    368   ret void
    369 }
    370 
    371 ; Make sure that we merge the consecutive load/store sequence below and use a
    372 ; word (16 bit) instead of a byte copy even if there are intermediate sign
    373 ; extensions.
    374 ; CHECK: MergeLoadStoreBaseIndexOffsetSext
    375 ; CHECK: movw    (%{{.*}},%{{.*}}), [[REG:%[a-z]+]]
    376 ; CHECK: movw    [[REG]], (%{{.*}})
    377 define void @MergeLoadStoreBaseIndexOffsetSext(i8* %a, i8* %b, i8* %c, i32 %n) {
    378   br label %1
    379 
    380 ; <label>:1
    381   %.09 = phi i32 [ %n, %0 ], [ %12, %1 ]
    382   %.08 = phi i8* [ %b, %0 ], [ %11, %1 ]
    383   %.0 = phi i8* [ %a, %0 ], [ %2, %1 ]
    384   %2 = getelementptr inbounds i8* %.0, i64 1
    385   %3 = load i8* %.0, align 1
    386   %4 = sext i8 %3 to i64
    387   %5 = getelementptr inbounds i8* %c, i64 %4
    388   %6 = load i8* %5, align 1
    389   %7 = add i64 %4, 1
    390   %8 = getelementptr inbounds i8* %c, i64 %7
    391   %9 = load i8* %8, align 1
    392   store i8 %6, i8* %.08, align 1
    393   %10 = getelementptr inbounds i8* %.08, i64 1
    394   store i8 %9, i8* %10, align 1
    395   %11 = getelementptr inbounds i8* %.08, i64 2
    396   %12 = add nsw i32 %.09, -1
    397   %13 = icmp eq i32 %12, 0
    398   br i1 %13, label %14, label %1
    399 
    400 ; <label>:14
    401   ret void
    402 }
    403 
    404 ; However, we can only merge ignore sign extensions when they are on all memory
    405 ; computations;
    406 ; CHECK: loadStoreBaseIndexOffsetSextNoSex
    407 ; CHECK-NOT: movw    (%{{.*}},%{{.*}}), [[REG:%[a-z]+]]
    408 ; CHECK-NOT: movw    [[REG]], (%{{.*}})
    409 define void @loadStoreBaseIndexOffsetSextNoSex(i8* %a, i8* %b, i8* %c, i32 %n) {
    410   br label %1
    411 
    412 ; <label>:1
    413   %.09 = phi i32 [ %n, %0 ], [ %12, %1 ]
    414   %.08 = phi i8* [ %b, %0 ], [ %11, %1 ]
    415   %.0 = phi i8* [ %a, %0 ], [ %2, %1 ]
    416   %2 = getelementptr inbounds i8* %.0, i64 1
    417   %3 = load i8* %.0, align 1
    418   %4 = sext i8 %3 to i64
    419   %5 = getelementptr inbounds i8* %c, i64 %4
    420   %6 = load i8* %5, align 1
    421   %7 = add i8 %3, 1
    422   %wrap.4 = sext i8 %7 to i64
    423   %8 = getelementptr inbounds i8* %c, i64 %wrap.4
    424   %9 = load i8* %8, align 1
    425   store i8 %6, i8* %.08, align 1
    426   %10 = getelementptr inbounds i8* %.08, i64 1
    427   store i8 %9, i8* %10, align 1
    428   %11 = getelementptr inbounds i8* %.08, i64 2
    429   %12 = add nsw i32 %.09, -1
    430   %13 = icmp eq i32 %12, 0
    431   br i1 %13, label %14, label %1
    432 
    433 ; <label>:14
    434   ret void
    435 }
    436