Home | History | Annotate | Download | only in X86
      1 ; RUN: llc -march=x86-64 -mcpu=corei7 -mattr=+avx < %s | FileCheck %s
      2 
      3 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
      4 target triple = "x86_64-apple-macosx10.8.0"
      5 
      6 %struct.A = type { i8, i8, i8, i8, i8, i8, i8, i8 }
      7 %struct.B = type { i32, i32, i32, i32, i32, i32, i32, i32 }
      8 
      9 ; CHECK: merge_const_store
     10 ; save 1,2,3 ... as one big integer.
     11 ; CHECK: movabsq $578437695752307201
     12 ; CHECK: ret
     13 define void @merge_const_store(i32 %count, %struct.A* nocapture %p) nounwind uwtable noinline ssp {
     14   %1 = icmp sgt i32 %count, 0
     15   br i1 %1, label %.lr.ph, label %._crit_edge
     16 .lr.ph:
     17   %i.02 = phi i32 [ %10, %.lr.ph ], [ 0, %0 ]
     18   %.01 = phi %struct.A* [ %11, %.lr.ph ], [ %p, %0 ]
     19   %2 = getelementptr inbounds %struct.A* %.01, i64 0, i32 0
     20   store i8 1, i8* %2, align 1
     21   %3 = getelementptr inbounds %struct.A* %.01, i64 0, i32 1
     22   store i8 2, i8* %3, align 1
     23   %4 = getelementptr inbounds %struct.A* %.01, i64 0, i32 2
     24   store i8 3, i8* %4, align 1
     25   %5 = getelementptr inbounds %struct.A* %.01, i64 0, i32 3
     26   store i8 4, i8* %5, align 1
     27   %6 = getelementptr inbounds %struct.A* %.01, i64 0, i32 4
     28   store i8 5, i8* %6, align 1
     29   %7 = getelementptr inbounds %struct.A* %.01, i64 0, i32 5
     30   store i8 6, i8* %7, align 1
     31   %8 = getelementptr inbounds %struct.A* %.01, i64 0, i32 6
     32   store i8 7, i8* %8, align 1
     33   %9 = getelementptr inbounds %struct.A* %.01, i64 0, i32 7
     34   store i8 8, i8* %9, align 1
     35   %10 = add nsw i32 %i.02, 1
     36   %11 = getelementptr inbounds %struct.A* %.01, i64 1
     37   %exitcond = icmp eq i32 %10, %count
     38   br i1 %exitcond, label %._crit_edge, label %.lr.ph
     39 ._crit_edge:
     40   ret void
     41 }
     42 
     43 ; No vectors because we use noimplicitfloat
     44 ; CHECK: merge_const_store_no_vec
     45 ; CHECK-NOT: vmovups
     46 ; CHECK: ret
     47 define void @merge_const_store_no_vec(i32 %count, %struct.B* nocapture %p) noimplicitfloat{
     48   %1 = icmp sgt i32 %count, 0
     49   br i1 %1, label %.lr.ph, label %._crit_edge
     50 .lr.ph:
     51   %i.02 = phi i32 [ %10, %.lr.ph ], [ 0, %0 ]
     52   %.01 = phi %struct.B* [ %11, %.lr.ph ], [ %p, %0 ]
     53   %2 = getelementptr inbounds %struct.B* %.01, i64 0, i32 0
     54   store i32 0, i32* %2, align 4
     55   %3 = getelementptr inbounds %struct.B* %.01, i64 0, i32 1
     56   store i32 0, i32* %3, align 4
     57   %4 = getelementptr inbounds %struct.B* %.01, i64 0, i32 2
     58   store i32 0, i32* %4, align 4
     59   %5 = getelementptr inbounds %struct.B* %.01, i64 0, i32 3
     60   store i32 0, i32* %5, align 4
     61   %6 = getelementptr inbounds %struct.B* %.01, i64 0, i32 4
     62   store i32 0, i32* %6, align 4
     63   %7 = getelementptr inbounds %struct.B* %.01, i64 0, i32 5
     64   store i32 0, i32* %7, align 4
     65   %8 = getelementptr inbounds %struct.B* %.01, i64 0, i32 6
     66   store i32 0, i32* %8, align 4
     67   %9 = getelementptr inbounds %struct.B* %.01, i64 0, i32 7
     68   store i32 0, i32* %9, align 4
     69   %10 = add nsw i32 %i.02, 1
     70   %11 = getelementptr inbounds %struct.B* %.01, i64 1
     71   %exitcond = icmp eq i32 %10, %count
     72   br i1 %exitcond, label %._crit_edge, label %.lr.ph
     73 ._crit_edge:
     74   ret void
     75 }
     76 
     77 ; Move the constants using a single vector store.
     78 ; CHECK: merge_const_store_vec
     79 ; CHECK: vmovups
     80 ; CHECK: ret
     81 define void @merge_const_store_vec(i32 %count, %struct.B* nocapture %p) nounwind uwtable noinline ssp {
     82   %1 = icmp sgt i32 %count, 0
     83   br i1 %1, label %.lr.ph, label %._crit_edge
     84 .lr.ph:
     85   %i.02 = phi i32 [ %10, %.lr.ph ], [ 0, %0 ]
     86   %.01 = phi %struct.B* [ %11, %.lr.ph ], [ %p, %0 ]
     87   %2 = getelementptr inbounds %struct.B* %.01, i64 0, i32 0
     88   store i32 0, i32* %2, align 4
     89   %3 = getelementptr inbounds %struct.B* %.01, i64 0, i32 1
     90   store i32 0, i32* %3, align 4
     91   %4 = getelementptr inbounds %struct.B* %.01, i64 0, i32 2
     92   store i32 0, i32* %4, align 4
     93   %5 = getelementptr inbounds %struct.B* %.01, i64 0, i32 3
     94   store i32 0, i32* %5, align 4
     95   %6 = getelementptr inbounds %struct.B* %.01, i64 0, i32 4
     96   store i32 0, i32* %6, align 4
     97   %7 = getelementptr inbounds %struct.B* %.01, i64 0, i32 5
     98   store i32 0, i32* %7, align 4
     99   %8 = getelementptr inbounds %struct.B* %.01, i64 0, i32 6
    100   store i32 0, i32* %8, align 4
    101   %9 = getelementptr inbounds %struct.B* %.01, i64 0, i32 7
    102   store i32 0, i32* %9, align 4
    103   %10 = add nsw i32 %i.02, 1
    104   %11 = getelementptr inbounds %struct.B* %.01, i64 1
    105   %exitcond = icmp eq i32 %10, %count
    106   br i1 %exitcond, label %._crit_edge, label %.lr.ph
    107 ._crit_edge:
    108   ret void
    109 }
    110 
    111 ; Move the first 4 constants as a single vector. Move the rest as scalars.
    112 ; CHECK: merge_nonconst_store
    113 ; CHECK: movl $67305985
    114 ; CHECK: movb
    115 ; CHECK: movb
    116 ; CHECK: movb
    117 ; CHECK: movb
    118 ; CHECK: ret
    119 define void @merge_nonconst_store(i32 %count, i8 %zz, %struct.A* nocapture %p) nounwind uwtable noinline ssp {
    120   %1 = icmp sgt i32 %count, 0
    121   br i1 %1, label %.lr.ph, label %._crit_edge
    122 .lr.ph:
    123   %i.02 = phi i32 [ %10, %.lr.ph ], [ 0, %0 ]
    124   %.01 = phi %struct.A* [ %11, %.lr.ph ], [ %p, %0 ]
    125   %2 = getelementptr inbounds %struct.A* %.01, i64 0, i32 0
    126   store i8 1, i8* %2, align 1
    127   %3 = getelementptr inbounds %struct.A* %.01, i64 0, i32 1
    128   store i8 2, i8* %3, align 1
    129   %4 = getelementptr inbounds %struct.A* %.01, i64 0, i32 2
    130   store i8 3, i8* %4, align 1
    131   %5 = getelementptr inbounds %struct.A* %.01, i64 0, i32 3
    132   store i8 4, i8* %5, align 1
    133   %6 = getelementptr inbounds %struct.A* %.01, i64 0, i32 4
    134   store i8 %zz, i8* %6, align 1                     ;  <----------- Not a const;
    135   %7 = getelementptr inbounds %struct.A* %.01, i64 0, i32 5
    136   store i8 6, i8* %7, align 1
    137   %8 = getelementptr inbounds %struct.A* %.01, i64 0, i32 6
    138   store i8 7, i8* %8, align 1
    139   %9 = getelementptr inbounds %struct.A* %.01, i64 0, i32 7
    140   store i8 8, i8* %9, align 1
    141   %10 = add nsw i32 %i.02, 1
    142   %11 = getelementptr inbounds %struct.A* %.01, i64 1
    143   %exitcond = icmp eq i32 %10, %count
    144   br i1 %exitcond, label %._crit_edge, label %.lr.ph
    145 ._crit_edge:
    146   ret void
    147 }
    148 
    149 
    150 ;CHECK: merge_loads_i16
    151 ; load:
    152 ;CHECK: movw
    153 ; store:
    154 ;CHECK: movw
    155 ;CHECK: ret
    156 define void @merge_loads_i16(i32 %count, %struct.A* noalias nocapture %q, %struct.A* noalias nocapture %p) nounwind uwtable noinline ssp {
    157   %1 = icmp sgt i32 %count, 0
    158   br i1 %1, label %.lr.ph, label %._crit_edge
    159 
    160 .lr.ph:                                           ; preds = %0
    161   %2 = getelementptr inbounds %struct.A* %q, i64 0, i32 0
    162   %3 = getelementptr inbounds %struct.A* %q, i64 0, i32 1
    163   br label %4
    164 
    165 ; <label>:4                                       ; preds = %4, %.lr.ph
    166   %i.02 = phi i32 [ 0, %.lr.ph ], [ %9, %4 ]
    167   %.01 = phi %struct.A* [ %p, %.lr.ph ], [ %10, %4 ]
    168   %5 = load i8* %2, align 1
    169   %6 = load i8* %3, align 1
    170   %7 = getelementptr inbounds %struct.A* %.01, i64 0, i32 0
    171   store i8 %5, i8* %7, align 1
    172   %8 = getelementptr inbounds %struct.A* %.01, i64 0, i32 1
    173   store i8 %6, i8* %8, align 1
    174   %9 = add nsw i32 %i.02, 1
    175   %10 = getelementptr inbounds %struct.A* %.01, i64 1
    176   %exitcond = icmp eq i32 %9, %count
    177   br i1 %exitcond, label %._crit_edge, label %4
    178 
    179 ._crit_edge:                                      ; preds = %4, %0
    180   ret void
    181 }
    182 
    183 ; The loads and the stores are interleved. Can't merge them.
    184 ;CHECK: no_merge_loads
    185 ;CHECK: movb
    186 ;CHECK: movb
    187 ;CHECK: movb
    188 ;CHECK: movb
    189 ;CHECK: ret
    190 define void @no_merge_loads(i32 %count, %struct.A* noalias nocapture %q, %struct.A* noalias nocapture %p) nounwind uwtable noinline ssp {
    191   %1 = icmp sgt i32 %count, 0
    192   br i1 %1, label %.lr.ph, label %._crit_edge
    193 
    194 .lr.ph:                                           ; preds = %0
    195   %2 = getelementptr inbounds %struct.A* %q, i64 0, i32 0
    196   %3 = getelementptr inbounds %struct.A* %q, i64 0, i32 1
    197   br label %a4
    198 
    199 a4:                                       ; preds = %4, %.lr.ph
    200   %i.02 = phi i32 [ 0, %.lr.ph ], [ %a9, %a4 ]
    201   %.01 = phi %struct.A* [ %p, %.lr.ph ], [ %a10, %a4 ]
    202   %a5 = load i8* %2, align 1
    203   %a7 = getelementptr inbounds %struct.A* %.01, i64 0, i32 0
    204   store i8 %a5, i8* %a7, align 1
    205   %a8 = getelementptr inbounds %struct.A* %.01, i64 0, i32 1
    206   %a6 = load i8* %3, align 1
    207   store i8 %a6, i8* %a8, align 1
    208   %a9 = add nsw i32 %i.02, 1
    209   %a10 = getelementptr inbounds %struct.A* %.01, i64 1
    210   %exitcond = icmp eq i32 %a9, %count
    211   br i1 %exitcond, label %._crit_edge, label %a4
    212 
    213 ._crit_edge:                                      ; preds = %4, %0
    214   ret void
    215 }
    216 
    217 
    218 ;CHECK: merge_loads_integer
    219 ; load:
    220 ;CHECK: movq
    221 ; store:
    222 ;CHECK: movq
    223 ;CHECK: ret
    224 define void @merge_loads_integer(i32 %count, %struct.B* noalias nocapture %q, %struct.B* noalias nocapture %p) nounwind uwtable noinline ssp {
    225   %1 = icmp sgt i32 %count, 0
    226   br i1 %1, label %.lr.ph, label %._crit_edge
    227 
    228 .lr.ph:                                           ; preds = %0
    229   %2 = getelementptr inbounds %struct.B* %q, i64 0, i32 0
    230   %3 = getelementptr inbounds %struct.B* %q, i64 0, i32 1
    231   br label %4
    232 
    233 ; <label>:4                                       ; preds = %4, %.lr.ph
    234   %i.02 = phi i32 [ 0, %.lr.ph ], [ %9, %4 ]
    235   %.01 = phi %struct.B* [ %p, %.lr.ph ], [ %10, %4 ]
    236   %5 = load i32* %2
    237   %6 = load i32* %3
    238   %7 = getelementptr inbounds %struct.B* %.01, i64 0, i32 0
    239   store i32 %5, i32* %7
    240   %8 = getelementptr inbounds %struct.B* %.01, i64 0, i32 1
    241   store i32 %6, i32* %8
    242   %9 = add nsw i32 %i.02, 1
    243   %10 = getelementptr inbounds %struct.B* %.01, i64 1
    244   %exitcond = icmp eq i32 %9, %count
    245   br i1 %exitcond, label %._crit_edge, label %4
    246 
    247 ._crit_edge:                                      ; preds = %4, %0
    248   ret void
    249 }
    250 
    251 
    252 ;CHECK: merge_loads_vector
    253 ; load:
    254 ;CHECK: movups
    255 ; store:
    256 ;CHECK: movups
    257 ;CHECK: ret
    258 define void @merge_loads_vector(i32 %count, %struct.B* noalias nocapture %q, %struct.B* noalias nocapture %p) nounwind uwtable noinline ssp {
    259   %a1 = icmp sgt i32 %count, 0
    260   br i1 %a1, label %.lr.ph, label %._crit_edge
    261 
    262 .lr.ph:                                           ; preds = %0
    263   %a2 = getelementptr inbounds %struct.B* %q, i64 0, i32 0
    264   %a3 = getelementptr inbounds %struct.B* %q, i64 0, i32 1
    265   %a4 = getelementptr inbounds %struct.B* %q, i64 0, i32 2
    266   %a5 = getelementptr inbounds %struct.B* %q, i64 0, i32 3
    267   br label %block4
    268 
    269 block4:                                       ; preds = %4, %.lr.ph
    270   %i.02 = phi i32 [ 0, %.lr.ph ], [ %c9, %block4 ]
    271   %.01 = phi %struct.B* [ %p, %.lr.ph ], [ %c10, %block4 ]
    272   %a7 = getelementptr inbounds %struct.B* %.01, i64 0, i32 0
    273   %a8 = getelementptr inbounds %struct.B* %.01, i64 0, i32 1
    274   %a9 = getelementptr inbounds %struct.B* %.01, i64 0, i32 2
    275   %a10 = getelementptr inbounds %struct.B* %.01, i64 0, i32 3
    276   %b1 = load i32* %a2
    277   %b2 = load i32* %a3
    278   %b3 = load i32* %a4
    279   %b4 = load i32* %a5
    280   store i32 %b1, i32* %a7
    281   store i32 %b2, i32* %a8
    282   store i32 %b3, i32* %a9
    283   store i32 %b4, i32* %a10
    284   %c9 = add nsw i32 %i.02, 1
    285   %c10 = getelementptr inbounds %struct.B* %.01, i64 1
    286   %exitcond = icmp eq i32 %c9, %count
    287   br i1 %exitcond, label %._crit_edge, label %block4
    288 
    289 ._crit_edge:                                      ; preds = %4, %0
    290   ret void
    291 }
    292 
    293 ;CHECK: merge_loads_no_align
    294 ; load:
    295 ;CHECK: movl
    296 ;CHECK: movl
    297 ;CHECK: movl
    298 ;CHECK: movl
    299 ; store:
    300 ;CHECK: movl
    301 ;CHECK: movl
    302 ;CHECK: movl
    303 ;CHECK: movl
    304 ;CHECK: ret
    305 define void @merge_loads_no_align(i32 %count, %struct.B* noalias nocapture %q, %struct.B* noalias nocapture %p) nounwind uwtable noinline ssp {
    306   %a1 = icmp sgt i32 %count, 0
    307   br i1 %a1, label %.lr.ph, label %._crit_edge
    308 
    309 .lr.ph:                                           ; preds = %0
    310   %a2 = getelementptr inbounds %struct.B* %q, i64 0, i32 0
    311   %a3 = getelementptr inbounds %struct.B* %q, i64 0, i32 1
    312   %a4 = getelementptr inbounds %struct.B* %q, i64 0, i32 2
    313   %a5 = getelementptr inbounds %struct.B* %q, i64 0, i32 3
    314   br label %block4
    315 
    316 block4:                                       ; preds = %4, %.lr.ph
    317   %i.02 = phi i32 [ 0, %.lr.ph ], [ %c9, %block4 ]
    318   %.01 = phi %struct.B* [ %p, %.lr.ph ], [ %c10, %block4 ]
    319   %a7 = getelementptr inbounds %struct.B* %.01, i64 0, i32 0
    320   %a8 = getelementptr inbounds %struct.B* %.01, i64 0, i32 1
    321   %a9 = getelementptr inbounds %struct.B* %.01, i64 0, i32 2
    322   %a10 = getelementptr inbounds %struct.B* %.01, i64 0, i32 3
    323   %b1 = load i32* %a2, align 1
    324   %b2 = load i32* %a3, align 1
    325   %b3 = load i32* %a4, align 1
    326   %b4 = load i32* %a5, align 1
    327   store i32 %b1, i32* %a7, align 1
    328   store i32 %b2, i32* %a8, align 1
    329   store i32 %b3, i32* %a9, align 1
    330   store i32 %b4, i32* %a10, align 1
    331   %c9 = add nsw i32 %i.02, 1
    332   %c10 = getelementptr inbounds %struct.B* %.01, i64 1
    333   %exitcond = icmp eq i32 %c9, %count
    334   br i1 %exitcond, label %._crit_edge, label %block4
    335 
    336 ._crit_edge:                                      ; preds = %4, %0
    337   ret void
    338 }
    339 
    340