Home | History | Annotate | Download | only in X86
      1 ; RUN: llc -march=x86-64 -mcpu=corei7 -mattr=+avx < %s | FileCheck %s
      2 ; RUN: llc -march=x86-64 -mcpu=corei7 -mattr=+avx -addr-sink-using-gep=1 < %s | FileCheck %s
      3 
      4 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
      5 target triple = "x86_64-apple-macosx10.8.0"
      6 
      7 %struct.A = type { i8, i8, i8, i8, i8, i8, i8, i8 }
      8 %struct.B = type { i32, i32, i32, i32, i32, i32, i32, i32 }
      9 
     10 ; CHECK: merge_const_store
     11 ; save 1,2,3 ... as one big integer.
     12 ; CHECK: movabsq $578437695752307201
     13 ; CHECK: ret
     14 define void @merge_const_store(i32 %count, %struct.A* nocapture %p) nounwind uwtable noinline ssp {
     15   %1 = icmp sgt i32 %count, 0
     16   br i1 %1, label %.lr.ph, label %._crit_edge
     17 .lr.ph:
     18   %i.02 = phi i32 [ %10, %.lr.ph ], [ 0, %0 ]
     19   %.01 = phi %struct.A* [ %11, %.lr.ph ], [ %p, %0 ]
     20   %2 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 0, i32 0
     21   store i8 1, i8* %2, align 1
     22   %3 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 0, i32 1
     23   store i8 2, i8* %3, align 1
     24   %4 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 0, i32 2
     25   store i8 3, i8* %4, align 1
     26   %5 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 0, i32 3
     27   store i8 4, i8* %5, align 1
     28   %6 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 0, i32 4
     29   store i8 5, i8* %6, align 1
     30   %7 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 0, i32 5
     31   store i8 6, i8* %7, align 1
     32   %8 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 0, i32 6
     33   store i8 7, i8* %8, align 1
     34   %9 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 0, i32 7
     35   store i8 8, i8* %9, align 1
     36   %10 = add nsw i32 %i.02, 1
     37   %11 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 1
     38   %exitcond = icmp eq i32 %10, %count
     39   br i1 %exitcond, label %._crit_edge, label %.lr.ph
     40 ._crit_edge:
     41   ret void
     42 }
     43 
     44 ; No vectors because we use noimplicitfloat
     45 ; CHECK: merge_const_store_no_vec
     46 ; CHECK-NOT: vmovups
     47 ; CHECK: ret
     48 define void @merge_const_store_no_vec(i32 %count, %struct.B* nocapture %p) noimplicitfloat{
     49   %1 = icmp sgt i32 %count, 0
     50   br i1 %1, label %.lr.ph, label %._crit_edge
     51 .lr.ph:
     52   %i.02 = phi i32 [ %10, %.lr.ph ], [ 0, %0 ]
     53   %.01 = phi %struct.B* [ %11, %.lr.ph ], [ %p, %0 ]
     54   %2 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 0
     55   store i32 0, i32* %2, align 4
     56   %3 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 1
     57   store i32 0, i32* %3, align 4
     58   %4 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 2
     59   store i32 0, i32* %4, align 4
     60   %5 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 3
     61   store i32 0, i32* %5, align 4
     62   %6 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 4
     63   store i32 0, i32* %6, align 4
     64   %7 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 5
     65   store i32 0, i32* %7, align 4
     66   %8 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 6
     67   store i32 0, i32* %8, align 4
     68   %9 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 7
     69   store i32 0, i32* %9, align 4
     70   %10 = add nsw i32 %i.02, 1
     71   %11 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 1
     72   %exitcond = icmp eq i32 %10, %count
     73   br i1 %exitcond, label %._crit_edge, label %.lr.ph
     74 ._crit_edge:
     75   ret void
     76 }
     77 
     78 ; Move the constants using a single vector store.
     79 ; CHECK: merge_const_store_vec
     80 ; CHECK: vmovups
     81 ; CHECK: ret
     82 define void @merge_const_store_vec(i32 %count, %struct.B* nocapture %p) nounwind uwtable noinline ssp {
     83   %1 = icmp sgt i32 %count, 0
     84   br i1 %1, label %.lr.ph, label %._crit_edge
     85 .lr.ph:
     86   %i.02 = phi i32 [ %10, %.lr.ph ], [ 0, %0 ]
     87   %.01 = phi %struct.B* [ %11, %.lr.ph ], [ %p, %0 ]
     88   %2 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 0
     89   store i32 0, i32* %2, align 4
     90   %3 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 1
     91   store i32 0, i32* %3, align 4
     92   %4 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 2
     93   store i32 0, i32* %4, align 4
     94   %5 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 3
     95   store i32 0, i32* %5, align 4
     96   %6 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 4
     97   store i32 0, i32* %6, align 4
     98   %7 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 5
     99   store i32 0, i32* %7, align 4
    100   %8 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 6
    101   store i32 0, i32* %8, align 4
    102   %9 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 7
    103   store i32 0, i32* %9, align 4
    104   %10 = add nsw i32 %i.02, 1
    105   %11 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 1
    106   %exitcond = icmp eq i32 %10, %count
    107   br i1 %exitcond, label %._crit_edge, label %.lr.ph
    108 ._crit_edge:
    109   ret void
    110 }
    111 
    112 ; Move the first 4 constants as a single vector. Move the rest as scalars.
    113 ; CHECK: merge_nonconst_store
    114 ; CHECK: movl $67305985
    115 ; CHECK: movb
    116 ; CHECK: movb
    117 ; CHECK: movb
    118 ; CHECK: movb
    119 ; CHECK: ret
    120 define void @merge_nonconst_store(i32 %count, i8 %zz, %struct.A* nocapture %p) nounwind uwtable noinline ssp {
    121   %1 = icmp sgt i32 %count, 0
    122   br i1 %1, label %.lr.ph, label %._crit_edge
    123 .lr.ph:
    124   %i.02 = phi i32 [ %10, %.lr.ph ], [ 0, %0 ]
    125   %.01 = phi %struct.A* [ %11, %.lr.ph ], [ %p, %0 ]
    126   %2 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 0, i32 0
    127   store i8 1, i8* %2, align 1
    128   %3 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 0, i32 1
    129   store i8 2, i8* %3, align 1
    130   %4 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 0, i32 2
    131   store i8 3, i8* %4, align 1
    132   %5 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 0, i32 3
    133   store i8 4, i8* %5, align 1
    134   %6 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 0, i32 4
    135   store i8 %zz, i8* %6, align 1                     ;  <----------- Not a const;
    136   %7 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 0, i32 5
    137   store i8 6, i8* %7, align 1
    138   %8 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 0, i32 6
    139   store i8 7, i8* %8, align 1
    140   %9 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 0, i32 7
    141   store i8 8, i8* %9, align 1
    142   %10 = add nsw i32 %i.02, 1
    143   %11 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 1
    144   %exitcond = icmp eq i32 %10, %count
    145   br i1 %exitcond, label %._crit_edge, label %.lr.ph
    146 ._crit_edge:
    147   ret void
    148 }
    149 
    150 
    151 ; CHECK-LABEL: merge_loads_i16:
    152 ;  load:
    153 ; CHECK: movw
    154 ;  store:
    155 ; CHECK: movw
    156 ; CHECK: ret
    157 define void @merge_loads_i16(i32 %count, %struct.A* noalias nocapture %q, %struct.A* noalias nocapture %p) nounwind uwtable noinline ssp {
    158   %1 = icmp sgt i32 %count, 0
    159   br i1 %1, label %.lr.ph, label %._crit_edge
    160 
    161 .lr.ph:                                           ; preds = %0
    162   %2 = getelementptr inbounds %struct.A, %struct.A* %q, i64 0, i32 0
    163   %3 = getelementptr inbounds %struct.A, %struct.A* %q, i64 0, i32 1
    164   br label %4
    165 
    166 ; <label>:4                                       ; preds = %4, %.lr.ph
    167   %i.02 = phi i32 [ 0, %.lr.ph ], [ %9, %4 ]
    168   %.01 = phi %struct.A* [ %p, %.lr.ph ], [ %10, %4 ]
    169   %5 = load i8, i8* %2, align 1
    170   %6 = load i8, i8* %3, align 1
    171   %7 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 0, i32 0
    172   store i8 %5, i8* %7, align 1
    173   %8 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 0, i32 1
    174   store i8 %6, i8* %8, align 1
    175   %9 = add nsw i32 %i.02, 1
    176   %10 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 1
    177   %exitcond = icmp eq i32 %9, %count
    178   br i1 %exitcond, label %._crit_edge, label %4
    179 
    180 ._crit_edge:                                      ; preds = %4, %0
    181   ret void
    182 }
    183 
    184 ; The loads and the stores are interleaved. Can't merge them.
    185 ; CHECK-LABEL: no_merge_loads:
    186 ; CHECK: movb
    187 ; CHECK: movb
    188 ; CHECK: movb
    189 ; CHECK: movb
    190 ; CHECK: ret
    191 define void @no_merge_loads(i32 %count, %struct.A* noalias nocapture %q, %struct.A* noalias nocapture %p) nounwind uwtable noinline ssp {
    192   %1 = icmp sgt i32 %count, 0
    193   br i1 %1, label %.lr.ph, label %._crit_edge
    194 
    195 .lr.ph:                                           ; preds = %0
    196   %2 = getelementptr inbounds %struct.A, %struct.A* %q, i64 0, i32 0
    197   %3 = getelementptr inbounds %struct.A, %struct.A* %q, i64 0, i32 1
    198   br label %a4
    199 
    200 a4:                                       ; preds = %4, %.lr.ph
    201   %i.02 = phi i32 [ 0, %.lr.ph ], [ %a9, %a4 ]
    202   %.01 = phi %struct.A* [ %p, %.lr.ph ], [ %a10, %a4 ]
    203   %a5 = load i8, i8* %2, align 1
    204   %a7 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 0, i32 0
    205   store i8 %a5, i8* %a7, align 1
    206   %a8 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 0, i32 1
    207   %a6 = load i8, i8* %3, align 1
    208   store i8 %a6, i8* %a8, align 1
    209   %a9 = add nsw i32 %i.02, 1
    210   %a10 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 1
    211   %exitcond = icmp eq i32 %a9, %count
    212   br i1 %exitcond, label %._crit_edge, label %a4
    213 
    214 ._crit_edge:                                      ; preds = %4, %0
    215   ret void
    216 }
    217 
    218 
    219 ; CHECK-LABEL: merge_loads_integer:
    220 ;  load:
    221 ; CHECK: movq
    222 ;  store:
    223 ; CHECK: movq
    224 ; CHECK: ret
    225 define void @merge_loads_integer(i32 %count, %struct.B* noalias nocapture %q, %struct.B* noalias nocapture %p) nounwind uwtable noinline ssp {
    226   %1 = icmp sgt i32 %count, 0
    227   br i1 %1, label %.lr.ph, label %._crit_edge
    228 
    229 .lr.ph:                                           ; preds = %0
    230   %2 = getelementptr inbounds %struct.B, %struct.B* %q, i64 0, i32 0
    231   %3 = getelementptr inbounds %struct.B, %struct.B* %q, i64 0, i32 1
    232   br label %4
    233 
    234 ; <label>:4                                       ; preds = %4, %.lr.ph
    235   %i.02 = phi i32 [ 0, %.lr.ph ], [ %9, %4 ]
    236   %.01 = phi %struct.B* [ %p, %.lr.ph ], [ %10, %4 ]
    237   %5 = load i32, i32* %2
    238   %6 = load i32, i32* %3
    239   %7 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 0
    240   store i32 %5, i32* %7
    241   %8 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 1
    242   store i32 %6, i32* %8
    243   %9 = add nsw i32 %i.02, 1
    244   %10 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 1
    245   %exitcond = icmp eq i32 %9, %count
    246   br i1 %exitcond, label %._crit_edge, label %4
    247 
    248 ._crit_edge:                                      ; preds = %4, %0
    249   ret void
    250 }
    251 
    252 
    253 ; CHECK-LABEL: merge_loads_vector:
    254 ;  load:
    255 ; CHECK: movups
    256 ;  store:
    257 ; CHECK: movups
    258 ; CHECK: ret
    259 define void @merge_loads_vector(i32 %count, %struct.B* noalias nocapture %q, %struct.B* noalias nocapture %p) nounwind uwtable noinline ssp {
    260   %a1 = icmp sgt i32 %count, 0
    261   br i1 %a1, label %.lr.ph, label %._crit_edge
    262 
    263 .lr.ph:                                           ; preds = %0
    264   %a2 = getelementptr inbounds %struct.B, %struct.B* %q, i64 0, i32 0
    265   %a3 = getelementptr inbounds %struct.B, %struct.B* %q, i64 0, i32 1
    266   %a4 = getelementptr inbounds %struct.B, %struct.B* %q, i64 0, i32 2
    267   %a5 = getelementptr inbounds %struct.B, %struct.B* %q, i64 0, i32 3
    268   br label %block4
    269 
    270 block4:                                       ; preds = %4, %.lr.ph
    271   %i.02 = phi i32 [ 0, %.lr.ph ], [ %c9, %block4 ]
    272   %.01 = phi %struct.B* [ %p, %.lr.ph ], [ %c10, %block4 ]
    273   %a7 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 0
    274   %a8 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 1
    275   %a9 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 2
    276   %a10 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 3
    277   %b1 = load i32, i32* %a2
    278   %b2 = load i32, i32* %a3
    279   %b3 = load i32, i32* %a4
    280   %b4 = load i32, i32* %a5
    281   store i32 %b1, i32* %a7
    282   store i32 %b2, i32* %a8
    283   store i32 %b3, i32* %a9
    284   store i32 %b4, i32* %a10
    285   %c9 = add nsw i32 %i.02, 1
    286   %c10 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 1
    287   %exitcond = icmp eq i32 %c9, %count
    288   br i1 %exitcond, label %._crit_edge, label %block4
    289 
    290 ._crit_edge:                                      ; preds = %4, %0
    291   ret void
    292 }
    293 
    294 ; CHECK-LABEL: merge_loads_no_align:
    295 ;  load:
    296 ; CHECK: movl
    297 ; CHECK: movl
    298 ; CHECK: movl
    299 ; CHECK: movl
    300 ;  store:
    301 ; CHECK: movl
    302 ; CHECK: movl
    303 ; CHECK: movl
    304 ; CHECK: movl
    305 ; CHECK: ret
    306 define void @merge_loads_no_align(i32 %count, %struct.B* noalias nocapture %q, %struct.B* noalias nocapture %p) nounwind uwtable noinline ssp {
    307   %a1 = icmp sgt i32 %count, 0
    308   br i1 %a1, label %.lr.ph, label %._crit_edge
    309 
    310 .lr.ph:                                           ; preds = %0
    311   %a2 = getelementptr inbounds %struct.B, %struct.B* %q, i64 0, i32 0
    312   %a3 = getelementptr inbounds %struct.B, %struct.B* %q, i64 0, i32 1
    313   %a4 = getelementptr inbounds %struct.B, %struct.B* %q, i64 0, i32 2
    314   %a5 = getelementptr inbounds %struct.B, %struct.B* %q, i64 0, i32 3
    315   br label %block4
    316 
    317 block4:                                       ; preds = %4, %.lr.ph
    318   %i.02 = phi i32 [ 0, %.lr.ph ], [ %c9, %block4 ]
    319   %.01 = phi %struct.B* [ %p, %.lr.ph ], [ %c10, %block4 ]
    320   %a7 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 0
    321   %a8 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 1
    322   %a9 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 2
    323   %a10 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 3
    324   %b1 = load i32, i32* %a2, align 1
    325   %b2 = load i32, i32* %a3, align 1
    326   %b3 = load i32, i32* %a4, align 1
    327   %b4 = load i32, i32* %a5, align 1
    328   store i32 %b1, i32* %a7, align 1
    329   store i32 %b2, i32* %a8, align 1
    330   store i32 %b3, i32* %a9, align 1
    331   store i32 %b4, i32* %a10, align 1
    332   %c9 = add nsw i32 %i.02, 1
    333   %c10 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 1
    334   %exitcond = icmp eq i32 %c9, %count
    335   br i1 %exitcond, label %._crit_edge, label %block4
    336 
    337 ._crit_edge:                                      ; preds = %4, %0
    338   ret void
    339 }
    340 
    341 ; Make sure that we merge the consecutive load/store sequence below and use a
    342 ; word (16 bit) instead of a byte copy.
    343 ; CHECK: MergeLoadStoreBaseIndexOffset
    344 ; CHECK: movw    (%{{.*}},%{{.*}}), [[REG:%[a-z]+]]
    345 ; CHECK: movw    [[REG]], (%{{.*}})
    346 define void @MergeLoadStoreBaseIndexOffset(i64* %a, i8* %b, i8* %c, i32 %n) {
    347   br label %1
    348 
    349 ; <label>:1
    350   %.09 = phi i32 [ %n, %0 ], [ %11, %1 ]
    351   %.08 = phi i8* [ %b, %0 ], [ %10, %1 ]
    352   %.0 = phi i64* [ %a, %0 ], [ %2, %1 ]
    353   %2 = getelementptr inbounds i64, i64* %.0, i64 1
    354   %3 = load i64, i64* %.0, align 1
    355   %4 = getelementptr inbounds i8, i8* %c, i64 %3
    356   %5 = load i8, i8* %4, align 1
    357   %6 = add i64 %3, 1
    358   %7 = getelementptr inbounds i8, i8* %c, i64 %6
    359   %8 = load i8, i8* %7, align 1
    360   store i8 %5, i8* %.08, align 1
    361   %9 = getelementptr inbounds i8, i8* %.08, i64 1
    362   store i8 %8, i8* %9, align 1
    363   %10 = getelementptr inbounds i8, i8* %.08, i64 2
    364   %11 = add nsw i32 %.09, -1
    365   %12 = icmp eq i32 %11, 0
    366   br i1 %12, label %13, label %1
    367 
    368 ; <label>:13
    369   ret void
    370 }
    371 
    372 ; Make sure that we merge the consecutive load/store sequence below and use a
    373 ; word (16 bit) instead of a byte copy even if there are intermediate sign
    374 ; extensions.
    375 ; CHECK: MergeLoadStoreBaseIndexOffsetSext
    376 ; CHECK: movw    (%{{.*}},%{{.*}}), [[REG:%[a-z]+]]
    377 ; CHECK: movw    [[REG]], (%{{.*}})
    378 define void @MergeLoadStoreBaseIndexOffsetSext(i8* %a, i8* %b, i8* %c, i32 %n) {
    379   br label %1
    380 
    381 ; <label>:1
    382   %.09 = phi i32 [ %n, %0 ], [ %12, %1 ]
    383   %.08 = phi i8* [ %b, %0 ], [ %11, %1 ]
    384   %.0 = phi i8* [ %a, %0 ], [ %2, %1 ]
    385   %2 = getelementptr inbounds i8, i8* %.0, i64 1
    386   %3 = load i8, i8* %.0, align 1
    387   %4 = sext i8 %3 to i64
    388   %5 = getelementptr inbounds i8, i8* %c, i64 %4
    389   %6 = load i8, i8* %5, align 1
    390   %7 = add i64 %4, 1
    391   %8 = getelementptr inbounds i8, i8* %c, i64 %7
    392   %9 = load i8, i8* %8, align 1
    393   store i8 %6, i8* %.08, align 1
    394   %10 = getelementptr inbounds i8, i8* %.08, i64 1
    395   store i8 %9, i8* %10, align 1
    396   %11 = getelementptr inbounds i8, i8* %.08, i64 2
    397   %12 = add nsw i32 %.09, -1
    398   %13 = icmp eq i32 %12, 0
    399   br i1 %13, label %14, label %1
    400 
    401 ; <label>:14
    402   ret void
    403 }
    404 
    405 ; However, we can only merge ignore sign extensions when they are on all memory
    406 ; computations;
    407 ; CHECK: loadStoreBaseIndexOffsetSextNoSex
    408 ; CHECK-NOT: movw    (%{{.*}},%{{.*}}), [[REG:%[a-z]+]]
    409 ; CHECK-NOT: movw    [[REG]], (%{{.*}})
    410 define void @loadStoreBaseIndexOffsetSextNoSex(i8* %a, i8* %b, i8* %c, i32 %n) {
    411   br label %1
    412 
    413 ; <label>:1
    414   %.09 = phi i32 [ %n, %0 ], [ %12, %1 ]
    415   %.08 = phi i8* [ %b, %0 ], [ %11, %1 ]
    416   %.0 = phi i8* [ %a, %0 ], [ %2, %1 ]
    417   %2 = getelementptr inbounds i8, i8* %.0, i64 1
    418   %3 = load i8, i8* %.0, align 1
    419   %4 = sext i8 %3 to i64
    420   %5 = getelementptr inbounds i8, i8* %c, i64 %4
    421   %6 = load i8, i8* %5, align 1
    422   %7 = add i8 %3, 1
    423   %wrap.4 = sext i8 %7 to i64
    424   %8 = getelementptr inbounds i8, i8* %c, i64 %wrap.4
    425   %9 = load i8, i8* %8, align 1
    426   store i8 %6, i8* %.08, align 1
    427   %10 = getelementptr inbounds i8, i8* %.08, i64 1
    428   store i8 %9, i8* %10, align 1
    429   %11 = getelementptr inbounds i8, i8* %.08, i64 2
    430   %12 = add nsw i32 %.09, -1
    431   %13 = icmp eq i32 %12, 0
    432   br i1 %13, label %14, label %1
    433 
    434 ; <label>:14
    435   ret void
    436 }
    437 
    438 ; PR21711 ( http://llvm.org/bugs/show_bug.cgi?id=21711 )
    439 define void @merge_vec_element_store(<8 x float> %v, float* %ptr) {
    440   %vecext0 = extractelement <8 x float> %v, i32 0
    441   %vecext1 = extractelement <8 x float> %v, i32 1
    442   %vecext2 = extractelement <8 x float> %v, i32 2
    443   %vecext3 = extractelement <8 x float> %v, i32 3
    444   %vecext4 = extractelement <8 x float> %v, i32 4
    445   %vecext5 = extractelement <8 x float> %v, i32 5
    446   %vecext6 = extractelement <8 x float> %v, i32 6
    447   %vecext7 = extractelement <8 x float> %v, i32 7
    448   %arrayidx1 = getelementptr inbounds float, float* %ptr, i64 1
    449   %arrayidx2 = getelementptr inbounds float, float* %ptr, i64 2
    450   %arrayidx3 = getelementptr inbounds float, float* %ptr, i64 3
    451   %arrayidx4 = getelementptr inbounds float, float* %ptr, i64 4
    452   %arrayidx5 = getelementptr inbounds float, float* %ptr, i64 5
    453   %arrayidx6 = getelementptr inbounds float, float* %ptr, i64 6
    454   %arrayidx7 = getelementptr inbounds float, float* %ptr, i64 7
    455   store float %vecext0, float* %ptr, align 4
    456   store float %vecext1, float* %arrayidx1, align 4
    457   store float %vecext2, float* %arrayidx2, align 4
    458   store float %vecext3, float* %arrayidx3, align 4
    459   store float %vecext4, float* %arrayidx4, align 4
    460   store float %vecext5, float* %arrayidx5, align 4
    461   store float %vecext6, float* %arrayidx6, align 4
    462   store float %vecext7, float* %arrayidx7, align 4
    463   ret void
    464 
    465 ; CHECK-LABEL: merge_vec_element_store
    466 ; CHECK: vmovups
    467 ; CHECK-NEXT: vzeroupper
    468 ; CHECK-NEXT: retq
    469 }
    470 
    471 ; This is a minimized test based on real code that was failing.
    472 ; We could merge stores (and loads) like this...
    473 
    474 define void @merge_vec_element_and_scalar_load([6 x i64]* %array) {
    475   %idx0 = getelementptr inbounds [6 x i64], [6 x i64]* %array, i64 0, i64 0
    476   %idx1 = getelementptr inbounds [6 x i64], [6 x i64]* %array, i64 0, i64 1
    477   %idx4 = getelementptr inbounds [6 x i64], [6 x i64]* %array, i64 0, i64 4
    478   %idx5 = getelementptr inbounds [6 x i64], [6 x i64]* %array, i64 0, i64 5
    479 
    480   %a0 = load i64, i64* %idx0, align 8
    481   store i64 %a0, i64* %idx4, align 8
    482 
    483   %b = bitcast i64* %idx1 to <2 x i64>*
    484   %v = load <2 x i64>, <2 x i64>* %b, align 8
    485   %a1 = extractelement <2 x i64> %v, i32 0
    486   store i64 %a1, i64* %idx5, align 8
    487   ret void
    488 
    489 ; CHECK-LABEL: merge_vec_element_and_scalar_load
    490 ; CHECK:      movq	(%rdi), %rax
    491 ; CHECK-NEXT: movq	%rax, 32(%rdi)
    492 ; CHECK-NEXT: movq	8(%rdi), %rax
    493 ; CHECK-NEXT: movq	%rax, 40(%rdi)
    494 ; CHECK-NEXT: retq
    495 }
    496