Home | History | Annotate | Download | only in X86
      1 ; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+avx -fixup-byte-word-insts=1 < %s | FileCheck -check-prefix=CHECK -check-prefix=BWON %s
      2 ; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+avx -fixup-byte-word-insts=0 < %s | FileCheck -check-prefix=CHECK -check-prefix=BWOFF %s
      3 ; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+avx -addr-sink-using-gep=1 < %s | FileCheck -check-prefix=CHECK -check-prefix=BWON %s
      4 
      5 %struct.A = type { i8, i8, i8, i8, i8, i8, i8, i8 }
      6 %struct.B = type { i32, i32, i32, i32, i32, i32, i32, i32 }
      7 
      8 ; CHECK-LABEL: merge_const_store:
      9 ; save 1,2,3 ... as one big integer.
     10 ; CHECK: movabsq $578437695752307201
     11 ; CHECK: ret
     12 define void @merge_const_store(i32 %count, %struct.A* nocapture %p) nounwind uwtable noinline ssp {
     13   %1 = icmp sgt i32 %count, 0
     14   br i1 %1, label %.lr.ph, label %._crit_edge
     15 .lr.ph:
     16   %i.02 = phi i32 [ %10, %.lr.ph ], [ 0, %0 ]
     17   %.01 = phi %struct.A* [ %11, %.lr.ph ], [ %p, %0 ]
     18   %2 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 0, i32 0
     19   store i8 1, i8* %2, align 1
     20   %3 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 0, i32 1
     21   store i8 2, i8* %3, align 1
     22   %4 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 0, i32 2
     23   store i8 3, i8* %4, align 1
     24   %5 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 0, i32 3
     25   store i8 4, i8* %5, align 1
     26   %6 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 0, i32 4
     27   store i8 5, i8* %6, align 1
     28   %7 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 0, i32 5
     29   store i8 6, i8* %7, align 1
     30   %8 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 0, i32 6
     31   store i8 7, i8* %8, align 1
     32   %9 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 0, i32 7
     33   store i8 8, i8* %9, align 1
     34   %10 = add nsw i32 %i.02, 1
     35   %11 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 1
     36   %exitcond = icmp eq i32 %10, %count
     37   br i1 %exitcond, label %._crit_edge, label %.lr.ph
     38 ._crit_edge:
     39   ret void
     40 }
     41 
     42 ; No vectors because we use noimplicitfloat
     43 ; CHECK-LABEL: merge_const_store_no_vec:
     44 ; CHECK-NOT: vmovups
     45 ; CHECK: ret
     46 define void @merge_const_store_no_vec(i32 %count, %struct.B* nocapture %p) noimplicitfloat{
     47   %1 = icmp sgt i32 %count, 0
     48   br i1 %1, label %.lr.ph, label %._crit_edge
     49 .lr.ph:
     50   %i.02 = phi i32 [ %10, %.lr.ph ], [ 0, %0 ]
     51   %.01 = phi %struct.B* [ %11, %.lr.ph ], [ %p, %0 ]
     52   %2 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 0
     53   store i32 0, i32* %2, align 4
     54   %3 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 1
     55   store i32 0, i32* %3, align 4
     56   %4 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 2
     57   store i32 0, i32* %4, align 4
     58   %5 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 3
     59   store i32 0, i32* %5, align 4
     60   %6 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 4
     61   store i32 0, i32* %6, align 4
     62   %7 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 5
     63   store i32 0, i32* %7, align 4
     64   %8 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 6
     65   store i32 0, i32* %8, align 4
     66   %9 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 7
     67   store i32 0, i32* %9, align 4
     68   %10 = add nsw i32 %i.02, 1
     69   %11 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 1
     70   %exitcond = icmp eq i32 %10, %count
     71   br i1 %exitcond, label %._crit_edge, label %.lr.ph
     72 ._crit_edge:
     73   ret void
     74 }
     75 
     76 ; Move the constants using a single vector store.
     77 ; CHECK-LABEL: merge_const_store_vec:
     78 ; CHECK: vmovups
     79 ; CHECK: ret
     80 define void @merge_const_store_vec(i32 %count, %struct.B* nocapture %p) nounwind uwtable noinline ssp {
     81   %1 = icmp sgt i32 %count, 0
     82   br i1 %1, label %.lr.ph, label %._crit_edge
     83 .lr.ph:
     84   %i.02 = phi i32 [ %10, %.lr.ph ], [ 0, %0 ]
     85   %.01 = phi %struct.B* [ %11, %.lr.ph ], [ %p, %0 ]
     86   %2 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 0
     87   store i32 0, i32* %2, align 4
     88   %3 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 1
     89   store i32 0, i32* %3, align 4
     90   %4 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 2
     91   store i32 0, i32* %4, align 4
     92   %5 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 3
     93   store i32 0, i32* %5, align 4
     94   %6 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 4
     95   store i32 0, i32* %6, align 4
     96   %7 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 5
     97   store i32 0, i32* %7, align 4
     98   %8 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 6
     99   store i32 0, i32* %8, align 4
    100   %9 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 7
    101   store i32 0, i32* %9, align 4
    102   %10 = add nsw i32 %i.02, 1
    103   %11 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 1
    104   %exitcond = icmp eq i32 %10, %count
    105   br i1 %exitcond, label %._crit_edge, label %.lr.ph
    106 ._crit_edge:
    107   ret void
    108 }
    109 
    110 ; Move the first 4 constants as a single vector. Move the rest as scalars.
    111 ; CHECK-LABEL: merge_nonconst_store:
    112 ; CHECK: movl $67305985
    113 ; CHECK: movb
    114 ; CHECK: movb
    115 ; CHECK: movb
    116 ; CHECK: movb
    117 ; CHECK: ret
    118 define void @merge_nonconst_store(i32 %count, i8 %zz, %struct.A* nocapture %p) nounwind uwtable noinline ssp {
    119   %1 = icmp sgt i32 %count, 0
    120   br i1 %1, label %.lr.ph, label %._crit_edge
    121 .lr.ph:
    122   %i.02 = phi i32 [ %10, %.lr.ph ], [ 0, %0 ]
    123   %.01 = phi %struct.A* [ %11, %.lr.ph ], [ %p, %0 ]
    124   %2 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 0, i32 0
    125   store i8 1, i8* %2, align 1
    126   %3 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 0, i32 1
    127   store i8 2, i8* %3, align 1
    128   %4 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 0, i32 2
    129   store i8 3, i8* %4, align 1
    130   %5 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 0, i32 3
    131   store i8 4, i8* %5, align 1
    132   %6 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 0, i32 4
    133   store i8 %zz, i8* %6, align 1                     ;  <----------- Not a const;
    134   %7 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 0, i32 5
    135   store i8 6, i8* %7, align 1
    136   %8 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 0, i32 6
    137   store i8 7, i8* %8, align 1
    138   %9 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 0, i32 7
    139   store i8 8, i8* %9, align 1
    140   %10 = add nsw i32 %i.02, 1
    141   %11 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 1
    142   %exitcond = icmp eq i32 %10, %count
    143   br i1 %exitcond, label %._crit_edge, label %.lr.ph
    144 ._crit_edge:
    145   ret void
    146 }
    147 
    148 
    149 ; CHECK-LABEL: merge_loads_i16:
    150 ;  load:
    151 ; BWON:  movzwl
    152 ; BWOFF: movw
    153 ;  store:
    154 ; CHECK: movw
    155 ; CHECK: ret
    156 define void @merge_loads_i16(i32 %count, %struct.A* noalias nocapture %q, %struct.A* noalias nocapture %p) nounwind uwtable noinline ssp {
    157   %1 = icmp sgt i32 %count, 0
    158   br i1 %1, label %.lr.ph, label %._crit_edge
    159 
    160 .lr.ph:                                           ; preds = %0
    161   %2 = getelementptr inbounds %struct.A, %struct.A* %q, i64 0, i32 0
    162   %3 = getelementptr inbounds %struct.A, %struct.A* %q, i64 0, i32 1
    163   br label %4
    164 
    165 ; <label>:4                                       ; preds = %4, %.lr.ph
    166   %i.02 = phi i32 [ 0, %.lr.ph ], [ %9, %4 ]
    167   %.01 = phi %struct.A* [ %p, %.lr.ph ], [ %10, %4 ]
    168   %5 = load i8, i8* %2, align 1
    169   %6 = load i8, i8* %3, align 1
    170   %7 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 0, i32 0
    171   store i8 %5, i8* %7, align 1
    172   %8 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 0, i32 1
    173   store i8 %6, i8* %8, align 1
    174   %9 = add nsw i32 %i.02, 1
    175   %10 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 1
    176   %exitcond = icmp eq i32 %9, %count
    177   br i1 %exitcond, label %._crit_edge, label %4
    178 
    179 ._crit_edge:                                      ; preds = %4, %0
    180   ret void
    181 }
    182 
    183 ; The loads and the stores are interleaved. Can't merge them.
    184 ; CHECK-LABEL: no_merge_loads:
    185 ; BWON:  movzbl
    186 ; BWOFF: movb
    187 ; CHECK: movb
    188 ; BWON:  movzbl
    189 ; BWOFF: movb
    190 ; CHECK: movb
    191 ; CHECK: ret
    192 define void @no_merge_loads(i32 %count, %struct.A* noalias nocapture %q, %struct.A* noalias nocapture %p) nounwind uwtable noinline ssp {
    193   %1 = icmp sgt i32 %count, 0
    194   br i1 %1, label %.lr.ph, label %._crit_edge
    195 
    196 .lr.ph:                                           ; preds = %0
    197   %2 = getelementptr inbounds %struct.A, %struct.A* %q, i64 0, i32 0
    198   %3 = getelementptr inbounds %struct.A, %struct.A* %q, i64 0, i32 1
    199   br label %a4
    200 
    201 a4:                                       ; preds = %4, %.lr.ph
    202   %i.02 = phi i32 [ 0, %.lr.ph ], [ %a9, %a4 ]
    203   %.01 = phi %struct.A* [ %p, %.lr.ph ], [ %a10, %a4 ]
    204   %a5 = load i8, i8* %2, align 1
    205   %a7 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 0, i32 0
    206   store i8 %a5, i8* %a7, align 1
    207   %a8 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 0, i32 1
    208   %a6 = load i8, i8* %3, align 1
    209   store i8 %a6, i8* %a8, align 1
    210   %a9 = add nsw i32 %i.02, 1
    211   %a10 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 1
    212   %exitcond = icmp eq i32 %a9, %count
    213   br i1 %exitcond, label %._crit_edge, label %a4
    214 
    215 ._crit_edge:                                      ; preds = %4, %0
    216   ret void
    217 }
    218 
    219 
    220 ; CHECK-LABEL: merge_loads_integer:
    221 ;  load:
    222 ; CHECK: movq
    223 ;  store:
    224 ; CHECK: movq
    225 ; CHECK: ret
    226 define void @merge_loads_integer(i32 %count, %struct.B* noalias nocapture %q, %struct.B* noalias nocapture %p) nounwind uwtable noinline ssp {
    227   %1 = icmp sgt i32 %count, 0
    228   br i1 %1, label %.lr.ph, label %._crit_edge
    229 
    230 .lr.ph:                                           ; preds = %0
    231   %2 = getelementptr inbounds %struct.B, %struct.B* %q, i64 0, i32 0
    232   %3 = getelementptr inbounds %struct.B, %struct.B* %q, i64 0, i32 1
    233   br label %4
    234 
    235 ; <label>:4                                       ; preds = %4, %.lr.ph
    236   %i.02 = phi i32 [ 0, %.lr.ph ], [ %9, %4 ]
    237   %.01 = phi %struct.B* [ %p, %.lr.ph ], [ %10, %4 ]
    238   %5 = load i32, i32* %2
    239   %6 = load i32, i32* %3
    240   %7 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 0
    241   store i32 %5, i32* %7
    242   %8 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 1
    243   store i32 %6, i32* %8
    244   %9 = add nsw i32 %i.02, 1
    245   %10 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 1
    246   %exitcond = icmp eq i32 %9, %count
    247   br i1 %exitcond, label %._crit_edge, label %4
    248 
    249 ._crit_edge:                                      ; preds = %4, %0
    250   ret void
    251 }
    252 
    253 
    254 ; CHECK-LABEL: merge_loads_vector:
    255 ;  load:
    256 ; CHECK: movups
    257 ;  store:
    258 ; CHECK: movups
    259 ; CHECK: ret
    260 define void @merge_loads_vector(i32 %count, %struct.B* noalias nocapture %q, %struct.B* noalias nocapture %p) nounwind uwtable noinline ssp {
    261   %a1 = icmp sgt i32 %count, 0
    262   br i1 %a1, label %.lr.ph, label %._crit_edge
    263 
    264 .lr.ph:                                           ; preds = %0
    265   %a2 = getelementptr inbounds %struct.B, %struct.B* %q, i64 0, i32 0
    266   %a3 = getelementptr inbounds %struct.B, %struct.B* %q, i64 0, i32 1
    267   %a4 = getelementptr inbounds %struct.B, %struct.B* %q, i64 0, i32 2
    268   %a5 = getelementptr inbounds %struct.B, %struct.B* %q, i64 0, i32 3
    269   br label %block4
    270 
    271 block4:                                       ; preds = %4, %.lr.ph
    272   %i.02 = phi i32 [ 0, %.lr.ph ], [ %c9, %block4 ]
    273   %.01 = phi %struct.B* [ %p, %.lr.ph ], [ %c10, %block4 ]
    274   %a7 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 0
    275   %a8 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 1
    276   %a9 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 2
    277   %a10 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 3
    278   %b1 = load i32, i32* %a2
    279   %b2 = load i32, i32* %a3
    280   %b3 = load i32, i32* %a4
    281   %b4 = load i32, i32* %a5
    282   store i32 %b1, i32* %a7
    283   store i32 %b2, i32* %a8
    284   store i32 %b3, i32* %a9
    285   store i32 %b4, i32* %a10
    286   %c9 = add nsw i32 %i.02, 1
    287   %c10 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 1
    288   %exitcond = icmp eq i32 %c9, %count
    289   br i1 %exitcond, label %._crit_edge, label %block4
    290 
    291 ._crit_edge:                                      ; preds = %4, %0
    292   ret void
    293 }
    294 
    295 ;; On x86, even unaligned copies should be merged to vector ops.
    296 ;; TODO: however, this cannot happen at the moment, due to brokenness
    297 ;; in MergeConsecutiveStores. See UseAA FIXME in DAGCombiner.cpp
    298 ;; visitSTORE.
    299 
    300 ; CHECK-LABEL: merge_loads_no_align:
    301 ;  load:
    302 ; CHECK-NOT: vmovups ;; TODO
    303 ;  store:
    304 ; CHECK-NOT: vmovups ;; TODO
    305 ; CHECK: ret
    306 define void @merge_loads_no_align(i32 %count, %struct.B* noalias nocapture %q, %struct.B* noalias nocapture %p) nounwind uwtable noinline ssp {
    307   %a1 = icmp sgt i32 %count, 0
    308   br i1 %a1, label %.lr.ph, label %._crit_edge
    309 
    310 .lr.ph:                                           ; preds = %0
    311   %a2 = getelementptr inbounds %struct.B, %struct.B* %q, i64 0, i32 0
    312   %a3 = getelementptr inbounds %struct.B, %struct.B* %q, i64 0, i32 1
    313   %a4 = getelementptr inbounds %struct.B, %struct.B* %q, i64 0, i32 2
    314   %a5 = getelementptr inbounds %struct.B, %struct.B* %q, i64 0, i32 3
    315   br label %block4
    316 
    317 block4:                                       ; preds = %4, %.lr.ph
    318   %i.02 = phi i32 [ 0, %.lr.ph ], [ %c9, %block4 ]
    319   %.01 = phi %struct.B* [ %p, %.lr.ph ], [ %c10, %block4 ]
    320   %a7 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 0
    321   %a8 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 1
    322   %a9 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 2
    323   %a10 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 3
    324   %b1 = load i32, i32* %a2, align 1
    325   %b2 = load i32, i32* %a3, align 1
    326   %b3 = load i32, i32* %a4, align 1
    327   %b4 = load i32, i32* %a5, align 1
    328   store i32 %b1, i32* %a7, align 1
    329   store i32 %b2, i32* %a8, align 1
    330   store i32 %b3, i32* %a9, align 1
    331   store i32 %b4, i32* %a10, align 1
    332   %c9 = add nsw i32 %i.02, 1
    333   %c10 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 1
    334   %exitcond = icmp eq i32 %c9, %count
    335   br i1 %exitcond, label %._crit_edge, label %block4
    336 
    337 ._crit_edge:                                      ; preds = %4, %0
    338   ret void
    339 }
    340 
    341 ; Make sure that we merge the consecutive load/store sequence below and use a
    342 ; word (16 bit) instead of a byte copy.
    343 ; CHECK-LABEL: MergeLoadStoreBaseIndexOffset:
    344 ; BWON: movzwl   (%{{.*}},%{{.*}}), %e[[REG:[a-z]+]]
    345 ; BWOFF: movw    (%{{.*}},%{{.*}}), %[[REG:[a-z]+]]
    346 ; CHECK: movw    %[[REG]], (%{{.*}})
    347 define void @MergeLoadStoreBaseIndexOffset(i64* %a, i8* %b, i8* %c, i32 %n) {
    348   br label %1
    349 
    350 ; <label>:1
    351   %.09 = phi i32 [ %n, %0 ], [ %11, %1 ]
    352   %.08 = phi i8* [ %b, %0 ], [ %10, %1 ]
    353   %.0 = phi i64* [ %a, %0 ], [ %2, %1 ]
    354   %2 = getelementptr inbounds i64, i64* %.0, i64 1
    355   %3 = load i64, i64* %.0, align 1
    356   %4 = getelementptr inbounds i8, i8* %c, i64 %3
    357   %5 = load i8, i8* %4, align 1
    358   %6 = add i64 %3, 1
    359   %7 = getelementptr inbounds i8, i8* %c, i64 %6
    360   %8 = load i8, i8* %7, align 1
    361   store i8 %5, i8* %.08, align 1
    362   %9 = getelementptr inbounds i8, i8* %.08, i64 1
    363   store i8 %8, i8* %9, align 1
    364   %10 = getelementptr inbounds i8, i8* %.08, i64 2
    365   %11 = add nsw i32 %.09, -1
    366   %12 = icmp eq i32 %11, 0
    367   br i1 %12, label %13, label %1
    368 
    369 ; <label>:13
    370   ret void
    371 }
    372 
    373 ; Make sure that we merge the consecutive load/store sequence below and use a
    374 ; word (16 bit) instead of a byte copy even if there are intermediate sign
    375 ; extensions.
    376 ; CHECK-LABEL: MergeLoadStoreBaseIndexOffsetSext:
    377 ; BWON: movzwl   (%{{.*}},%{{.*}}), %e[[REG:[a-z]+]]
    378 ; BWOFF: movw    (%{{.*}},%{{.*}}), %[[REG:[a-z]+]]
    379 ; CHECK: movw    %[[REG]], (%{{.*}})
    380 define void @MergeLoadStoreBaseIndexOffsetSext(i8* %a, i8* %b, i8* %c, i32 %n) {
    381   br label %1
    382 
    383 ; <label>:1
    384   %.09 = phi i32 [ %n, %0 ], [ %12, %1 ]
    385   %.08 = phi i8* [ %b, %0 ], [ %11, %1 ]
    386   %.0 = phi i8* [ %a, %0 ], [ %2, %1 ]
    387   %2 = getelementptr inbounds i8, i8* %.0, i64 1
    388   %3 = load i8, i8* %.0, align 1
    389   %4 = sext i8 %3 to i64
    390   %5 = getelementptr inbounds i8, i8* %c, i64 %4
    391   %6 = load i8, i8* %5, align 1
    392   %7 = add i64 %4, 1
    393   %8 = getelementptr inbounds i8, i8* %c, i64 %7
    394   %9 = load i8, i8* %8, align 1
    395   store i8 %6, i8* %.08, align 1
    396   %10 = getelementptr inbounds i8, i8* %.08, i64 1
    397   store i8 %9, i8* %10, align 1
    398   %11 = getelementptr inbounds i8, i8* %.08, i64 2
    399   %12 = add nsw i32 %.09, -1
    400   %13 = icmp eq i32 %12, 0
    401   br i1 %13, label %14, label %1
    402 
    403 ; <label>:14
    404   ret void
    405 }
    406 
    407 ; However, we can only merge ignore sign extensions when they are on all memory
    408 ; computations;
    409 ; CHECK-LABEL: loadStoreBaseIndexOffsetSextNoSex:
    410 ; CHECK-NOT: movw    (%{{.*}},%{{.*}}), [[REG:%[a-z]+]]
    411 ; CHECK-NOT: movw    [[REG]], (%{{.*}})
    412 define void @loadStoreBaseIndexOffsetSextNoSex(i8* %a, i8* %b, i8* %c, i32 %n) {
    413   br label %1
    414 
    415 ; <label>:1
    416   %.09 = phi i32 [ %n, %0 ], [ %12, %1 ]
    417   %.08 = phi i8* [ %b, %0 ], [ %11, %1 ]
    418   %.0 = phi i8* [ %a, %0 ], [ %2, %1 ]
    419   %2 = getelementptr inbounds i8, i8* %.0, i64 1
    420   %3 = load i8, i8* %.0, align 1
    421   %4 = sext i8 %3 to i64
    422   %5 = getelementptr inbounds i8, i8* %c, i64 %4
    423   %6 = load i8, i8* %5, align 1
    424   %7 = add i8 %3, 1
    425   %wrap.4 = sext i8 %7 to i64
    426   %8 = getelementptr inbounds i8, i8* %c, i64 %wrap.4
    427   %9 = load i8, i8* %8, align 1
    428   store i8 %6, i8* %.08, align 1
    429   %10 = getelementptr inbounds i8, i8* %.08, i64 1
    430   store i8 %9, i8* %10, align 1
    431   %11 = getelementptr inbounds i8, i8* %.08, i64 2
    432   %12 = add nsw i32 %.09, -1
    433   %13 = icmp eq i32 %12, 0
    434   br i1 %13, label %14, label %1
    435 
    436 ; <label>:14
    437   ret void
    438 }
    439 
    440 ; PR21711 ( http://llvm.org/bugs/show_bug.cgi?id=21711 )
    441 define void @merge_vec_element_store(<8 x float> %v, float* %ptr) {
    442   %vecext0 = extractelement <8 x float> %v, i32 0
    443   %vecext1 = extractelement <8 x float> %v, i32 1
    444   %vecext2 = extractelement <8 x float> %v, i32 2
    445   %vecext3 = extractelement <8 x float> %v, i32 3
    446   %vecext4 = extractelement <8 x float> %v, i32 4
    447   %vecext5 = extractelement <8 x float> %v, i32 5
    448   %vecext6 = extractelement <8 x float> %v, i32 6
    449   %vecext7 = extractelement <8 x float> %v, i32 7
    450   %arrayidx1 = getelementptr inbounds float, float* %ptr, i64 1
    451   %arrayidx2 = getelementptr inbounds float, float* %ptr, i64 2
    452   %arrayidx3 = getelementptr inbounds float, float* %ptr, i64 3
    453   %arrayidx4 = getelementptr inbounds float, float* %ptr, i64 4
    454   %arrayidx5 = getelementptr inbounds float, float* %ptr, i64 5
    455   %arrayidx6 = getelementptr inbounds float, float* %ptr, i64 6
    456   %arrayidx7 = getelementptr inbounds float, float* %ptr, i64 7
    457   store float %vecext0, float* %ptr, align 4
    458   store float %vecext1, float* %arrayidx1, align 4
    459   store float %vecext2, float* %arrayidx2, align 4
    460   store float %vecext3, float* %arrayidx3, align 4
    461   store float %vecext4, float* %arrayidx4, align 4
    462   store float %vecext5, float* %arrayidx5, align 4
    463   store float %vecext6, float* %arrayidx6, align 4
    464   store float %vecext7, float* %arrayidx7, align 4
    465   ret void
    466 
    467 ; CHECK-LABEL: merge_vec_element_store
    468 ; CHECK: vmovups
    469 ; CHECK-NEXT: vzeroupper
    470 ; CHECK-NEXT: retq
    471 }
    472 
    473 ; PR21711 - Merge vector stores into wider vector stores.
    474 ; These should be merged into 32-byte stores.
    475 define void @merge_vec_extract_stores(<8 x float> %v1, <8 x float> %v2, <4 x float>* %ptr) {
    476   %idx0 = getelementptr inbounds <4 x float>, <4 x float>* %ptr, i64 3
    477   %idx1 = getelementptr inbounds <4 x float>, <4 x float>* %ptr, i64 4
    478   %idx2 = getelementptr inbounds <4 x float>, <4 x float>* %ptr, i64 5
    479   %idx3 = getelementptr inbounds <4 x float>, <4 x float>* %ptr, i64 6
    480   %shuffle0 = shufflevector <8 x float> %v1, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    481   %shuffle1 = shufflevector <8 x float> %v1, <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
    482   %shuffle2 = shufflevector <8 x float> %v2, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    483   %shuffle3 = shufflevector <8 x float> %v2, <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
    484   store <4 x float> %shuffle0, <4 x float>* %idx0, align 16
    485   store <4 x float> %shuffle1, <4 x float>* %idx1, align 16
    486   store <4 x float> %shuffle2, <4 x float>* %idx2, align 16
    487   store <4 x float> %shuffle3, <4 x float>* %idx3, align 16
    488   ret void
    489 
    490 ; CHECK-LABEL: merge_vec_extract_stores
    491 ; CHECK:      vmovups %ymm0, 48(%rdi)
    492 ; CHECK-NEXT: vmovups %ymm1, 80(%rdi)
    493 ; CHECK-NEXT: vzeroupper
    494 ; CHECK-NEXT: retq
    495 }
    496 
    497 ; Merging vector stores when sourced from vector loads is not currently handled.
    498 define void @merge_vec_stores_from_loads(<4 x float>* %v, <4 x float>* %ptr) {
    499   %load_idx0 = getelementptr inbounds <4 x float>, <4 x float>* %v, i64 0
    500   %load_idx1 = getelementptr inbounds <4 x float>, <4 x float>* %v, i64 1
    501   %v0 = load <4 x float>, <4 x float>* %load_idx0
    502   %v1 = load <4 x float>, <4 x float>* %load_idx1
    503   %store_idx0 = getelementptr inbounds <4 x float>, <4 x float>* %ptr, i64 0
    504   %store_idx1 = getelementptr inbounds <4 x float>, <4 x float>* %ptr, i64 1
    505   store <4 x float> %v0, <4 x float>* %store_idx0, align 16
    506   store <4 x float> %v1, <4 x float>* %store_idx1, align 16
    507   ret void
    508 
    509 ; CHECK-LABEL: merge_vec_stores_from_loads
    510 ; CHECK:      vmovaps
    511 ; CHECK-NEXT: vmovaps
    512 ; CHECK-NEXT: vmovaps
    513 ; CHECK-NEXT: vmovaps
    514 ; CHECK-NEXT: retq
    515 }
    516 
    517 ; Merging vector stores when sourced from a constant vector is not currently handled. 
    518 define void @merge_vec_stores_of_constants(<4 x i32>* %ptr) {
    519   %idx0 = getelementptr inbounds <4 x i32>, <4 x i32>* %ptr, i64 3
    520   %idx1 = getelementptr inbounds <4 x i32>, <4 x i32>* %ptr, i64 4
    521   store <4 x i32> <i32 0, i32 0, i32 0, i32 0>, <4 x i32>* %idx0, align 16
    522   store <4 x i32> <i32 0, i32 0, i32 0, i32 0>, <4 x i32>* %idx1, align 16
    523   ret void
    524 
    525 ; CHECK-LABEL: merge_vec_stores_of_constants
    526 ; CHECK:      vxorps
    527 ; CHECK-NEXT: vmovaps
    528 ; CHECK-NEXT: vmovaps
    529 ; CHECK-NEXT: retq
    530 }
    531 
    532 ; This is a minimized test based on real code that was failing.
    533 ; We could merge stores (and loads) like this...
    534 
    535 define void @merge_vec_element_and_scalar_load([6 x i64]* %array) {
    536   %idx0 = getelementptr inbounds [6 x i64], [6 x i64]* %array, i64 0, i64 0
    537   %idx1 = getelementptr inbounds [6 x i64], [6 x i64]* %array, i64 0, i64 1
    538   %idx4 = getelementptr inbounds [6 x i64], [6 x i64]* %array, i64 0, i64 4
    539   %idx5 = getelementptr inbounds [6 x i64], [6 x i64]* %array, i64 0, i64 5
    540 
    541   %a0 = load i64, i64* %idx0, align 8
    542   store i64 %a0, i64* %idx4, align 8
    543 
    544   %b = bitcast i64* %idx1 to <2 x i64>*
    545   %v = load <2 x i64>, <2 x i64>* %b, align 8
    546   %a1 = extractelement <2 x i64> %v, i32 0
    547   store i64 %a1, i64* %idx5, align 8
    548   ret void
    549 
    550 ; CHECK-LABEL: merge_vec_element_and_scalar_load
    551 ; CHECK:      movq	(%rdi), %rax
    552 ; CHECK-NEXT: movq	%rax, 32(%rdi)
    553 ; CHECK-NEXT: movq	8(%rdi), %rax
    554 ; CHECK-NEXT: movq	%rax, 40(%rdi)
    555 ; CHECK-NEXT: retq
    556 }
    557