Home | History | Annotate | Download | only in LoopVectorize
      1 ; RUN: opt -S -loop-vectorize -instcombine -force-vector-width=2 -force-vector-interleave=1 -enable-interleaved-mem-accesses -vectorize-num-stores-pred=1 -enable-cond-stores-vec < %s | FileCheck %s
      2 
      3 target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
      4 %pair = type { i64, i64 }
      5 
      6 ; Ensure that we vectorize the interleaved load group even though the loop
      7 ; contains a conditional store. The store group contains gaps and is not
      8 ; vectorized.
      9 ;
     10 ; CHECK-LABEL: @interleaved_with_cond_store_0(
     11 ;
     12 ; CHECK: min.iters.checked
     13 ; CHECK:   %n.mod.vf = and i64 %[[N:.+]], 1
     14 ; CHECK:   %[[IsZero:[a-zA-Z0-9]+]] = icmp eq i64 %n.mod.vf, 0
     15 ; CHECK:   %[[R:.+]] = select i1 %[[IsZero]], i64 2, i64 %n.mod.vf
     16 ; CHECK:   %n.vec = sub i64 %[[N]], %[[R]]
     17 ;
     18 ; CHECK: vector.body:
     19 ; CHECK:   %wide.vec = load <4 x i64>, <4 x i64>* %{{.*}}
     20 ; CHECK:   %strided.vec = shufflevector <4 x i64> %wide.vec, <4 x i64> undef, <2 x i32> <i32 0, i32 2>
     21 ;
     22 ; CHECK: pred.store.if
     23 ; CHECK:   %[[X1:.+]] = extractelement <4 x i64> %wide.vec, i32 0
     24 ; CHECK:   store i64 %[[X1]], {{.*}}
     25 ;
     26 ; CHECK: pred.store.if
     27 ; CHECK:   %[[X2:.+]] = extractelement <4 x i64> %wide.vec, i32 2
     28 ; CHECK:   store i64 %[[X2]], {{.*}}
     29 
     30 define void @interleaved_with_cond_store_0(%pair *%p, i64 %x, i64 %n) {
     31 entry:
     32   br label %for.body
     33 
     34 for.body:
     35   %i  = phi i64 [ %i.next, %if.merge ], [ 0, %entry ]
     36   %p.1 = getelementptr inbounds %pair, %pair* %p, i64 %i, i32 1
     37   %0 = load i64, i64* %p.1, align 8
     38   %1 = icmp eq i64 %0, %x
     39   br i1 %1, label %if.then, label %if.merge
     40 
     41 if.then:
     42   store i64 %0, i64* %p.1, align 8
     43   br label %if.merge
     44 
     45 if.merge:
     46   %i.next = add nuw nsw i64 %i, 1
     47   %cond = icmp slt i64 %i.next, %n
     48   br i1 %cond, label %for.body, label %for.end
     49 
     50 for.end:
     51   ret void
     52 }
     53 
     54 ; Ensure that we don't form a single interleaved group for the two loads. The
     55 ; conditional store prevents the second load from being hoisted. The two load
     56 ; groups are separately vectorized. The store group contains gaps and is not
     57 ; vectorized.
     58 ;
     59 ; CHECK-LABEL: @interleaved_with_cond_store_1(
     60 ;
     61 ; CHECK: min.iters.checked
     62 ; CHECK:   %n.mod.vf = and i64 %[[N:.+]], 1
     63 ; CHECK:   %[[IsZero:[a-zA-Z0-9]+]] = icmp eq i64 %n.mod.vf, 0
     64 ; CHECK:   %[[R:.+]] = select i1 %[[IsZero]], i64 2, i64 %n.mod.vf
     65 ; CHECK:   %n.vec = sub i64 %[[N]], %[[R]]
     66 ;
     67 ; CHECK: vector.body:
     68 ; CHECK:   %[[L1:.+]] = load <4 x i64>, <4 x i64>* %{{.*}}
     69 ; CHECK:   %strided.vec = shufflevector <4 x i64> %[[L1]], <4 x i64> undef, <2 x i32> <i32 0, i32 2>
     70 ;
     71 ; CHECK: pred.store.if
     72 ; CHECK:   %[[X1:.+]] = extractelement <4 x i64> %wide.vec, i32 0
     73 ; CHECK:   store i64 %[[X1]], {{.*}}
     74 ;
     75 ; CHECK: pred.store.if
     76 ; CHECK:   %[[X2:.+]] = extractelement <4 x i64> %wide.vec, i32 2
     77 ; CHECK:   store i64 %[[X2]], {{.*}}
     78 ;
     79 ; CHECK: pred.store.continue
     80 ; CHECK:   %[[L2:.+]] = load <4 x i64>, <4 x i64>* {{.*}}
     81 ; CHECK:   %[[X3:.+]] = extractelement <4 x i64> %[[L2]], i32 0
     82 ; CHECK:   store i64 %[[X3]], {{.*}}
     83 ; CHECK:   %[[X4:.+]] = extractelement <4 x i64> %[[L2]], i32 2
     84 ; CHECK:   store i64 %[[X4]], {{.*}}
     85 
     86 define void @interleaved_with_cond_store_1(%pair *%p, i64 %x, i64 %n) {
     87 entry:
     88   br label %for.body
     89 
     90 for.body:
     91   %i  = phi i64 [ %i.next, %if.merge ], [ 0, %entry ]
     92   %p.0 = getelementptr inbounds %pair, %pair* %p, i64 %i, i32 0
     93   %p.1 = getelementptr inbounds %pair, %pair* %p, i64 %i, i32 1
     94   %0 = load i64, i64* %p.1, align 8
     95   %1 = icmp eq i64 %0, %x
     96   br i1 %1, label %if.then, label %if.merge
     97 
     98 if.then:
     99   store i64 %0, i64* %p.0, align 8
    100   br label %if.merge
    101 
    102 if.merge:
    103   %2 = load i64, i64* %p.0, align 8
    104   store i64 %2, i64 *%p.1, align 8
    105   %i.next = add nuw nsw i64 %i, 1
    106   %cond = icmp slt i64 %i.next, %n
    107   br i1 %cond, label %for.body, label %for.end
    108 
    109 for.end:
    110   ret void
    111 }
    112 
    113 ; Ensure that we don't create a single interleaved group for the two stores.
    114 ; The second store is conditional and we can't sink the first store inside the
    115 ; predicated block. The load group is vectorized, and the store groups contain
    116 ; gaps and are not vectorized.
    117 ;
    118 ; CHECK-LABEL: @interleaved_with_cond_store_2(
    119 ;
    120 ; CHECK: min.iters.checked
    121 ; CHECK:   %n.mod.vf = and i64 %[[N:.+]], 1
    122 ; CHECK:   %[[IsZero:[a-zA-Z0-9]+]] = icmp eq i64 %n.mod.vf, 0
    123 ; CHECK:   %[[R:.+]] = select i1 %[[IsZero]], i64 2, i64 %n.mod.vf
    124 ; CHECK:   %n.vec = sub i64 %[[N]], %[[R]]
    125 ;
    126 ; CHECK: vector.body:
    127 ; CHECK:   %[[L1:.+]] = load <4 x i64>, <4 x i64>* %{{.*}}
    128 ; CHECK:   %strided.vec = shufflevector <4 x i64> %[[L1]], <4 x i64> undef, <2 x i32> <i32 0, i32 2>
    129 ; CHECK:   store i64 %x, {{.*}}
    130 ; CHECK:   store i64 %x, {{.*}}
    131 ;
    132 ; CHECK: pred.store.if
    133 ; CHECK:   %[[X1:.+]] = extractelement <4 x i64> %wide.vec, i32 0
    134 ; CHECK:   store i64 %[[X1]], {{.*}}
    135 ;
    136 ; CHECK: pred.store.if
    137 ; CHECK:   %[[X2:.+]] = extractelement <4 x i64> %wide.vec, i32 2
    138 ; CHECK:   store i64 %[[X2]], {{.*}}
    139 
    140 define void @interleaved_with_cond_store_2(%pair *%p, i64 %x, i64 %n) {
    141 entry:
    142   br label %for.body
    143 
    144 for.body:
    145   %i  = phi i64 [ %i.next, %if.merge ], [ 0, %entry ]
    146   %p.0 = getelementptr inbounds %pair, %pair* %p, i64 %i, i32 0
    147   %p.1 = getelementptr inbounds %pair, %pair* %p, i64 %i, i32 1
    148   %0 = load i64, i64* %p.1, align 8
    149   store i64 %x, i64* %p.0, align 8
    150   %1 = icmp eq i64 %0, %x
    151   br i1 %1, label %if.then, label %if.merge
    152 
    153 if.then:
    154   store i64 %0, i64* %p.1, align 8
    155   br label %if.merge
    156 
    157 if.merge:
    158   %i.next = add nuw nsw i64 %i, 1
    159   %cond = icmp slt i64 %i.next, %n
    160   br i1 %cond, label %for.body, label %for.end
    161 
    162 for.end:
    163   ret void
    164 }
    165