1 ; RUN: opt -S -loop-vectorize -instcombine -force-vector-width=2 -force-vector-interleave=1 -enable-interleaved-mem-accesses -vectorize-num-stores-pred=1 -enable-cond-stores-vec < %s | FileCheck %s 2 3 target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" 4 %pair = type { i64, i64 } 5 6 ; Ensure that we vectorize the interleaved load group even though the loop 7 ; contains a conditional store. The store group contains gaps and is not 8 ; vectorized. 9 ; 10 ; CHECK-LABEL: @interleaved_with_cond_store_0( 11 ; 12 ; CHECK: min.iters.checked 13 ; CHECK: %n.mod.vf = and i64 %[[N:.+]], 1 14 ; CHECK: %[[IsZero:[a-zA-Z0-9]+]] = icmp eq i64 %n.mod.vf, 0 15 ; CHECK: %[[R:.+]] = select i1 %[[IsZero]], i64 2, i64 %n.mod.vf 16 ; CHECK: %n.vec = sub i64 %[[N]], %[[R]] 17 ; 18 ; CHECK: vector.body: 19 ; CHECK: %wide.vec = load <4 x i64>, <4 x i64>* %{{.*}} 20 ; CHECK: %strided.vec = shufflevector <4 x i64> %wide.vec, <4 x i64> undef, <2 x i32> <i32 0, i32 2> 21 ; 22 ; CHECK: pred.store.if 23 ; CHECK: %[[X1:.+]] = extractelement <4 x i64> %wide.vec, i32 0 24 ; CHECK: store i64 %[[X1]], {{.*}} 25 ; 26 ; CHECK: pred.store.if 27 ; CHECK: %[[X2:.+]] = extractelement <4 x i64> %wide.vec, i32 2 28 ; CHECK: store i64 %[[X2]], {{.*}} 29 30 define void @interleaved_with_cond_store_0(%pair *%p, i64 %x, i64 %n) { 31 entry: 32 br label %for.body 33 34 for.body: 35 %i = phi i64 [ %i.next, %if.merge ], [ 0, %entry ] 36 %p.1 = getelementptr inbounds %pair, %pair* %p, i64 %i, i32 1 37 %0 = load i64, i64* %p.1, align 8 38 %1 = icmp eq i64 %0, %x 39 br i1 %1, label %if.then, label %if.merge 40 41 if.then: 42 store i64 %0, i64* %p.1, align 8 43 br label %if.merge 44 45 if.merge: 46 %i.next = add nuw nsw i64 %i, 1 47 %cond = icmp slt i64 %i.next, %n 48 br i1 %cond, label %for.body, label %for.end 49 50 for.end: 51 ret void 52 } 53 54 ; Ensure that we don't form a single interleaved group for the two loads. The 55 ; conditional store prevents the second load from being hoisted. The two load 56 ; groups are separately vectorized. The store group contains gaps and is not 57 ; vectorized. 58 ; 59 ; CHECK-LABEL: @interleaved_with_cond_store_1( 60 ; 61 ; CHECK: min.iters.checked 62 ; CHECK: %n.mod.vf = and i64 %[[N:.+]], 1 63 ; CHECK: %[[IsZero:[a-zA-Z0-9]+]] = icmp eq i64 %n.mod.vf, 0 64 ; CHECK: %[[R:.+]] = select i1 %[[IsZero]], i64 2, i64 %n.mod.vf 65 ; CHECK: %n.vec = sub i64 %[[N]], %[[R]] 66 ; 67 ; CHECK: vector.body: 68 ; CHECK: %[[L1:.+]] = load <4 x i64>, <4 x i64>* %{{.*}} 69 ; CHECK: %strided.vec = shufflevector <4 x i64> %[[L1]], <4 x i64> undef, <2 x i32> <i32 0, i32 2> 70 ; 71 ; CHECK: pred.store.if 72 ; CHECK: %[[X1:.+]] = extractelement <4 x i64> %wide.vec, i32 0 73 ; CHECK: store i64 %[[X1]], {{.*}} 74 ; 75 ; CHECK: pred.store.if 76 ; CHECK: %[[X2:.+]] = extractelement <4 x i64> %wide.vec, i32 2 77 ; CHECK: store i64 %[[X2]], {{.*}} 78 ; 79 ; CHECK: pred.store.continue 80 ; CHECK: %[[L2:.+]] = load <4 x i64>, <4 x i64>* {{.*}} 81 ; CHECK: %[[X3:.+]] = extractelement <4 x i64> %[[L2]], i32 0 82 ; CHECK: store i64 %[[X3]], {{.*}} 83 ; CHECK: %[[X4:.+]] = extractelement <4 x i64> %[[L2]], i32 2 84 ; CHECK: store i64 %[[X4]], {{.*}} 85 86 define void @interleaved_with_cond_store_1(%pair *%p, i64 %x, i64 %n) { 87 entry: 88 br label %for.body 89 90 for.body: 91 %i = phi i64 [ %i.next, %if.merge ], [ 0, %entry ] 92 %p.0 = getelementptr inbounds %pair, %pair* %p, i64 %i, i32 0 93 %p.1 = getelementptr inbounds %pair, %pair* %p, i64 %i, i32 1 94 %0 = load i64, i64* %p.1, align 8 95 %1 = icmp eq i64 %0, %x 96 br i1 %1, label %if.then, label %if.merge 97 98 if.then: 99 store i64 %0, i64* %p.0, align 8 100 br label %if.merge 101 102 if.merge: 103 %2 = load i64, i64* %p.0, align 8 104 store i64 %2, i64 *%p.1, align 8 105 %i.next = add nuw nsw i64 %i, 1 106 %cond = icmp slt i64 %i.next, %n 107 br i1 %cond, label %for.body, label %for.end 108 109 for.end: 110 ret void 111 } 112 113 ; Ensure that we don't create a single interleaved group for the two stores. 114 ; The second store is conditional and we can't sink the first store inside the 115 ; predicated block. The load group is vectorized, and the store groups contain 116 ; gaps and are not vectorized. 117 ; 118 ; CHECK-LABEL: @interleaved_with_cond_store_2( 119 ; 120 ; CHECK: min.iters.checked 121 ; CHECK: %n.mod.vf = and i64 %[[N:.+]], 1 122 ; CHECK: %[[IsZero:[a-zA-Z0-9]+]] = icmp eq i64 %n.mod.vf, 0 123 ; CHECK: %[[R:.+]] = select i1 %[[IsZero]], i64 2, i64 %n.mod.vf 124 ; CHECK: %n.vec = sub i64 %[[N]], %[[R]] 125 ; 126 ; CHECK: vector.body: 127 ; CHECK: %[[L1:.+]] = load <4 x i64>, <4 x i64>* %{{.*}} 128 ; CHECK: %strided.vec = shufflevector <4 x i64> %[[L1]], <4 x i64> undef, <2 x i32> <i32 0, i32 2> 129 ; CHECK: store i64 %x, {{.*}} 130 ; CHECK: store i64 %x, {{.*}} 131 ; 132 ; CHECK: pred.store.if 133 ; CHECK: %[[X1:.+]] = extractelement <4 x i64> %wide.vec, i32 0 134 ; CHECK: store i64 %[[X1]], {{.*}} 135 ; 136 ; CHECK: pred.store.if 137 ; CHECK: %[[X2:.+]] = extractelement <4 x i64> %wide.vec, i32 2 138 ; CHECK: store i64 %[[X2]], {{.*}} 139 140 define void @interleaved_with_cond_store_2(%pair *%p, i64 %x, i64 %n) { 141 entry: 142 br label %for.body 143 144 for.body: 145 %i = phi i64 [ %i.next, %if.merge ], [ 0, %entry ] 146 %p.0 = getelementptr inbounds %pair, %pair* %p, i64 %i, i32 0 147 %p.1 = getelementptr inbounds %pair, %pair* %p, i64 %i, i32 1 148 %0 = load i64, i64* %p.1, align 8 149 store i64 %x, i64* %p.0, align 8 150 %1 = icmp eq i64 %0, %x 151 br i1 %1, label %if.then, label %if.merge 152 153 if.then: 154 store i64 %0, i64* %p.1, align 8 155 br label %if.merge 156 157 if.merge: 158 %i.next = add nuw nsw i64 %i, 1 159 %cond = icmp slt i64 %i.next, %n 160 br i1 %cond, label %for.body, label %for.end 161 162 for.end: 163 ret void 164 } 165