1 ; RUN: opt -S < %s -basicaa -loop-vectorize -force-vector-interleave=1 2>&1 | FileCheck %s 2 3 target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" 4 target triple = "aarch64" 5 6 ; CHECK-LABEL: @add_a( 7 ; CHECK: load <16 x i8>, <16 x i8>* 8 ; CHECK: add nuw nsw <16 x i8> 9 ; CHECK: store <16 x i8> 10 ; Function Attrs: nounwind 11 define void @add_a(i8* noalias nocapture readonly %p, i8* noalias nocapture %q, i32 %len) #0 { 12 entry: 13 %cmp8 = icmp sgt i32 %len, 0 14 br i1 %cmp8, label %for.body, label %for.cond.cleanup 15 16 for.cond.cleanup: ; preds = %for.body, %entry 17 ret void 18 19 for.body: ; preds = %entry, %for.body 20 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] 21 %arrayidx = getelementptr inbounds i8, i8* %p, i64 %indvars.iv 22 %0 = load i8, i8* %arrayidx 23 %conv = zext i8 %0 to i32 24 %add = add nuw nsw i32 %conv, 2 25 %conv1 = trunc i32 %add to i8 26 %arrayidx3 = getelementptr inbounds i8, i8* %q, i64 %indvars.iv 27 store i8 %conv1, i8* %arrayidx3 28 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 29 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 30 %exitcond = icmp eq i32 %lftr.wideiv, %len 31 br i1 %exitcond, label %for.cond.cleanup, label %for.body 32 } 33 34 ; CHECK-LABEL: @add_b( 35 ; CHECK: load <8 x i16>, <8 x i16>* 36 ; CHECK: add nuw nsw <8 x i16> 37 ; CHECK: store <8 x i16> 38 ; Function Attrs: nounwind 39 define void @add_b(i16* noalias nocapture readonly %p, i16* noalias nocapture %q, i32 %len) #0 { 40 entry: 41 %cmp9 = icmp sgt i32 %len, 0 42 br i1 %cmp9, label %for.body, label %for.cond.cleanup 43 44 for.cond.cleanup: ; preds = %for.body, %entry 45 ret void 46 47 for.body: ; preds = %entry, %for.body 48 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] 49 %arrayidx = getelementptr inbounds i16, i16* %p, i64 %indvars.iv 50 %0 = load i16, i16* %arrayidx 51 %conv8 = zext i16 %0 to i32 52 %add = add nuw nsw i32 %conv8, 2 53 %conv1 = trunc i32 %add to i16 54 %arrayidx3 = getelementptr inbounds i16, i16* %q, i64 %indvars.iv 55 store i16 %conv1, i16* %arrayidx3 56 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 57 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 58 %exitcond = icmp eq i32 %lftr.wideiv, %len 59 br i1 %exitcond, label %for.cond.cleanup, label %for.body 60 } 61 62 ; CHECK-LABEL: @add_c( 63 ; CHECK: load <8 x i8>, <8 x i8>* 64 ; CHECK: add nuw nsw <8 x i16> 65 ; CHECK: store <8 x i16> 66 ; Function Attrs: nounwind 67 define void @add_c(i8* noalias nocapture readonly %p, i16* noalias nocapture %q, i32 %len) #0 { 68 entry: 69 %cmp8 = icmp sgt i32 %len, 0 70 br i1 %cmp8, label %for.body, label %for.cond.cleanup 71 72 for.cond.cleanup: ; preds = %for.body, %entry 73 ret void 74 75 for.body: ; preds = %entry, %for.body 76 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] 77 %arrayidx = getelementptr inbounds i8, i8* %p, i64 %indvars.iv 78 %0 = load i8, i8* %arrayidx 79 %conv = zext i8 %0 to i32 80 %add = add nuw nsw i32 %conv, 2 81 %conv1 = trunc i32 %add to i16 82 %arrayidx3 = getelementptr inbounds i16, i16* %q, i64 %indvars.iv 83 store i16 %conv1, i16* %arrayidx3 84 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 85 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 86 %exitcond = icmp eq i32 %lftr.wideiv, %len 87 br i1 %exitcond, label %for.cond.cleanup, label %for.body 88 } 89 90 ; CHECK-LABEL: @add_d( 91 ; CHECK: load <4 x i16> 92 ; CHECK: add nsw <4 x i32> 93 ; CHECK: store <4 x i32> 94 define void @add_d(i16* noalias nocapture readonly %p, i32* noalias nocapture %q, i32 %len) #0 { 95 entry: 96 %cmp7 = icmp sgt i32 %len, 0 97 br i1 %cmp7, label %for.body, label %for.cond.cleanup 98 99 for.cond.cleanup: ; preds = %for.body, %entry 100 ret void 101 102 for.body: ; preds = %entry, %for.body 103 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] 104 %arrayidx = getelementptr inbounds i16, i16* %p, i64 %indvars.iv 105 %0 = load i16, i16* %arrayidx 106 %conv = sext i16 %0 to i32 107 %add = add nsw i32 %conv, 2 108 %arrayidx2 = getelementptr inbounds i32, i32* %q, i64 %indvars.iv 109 store i32 %add, i32* %arrayidx2 110 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 111 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 112 %exitcond = icmp eq i32 %lftr.wideiv, %len 113 br i1 %exitcond, label %for.cond.cleanup, label %for.body 114 } 115 116 ; CHECK-LABEL: @add_e( 117 ; CHECK: load <16 x i8> 118 ; CHECK: shl <16 x i8> 119 ; CHECK: add nuw nsw <16 x i8> 120 ; CHECK: or <16 x i8> 121 ; CHECK: mul nuw nsw <16 x i8> 122 ; CHECK: and <16 x i8> 123 ; CHECK: xor <16 x i8> 124 ; CHECK: mul nuw nsw <16 x i8> 125 ; CHECK: store <16 x i8> 126 define void @add_e(i8* noalias nocapture readonly %p, i8* noalias nocapture %q, i8 %arg1, i8 %arg2, i32 %len) #0 { 127 entry: 128 %cmp.32 = icmp sgt i32 %len, 0 129 br i1 %cmp.32, label %for.body.lr.ph, label %for.cond.cleanup 130 131 for.body.lr.ph: ; preds = %entry 132 %conv11 = zext i8 %arg2 to i32 133 %conv13 = zext i8 %arg1 to i32 134 br label %for.body 135 136 for.cond.cleanup: ; preds = %for.body, %entry 137 ret void 138 139 for.body: ; preds = %for.body, %for.body.lr.ph 140 %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ] 141 %arrayidx = getelementptr inbounds i8, i8* %p, i64 %indvars.iv 142 %0 = load i8, i8* %arrayidx 143 %conv = zext i8 %0 to i32 144 %add = shl i32 %conv, 4 145 %conv2 = add nuw nsw i32 %add, 32 146 %or = or i32 %conv, 51 147 %mul = mul nuw nsw i32 %or, 60 148 %and = and i32 %conv2, %conv13 149 %mul.masked = and i32 %mul, 252 150 %conv17 = xor i32 %mul.masked, %conv11 151 %mul18 = mul nuw nsw i32 %conv17, %and 152 %conv19 = trunc i32 %mul18 to i8 153 %arrayidx21 = getelementptr inbounds i8, i8* %q, i64 %indvars.iv 154 store i8 %conv19, i8* %arrayidx21 155 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 156 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 157 %exitcond = icmp eq i32 %lftr.wideiv, %len 158 br i1 %exitcond, label %for.cond.cleanup, label %for.body 159 } 160 161 ; CHECK-LABEL: @add_f 162 ; CHECK: load <8 x i16> 163 ; CHECK: trunc <8 x i16> 164 ; CHECK: shl <8 x i8> 165 ; CHECK: add nsw <8 x i8> 166 ; CHECK: or <8 x i8> 167 ; CHECK: mul nuw nsw <8 x i8> 168 ; CHECK: and <8 x i8> 169 ; CHECK: xor <8 x i8> 170 ; CHECK: mul nuw nsw <8 x i8> 171 ; CHECK: store <8 x i8> 172 define void @add_f(i16* noalias nocapture readonly %p, i8* noalias nocapture %q, i8 %arg1, i8 %arg2, i32 %len) #0 { 173 entry: 174 %cmp.32 = icmp sgt i32 %len, 0 175 br i1 %cmp.32, label %for.body.lr.ph, label %for.cond.cleanup 176 177 for.body.lr.ph: ; preds = %entry 178 %conv11 = zext i8 %arg2 to i32 179 %conv13 = zext i8 %arg1 to i32 180 br label %for.body 181 182 for.cond.cleanup: ; preds = %for.body, %entry 183 ret void 184 185 for.body: ; preds = %for.body, %for.body.lr.ph 186 %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ] 187 %arrayidx = getelementptr inbounds i16, i16* %p, i64 %indvars.iv 188 %0 = load i16, i16* %arrayidx 189 %conv = sext i16 %0 to i32 190 %add = shl i32 %conv, 4 191 %conv2 = add nsw i32 %add, 32 192 %or = and i32 %conv, 204 193 %conv8 = or i32 %or, 51 194 %mul = mul nuw nsw i32 %conv8, 60 195 %and = and i32 %conv2, %conv13 196 %mul.masked = and i32 %mul, 252 197 %conv17 = xor i32 %mul.masked, %conv11 198 %mul18 = mul nuw nsw i32 %conv17, %and 199 %conv19 = trunc i32 %mul18 to i8 200 %arrayidx21 = getelementptr inbounds i8, i8* %q, i64 %indvars.iv 201 store i8 %conv19, i8* %arrayidx21 202 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 203 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 204 %exitcond = icmp eq i32 %lftr.wideiv, %len 205 br i1 %exitcond, label %for.cond.cleanup, label %for.body 206 } 207 208 ; CHECK-LABEL: @add_g 209 ; CHECK: load <16 x i8> 210 ; CHECK: xor <16 x i8> 211 ; CHECK: icmp ult <16 x i8> 212 ; CHECK: select <16 x i1> {{.*}}, <16 x i8> 213 ; CHECK: store <16 x i8> 214 define void @add_g(i8* noalias nocapture readonly %p, i8* noalias nocapture readonly %q, i8* noalias nocapture %r, i8 %arg1, i32 %len) #0 { 215 %1 = icmp sgt i32 %len, 0 216 br i1 %1, label %.lr.ph, label %._crit_edge 217 218 .lr.ph: ; preds = %0 219 %2 = sext i8 %arg1 to i64 220 br label %3 221 222 ._crit_edge: ; preds = %3, %0 223 ret void 224 225 ; <label>:3 ; preds = %3, %.lr.ph 226 %indvars.iv = phi i64 [ 0, %.lr.ph ], [ %indvars.iv.next, %3 ] 227 %x4 = getelementptr inbounds i8, i8* %p, i64 %indvars.iv 228 %x5 = load i8, i8* %x4 229 %x7 = getelementptr inbounds i8, i8* %q, i64 %indvars.iv 230 %x8 = load i8, i8* %x7 231 %x9 = zext i8 %x5 to i32 232 %x10 = xor i32 %x9, 255 233 %x11 = icmp ult i32 %x10, 24 234 %x12 = select i1 %x11, i32 %x10, i32 24 235 %x13 = trunc i32 %x12 to i8 236 store i8 %x13, i8* %x4 237 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 238 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 239 %exitcond = icmp eq i32 %lftr.wideiv, %len 240 br i1 %exitcond, label %._crit_edge, label %3 241 } 242 243 attributes #0 = { nounwind } 244