1 ; RUN: opt %s -scalarizer -scalarize-load-store -dce -S | FileCheck %s 2 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" 3 4 declare <4 x float> @ext(<4 x float>) 5 @g = global <4 x float> zeroinitializer 6 7 define void @f1(<4 x float> %init, <4 x float> *%base, i32 %count) { 8 ; CHECK-LABEL: @f1( 9 ; CHECK: entry: 10 ; CHECK: %init.i0 = extractelement <4 x float> %init, i32 0 11 ; CHECK: %init.i1 = extractelement <4 x float> %init, i32 1 12 ; CHECK: %init.i2 = extractelement <4 x float> %init, i32 2 13 ; CHECK: %init.i3 = extractelement <4 x float> %init, i32 3 14 ; CHECK: br label %loop 15 ; CHECK: loop: 16 ; CHECK: %i = phi i32 [ %count, %entry ], [ %nexti, %loop ] 17 ; CHECK: %acc.i0 = phi float [ %init.i0, %entry ], [ %sel.i0, %loop ] 18 ; CHECK: %acc.i1 = phi float [ %init.i1, %entry ], [ %sel.i1, %loop ] 19 ; CHECK: %acc.i2 = phi float [ %init.i2, %entry ], [ %sel.i2, %loop ] 20 ; CHECK: %acc.i3 = phi float [ %init.i3, %entry ], [ %sel.i3, %loop ] 21 ; CHECK: %nexti = sub i32 %i, 1 22 ; CHECK: %ptr = getelementptr <4 x float>, <4 x float>* %base, i32 %i 23 ; CHECK: %ptr.i0 = bitcast <4 x float>* %ptr to float* 24 ; CHECK: %val.i0 = load float, float* %ptr.i0, align 16 25 ; CHECK: %ptr.i1 = getelementptr float, float* %ptr.i0, i32 1 26 ; CHECK: %val.i1 = load float, float* %ptr.i1, align 4 27 ; CHECK: %ptr.i2 = getelementptr float, float* %ptr.i0, i32 2 28 ; CHECK: %val.i2 = load float, float* %ptr.i2, align 8 29 ; CHECK: %ptr.i3 = getelementptr float, float* %ptr.i0, i32 3 30 ; CHECK: %val.i3 = load float, float* %ptr.i3, align 4 31 ; CHECK: %add.i0 = fadd float %val.i0, %val.i2 32 ; CHECK: %add.i1 = fadd float %val.i1, %val.i3 33 ; CHECK: %add.i2 = fadd float %acc.i0, %acc.i2 34 ; CHECK: %add.i3 = fadd float %acc.i1, %acc.i3 35 ; CHECK: %add.upto0 = insertelement <4 x float> undef, float %add.i0, i32 0 36 ; CHECK: %add.upto1 = insertelement <4 x float> %add.upto0, float %add.i1, i32 1 37 ; CHECK: %add.upto2 = insertelement <4 x float> %add.upto1, float %add.i2, i32 2 38 ; CHECK: %add = insertelement <4 x float> %add.upto2, float %add.i3, i32 3 39 ; CHECK: %call = call <4 x float> @ext(<4 x float> %add) 40 ; CHECK: %call.i0 = extractelement <4 x float> %call, i32 0 41 ; CHECK: %cmp.i0 = fcmp ogt float %call.i0, 1.0 42 ; CHECK: %call.i1 = extractelement <4 x float> %call, i32 1 43 ; CHECK: %cmp.i1 = fcmp ogt float %call.i1, 2.0 44 ; CHECK: %call.i2 = extractelement <4 x float> %call, i32 2 45 ; CHECK: %cmp.i2 = fcmp ogt float %call.i2, 3.0 46 ; CHECK: %call.i3 = extractelement <4 x float> %call, i32 3 47 ; CHECK: %cmp.i3 = fcmp ogt float %call.i3, 4.0 48 ; CHECK: %sel.i0 = select i1 %cmp.i0, float %call.i0, float 5.0 49 ; CHECK: %sel.i1 = select i1 %cmp.i1, float %call.i1, float 6.0 50 ; CHECK: %sel.i2 = select i1 %cmp.i2, float %call.i2, float 7.0 51 ; CHECK: %sel.i3 = select i1 %cmp.i3, float %call.i3, float 8.0 52 ; CHECK: store float %sel.i0, float* %ptr.i0 53 ; CHECK: store float %sel.i1, float* %ptr.i1 54 ; CHECK: store float %sel.i2, float* %ptr.i2 55 ; CHECK: store float %sel.i3, float* %ptr.i3 56 ; CHECK: %test = icmp eq i32 %nexti, 0 57 ; CHECK: br i1 %test, label %loop, label %exit 58 ; CHECK: exit: 59 ; CHECK: ret void 60 entry: 61 br label %loop 62 63 loop: 64 %i = phi i32 [ %count, %entry ], [ %nexti, %loop ] 65 %acc = phi <4 x float> [ %init, %entry ], [ %sel, %loop ] 66 %nexti = sub i32 %i, 1 67 68 %ptr = getelementptr <4 x float>, <4 x float> *%base, i32 %i 69 %val = load <4 x float> , <4 x float> *%ptr 70 %dval = bitcast <4 x float> %val to <2 x double> 71 %dacc = bitcast <4 x float> %acc to <2 x double> 72 %shuffle1 = shufflevector <2 x double> %dval, <2 x double> %dacc, 73 <2 x i32> <i32 0, i32 2> 74 %shuffle2 = shufflevector <2 x double> %dval, <2 x double> %dacc, 75 <2 x i32> <i32 1, i32 3> 76 %f1 = bitcast <2 x double> %shuffle1 to <4 x float> 77 %f2 = bitcast <2 x double> %shuffle2 to <4 x float> 78 %add = fadd <4 x float> %f1, %f2 79 %call = call <4 x float> @ext(<4 x float> %add) 80 %cmp = fcmp ogt <4 x float> %call, 81 <float 1.0, float 2.0, float 3.0, float 4.0> 82 %sel = select <4 x i1> %cmp, <4 x float> %call, 83 <4 x float> <float 5.0, float 6.0, float 7.0, float 8.0> 84 store <4 x float> %sel, <4 x float> *%ptr 85 86 %test = icmp eq i32 %nexti, 0 87 br i1 %test, label %loop, label %exit 88 89 exit: 90 ret void 91 } 92 93 define void @f2(<4 x i32> %init, <4 x i8> *%base, i32 %count) { 94 ; CHECK-LABEL: define void @f2(<4 x i32> %init, <4 x i8>* %base, i32 %count) { 95 ; CHECK: entry: 96 ; CHECK: %init.i0 = extractelement <4 x i32> %init, i32 0 97 ; CHECK: %init.i1 = extractelement <4 x i32> %init, i32 1 98 ; CHECK: %init.i2 = extractelement <4 x i32> %init, i32 2 99 ; CHECK: %init.i3 = extractelement <4 x i32> %init, i32 3 100 ; CHECK: br label %loop 101 ; CHECK: loop: 102 ; CHECK: %i = phi i32 [ %count, %entry ], [ %nexti, %loop ] 103 ; CHECK: %acc.i0 = phi i32 [ %init.i0, %entry ], [ %sel.i0, %loop ] 104 ; CHECK: %acc.i1 = phi i32 [ %init.i1, %entry ], [ %sel.i1, %loop ] 105 ; CHECK: %acc.i2 = phi i32 [ %init.i2, %entry ], [ %sel.i2, %loop ] 106 ; CHECK: %acc.i3 = phi i32 [ %init.i3, %entry ], [ %sel.i3, %loop ] 107 ; CHECK: %nexti = sub i32 %i, 1 108 ; CHECK: %ptr = getelementptr <4 x i8>, <4 x i8>* %base, i32 %i 109 ; CHECK: %ptr.i0 = bitcast <4 x i8>* %ptr to i8* 110 ; CHECK: %val.i0 = load i8, i8* %ptr.i0, align 4 111 ; CHECK: %ptr.i1 = getelementptr i8, i8* %ptr.i0, i32 1 112 ; CHECK: %val.i1 = load i8, i8* %ptr.i1, align 1 113 ; CHECK: %ptr.i2 = getelementptr i8, i8* %ptr.i0, i32 2 114 ; CHECK: %val.i2 = load i8, i8* %ptr.i2, align 2 115 ; CHECK: %ptr.i3 = getelementptr i8, i8* %ptr.i0, i32 3 116 ; CHECK: %val.i3 = load i8, i8* %ptr.i3, align 1 117 ; CHECK: %ext.i0 = sext i8 %val.i0 to i32 118 ; CHECK: %ext.i1 = sext i8 %val.i1 to i32 119 ; CHECK: %ext.i2 = sext i8 %val.i2 to i32 120 ; CHECK: %ext.i3 = sext i8 %val.i3 to i32 121 ; CHECK: %add.i0 = add i32 %ext.i0, %acc.i0 122 ; CHECK: %add.i1 = add i32 %ext.i1, %acc.i1 123 ; CHECK: %add.i2 = add i32 %ext.i2, %acc.i2 124 ; CHECK: %add.i3 = add i32 %ext.i3, %acc.i3 125 ; CHECK: %cmp.i0 = icmp slt i32 %add.i0, -10 126 ; CHECK: %cmp.i1 = icmp slt i32 %add.i1, -11 127 ; CHECK: %cmp.i2 = icmp slt i32 %add.i2, -12 128 ; CHECK: %cmp.i3 = icmp slt i32 %add.i3, -13 129 ; CHECK: %sel.i0 = select i1 %cmp.i0, i32 %add.i0, i32 %i 130 ; CHECK: %sel.i1 = select i1 %cmp.i1, i32 %add.i1, i32 %i 131 ; CHECK: %sel.i2 = select i1 %cmp.i2, i32 %add.i2, i32 %i 132 ; CHECK: %sel.i3 = select i1 %cmp.i3, i32 %add.i3, i32 %i 133 ; CHECK: %trunc.i0 = trunc i32 %sel.i0 to i8 134 ; CHECK: %trunc.i1 = trunc i32 %sel.i1 to i8 135 ; CHECK: %trunc.i2 = trunc i32 %sel.i2 to i8 136 ; CHECK: %trunc.i3 = trunc i32 %sel.i3 to i8 137 ; CHECK: store i8 %trunc.i0, i8* %ptr.i0, align 4 138 ; CHECK: store i8 %trunc.i1, i8* %ptr.i1, align 1 139 ; CHECK: store i8 %trunc.i2, i8* %ptr.i2, align 2 140 ; CHECK: store i8 %trunc.i3, i8* %ptr.i3, align 1 141 ; CHECK: %test = icmp eq i32 %nexti, 0 142 ; CHECK: br i1 %test, label %loop, label %exit 143 ; CHECK: exit: 144 ; CHECK: ret void 145 entry: 146 br label %loop 147 148 loop: 149 %i = phi i32 [ %count, %entry ], [ %nexti, %loop ] 150 %acc = phi <4 x i32> [ %init, %entry ], [ %sel, %loop ] 151 %nexti = sub i32 %i, 1 152 153 %ptr = getelementptr <4 x i8>, <4 x i8> *%base, i32 %i 154 %val = load <4 x i8> , <4 x i8> *%ptr 155 %ext = sext <4 x i8> %val to <4 x i32> 156 %add = add <4 x i32> %ext, %acc 157 %cmp = icmp slt <4 x i32> %add, <i32 -10, i32 -11, i32 -12, i32 -13> 158 %single = insertelement <4 x i32> undef, i32 %i, i32 0 159 %limit = shufflevector <4 x i32> %single, <4 x i32> undef, 160 <4 x i32> zeroinitializer 161 %sel = select <4 x i1> %cmp, <4 x i32> %add, <4 x i32> %limit 162 %trunc = trunc <4 x i32> %sel to <4 x i8> 163 store <4 x i8> %trunc, <4 x i8> *%ptr 164 165 %test = icmp eq i32 %nexti, 0 166 br i1 %test, label %loop, label %exit 167 168 exit: 169 ret void 170 } 171 172 ; Check that !tbaa information is preserved. 173 define void @f3(<4 x i32> *%src, <4 x i32> *%dst) { 174 ; CHECK-LABEL: @f3( 175 ; CHECK: %val.i0 = load i32, i32* %src.i0, align 16, !tbaa ![[TAG:[0-9]*]] 176 ; CHECK: %val.i1 = load i32, i32* %src.i1, align 4, !tbaa ![[TAG]] 177 ; CHECK: %val.i2 = load i32, i32* %src.i2, align 8, !tbaa ![[TAG]] 178 ; CHECK: %val.i3 = load i32, i32* %src.i3, align 4, !tbaa ![[TAG]] 179 ; CHECK: store i32 %add.i0, i32* %dst.i0, align 16, !tbaa ![[TAG:[0-9]*]] 180 ; CHECK: store i32 %add.i1, i32* %dst.i1, align 4, !tbaa ![[TAG]] 181 ; CHECK: store i32 %add.i2, i32* %dst.i2, align 8, !tbaa ![[TAG]] 182 ; CHECK: store i32 %add.i3, i32* %dst.i3, align 4, !tbaa ![[TAG]] 183 ; CHECK: ret void 184 %val = load <4 x i32> , <4 x i32> *%src, !tbaa !1 185 %add = add <4 x i32> %val, %val 186 store <4 x i32> %add, <4 x i32> *%dst, !tbaa !2 187 ret void 188 } 189 190 ; Check that !tbaa.struct information is preserved. 191 define void @f4(<4 x i32> *%src, <4 x i32> *%dst) { 192 ; CHECK-LABEL: @f4( 193 ; CHECK: %val.i0 = load i32, i32* %src.i0, align 16, !tbaa.struct ![[TAG:[0-9]*]] 194 ; CHECK: %val.i1 = load i32, i32* %src.i1, align 4, !tbaa.struct ![[TAG]] 195 ; CHECK: %val.i2 = load i32, i32* %src.i2, align 8, !tbaa.struct ![[TAG]] 196 ; CHECK: %val.i3 = load i32, i32* %src.i3, align 4, !tbaa.struct ![[TAG]] 197 ; CHECK: store i32 %add.i0, i32* %dst.i0, align 16, !tbaa.struct ![[TAG]] 198 ; CHECK: store i32 %add.i1, i32* %dst.i1, align 4, !tbaa.struct ![[TAG]] 199 ; CHECK: store i32 %add.i2, i32* %dst.i2, align 8, !tbaa.struct ![[TAG]] 200 ; CHECK: store i32 %add.i3, i32* %dst.i3, align 4, !tbaa.struct ![[TAG]] 201 ; CHECK: ret void 202 %val = load <4 x i32> , <4 x i32> *%src, !tbaa.struct !5 203 %add = add <4 x i32> %val, %val 204 store <4 x i32> %add, <4 x i32> *%dst, !tbaa.struct !5 205 ret void 206 } 207 208 ; Check that llvm.mem.parallel_loop_access information is preserved. 209 define void @f5(i32 %count, <4 x i32> *%src, <4 x i32> *%dst) { 210 ; CHECK-LABEL: @f5( 211 ; CHECK: %val.i0 = load i32, i32* %this_src.i0, align 16, !llvm.mem.parallel_loop_access ![[TAG:[0-9]*]] 212 ; CHECK: %val.i1 = load i32, i32* %this_src.i1, align 4, !llvm.mem.parallel_loop_access ![[TAG]] 213 ; CHECK: %val.i2 = load i32, i32* %this_src.i2, align 8, !llvm.mem.parallel_loop_access ![[TAG]] 214 ; CHECK: %val.i3 = load i32, i32* %this_src.i3, align 4, !llvm.mem.parallel_loop_access ![[TAG]] 215 ; CHECK: store i32 %add.i0, i32* %this_dst.i0, align 16, !llvm.mem.parallel_loop_access ![[TAG]] 216 ; CHECK: store i32 %add.i1, i32* %this_dst.i1, align 4, !llvm.mem.parallel_loop_access ![[TAG]] 217 ; CHECK: store i32 %add.i2, i32* %this_dst.i2, align 8, !llvm.mem.parallel_loop_access ![[TAG]] 218 ; CHECK: store i32 %add.i3, i32* %this_dst.i3, align 4, !llvm.mem.parallel_loop_access ![[TAG]] 219 ; CHECK: ret void 220 entry: 221 br label %loop 222 223 loop: 224 %index = phi i32 [ 0, %entry ], [ %next_index, %loop ] 225 %this_src = getelementptr <4 x i32>, <4 x i32> *%src, i32 %index 226 %this_dst = getelementptr <4 x i32>, <4 x i32> *%dst, i32 %index 227 %val = load <4 x i32> , <4 x i32> *%this_src, !llvm.mem.parallel_loop_access !3 228 %add = add <4 x i32> %val, %val 229 store <4 x i32> %add, <4 x i32> *%this_dst, !llvm.mem.parallel_loop_access !3 230 %next_index = add i32 %index, -1 231 %continue = icmp ne i32 %next_index, %count 232 br i1 %continue, label %loop, label %end, !llvm.loop !3 233 234 end: 235 ret void 236 } 237 238 ; Check that fpmath information is preserved. 239 define <4 x float> @f6(<4 x float> %x) { 240 ; CHECK-LABEL: @f6( 241 ; CHECK: %x.i0 = extractelement <4 x float> %x, i32 0 242 ; CHECK: %res.i0 = fadd float %x.i0, 1.0{{[e+0]*}}, !fpmath ![[TAG:[0-9]*]] 243 ; CHECK: %x.i1 = extractelement <4 x float> %x, i32 1 244 ; CHECK: %res.i1 = fadd float %x.i1, 2.0{{[e+0]*}}, !fpmath ![[TAG]] 245 ; CHECK: %x.i2 = extractelement <4 x float> %x, i32 2 246 ; CHECK: %res.i2 = fadd float %x.i2, 3.0{{[e+0]*}}, !fpmath ![[TAG]] 247 ; CHECK: %x.i3 = extractelement <4 x float> %x, i32 3 248 ; CHECK: %res.i3 = fadd float %x.i3, 4.0{{[e+0]*}}, !fpmath ![[TAG]] 249 ; CHECK: %res.upto0 = insertelement <4 x float> undef, float %res.i0, i32 0 250 ; CHECK: %res.upto1 = insertelement <4 x float> %res.upto0, float %res.i1, i32 1 251 ; CHECK: %res.upto2 = insertelement <4 x float> %res.upto1, float %res.i2, i32 2 252 ; CHECK: %res = insertelement <4 x float> %res.upto2, float %res.i3, i32 3 253 ; CHECK: ret <4 x float> %res 254 %res = fadd <4 x float> %x, <float 1.0, float 2.0, float 3.0, float 4.0>, 255 !fpmath !4 256 ret <4 x float> %res 257 } 258 259 ; Check that random metadata isn't kept. 260 define void @f7(<4 x i32> *%src, <4 x i32> *%dst) { 261 ; CHECK-LABEL: @f7( 262 ; CHECK-NOT: !foo 263 ; CHECK: ret void 264 %val = load <4 x i32> , <4 x i32> *%src, !foo !5 265 %add = add <4 x i32> %val, %val 266 store <4 x i32> %add, <4 x i32> *%dst, !foo !5 267 ret void 268 } 269 270 ; Test GEP with vectors. 271 define void @f8(<4 x float *> *%dest, <4 x float *> %ptr0, <4 x i32> %i0, 272 float *%other) { 273 ; CHECK-LABEL: @f8( 274 ; CHECK: %dest.i0 = bitcast <4 x float*>* %dest to float** 275 ; CHECK: %dest.i1 = getelementptr float*, float** %dest.i0, i32 1 276 ; CHECK: %dest.i2 = getelementptr float*, float** %dest.i0, i32 2 277 ; CHECK: %dest.i3 = getelementptr float*, float** %dest.i0, i32 3 278 ; CHECK: %i0.i1 = extractelement <4 x i32> %i0, i32 1 279 ; CHECK: %i0.i3 = extractelement <4 x i32> %i0, i32 3 280 ; CHECK: %ptr0.i0 = extractelement <4 x float*> %ptr0, i32 0 281 ; CHECK: %val.i0 = getelementptr float, float* %ptr0.i0, i32 100 282 ; CHECK: %val.i1 = getelementptr float, float* %other, i32 %i0.i1 283 ; CHECK: %ptr0.i2 = extractelement <4 x float*> %ptr0, i32 2 284 ; CHECK: %val.i2 = getelementptr float, float* %ptr0.i2, i32 100 285 ; CHECK: %ptr0.i3 = extractelement <4 x float*> %ptr0, i32 3 286 ; CHECK: %val.i3 = getelementptr float, float* %ptr0.i3, i32 %i0.i3 287 ; CHECK: store float* %val.i0, float** %dest.i0, align 32 288 ; CHECK: store float* %val.i1, float** %dest.i1, align 8 289 ; CHECK: store float* %val.i2, float** %dest.i2, align 16 290 ; CHECK: store float* %val.i3, float** %dest.i3, align 8 291 ; CHECK: ret void 292 %i1 = insertelement <4 x i32> %i0, i32 100, i32 0 293 %i2 = insertelement <4 x i32> %i1, i32 100, i32 2 294 %ptr1 = insertelement <4 x float *> %ptr0, float *%other, i32 1 295 %val = getelementptr float, <4 x float *> %ptr1, <4 x i32> %i2 296 store <4 x float *> %val, <4 x float *> *%dest 297 ret void 298 } 299 300 ; Test the handling of unaligned loads. 301 define void @f9(<4 x float> *%dest, <4 x float> *%src) { 302 ; CHECK: @f9( 303 ; CHECK: %dest.i0 = bitcast <4 x float>* %dest to float* 304 ; CHECK: %dest.i1 = getelementptr float, float* %dest.i0, i32 1 305 ; CHECK: %dest.i2 = getelementptr float, float* %dest.i0, i32 2 306 ; CHECK: %dest.i3 = getelementptr float, float* %dest.i0, i32 3 307 ; CHECK: %src.i0 = bitcast <4 x float>* %src to float* 308 ; CHECK: %val.i0 = load float, float* %src.i0, align 4 309 ; CHECK: %src.i1 = getelementptr float, float* %src.i0, i32 1 310 ; CHECK: %val.i1 = load float, float* %src.i1, align 4 311 ; CHECK: %src.i2 = getelementptr float, float* %src.i0, i32 2 312 ; CHECK: %val.i2 = load float, float* %src.i2, align 4 313 ; CHECK: %src.i3 = getelementptr float, float* %src.i0, i32 3 314 ; CHECK: %val.i3 = load float, float* %src.i3, align 4 315 ; CHECK: store float %val.i0, float* %dest.i0, align 8 316 ; CHECK: store float %val.i1, float* %dest.i1, align 4 317 ; CHECK: store float %val.i2, float* %dest.i2, align 8 318 ; CHECK: store float %val.i3, float* %dest.i3, align 4 319 ; CHECK: ret void 320 %val = load <4 x float> , <4 x float> *%src, align 4 321 store <4 x float> %val, <4 x float> *%dest, align 8 322 ret void 323 } 324 325 ; ...and again with subelement alignment. 326 define void @f10(<4 x float> *%dest, <4 x float> *%src) { 327 ; CHECK: @f10( 328 ; CHECK: %dest.i0 = bitcast <4 x float>* %dest to float* 329 ; CHECK: %dest.i1 = getelementptr float, float* %dest.i0, i32 1 330 ; CHECK: %dest.i2 = getelementptr float, float* %dest.i0, i32 2 331 ; CHECK: %dest.i3 = getelementptr float, float* %dest.i0, i32 3 332 ; CHECK: %src.i0 = bitcast <4 x float>* %src to float* 333 ; CHECK: %val.i0 = load float, float* %src.i0, align 1 334 ; CHECK: %src.i1 = getelementptr float, float* %src.i0, i32 1 335 ; CHECK: %val.i1 = load float, float* %src.i1, align 1 336 ; CHECK: %src.i2 = getelementptr float, float* %src.i0, i32 2 337 ; CHECK: %val.i2 = load float, float* %src.i2, align 1 338 ; CHECK: %src.i3 = getelementptr float, float* %src.i0, i32 3 339 ; CHECK: %val.i3 = load float, float* %src.i3, align 1 340 ; CHECK: store float %val.i0, float* %dest.i0, align 2 341 ; CHECK: store float %val.i1, float* %dest.i1, align 2 342 ; CHECK: store float %val.i2, float* %dest.i2, align 2 343 ; CHECK: store float %val.i3, float* %dest.i3, align 2 344 ; CHECK: ret void 345 %val = load <4 x float> , <4 x float> *%src, align 1 346 store <4 x float> %val, <4 x float> *%dest, align 2 347 ret void 348 } 349 350 ; Test that sub-byte loads aren't scalarized. 351 define void @f11(<32 x i1> *%dest, <32 x i1> *%src0) { 352 ; CHECK: @f11( 353 ; CHECK: %val0 = load <32 x i1>, <32 x i1>* %src0 354 ; CHECK: %val1 = load <32 x i1>, <32 x i1>* %src1 355 ; CHECK: store <32 x i1> %and, <32 x i1>* %dest 356 ; CHECK: ret void 357 %src1 = getelementptr <32 x i1>, <32 x i1> *%src0, i32 1 358 %val0 = load <32 x i1> , <32 x i1> *%src0 359 %val1 = load <32 x i1> , <32 x i1> *%src1 360 %and = and <32 x i1> %val0, %val1 361 store <32 x i1> %and, <32 x i1> *%dest 362 ret void 363 } 364 365 ; Test that variable inserts aren't scalarized. 366 define void @f12(<4 x i32> *%dest, <4 x i32> *%src, i32 %index) { 367 ; CHECK: @f12( 368 ; CHECK: %val1 = insertelement <4 x i32> %val0, i32 1, i32 %index 369 ; CHECK-DAG: %val1.i0 = extractelement <4 x i32> %val1, i32 0 370 ; CHECK-DAG: %val1.i1 = extractelement <4 x i32> %val1, i32 1 371 ; CHECK-DAG: %val1.i2 = extractelement <4 x i32> %val1, i32 2 372 ; CHECK-DAG: %val1.i3 = extractelement <4 x i32> %val1, i32 3 373 ; CHECK-DAG: %val2.i0 = shl i32 1, %val1.i0 374 ; CHECK-DAG: %val2.i1 = shl i32 2, %val1.i1 375 ; CHECK-DAG: %val2.i2 = shl i32 3, %val1.i2 376 ; CHECK-DAG: %val2.i3 = shl i32 4, %val1.i3 377 ; CHECK: ret void 378 %val0 = load <4 x i32> , <4 x i32> *%src 379 %val1 = insertelement <4 x i32> %val0, i32 1, i32 %index 380 %val2 = shl <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %val1 381 store <4 x i32> %val2, <4 x i32> *%dest 382 ret void 383 } 384 385 ; Test vector GEPs with more than one index. 386 define void @f13(<4 x float *> *%dest, <4 x [4 x float] *> %ptr, <4 x i32> %i, 387 float *%other) { 388 ; CHECK-LABEL: @f13( 389 ; CHECK: %dest.i0 = bitcast <4 x float*>* %dest to float** 390 ; CHECK: %dest.i1 = getelementptr float*, float** %dest.i0, i32 1 391 ; CHECK: %dest.i2 = getelementptr float*, float** %dest.i0, i32 2 392 ; CHECK: %dest.i3 = getelementptr float*, float** %dest.i0, i32 3 393 ; CHECK: %i.i0 = extractelement <4 x i32> %i, i32 0 394 ; CHECK: %ptr.i0 = extractelement <4 x [4 x float]*> %ptr, i32 0 395 ; CHECK: %val.i0 = getelementptr inbounds [4 x float], [4 x float]* %ptr.i0, i32 0, i32 %i.i0 396 ; CHECK: %i.i1 = extractelement <4 x i32> %i, i32 1 397 ; CHECK: %ptr.i1 = extractelement <4 x [4 x float]*> %ptr, i32 1 398 ; CHECK: %val.i1 = getelementptr inbounds [4 x float], [4 x float]* %ptr.i1, i32 1, i32 %i.i1 399 ; CHECK: %i.i2 = extractelement <4 x i32> %i, i32 2 400 ; CHECK: %ptr.i2 = extractelement <4 x [4 x float]*> %ptr, i32 2 401 ; CHECK: %val.i2 = getelementptr inbounds [4 x float], [4 x float]* %ptr.i2, i32 2, i32 %i.i2 402 ; CHECK: %i.i3 = extractelement <4 x i32> %i, i32 3 403 ; CHECK: %ptr.i3 = extractelement <4 x [4 x float]*> %ptr, i32 3 404 ; CHECK: %val.i3 = getelementptr inbounds [4 x float], [4 x float]* %ptr.i3, i32 3, i32 %i.i3 405 ; CHECK: store float* %val.i0, float** %dest.i0, align 32 406 ; CHECK: store float* %val.i1, float** %dest.i1, align 8 407 ; CHECK: store float* %val.i2, float** %dest.i2, align 16 408 ; CHECK: store float* %val.i3, float** %dest.i3, align 8 409 ; CHECK: ret void 410 %val = getelementptr inbounds [4 x float], <4 x [4 x float] *> %ptr, 411 <4 x i32> <i32 0, i32 1, i32 2, i32 3>, 412 <4 x i32> %i 413 store <4 x float *> %val, <4 x float *> *%dest 414 ret void 415 } 416 417 ; Test combinations of vector and non-vector PHIs. 418 define <4 x float> @f14(<4 x float> %acc, i32 %count) { 419 ; CHECK-LABEL: @f14( 420 ; CHECK: %this_acc.i0 = phi float [ %acc.i0, %entry ], [ %next_acc.i0, %loop ] 421 ; CHECK: %this_acc.i1 = phi float [ %acc.i1, %entry ], [ %next_acc.i1, %loop ] 422 ; CHECK: %this_acc.i2 = phi float [ %acc.i2, %entry ], [ %next_acc.i2, %loop ] 423 ; CHECK: %this_acc.i3 = phi float [ %acc.i3, %entry ], [ %next_acc.i3, %loop ] 424 ; CHECK: %this_count = phi i32 [ %count, %entry ], [ %next_count, %loop ] 425 ; CHECK: %this_acc.upto0 = insertelement <4 x float> undef, float %this_acc.i0, i32 0 426 ; CHECK: %this_acc.upto1 = insertelement <4 x float> %this_acc.upto0, float %this_acc.i1, i32 1 427 ; CHECK: %this_acc.upto2 = insertelement <4 x float> %this_acc.upto1, float %this_acc.i2, i32 2 428 ; CHECK: %this_acc = insertelement <4 x float> %this_acc.upto2, float %this_acc.i3, i32 3 429 ; CHECK: ret <4 x float> %next_acc 430 entry: 431 br label %loop 432 433 loop: 434 %this_acc = phi <4 x float> [ %acc, %entry ], [ %next_acc, %loop ] 435 %this_count = phi i32 [ %count, %entry ], [ %next_count, %loop ] 436 %foo = call <4 x float> @ext(<4 x float> %this_acc) 437 %next_acc = fadd <4 x float> %this_acc, %foo 438 %next_count = sub i32 %this_count, 1 439 %cmp = icmp eq i32 %next_count, 0 440 br i1 %cmp, label %loop, label %exit 441 442 exit: 443 ret <4 x float> %next_acc 444 } 445 446 !0 = !{ !"root" } 447 !1 = !{ !"set1", !0 } 448 !2 = !{ !"set2", !0 } 449 !3 = !{ !3 } 450 !4 = !{ float 4.0 } 451 !5 = !{ i64 0, i64 8, null } 452