Home | History | Annotate | Download | only in Scalarizer
      1 ; RUN: opt %s -scalarizer -scalarize-load-store -dce -S | FileCheck %s
      2 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
      3 
      4 declare <4 x float> @ext(<4 x float>)
      5 @g = global <4 x float> zeroinitializer
      6 
      7 define void @f1(<4 x float> %init, <4 x float> *%base, i32 %count) {
      8 ; CHECK-LABEL: @f1(
      9 ; CHECK: entry:
     10 ; CHECK:   %init.i0 = extractelement <4 x float> %init, i32 0
     11 ; CHECK:   %init.i1 = extractelement <4 x float> %init, i32 1
     12 ; CHECK:   %init.i2 = extractelement <4 x float> %init, i32 2
     13 ; CHECK:   %init.i3 = extractelement <4 x float> %init, i32 3
     14 ; CHECK:   br label %loop
     15 ; CHECK: loop:
     16 ; CHECK:   %i = phi i32 [ %count, %entry ], [ %nexti, %loop ]
     17 ; CHECK:   %acc.i0 = phi float [ %init.i0, %entry ], [ %sel.i0, %loop ]
     18 ; CHECK:   %acc.i1 = phi float [ %init.i1, %entry ], [ %sel.i1, %loop ]
     19 ; CHECK:   %acc.i2 = phi float [ %init.i2, %entry ], [ %sel.i2, %loop ]
     20 ; CHECK:   %acc.i3 = phi float [ %init.i3, %entry ], [ %sel.i3, %loop ]
     21 ; CHECK:   %nexti = sub i32 %i, 1
     22 ; CHECK:   %ptr = getelementptr <4 x float>, <4 x float>* %base, i32 %i
     23 ; CHECK:   %ptr.i0 = bitcast <4 x float>* %ptr to float*
     24 ; CHECK:   %val.i0 = load float, float* %ptr.i0, align 16
     25 ; CHECK:   %ptr.i1 = getelementptr float, float* %ptr.i0, i32 1
     26 ; CHECK:   %val.i1 = load float, float* %ptr.i1, align 4
     27 ; CHECK:   %ptr.i2 = getelementptr float, float* %ptr.i0, i32 2
     28 ; CHECK:   %val.i2 = load float, float* %ptr.i2, align 8
     29 ; CHECK:   %ptr.i3 = getelementptr float, float* %ptr.i0, i32 3
     30 ; CHECK:   %val.i3 = load float, float* %ptr.i3, align 4
     31 ; CHECK:   %add.i0 = fadd float %val.i0, %val.i2
     32 ; CHECK:   %add.i1 = fadd float %val.i1, %val.i3
     33 ; CHECK:   %add.i2 = fadd float %acc.i0, %acc.i2
     34 ; CHECK:   %add.i3 = fadd float %acc.i1, %acc.i3
     35 ; CHECK:   %add.upto0 = insertelement <4 x float> undef, float %add.i0, i32 0
     36 ; CHECK:   %add.upto1 = insertelement <4 x float> %add.upto0, float %add.i1, i32 1
     37 ; CHECK:   %add.upto2 = insertelement <4 x float> %add.upto1, float %add.i2, i32 2
     38 ; CHECK:   %add = insertelement <4 x float> %add.upto2, float %add.i3, i32 3
     39 ; CHECK:   %call = call <4 x float> @ext(<4 x float> %add)
     40 ; CHECK:   %call.i0 = extractelement <4 x float> %call, i32 0
     41 ; CHECK:   %cmp.i0 = fcmp ogt float %call.i0, 1.0
     42 ; CHECK:   %call.i1 = extractelement <4 x float> %call, i32 1
     43 ; CHECK:   %cmp.i1 = fcmp ogt float %call.i1, 2.0
     44 ; CHECK:   %call.i2 = extractelement <4 x float> %call, i32 2
     45 ; CHECK:   %cmp.i2 = fcmp ogt float %call.i2, 3.0
     46 ; CHECK:   %call.i3 = extractelement <4 x float> %call, i32 3
     47 ; CHECK:   %cmp.i3 = fcmp ogt float %call.i3, 4.0
     48 ; CHECK:   %sel.i0 = select i1 %cmp.i0, float %call.i0, float 5.0
     49 ; CHECK:   %sel.i1 = select i1 %cmp.i1, float %call.i1, float 6.0
     50 ; CHECK:   %sel.i2 = select i1 %cmp.i2, float %call.i2, float 7.0
     51 ; CHECK:   %sel.i3 = select i1 %cmp.i3, float %call.i3, float 8.0
     52 ; CHECK:   store float %sel.i0, float* %ptr.i0
     53 ; CHECK:   store float %sel.i1, float* %ptr.i1
     54 ; CHECK:   store float %sel.i2, float* %ptr.i2
     55 ; CHECK:   store float %sel.i3, float* %ptr.i3
     56 ; CHECK:   %test = icmp eq i32 %nexti, 0
     57 ; CHECK:   br i1 %test, label %loop, label %exit
     58 ; CHECK: exit:
     59 ; CHECK:   ret void
     60 entry:
     61   br label %loop
     62 
     63 loop:
     64   %i = phi i32 [ %count, %entry ], [ %nexti, %loop ]
     65   %acc = phi <4 x float> [ %init, %entry ], [ %sel, %loop ]
     66   %nexti = sub i32 %i, 1
     67 
     68   %ptr = getelementptr <4 x float>, <4 x float> *%base, i32 %i
     69   %val = load <4 x float> , <4 x float> *%ptr
     70   %dval = bitcast <4 x float> %val to <2 x double>
     71   %dacc = bitcast <4 x float> %acc to <2 x double>
     72   %shuffle1 = shufflevector <2 x double> %dval, <2 x double> %dacc,
     73                             <2 x i32> <i32 0, i32 2>
     74   %shuffle2 = shufflevector <2 x double> %dval, <2 x double> %dacc,
     75                             <2 x i32> <i32 1, i32 3>
     76   %f1 = bitcast <2 x double> %shuffle1 to <4 x float>
     77   %f2 = bitcast <2 x double> %shuffle2 to <4 x float>
     78   %add = fadd <4 x float> %f1, %f2
     79   %call = call <4 x float> @ext(<4 x float> %add)
     80   %cmp = fcmp ogt <4 x float> %call,
     81                   <float 1.0, float 2.0, float 3.0, float 4.0>
     82   %sel = select <4 x i1> %cmp, <4 x float> %call,
     83                 <4 x float> <float 5.0, float 6.0, float 7.0, float 8.0>
     84   store <4 x float> %sel, <4 x float> *%ptr
     85 
     86   %test = icmp eq i32 %nexti, 0
     87   br i1 %test, label %loop, label %exit
     88 
     89 exit:
     90   ret void
     91 }
     92 
     93 define void @f2(<4 x i32> %init, <4 x i8> *%base, i32 %count) {
     94 ; CHECK-LABEL: define void @f2(<4 x i32> %init, <4 x i8>* %base, i32 %count) {
     95 ; CHECK: entry:
     96 ; CHECK:   %init.i0 = extractelement <4 x i32> %init, i32 0
     97 ; CHECK:   %init.i1 = extractelement <4 x i32> %init, i32 1
     98 ; CHECK:   %init.i2 = extractelement <4 x i32> %init, i32 2
     99 ; CHECK:   %init.i3 = extractelement <4 x i32> %init, i32 3
    100 ; CHECK:   br label %loop
    101 ; CHECK: loop:
    102 ; CHECK:   %i = phi i32 [ %count, %entry ], [ %nexti, %loop ]
    103 ; CHECK:   %acc.i0 = phi i32 [ %init.i0, %entry ], [ %sel.i0, %loop ]
    104 ; CHECK:   %acc.i1 = phi i32 [ %init.i1, %entry ], [ %sel.i1, %loop ]
    105 ; CHECK:   %acc.i2 = phi i32 [ %init.i2, %entry ], [ %sel.i2, %loop ]
    106 ; CHECK:   %acc.i3 = phi i32 [ %init.i3, %entry ], [ %sel.i3, %loop ]
    107 ; CHECK:   %nexti = sub i32 %i, 1
    108 ; CHECK:   %ptr = getelementptr <4 x i8>, <4 x i8>* %base, i32 %i
    109 ; CHECK:   %ptr.i0 = bitcast <4 x i8>* %ptr to i8*
    110 ; CHECK:   %val.i0 = load i8, i8* %ptr.i0, align 4
    111 ; CHECK:   %ptr.i1 = getelementptr i8, i8* %ptr.i0, i32 1
    112 ; CHECK:   %val.i1 = load i8, i8* %ptr.i1, align 1
    113 ; CHECK:   %ptr.i2 = getelementptr i8, i8* %ptr.i0, i32 2
    114 ; CHECK:   %val.i2 = load i8, i8* %ptr.i2, align 2
    115 ; CHECK:   %ptr.i3 = getelementptr i8, i8* %ptr.i0, i32 3
    116 ; CHECK:   %val.i3 = load i8, i8* %ptr.i3, align 1
    117 ; CHECK:   %ext.i0 = sext i8 %val.i0 to i32
    118 ; CHECK:   %ext.i1 = sext i8 %val.i1 to i32
    119 ; CHECK:   %ext.i2 = sext i8 %val.i2 to i32
    120 ; CHECK:   %ext.i3 = sext i8 %val.i3 to i32
    121 ; CHECK:   %add.i0 = add i32 %ext.i0, %acc.i0
    122 ; CHECK:   %add.i1 = add i32 %ext.i1, %acc.i1
    123 ; CHECK:   %add.i2 = add i32 %ext.i2, %acc.i2
    124 ; CHECK:   %add.i3 = add i32 %ext.i3, %acc.i3
    125 ; CHECK:   %cmp.i0 = icmp slt i32 %add.i0, -10
    126 ; CHECK:   %cmp.i1 = icmp slt i32 %add.i1, -11
    127 ; CHECK:   %cmp.i2 = icmp slt i32 %add.i2, -12
    128 ; CHECK:   %cmp.i3 = icmp slt i32 %add.i3, -13
    129 ; CHECK:   %sel.i0 = select i1 %cmp.i0, i32 %add.i0, i32 %i
    130 ; CHECK:   %sel.i1 = select i1 %cmp.i1, i32 %add.i1, i32 %i
    131 ; CHECK:   %sel.i2 = select i1 %cmp.i2, i32 %add.i2, i32 %i
    132 ; CHECK:   %sel.i3 = select i1 %cmp.i3, i32 %add.i3, i32 %i
    133 ; CHECK:   %trunc.i0 = trunc i32 %sel.i0 to i8
    134 ; CHECK:   %trunc.i1 = trunc i32 %sel.i1 to i8
    135 ; CHECK:   %trunc.i2 = trunc i32 %sel.i2 to i8
    136 ; CHECK:   %trunc.i3 = trunc i32 %sel.i3 to i8
    137 ; CHECK:   store i8 %trunc.i0, i8* %ptr.i0, align 4
    138 ; CHECK:   store i8 %trunc.i1, i8* %ptr.i1, align 1
    139 ; CHECK:   store i8 %trunc.i2, i8* %ptr.i2, align 2
    140 ; CHECK:   store i8 %trunc.i3, i8* %ptr.i3, align 1
    141 ; CHECK:   %test = icmp eq i32 %nexti, 0
    142 ; CHECK:   br i1 %test, label %loop, label %exit
    143 ; CHECK: exit:
    144 ; CHECK:   ret void
    145 entry:
    146   br label %loop
    147 
    148 loop:
    149   %i = phi i32 [ %count, %entry ], [ %nexti, %loop ]
    150   %acc = phi <4 x i32> [ %init, %entry ], [ %sel, %loop ]
    151   %nexti = sub i32 %i, 1
    152 
    153   %ptr = getelementptr <4 x i8>, <4 x i8> *%base, i32 %i
    154   %val = load <4 x i8> , <4 x i8> *%ptr
    155   %ext = sext <4 x i8> %val to <4 x i32>
    156   %add = add <4 x i32> %ext, %acc
    157   %cmp = icmp slt <4 x i32> %add, <i32 -10, i32 -11, i32 -12, i32 -13>
    158   %single = insertelement <4 x i32> undef, i32 %i, i32 0
    159   %limit = shufflevector <4 x i32> %single, <4 x i32> undef,
    160                          <4 x i32> zeroinitializer
    161   %sel = select <4 x i1> %cmp, <4 x i32> %add, <4 x i32> %limit
    162   %trunc = trunc <4 x i32> %sel to <4 x i8>
    163   store <4 x i8> %trunc, <4 x i8> *%ptr
    164 
    165   %test = icmp eq i32 %nexti, 0
    166   br i1 %test, label %loop, label %exit
    167 
    168 exit:
    169   ret void
    170 }
    171 
    172 ; Check that !tbaa information is preserved.
    173 define void @f3(<4 x i32> *%src, <4 x i32> *%dst) {
    174 ; CHECK-LABEL: @f3(
    175 ; CHECK: %val.i0 = load i32, i32* %src.i0, align 16, !tbaa ![[TAG:[0-9]*]]
    176 ; CHECK: %val.i1 = load i32, i32* %src.i1, align 4, !tbaa ![[TAG]]
    177 ; CHECK: %val.i2 = load i32, i32* %src.i2, align 8, !tbaa ![[TAG]]
    178 ; CHECK: %val.i3 = load i32, i32* %src.i3, align 4, !tbaa ![[TAG]]
    179 ; CHECK: store i32 %add.i0, i32* %dst.i0, align 16, !tbaa ![[TAG:[0-9]*]]
    180 ; CHECK: store i32 %add.i1, i32* %dst.i1, align 4, !tbaa ![[TAG]]
    181 ; CHECK: store i32 %add.i2, i32* %dst.i2, align 8, !tbaa ![[TAG]]
    182 ; CHECK: store i32 %add.i3, i32* %dst.i3, align 4, !tbaa ![[TAG]]
    183 ; CHECK: ret void
    184   %val = load <4 x i32> , <4 x i32> *%src, !tbaa !1
    185   %add = add <4 x i32> %val, %val
    186   store <4 x i32> %add, <4 x i32> *%dst, !tbaa !2
    187   ret void
    188 }
    189 
    190 ; Check that !tbaa.struct information is preserved.
    191 define void @f4(<4 x i32> *%src, <4 x i32> *%dst) {
    192 ; CHECK-LABEL: @f4(
    193 ; CHECK: %val.i0 = load i32, i32* %src.i0, align 16, !tbaa.struct ![[TAG:[0-9]*]]
    194 ; CHECK: %val.i1 = load i32, i32* %src.i1, align 4, !tbaa.struct ![[TAG]]
    195 ; CHECK: %val.i2 = load i32, i32* %src.i2, align 8, !tbaa.struct ![[TAG]]
    196 ; CHECK: %val.i3 = load i32, i32* %src.i3, align 4, !tbaa.struct ![[TAG]]
    197 ; CHECK: store i32 %add.i0, i32* %dst.i0, align 16, !tbaa.struct ![[TAG]]
    198 ; CHECK: store i32 %add.i1, i32* %dst.i1, align 4, !tbaa.struct ![[TAG]]
    199 ; CHECK: store i32 %add.i2, i32* %dst.i2, align 8, !tbaa.struct ![[TAG]]
    200 ; CHECK: store i32 %add.i3, i32* %dst.i3, align 4, !tbaa.struct ![[TAG]]
    201 ; CHECK: ret void
    202   %val = load <4 x i32> , <4 x i32> *%src, !tbaa.struct !5
    203   %add = add <4 x i32> %val, %val
    204   store <4 x i32> %add, <4 x i32> *%dst, !tbaa.struct !5
    205   ret void
    206 }
    207 
    208 ; Check that llvm.mem.parallel_loop_access information is preserved.
    209 define void @f5(i32 %count, <4 x i32> *%src, <4 x i32> *%dst) {
    210 ; CHECK-LABEL: @f5(
    211 ; CHECK: %val.i0 = load i32, i32* %this_src.i0, align 16, !llvm.mem.parallel_loop_access ![[TAG:[0-9]*]]
    212 ; CHECK: %val.i1 = load i32, i32* %this_src.i1, align 4, !llvm.mem.parallel_loop_access ![[TAG]]
    213 ; CHECK: %val.i2 = load i32, i32* %this_src.i2, align 8, !llvm.mem.parallel_loop_access ![[TAG]]
    214 ; CHECK: %val.i3 = load i32, i32* %this_src.i3, align 4, !llvm.mem.parallel_loop_access ![[TAG]]
    215 ; CHECK: store i32 %add.i0, i32* %this_dst.i0, align 16, !llvm.mem.parallel_loop_access ![[TAG]]
    216 ; CHECK: store i32 %add.i1, i32* %this_dst.i1, align 4, !llvm.mem.parallel_loop_access ![[TAG]]
    217 ; CHECK: store i32 %add.i2, i32* %this_dst.i2, align 8, !llvm.mem.parallel_loop_access ![[TAG]]
    218 ; CHECK: store i32 %add.i3, i32* %this_dst.i3, align 4, !llvm.mem.parallel_loop_access ![[TAG]]
    219 ; CHECK: ret void
    220 entry:
    221   br label %loop
    222 
    223 loop:
    224   %index = phi i32 [ 0, %entry ], [ %next_index, %loop ]
    225   %this_src = getelementptr <4 x i32>, <4 x i32> *%src, i32 %index
    226   %this_dst = getelementptr <4 x i32>, <4 x i32> *%dst, i32 %index
    227   %val = load <4 x i32> , <4 x i32> *%this_src, !llvm.mem.parallel_loop_access !3
    228   %add = add <4 x i32> %val, %val
    229   store <4 x i32> %add, <4 x i32> *%this_dst, !llvm.mem.parallel_loop_access !3
    230   %next_index = add i32 %index, -1
    231   %continue = icmp ne i32 %next_index, %count
    232   br i1 %continue, label %loop, label %end, !llvm.loop !3
    233 
    234 end:
    235   ret void
    236 }
    237 
    238 ; Check that fpmath information is preserved.
    239 define <4 x float> @f6(<4 x float> %x) {
    240 ; CHECK-LABEL: @f6(
    241 ; CHECK: %x.i0 = extractelement <4 x float> %x, i32 0
    242 ; CHECK: %res.i0 = fadd float %x.i0, 1.0{{[e+0]*}}, !fpmath ![[TAG:[0-9]*]]
    243 ; CHECK: %x.i1 = extractelement <4 x float> %x, i32 1
    244 ; CHECK: %res.i1 = fadd float %x.i1, 2.0{{[e+0]*}}, !fpmath ![[TAG]]
    245 ; CHECK: %x.i2 = extractelement <4 x float> %x, i32 2
    246 ; CHECK: %res.i2 = fadd float %x.i2, 3.0{{[e+0]*}}, !fpmath ![[TAG]]
    247 ; CHECK: %x.i3 = extractelement <4 x float> %x, i32 3
    248 ; CHECK: %res.i3 = fadd float %x.i3, 4.0{{[e+0]*}}, !fpmath ![[TAG]]
    249 ; CHECK: %res.upto0 = insertelement <4 x float> undef, float %res.i0, i32 0
    250 ; CHECK: %res.upto1 = insertelement <4 x float> %res.upto0, float %res.i1, i32 1
    251 ; CHECK: %res.upto2 = insertelement <4 x float> %res.upto1, float %res.i2, i32 2
    252 ; CHECK: %res = insertelement <4 x float> %res.upto2, float %res.i3, i32 3
    253 ; CHECK: ret <4 x float> %res
    254   %res = fadd <4 x float> %x, <float 1.0, float 2.0, float 3.0, float 4.0>,
    255     !fpmath !4
    256   ret <4 x float> %res
    257 }
    258 
    259 ; Check that random metadata isn't kept.
    260 define void @f7(<4 x i32> *%src, <4 x i32> *%dst) {
    261 ; CHECK-LABEL: @f7(
    262 ; CHECK-NOT: !foo
    263 ; CHECK: ret void
    264   %val = load <4 x i32> , <4 x i32> *%src, !foo !5
    265   %add = add <4 x i32> %val, %val
    266   store <4 x i32> %add, <4 x i32> *%dst, !foo !5
    267   ret void
    268 }
    269 
    270 ; Test GEP with vectors.
    271 define void @f8(<4 x float *> *%dest, <4 x float *> %ptr0, <4 x i32> %i0,
    272                 float *%other) {
    273 ; CHECK-LABEL: @f8(
    274 ; CHECK: %dest.i0 = bitcast <4 x float*>* %dest to float**
    275 ; CHECK: %dest.i1 = getelementptr float*, float** %dest.i0, i32 1
    276 ; CHECK: %dest.i2 = getelementptr float*, float** %dest.i0, i32 2
    277 ; CHECK: %dest.i3 = getelementptr float*, float** %dest.i0, i32 3
    278 ; CHECK: %i0.i1 = extractelement <4 x i32> %i0, i32 1
    279 ; CHECK: %i0.i3 = extractelement <4 x i32> %i0, i32 3
    280 ; CHECK: %ptr0.i0 = extractelement <4 x float*> %ptr0, i32 0
    281 ; CHECK: %val.i0 = getelementptr float, float* %ptr0.i0, i32 100
    282 ; CHECK: %val.i1 = getelementptr float, float* %other, i32 %i0.i1
    283 ; CHECK: %ptr0.i2 = extractelement <4 x float*> %ptr0, i32 2
    284 ; CHECK: %val.i2 = getelementptr float, float* %ptr0.i2, i32 100
    285 ; CHECK: %ptr0.i3 = extractelement <4 x float*> %ptr0, i32 3
    286 ; CHECK: %val.i3 = getelementptr float, float* %ptr0.i3, i32 %i0.i3
    287 ; CHECK: store float* %val.i0, float** %dest.i0, align 32
    288 ; CHECK: store float* %val.i1, float** %dest.i1, align 8
    289 ; CHECK: store float* %val.i2, float** %dest.i2, align 16
    290 ; CHECK: store float* %val.i3, float** %dest.i3, align 8
    291 ; CHECK: ret void
    292   %i1 = insertelement <4 x i32> %i0, i32 100, i32 0
    293   %i2 = insertelement <4 x i32> %i1, i32 100, i32 2
    294   %ptr1 = insertelement <4 x float *> %ptr0, float *%other, i32 1
    295   %val = getelementptr float, <4 x float *> %ptr1, <4 x i32> %i2
    296   store <4 x float *> %val, <4 x float *> *%dest
    297   ret void
    298 }
    299 
    300 ; Test the handling of unaligned loads.
    301 define void @f9(<4 x float> *%dest, <4 x float> *%src) {
    302 ; CHECK: @f9(
    303 ; CHECK: %dest.i0 = bitcast <4 x float>* %dest to float*
    304 ; CHECK: %dest.i1 = getelementptr float, float* %dest.i0, i32 1
    305 ; CHECK: %dest.i2 = getelementptr float, float* %dest.i0, i32 2
    306 ; CHECK: %dest.i3 = getelementptr float, float* %dest.i0, i32 3
    307 ; CHECK: %src.i0 = bitcast <4 x float>* %src to float*
    308 ; CHECK: %val.i0 = load float, float* %src.i0, align 4
    309 ; CHECK: %src.i1 = getelementptr float, float* %src.i0, i32 1
    310 ; CHECK: %val.i1 = load float, float* %src.i1, align 4
    311 ; CHECK: %src.i2 = getelementptr float, float* %src.i0, i32 2
    312 ; CHECK: %val.i2 = load float, float* %src.i2, align 4
    313 ; CHECK: %src.i3 = getelementptr float, float* %src.i0, i32 3
    314 ; CHECK: %val.i3 = load float, float* %src.i3, align 4
    315 ; CHECK: store float %val.i0, float* %dest.i0, align 8
    316 ; CHECK: store float %val.i1, float* %dest.i1, align 4
    317 ; CHECK: store float %val.i2, float* %dest.i2, align 8
    318 ; CHECK: store float %val.i3, float* %dest.i3, align 4
    319 ; CHECK: ret void
    320   %val = load <4 x float> , <4 x float> *%src, align 4
    321   store <4 x float> %val, <4 x float> *%dest, align 8
    322   ret void
    323 }
    324 
    325 ; ...and again with subelement alignment.
    326 define void @f10(<4 x float> *%dest, <4 x float> *%src) {
    327 ; CHECK: @f10(
    328 ; CHECK: %dest.i0 = bitcast <4 x float>* %dest to float*
    329 ; CHECK: %dest.i1 = getelementptr float, float* %dest.i0, i32 1
    330 ; CHECK: %dest.i2 = getelementptr float, float* %dest.i0, i32 2
    331 ; CHECK: %dest.i3 = getelementptr float, float* %dest.i0, i32 3
    332 ; CHECK: %src.i0 = bitcast <4 x float>* %src to float*
    333 ; CHECK: %val.i0 = load float, float* %src.i0, align 1
    334 ; CHECK: %src.i1 = getelementptr float, float* %src.i0, i32 1
    335 ; CHECK: %val.i1 = load float, float* %src.i1, align 1
    336 ; CHECK: %src.i2 = getelementptr float, float* %src.i0, i32 2
    337 ; CHECK: %val.i2 = load float, float* %src.i2, align 1
    338 ; CHECK: %src.i3 = getelementptr float, float* %src.i0, i32 3
    339 ; CHECK: %val.i3 = load float, float* %src.i3, align 1
    340 ; CHECK: store float %val.i0, float* %dest.i0, align 2
    341 ; CHECK: store float %val.i1, float* %dest.i1, align 2
    342 ; CHECK: store float %val.i2, float* %dest.i2, align 2
    343 ; CHECK: store float %val.i3, float* %dest.i3, align 2
    344 ; CHECK: ret void
    345   %val = load <4 x float> , <4 x float> *%src, align 1
    346   store <4 x float> %val, <4 x float> *%dest, align 2
    347   ret void
    348 }
    349 
    350 ; Test that sub-byte loads aren't scalarized.
    351 define void @f11(<32 x i1> *%dest, <32 x i1> *%src0) {
    352 ; CHECK: @f11(
    353 ; CHECK: %val0 = load <32 x i1>, <32 x i1>* %src0
    354 ; CHECK: %val1 = load <32 x i1>, <32 x i1>* %src1
    355 ; CHECK: store <32 x i1> %and, <32 x i1>* %dest
    356 ; CHECK: ret void
    357   %src1 = getelementptr <32 x i1>, <32 x i1> *%src0, i32 1
    358   %val0 = load <32 x i1> , <32 x i1> *%src0
    359   %val1 = load <32 x i1> , <32 x i1> *%src1
    360   %and = and <32 x i1> %val0, %val1
    361   store <32 x i1> %and, <32 x i1> *%dest
    362   ret void
    363 }
    364 
    365 ; Test that variable inserts aren't scalarized.
    366 define void @f12(<4 x i32> *%dest, <4 x i32> *%src, i32 %index) {
    367 ; CHECK: @f12(
    368 ; CHECK: %val1 = insertelement <4 x i32> %val0, i32 1, i32 %index
    369 ; CHECK-DAG: %val1.i0 = extractelement <4 x i32> %val1, i32 0
    370 ; CHECK-DAG: %val1.i1 = extractelement <4 x i32> %val1, i32 1
    371 ; CHECK-DAG: %val1.i2 = extractelement <4 x i32> %val1, i32 2
    372 ; CHECK-DAG: %val1.i3 = extractelement <4 x i32> %val1, i32 3
    373 ; CHECK-DAG: %val2.i0 = shl i32 1, %val1.i0
    374 ; CHECK-DAG: %val2.i1 = shl i32 2, %val1.i1
    375 ; CHECK-DAG: %val2.i2 = shl i32 3, %val1.i2
    376 ; CHECK-DAG: %val2.i3 = shl i32 4, %val1.i3
    377 ; CHECK: ret void
    378   %val0 = load <4 x i32> , <4 x i32> *%src
    379   %val1 = insertelement <4 x i32> %val0, i32 1, i32 %index
    380   %val2 = shl <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %val1
    381   store <4 x i32> %val2, <4 x i32> *%dest
    382   ret void
    383 }
    384 
    385 ; Test vector GEPs with more than one index.
    386 define void @f13(<4 x float *> *%dest, <4 x [4 x float] *> %ptr, <4 x i32> %i,
    387                  float *%other) {
    388 ; CHECK-LABEL: @f13(
    389 ; CHECK: %dest.i0 = bitcast <4 x float*>* %dest to float**
    390 ; CHECK: %dest.i1 = getelementptr float*, float** %dest.i0, i32 1
    391 ; CHECK: %dest.i2 = getelementptr float*, float** %dest.i0, i32 2
    392 ; CHECK: %dest.i3 = getelementptr float*, float** %dest.i0, i32 3
    393 ; CHECK: %i.i0 = extractelement <4 x i32> %i, i32 0
    394 ; CHECK: %ptr.i0 = extractelement <4 x [4 x float]*> %ptr, i32 0
    395 ; CHECK: %val.i0 = getelementptr inbounds [4 x float], [4 x float]* %ptr.i0, i32 0, i32 %i.i0
    396 ; CHECK: %i.i1 = extractelement <4 x i32> %i, i32 1
    397 ; CHECK: %ptr.i1 = extractelement <4 x [4 x float]*> %ptr, i32 1
    398 ; CHECK: %val.i1 = getelementptr inbounds [4 x float], [4 x float]* %ptr.i1, i32 1, i32 %i.i1
    399 ; CHECK: %i.i2 = extractelement <4 x i32> %i, i32 2
    400 ; CHECK: %ptr.i2 = extractelement <4 x [4 x float]*> %ptr, i32 2
    401 ; CHECK: %val.i2 = getelementptr inbounds [4 x float], [4 x float]* %ptr.i2, i32 2, i32 %i.i2
    402 ; CHECK: %i.i3 = extractelement <4 x i32> %i, i32 3
    403 ; CHECK: %ptr.i3 = extractelement <4 x [4 x float]*> %ptr, i32 3
    404 ; CHECK: %val.i3 = getelementptr inbounds [4 x float], [4 x float]* %ptr.i3, i32 3, i32 %i.i3
    405 ; CHECK: store float* %val.i0, float** %dest.i0, align 32
    406 ; CHECK: store float* %val.i1, float** %dest.i1, align 8
    407 ; CHECK: store float* %val.i2, float** %dest.i2, align 16
    408 ; CHECK: store float* %val.i3, float** %dest.i3, align 8
    409 ; CHECK: ret void
    410   %val = getelementptr inbounds [4 x float], <4 x [4 x float] *> %ptr,
    411                                 <4 x i32> <i32 0, i32 1, i32 2, i32 3>,
    412                                 <4 x i32> %i
    413   store <4 x float *> %val, <4 x float *> *%dest
    414   ret void
    415 }
    416 
    417 ; Test combinations of vector and non-vector PHIs.
    418 define <4 x float> @f14(<4 x float> %acc, i32 %count) {
    419 ; CHECK-LABEL: @f14(
    420 ; CHECK: %this_acc.i0 = phi float [ %acc.i0, %entry ], [ %next_acc.i0, %loop ]
    421 ; CHECK: %this_acc.i1 = phi float [ %acc.i1, %entry ], [ %next_acc.i1, %loop ]
    422 ; CHECK: %this_acc.i2 = phi float [ %acc.i2, %entry ], [ %next_acc.i2, %loop ]
    423 ; CHECK: %this_acc.i3 = phi float [ %acc.i3, %entry ], [ %next_acc.i3, %loop ]
    424 ; CHECK: %this_count = phi i32 [ %count, %entry ], [ %next_count, %loop ]
    425 ; CHECK: %this_acc.upto0 = insertelement <4 x float> undef, float %this_acc.i0, i32 0
    426 ; CHECK: %this_acc.upto1 = insertelement <4 x float> %this_acc.upto0, float %this_acc.i1, i32 1
    427 ; CHECK: %this_acc.upto2 = insertelement <4 x float> %this_acc.upto1, float %this_acc.i2, i32 2
    428 ; CHECK: %this_acc = insertelement <4 x float> %this_acc.upto2, float %this_acc.i3, i32 3
    429 ; CHECK: ret <4 x float> %next_acc
    430 entry:
    431   br label %loop
    432 
    433 loop:
    434   %this_acc = phi <4 x float> [ %acc, %entry ], [ %next_acc, %loop ]
    435   %this_count = phi i32 [ %count, %entry ], [ %next_count, %loop ]
    436   %foo = call <4 x float> @ext(<4 x float> %this_acc)
    437   %next_acc = fadd <4 x float> %this_acc, %foo
    438   %next_count = sub i32 %this_count, 1
    439   %cmp = icmp eq i32 %next_count, 0
    440   br i1 %cmp, label %loop, label %exit
    441 
    442 exit:
    443   ret <4 x float> %next_acc
    444 }
    445 
    446 !0 = !{ !"root" }
    447 !1 = !{ !"set1", !0 }
    448 !2 = !{ !"set2", !0 }
    449 !3 = !{ !3 }
    450 !4 = !{ float 4.0 }
    451 !5 = !{ i64 0, i64 8, null }
    452