Home | History | Annotate | Download | only in SROA
      1 ; RUN: opt < %s -sroa -S | FileCheck %s
      2 ; RUN: opt < %s -sroa -force-ssa-updater -S | FileCheck %s
      3 
      4 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64"
      5 
      6 declare void @llvm.lifetime.start(i64, i8* nocapture)
      7 declare void @llvm.lifetime.end(i64, i8* nocapture)
      8 
      9 define i32 @test0() {
     10 ; CHECK-LABEL: @test0(
     11 ; CHECK-NOT: alloca
     12 ; CHECK: ret i32
     13 
     14 entry:
     15   %a1 = alloca i32
     16   %a2 = alloca float
     17 
     18   %a1.i8 = bitcast i32* %a1 to i8*
     19   call void @llvm.lifetime.start(i64 4, i8* %a1.i8)
     20 
     21   store i32 0, i32* %a1
     22   %v1 = load i32* %a1
     23 
     24   call void @llvm.lifetime.end(i64 4, i8* %a1.i8)
     25 
     26   %a2.i8 = bitcast float* %a2 to i8*
     27   call void @llvm.lifetime.start(i64 4, i8* %a2.i8)
     28 
     29   store float 0.0, float* %a2
     30   %v2 = load float * %a2
     31   %v2.int = bitcast float %v2 to i32
     32   %sum1 = add i32 %v1, %v2.int
     33 
     34   call void @llvm.lifetime.end(i64 4, i8* %a2.i8)
     35 
     36   ret i32 %sum1
     37 }
     38 
     39 define i32 @test1() {
     40 ; CHECK-LABEL: @test1(
     41 ; CHECK-NOT: alloca
     42 ; CHECK: ret i32 0
     43 
     44 entry:
     45   %X = alloca { i32, float }
     46   %Y = getelementptr { i32, float }* %X, i64 0, i32 0
     47   store i32 0, i32* %Y
     48   %Z = load i32* %Y
     49   ret i32 %Z
     50 }
     51 
     52 define i64 @test2(i64 %X) {
     53 ; CHECK-LABEL: @test2(
     54 ; CHECK-NOT: alloca
     55 ; CHECK: ret i64 %X
     56 
     57 entry:
     58   %A = alloca [8 x i8]
     59   %B = bitcast [8 x i8]* %A to i64*
     60   store i64 %X, i64* %B
     61   br label %L2
     62 
     63 L2:
     64   %Z = load i64* %B
     65   ret i64 %Z
     66 }
     67 
     68 define void @test3(i8* %dst, i8* %src) {
     69 ; CHECK-LABEL: @test3(
     70 
     71 entry:
     72   %a = alloca [300 x i8]
     73 ; CHECK-NOT:  alloca
     74 ; CHECK:      %[[test3_a1:.*]] = alloca [42 x i8]
     75 ; CHECK-NEXT: %[[test3_a2:.*]] = alloca [99 x i8]
     76 ; CHECK-NEXT: %[[test3_a3:.*]] = alloca [16 x i8]
     77 ; CHECK-NEXT: %[[test3_a4:.*]] = alloca [42 x i8]
     78 ; CHECK-NEXT: %[[test3_a5:.*]] = alloca [7 x i8]
     79 ; CHECK-NEXT: %[[test3_a6:.*]] = alloca [7 x i8]
     80 ; CHECK-NEXT: %[[test3_a7:.*]] = alloca [85 x i8]
     81 
     82   %b = getelementptr [300 x i8]* %a, i64 0, i64 0
     83   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %b, i8* %src, i32 300, i32 1, i1 false)
     84 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [42 x i8]* %[[test3_a1]], i64 0, i64 0
     85 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %src, i32 42
     86 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8* %src, i64 42
     87 ; CHECK-NEXT: %[[test3_r1:.*]] = load i8* %[[gep]]
     88 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8* %src, i64 43
     89 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [99 x i8]* %[[test3_a2]], i64 0, i64 0
     90 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 99
     91 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8* %src, i64 142
     92 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [16 x i8]* %[[test3_a3]], i64 0, i64 0
     93 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 16
     94 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8* %src, i64 158
     95 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [42 x i8]* %[[test3_a4]], i64 0, i64 0
     96 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 42
     97 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8* %src, i64 200
     98 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [7 x i8]* %[[test3_a5]], i64 0, i64 0
     99 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 7
    100 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8* %src, i64 207
    101 ; CHECK-NEXT: %[[test3_r2:.*]] = load i8* %[[gep]]
    102 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8* %src, i64 208
    103 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [7 x i8]* %[[test3_a6]], i64 0, i64 0
    104 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 7
    105 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8* %src, i64 215
    106 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [85 x i8]* %[[test3_a7]], i64 0, i64 0
    107 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 85
    108 
    109   ; Clobber a single element of the array, this should be promotable.
    110   %c = getelementptr [300 x i8]* %a, i64 0, i64 42
    111   store i8 0, i8* %c
    112 
    113   ; Make a sequence of overlapping stores to the array. These overlap both in
    114   ; forward strides and in shrinking accesses.
    115   %overlap.1.i8 = getelementptr [300 x i8]* %a, i64 0, i64 142
    116   %overlap.2.i8 = getelementptr [300 x i8]* %a, i64 0, i64 143
    117   %overlap.3.i8 = getelementptr [300 x i8]* %a, i64 0, i64 144
    118   %overlap.4.i8 = getelementptr [300 x i8]* %a, i64 0, i64 145
    119   %overlap.5.i8 = getelementptr [300 x i8]* %a, i64 0, i64 146
    120   %overlap.6.i8 = getelementptr [300 x i8]* %a, i64 0, i64 147
    121   %overlap.7.i8 = getelementptr [300 x i8]* %a, i64 0, i64 148
    122   %overlap.8.i8 = getelementptr [300 x i8]* %a, i64 0, i64 149
    123   %overlap.9.i8 = getelementptr [300 x i8]* %a, i64 0, i64 150
    124   %overlap.1.i16 = bitcast i8* %overlap.1.i8 to i16*
    125   %overlap.1.i32 = bitcast i8* %overlap.1.i8 to i32*
    126   %overlap.1.i64 = bitcast i8* %overlap.1.i8 to i64*
    127   %overlap.2.i64 = bitcast i8* %overlap.2.i8 to i64*
    128   %overlap.3.i64 = bitcast i8* %overlap.3.i8 to i64*
    129   %overlap.4.i64 = bitcast i8* %overlap.4.i8 to i64*
    130   %overlap.5.i64 = bitcast i8* %overlap.5.i8 to i64*
    131   %overlap.6.i64 = bitcast i8* %overlap.6.i8 to i64*
    132   %overlap.7.i64 = bitcast i8* %overlap.7.i8 to i64*
    133   %overlap.8.i64 = bitcast i8* %overlap.8.i8 to i64*
    134   %overlap.9.i64 = bitcast i8* %overlap.9.i8 to i64*
    135   store i8 1, i8* %overlap.1.i8
    136 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [16 x i8]* %[[test3_a3]], i64 0, i64 0
    137 ; CHECK-NEXT: store i8 1, i8* %[[gep]]
    138   store i16 1, i16* %overlap.1.i16
    139 ; CHECK-NEXT: %[[bitcast:.*]] = bitcast [16 x i8]* %[[test3_a3]] to i16*
    140 ; CHECK-NEXT: store i16 1, i16* %[[bitcast]]
    141   store i32 1, i32* %overlap.1.i32
    142 ; CHECK-NEXT: %[[bitcast:.*]] = bitcast [16 x i8]* %[[test3_a3]] to i32*
    143 ; CHECK-NEXT: store i32 1, i32* %[[bitcast]]
    144   store i64 1, i64* %overlap.1.i64
    145 ; CHECK-NEXT: %[[bitcast:.*]] = bitcast [16 x i8]* %[[test3_a3]] to i64*
    146 ; CHECK-NEXT: store i64 1, i64* %[[bitcast]]
    147   store i64 2, i64* %overlap.2.i64
    148 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [16 x i8]* %[[test3_a3]], i64 0, i64 1
    149 ; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i64*
    150 ; CHECK-NEXT: store i64 2, i64* %[[bitcast]]
    151   store i64 3, i64* %overlap.3.i64
    152 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [16 x i8]* %[[test3_a3]], i64 0, i64 2
    153 ; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i64*
    154 ; CHECK-NEXT: store i64 3, i64* %[[bitcast]]
    155   store i64 4, i64* %overlap.4.i64
    156 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [16 x i8]* %[[test3_a3]], i64 0, i64 3
    157 ; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i64*
    158 ; CHECK-NEXT: store i64 4, i64* %[[bitcast]]
    159   store i64 5, i64* %overlap.5.i64
    160 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [16 x i8]* %[[test3_a3]], i64 0, i64 4
    161 ; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i64*
    162 ; CHECK-NEXT: store i64 5, i64* %[[bitcast]]
    163   store i64 6, i64* %overlap.6.i64
    164 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [16 x i8]* %[[test3_a3]], i64 0, i64 5
    165 ; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i64*
    166 ; CHECK-NEXT: store i64 6, i64* %[[bitcast]]
    167   store i64 7, i64* %overlap.7.i64
    168 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [16 x i8]* %[[test3_a3]], i64 0, i64 6
    169 ; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i64*
    170 ; CHECK-NEXT: store i64 7, i64* %[[bitcast]]
    171   store i64 8, i64* %overlap.8.i64
    172 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [16 x i8]* %[[test3_a3]], i64 0, i64 7
    173 ; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i64*
    174 ; CHECK-NEXT: store i64 8, i64* %[[bitcast]]
    175   store i64 9, i64* %overlap.9.i64
    176 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [16 x i8]* %[[test3_a3]], i64 0, i64 8
    177 ; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i64*
    178 ; CHECK-NEXT: store i64 9, i64* %[[bitcast]]
    179 
    180   ; Make two sequences of overlapping stores with more gaps and irregularities.
    181   %overlap2.1.0.i8 = getelementptr [300 x i8]* %a, i64 0, i64 200
    182   %overlap2.1.1.i8 = getelementptr [300 x i8]* %a, i64 0, i64 201
    183   %overlap2.1.2.i8 = getelementptr [300 x i8]* %a, i64 0, i64 202
    184   %overlap2.1.3.i8 = getelementptr [300 x i8]* %a, i64 0, i64 203
    185 
    186   %overlap2.2.0.i8 = getelementptr [300 x i8]* %a, i64 0, i64 208
    187   %overlap2.2.1.i8 = getelementptr [300 x i8]* %a, i64 0, i64 209
    188   %overlap2.2.2.i8 = getelementptr [300 x i8]* %a, i64 0, i64 210
    189   %overlap2.2.3.i8 = getelementptr [300 x i8]* %a, i64 0, i64 211
    190 
    191   %overlap2.1.0.i16 = bitcast i8* %overlap2.1.0.i8 to i16*
    192   %overlap2.1.0.i32 = bitcast i8* %overlap2.1.0.i8 to i32*
    193   %overlap2.1.1.i32 = bitcast i8* %overlap2.1.1.i8 to i32*
    194   %overlap2.1.2.i32 = bitcast i8* %overlap2.1.2.i8 to i32*
    195   %overlap2.1.3.i32 = bitcast i8* %overlap2.1.3.i8 to i32*
    196   store i8 1,  i8*  %overlap2.1.0.i8
    197 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [7 x i8]* %[[test3_a5]], i64 0, i64 0
    198 ; CHECK-NEXT: store i8 1, i8* %[[gep]]
    199   store i16 1, i16* %overlap2.1.0.i16
    200 ; CHECK-NEXT: %[[bitcast:.*]] = bitcast [7 x i8]* %[[test3_a5]] to i16*
    201 ; CHECK-NEXT: store i16 1, i16* %[[bitcast]]
    202   store i32 1, i32* %overlap2.1.0.i32
    203 ; CHECK-NEXT: %[[bitcast:.*]] = bitcast [7 x i8]* %[[test3_a5]] to i32*
    204 ; CHECK-NEXT: store i32 1, i32* %[[bitcast]]
    205   store i32 2, i32* %overlap2.1.1.i32
    206 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [7 x i8]* %[[test3_a5]], i64 0, i64 1
    207 ; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i32*
    208 ; CHECK-NEXT: store i32 2, i32* %[[bitcast]]
    209   store i32 3, i32* %overlap2.1.2.i32
    210 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [7 x i8]* %[[test3_a5]], i64 0, i64 2
    211 ; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i32*
    212 ; CHECK-NEXT: store i32 3, i32* %[[bitcast]]
    213   store i32 4, i32* %overlap2.1.3.i32
    214 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [7 x i8]* %[[test3_a5]], i64 0, i64 3
    215 ; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i32*
    216 ; CHECK-NEXT: store i32 4, i32* %[[bitcast]]
    217 
    218   %overlap2.2.0.i32 = bitcast i8* %overlap2.2.0.i8 to i32*
    219   %overlap2.2.1.i16 = bitcast i8* %overlap2.2.1.i8 to i16*
    220   %overlap2.2.1.i32 = bitcast i8* %overlap2.2.1.i8 to i32*
    221   %overlap2.2.2.i32 = bitcast i8* %overlap2.2.2.i8 to i32*
    222   %overlap2.2.3.i32 = bitcast i8* %overlap2.2.3.i8 to i32*
    223   store i32 1, i32* %overlap2.2.0.i32
    224 ; CHECK-NEXT: %[[bitcast:.*]] = bitcast [7 x i8]* %[[test3_a6]] to i32*
    225 ; CHECK-NEXT: store i32 1, i32* %[[bitcast]]
    226   store i8 1,  i8*  %overlap2.2.1.i8
    227 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [7 x i8]* %[[test3_a6]], i64 0, i64 1
    228 ; CHECK-NEXT: store i8 1, i8* %[[gep]]
    229   store i16 1, i16* %overlap2.2.1.i16
    230 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [7 x i8]* %[[test3_a6]], i64 0, i64 1
    231 ; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i16*
    232 ; CHECK-NEXT: store i16 1, i16* %[[bitcast]]
    233   store i32 1, i32* %overlap2.2.1.i32
    234 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [7 x i8]* %[[test3_a6]], i64 0, i64 1
    235 ; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i32*
    236 ; CHECK-NEXT: store i32 1, i32* %[[bitcast]]
    237   store i32 3, i32* %overlap2.2.2.i32
    238 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [7 x i8]* %[[test3_a6]], i64 0, i64 2
    239 ; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i32*
    240 ; CHECK-NEXT: store i32 3, i32* %[[bitcast]]
    241   store i32 4, i32* %overlap2.2.3.i32
    242 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [7 x i8]* %[[test3_a6]], i64 0, i64 3
    243 ; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i32*
    244 ; CHECK-NEXT: store i32 4, i32* %[[bitcast]]
    245 
    246   %overlap2.prefix = getelementptr i8* %overlap2.1.1.i8, i64 -4
    247   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %overlap2.prefix, i8* %src, i32 8, i32 1, i1 false)
    248 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [42 x i8]* %[[test3_a4]], i64 0, i64 39
    249 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %src, i32 3
    250 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8* %src, i64 3
    251 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [7 x i8]* %[[test3_a5]], i64 0, i64 0
    252 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 5
    253 
    254   ; Bridge between the overlapping areas
    255   call void @llvm.memset.p0i8.i32(i8* %overlap2.1.2.i8, i8 42, i32 8, i32 1, i1 false)
    256 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [7 x i8]* %[[test3_a5]], i64 0, i64 2
    257 ; CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* %[[gep]], i8 42, i32 5
    258 ; ...promoted i8 store...
    259 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [7 x i8]* %[[test3_a6]], i64 0, i64 0
    260 ; CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* %[[gep]], i8 42, i32 2
    261 
    262   ; Entirely within the second overlap.
    263   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %overlap2.2.1.i8, i8* %src, i32 5, i32 1, i1 false)
    264 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [7 x i8]* %[[test3_a6]], i64 0, i64 1
    265 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep]], i8* %src, i32 5
    266 
    267   ; Trailing past the second overlap.
    268   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %overlap2.2.2.i8, i8* %src, i32 8, i32 1, i1 false)
    269 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [7 x i8]* %[[test3_a6]], i64 0, i64 2
    270 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep]], i8* %src, i32 5
    271 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8* %src, i64 5
    272 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [85 x i8]* %[[test3_a7]], i64 0, i64 0
    273 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 3
    274 
    275   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %b, i32 300, i32 1, i1 false)
    276 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [42 x i8]* %[[test3_a1]], i64 0, i64 0
    277 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %[[gep]], i32 42
    278 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8* %dst, i64 42
    279 ; CHECK-NEXT: store i8 0, i8* %[[gep]]
    280 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8* %dst, i64 43
    281 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [99 x i8]* %[[test3_a2]], i64 0, i64 0
    282 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 99
    283 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8* %dst, i64 142
    284 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [16 x i8]* %[[test3_a3]], i64 0, i64 0
    285 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 16
    286 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8* %dst, i64 158
    287 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [42 x i8]* %[[test3_a4]], i64 0, i64 0
    288 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 42
    289 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8* %dst, i64 200
    290 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [7 x i8]* %[[test3_a5]], i64 0, i64 0
    291 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 7
    292 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8* %dst, i64 207
    293 ; CHECK-NEXT: store i8 42, i8* %[[gep]]
    294 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8* %dst, i64 208
    295 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [7 x i8]* %[[test3_a6]], i64 0, i64 0
    296 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 7
    297 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8* %dst, i64 215
    298 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [85 x i8]* %[[test3_a7]], i64 0, i64 0
    299 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 85
    300 
    301   ret void
    302 }
    303 
    304 define void @test4(i8* %dst, i8* %src) {
    305 ; CHECK-LABEL: @test4(
    306 
    307 entry:
    308   %a = alloca [100 x i8]
    309 ; CHECK-NOT:  alloca
    310 ; CHECK:      %[[test4_a1:.*]] = alloca [20 x i8]
    311 ; CHECK-NEXT: %[[test4_a2:.*]] = alloca [7 x i8]
    312 ; CHECK-NEXT: %[[test4_a3:.*]] = alloca [10 x i8]
    313 ; CHECK-NEXT: %[[test4_a4:.*]] = alloca [7 x i8]
    314 ; CHECK-NEXT: %[[test4_a5:.*]] = alloca [7 x i8]
    315 ; CHECK-NEXT: %[[test4_a6:.*]] = alloca [40 x i8]
    316 
    317   %b = getelementptr [100 x i8]* %a, i64 0, i64 0
    318   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %b, i8* %src, i32 100, i32 1, i1 false)
    319 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [20 x i8]* %[[test4_a1]], i64 0, i64 0
    320 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep]], i8* %src, i32 20
    321 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8* %src, i64 20
    322 ; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i16*
    323 ; CHECK-NEXT: %[[test4_r1:.*]] = load i16* %[[bitcast]]
    324 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8* %src, i64 22
    325 ; CHECK-NEXT: %[[test4_r2:.*]] = load i8* %[[gep]]
    326 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8* %src, i64 23
    327 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [7 x i8]* %[[test4_a2]], i64 0, i64 0
    328 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 7
    329 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8* %src, i64 30
    330 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [10 x i8]* %[[test4_a3]], i64 0, i64 0
    331 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 10
    332 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8* %src, i64 40
    333 ; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i16*
    334 ; CHECK-NEXT: %[[test4_r3:.*]] = load i16* %[[bitcast]]
    335 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8* %src, i64 42
    336 ; CHECK-NEXT: %[[test4_r4:.*]] = load i8* %[[gep]]
    337 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8* %src, i64 43
    338 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [7 x i8]* %[[test4_a4]], i64 0, i64 0
    339 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 7
    340 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8* %src, i64 50
    341 ; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i16*
    342 ; CHECK-NEXT: %[[test4_r5:.*]] = load i16* %[[bitcast]]
    343 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8* %src, i64 52
    344 ; CHECK-NEXT: %[[test4_r6:.*]] = load i8* %[[gep]]
    345 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8* %src, i64 53
    346 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [7 x i8]* %[[test4_a5]], i64 0, i64 0
    347 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 7
    348 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8* %src, i64 60
    349 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [40 x i8]* %[[test4_a6]], i64 0, i64 0
    350 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 40
    351 
    352   %a.src.1 = getelementptr [100 x i8]* %a, i64 0, i64 20
    353   %a.dst.1 = getelementptr [100 x i8]* %a, i64 0, i64 40
    354   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a.dst.1, i8* %a.src.1, i32 10, i32 1, i1 false)
    355 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [7 x i8]* %[[test4_a4]], i64 0, i64 0
    356 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [7 x i8]* %[[test4_a2]], i64 0, i64 0
    357 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 7
    358 
    359   ; Clobber a single element of the array, this should be promotable, and be deleted.
    360   %c = getelementptr [100 x i8]* %a, i64 0, i64 42
    361   store i8 0, i8* %c
    362 
    363   %a.src.2 = getelementptr [100 x i8]* %a, i64 0, i64 50
    364   call void @llvm.memmove.p0i8.p0i8.i32(i8* %a.dst.1, i8* %a.src.2, i32 10, i32 1, i1 false)
    365 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [7 x i8]* %[[test4_a4]], i64 0, i64 0
    366 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [7 x i8]* %[[test4_a5]], i64 0, i64 0
    367 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 7
    368 
    369   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %b, i32 100, i32 1, i1 false)
    370 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [20 x i8]* %[[test4_a1]], i64 0, i64 0
    371 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %[[gep]], i32 20
    372 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8* %dst, i64 20
    373 ; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i16*
    374 ; CHECK-NEXT: store i16 %[[test4_r1]], i16* %[[bitcast]]
    375 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8* %dst, i64 22
    376 ; CHECK-NEXT: store i8 %[[test4_r2]], i8* %[[gep]]
    377 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8* %dst, i64 23
    378 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [7 x i8]* %[[test4_a2]], i64 0, i64 0
    379 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 7
    380 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8* %dst, i64 30
    381 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [10 x i8]* %[[test4_a3]], i64 0, i64 0
    382 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 10
    383 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8* %dst, i64 40
    384 ; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i16*
    385 ; CHECK-NEXT: store i16 %[[test4_r5]], i16* %[[bitcast]]
    386 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8* %dst, i64 42
    387 ; CHECK-NEXT: store i8 %[[test4_r6]], i8* %[[gep]]
    388 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8* %dst, i64 43
    389 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [7 x i8]* %[[test4_a4]], i64 0, i64 0
    390 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 7
    391 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8* %dst, i64 50
    392 ; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i16*
    393 ; CHECK-NEXT: store i16 %[[test4_r5]], i16* %[[bitcast]]
    394 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8* %dst, i64 52
    395 ; CHECK-NEXT: store i8 %[[test4_r6]], i8* %[[gep]]
    396 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8* %dst, i64 53
    397 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [7 x i8]* %[[test4_a5]], i64 0, i64 0
    398 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 7
    399 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8* %dst, i64 60
    400 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [40 x i8]* %[[test4_a6]], i64 0, i64 0
    401 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 40
    402 
    403   ret void
    404 }
    405 
    406 declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
    407 declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
    408 declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
    409 
    410 define i16 @test5() {
    411 ; CHECK-LABEL: @test5(
    412 ; CHECK-NOT: alloca float
    413 ; CHECK:      %[[cast:.*]] = bitcast float 0.0{{.*}} to i32
    414 ; CHECK-NEXT: %[[shr:.*]] = lshr i32 %[[cast]], 16
    415 ; CHECK-NEXT: %[[trunc:.*]] = trunc i32 %[[shr]] to i16
    416 ; CHECK-NEXT: ret i16 %[[trunc]]
    417 
    418 entry:
    419   %a = alloca [4 x i8]
    420   %fptr = bitcast [4 x i8]* %a to float*
    421   store float 0.0, float* %fptr
    422   %ptr = getelementptr [4 x i8]* %a, i32 0, i32 2
    423   %iptr = bitcast i8* %ptr to i16*
    424   %val = load i16* %iptr
    425   ret i16 %val
    426 }
    427 
    428 define i32 @test6() {
    429 ; CHECK-LABEL: @test6(
    430 ; CHECK: alloca i32
    431 ; CHECK-NEXT: store volatile i32
    432 ; CHECK-NEXT: load i32*
    433 ; CHECK-NEXT: ret i32
    434 
    435 entry:
    436   %a = alloca [4 x i8]
    437   %ptr = getelementptr [4 x i8]* %a, i32 0, i32 0
    438   call void @llvm.memset.p0i8.i32(i8* %ptr, i8 42, i32 4, i32 1, i1 true)
    439   %iptr = bitcast i8* %ptr to i32*
    440   %val = load i32* %iptr
    441   ret i32 %val
    442 }
    443 
    444 define void @test7(i8* %src, i8* %dst) {
    445 ; CHECK-LABEL: @test7(
    446 ; CHECK: alloca i32
    447 ; CHECK-NEXT: bitcast i8* %src to i32*
    448 ; CHECK-NEXT: load volatile i32*
    449 ; CHECK-NEXT: store volatile i32
    450 ; CHECK-NEXT: bitcast i8* %dst to i32*
    451 ; CHECK-NEXT: load volatile i32*
    452 ; CHECK-NEXT: store volatile i32
    453 ; CHECK-NEXT: ret
    454 
    455 entry:
    456   %a = alloca [4 x i8]
    457   %ptr = getelementptr [4 x i8]* %a, i32 0, i32 0
    458   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %ptr, i8* %src, i32 4, i32 1, i1 true)
    459   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %ptr, i32 4, i32 1, i1 true)
    460   ret void
    461 }
    462 
    463 
    464 %S1 = type { i32, i32, [16 x i8] }
    465 %S2 = type { %S1*, %S2* }
    466 
    467 define %S2 @test8(%S2* %s2) {
    468 ; CHECK-LABEL: @test8(
    469 entry:
    470   %new = alloca %S2
    471 ; CHECK-NOT: alloca
    472 
    473   %s2.next.ptr = getelementptr %S2* %s2, i64 0, i32 1
    474   %s2.next = load %S2** %s2.next.ptr
    475 ; CHECK:      %[[gep:.*]] = getelementptr %S2* %s2, i64 0, i32 1
    476 ; CHECK-NEXT: %[[next:.*]] = load %S2** %[[gep]]
    477 
    478   %s2.next.s1.ptr = getelementptr %S2* %s2.next, i64 0, i32 0
    479   %s2.next.s1 = load %S1** %s2.next.s1.ptr
    480   %new.s1.ptr = getelementptr %S2* %new, i64 0, i32 0
    481   store %S1* %s2.next.s1, %S1** %new.s1.ptr
    482   %s2.next.next.ptr = getelementptr %S2* %s2.next, i64 0, i32 1
    483   %s2.next.next = load %S2** %s2.next.next.ptr
    484   %new.next.ptr = getelementptr %S2* %new, i64 0, i32 1
    485   store %S2* %s2.next.next, %S2** %new.next.ptr
    486 ; CHECK-NEXT: %[[gep:.*]] = getelementptr %S2* %[[next]], i64 0, i32 0
    487 ; CHECK-NEXT: %[[next_s1:.*]] = load %S1** %[[gep]]
    488 ; CHECK-NEXT: %[[gep:.*]] = getelementptr %S2* %[[next]], i64 0, i32 1
    489 ; CHECK-NEXT: %[[next_next:.*]] = load %S2** %[[gep]]
    490 
    491   %new.s1 = load %S1** %new.s1.ptr
    492   %result1 = insertvalue %S2 undef, %S1* %new.s1, 0
    493 ; CHECK-NEXT: %[[result1:.*]] = insertvalue %S2 undef, %S1* %[[next_s1]], 0
    494   %new.next = load %S2** %new.next.ptr
    495   %result2 = insertvalue %S2 %result1, %S2* %new.next, 1
    496 ; CHECK-NEXT: %[[result2:.*]] = insertvalue %S2 %[[result1]], %S2* %[[next_next]], 1
    497   ret %S2 %result2
    498 ; CHECK-NEXT: ret %S2 %[[result2]]
    499 }
    500 
    501 define i64 @test9() {
    502 ; Ensure we can handle loads off the end of an alloca even when wrapped in
    503 ; weird bit casts and types. This is valid IR due to the alignment and masking
    504 ; off the bits past the end of the alloca.
    505 ;
    506 ; CHECK-LABEL: @test9(
    507 ; CHECK-NOT: alloca
    508 ; CHECK:      %[[b2:.*]] = zext i8 26 to i64
    509 ; CHECK-NEXT: %[[s2:.*]] = shl i64 %[[b2]], 16
    510 ; CHECK-NEXT: %[[m2:.*]] = and i64 undef, -16711681
    511 ; CHECK-NEXT: %[[i2:.*]] = or i64 %[[m2]], %[[s2]]
    512 ; CHECK-NEXT: %[[b1:.*]] = zext i8 0 to i64
    513 ; CHECK-NEXT: %[[s1:.*]] = shl i64 %[[b1]], 8
    514 ; CHECK-NEXT: %[[m1:.*]] = and i64 %[[i2]], -65281
    515 ; CHECK-NEXT: %[[i1:.*]] = or i64 %[[m1]], %[[s1]]
    516 ; CHECK-NEXT: %[[b0:.*]] = zext i8 0 to i64
    517 ; CHECK-NEXT: %[[m0:.*]] = and i64 %[[i1]], -256
    518 ; CHECK-NEXT: %[[i0:.*]] = or i64 %[[m0]], %[[b0]]
    519 ; CHECK-NEXT: %[[result:.*]] = and i64 %[[i0]], 16777215
    520 ; CHECK-NEXT: ret i64 %[[result]]
    521 
    522 entry:
    523   %a = alloca { [3 x i8] }, align 8
    524   %gep1 = getelementptr inbounds { [3 x i8] }* %a, i32 0, i32 0, i32 0
    525   store i8 0, i8* %gep1, align 1
    526   %gep2 = getelementptr inbounds { [3 x i8] }* %a, i32 0, i32 0, i32 1
    527   store i8 0, i8* %gep2, align 1
    528   %gep3 = getelementptr inbounds { [3 x i8] }* %a, i32 0, i32 0, i32 2
    529   store i8 26, i8* %gep3, align 1
    530   %cast = bitcast { [3 x i8] }* %a to { i64 }*
    531   %elt = getelementptr inbounds { i64 }* %cast, i32 0, i32 0
    532   %load = load i64* %elt
    533   %result = and i64 %load, 16777215
    534   ret i64 %result
    535 }
    536 
    537 define %S2* @test10() {
    538 ; CHECK-LABEL: @test10(
    539 ; CHECK-NOT: alloca %S2*
    540 ; CHECK: ret %S2* null
    541 
    542 entry:
    543   %a = alloca [8 x i8]
    544   %ptr = getelementptr [8 x i8]* %a, i32 0, i32 0
    545   call void @llvm.memset.p0i8.i32(i8* %ptr, i8 0, i32 8, i32 1, i1 false)
    546   %s2ptrptr = bitcast i8* %ptr to %S2**
    547   %s2ptr = load %S2** %s2ptrptr
    548   ret %S2* %s2ptr
    549 }
    550 
    551 define i32 @test11() {
    552 ; CHECK-LABEL: @test11(
    553 ; CHECK-NOT: alloca
    554 ; CHECK: ret i32 0
    555 
    556 entry:
    557   %X = alloca i32
    558   br i1 undef, label %good, label %bad
    559 
    560 good:
    561   %Y = getelementptr i32* %X, i64 0
    562   store i32 0, i32* %Y
    563   %Z = load i32* %Y
    564   ret i32 %Z
    565 
    566 bad:
    567   %Y2 = getelementptr i32* %X, i64 1
    568   store i32 0, i32* %Y2
    569   %Z2 = load i32* %Y2
    570   ret i32 %Z2
    571 }
    572 
    573 define i8 @test12() {
    574 ; We fully promote these to the i24 load or store size, resulting in just masks
    575 ; and other operations that instcombine will fold, but no alloca.
    576 ;
    577 ; CHECK-LABEL: @test12(
    578 
    579 entry:
    580   %a = alloca [3 x i8]
    581   %b = alloca [3 x i8]
    582 ; CHECK-NOT: alloca
    583 
    584   %a0ptr = getelementptr [3 x i8]* %a, i64 0, i32 0
    585   store i8 0, i8* %a0ptr
    586   %a1ptr = getelementptr [3 x i8]* %a, i64 0, i32 1
    587   store i8 0, i8* %a1ptr
    588   %a2ptr = getelementptr [3 x i8]* %a, i64 0, i32 2
    589   store i8 0, i8* %a2ptr
    590   %aiptr = bitcast [3 x i8]* %a to i24*
    591   %ai = load i24* %aiptr
    592 ; CHECK-NOT: store
    593 ; CHECK-NOT: load
    594 ; CHECK:      %[[ext2:.*]] = zext i8 0 to i24
    595 ; CHECK-NEXT: %[[shift2:.*]] = shl i24 %[[ext2]], 16
    596 ; CHECK-NEXT: %[[mask2:.*]] = and i24 undef, 65535
    597 ; CHECK-NEXT: %[[insert2:.*]] = or i24 %[[mask2]], %[[shift2]]
    598 ; CHECK-NEXT: %[[ext1:.*]] = zext i8 0 to i24
    599 ; CHECK-NEXT: %[[shift1:.*]] = shl i24 %[[ext1]], 8
    600 ; CHECK-NEXT: %[[mask1:.*]] = and i24 %[[insert2]], -65281
    601 ; CHECK-NEXT: %[[insert1:.*]] = or i24 %[[mask1]], %[[shift1]]
    602 ; CHECK-NEXT: %[[ext0:.*]] = zext i8 0 to i24
    603 ; CHECK-NEXT: %[[mask0:.*]] = and i24 %[[insert1]], -256
    604 ; CHECK-NEXT: %[[insert0:.*]] = or i24 %[[mask0]], %[[ext0]]
    605 
    606   %biptr = bitcast [3 x i8]* %b to i24*
    607   store i24 %ai, i24* %biptr
    608   %b0ptr = getelementptr [3 x i8]* %b, i64 0, i32 0
    609   %b0 = load i8* %b0ptr
    610   %b1ptr = getelementptr [3 x i8]* %b, i64 0, i32 1
    611   %b1 = load i8* %b1ptr
    612   %b2ptr = getelementptr [3 x i8]* %b, i64 0, i32 2
    613   %b2 = load i8* %b2ptr
    614 ; CHECK-NOT: store
    615 ; CHECK-NOT: load
    616 ; CHECK:      %[[trunc0:.*]] = trunc i24 %[[insert0]] to i8
    617 ; CHECK-NEXT: %[[shift1:.*]] = lshr i24 %[[insert0]], 8
    618 ; CHECK-NEXT: %[[trunc1:.*]] = trunc i24 %[[shift1]] to i8
    619 ; CHECK-NEXT: %[[shift2:.*]] = lshr i24 %[[insert0]], 16
    620 ; CHECK-NEXT: %[[trunc2:.*]] = trunc i24 %[[shift2]] to i8
    621 
    622   %bsum0 = add i8 %b0, %b1
    623   %bsum1 = add i8 %bsum0, %b2
    624   ret i8 %bsum1
    625 ; CHECK:      %[[sum0:.*]] = add i8 %[[trunc0]], %[[trunc1]]
    626 ; CHECK-NEXT: %[[sum1:.*]] = add i8 %[[sum0]], %[[trunc2]]
    627 ; CHECK-NEXT: ret i8 %[[sum1]]
    628 }
    629 
    630 define i32 @test13() {
    631 ; Ensure we don't crash and handle undefined loads that straddle the end of the
    632 ; allocation.
    633 ; CHECK-LABEL: @test13(
    634 ; CHECK:      %[[value:.*]] = zext i8 0 to i16
    635 ; CHECK-NEXT: %[[ret:.*]] = zext i16 %[[value]] to i32
    636 ; CHECK-NEXT: ret i32 %[[ret]]
    637 
    638 entry:
    639   %a = alloca [3 x i8], align 2
    640   %b0ptr = getelementptr [3 x i8]* %a, i64 0, i32 0
    641   store i8 0, i8* %b0ptr
    642   %b1ptr = getelementptr [3 x i8]* %a, i64 0, i32 1
    643   store i8 0, i8* %b1ptr
    644   %b2ptr = getelementptr [3 x i8]* %a, i64 0, i32 2
    645   store i8 0, i8* %b2ptr
    646   %iptrcast = bitcast [3 x i8]* %a to i16*
    647   %iptrgep = getelementptr i16* %iptrcast, i64 1
    648   %i = load i16* %iptrgep
    649   %ret = zext i16 %i to i32
    650   ret i32 %ret
    651 }
    652 
    653 %test14.struct = type { [3 x i32] }
    654 
    655 define void @test14(...) nounwind uwtable {
    656 ; This is a strange case where we split allocas into promotable partitions, but
    657 ; also gain enough data to prove they must be dead allocas due to GEPs that walk
    658 ; across two adjacent allocas. Test that we don't try to promote or otherwise
    659 ; do bad things to these dead allocas, they should just be removed.
    660 ; CHECK-LABEL: @test14(
    661 ; CHECK-NEXT: entry:
    662 ; CHECK-NEXT: ret void
    663 
    664 entry:
    665   %a = alloca %test14.struct
    666   %p = alloca %test14.struct*
    667   %0 = bitcast %test14.struct* %a to i8*
    668   %1 = getelementptr i8* %0, i64 12
    669   %2 = bitcast i8* %1 to %test14.struct*
    670   %3 = getelementptr inbounds %test14.struct* %2, i32 0, i32 0
    671   %4 = getelementptr inbounds %test14.struct* %a, i32 0, i32 0
    672   %5 = bitcast [3 x i32]* %3 to i32*
    673   %6 = bitcast [3 x i32]* %4 to i32*
    674   %7 = load i32* %6, align 4
    675   store i32 %7, i32* %5, align 4
    676   %8 = getelementptr inbounds i32* %5, i32 1
    677   %9 = getelementptr inbounds i32* %6, i32 1
    678   %10 = load i32* %9, align 4
    679   store i32 %10, i32* %8, align 4
    680   %11 = getelementptr inbounds i32* %5, i32 2
    681   %12 = getelementptr inbounds i32* %6, i32 2
    682   %13 = load i32* %12, align 4
    683   store i32 %13, i32* %11, align 4
    684   ret void
    685 }
    686 
    687 define i32 @test15(i1 %flag) nounwind uwtable {
    688 ; Ensure that when there are dead instructions using an alloca that are not
    689 ; loads or stores we still delete them during partitioning and rewriting.
    690 ; Otherwise we'll go to promote them while thy still have unpromotable uses.
    691 ; CHECK-LABEL: @test15(
    692 ; CHECK-NEXT: entry:
    693 ; CHECK-NEXT:   br label %loop
    694 ; CHECK:      loop:
    695 ; CHECK-NEXT:   br label %loop
    696 
    697 entry:
    698   %l0 = alloca i64
    699   %l1 = alloca i64
    700   %l2 = alloca i64
    701   %l3 = alloca i64
    702   br label %loop
    703 
    704 loop:
    705   %dead3 = phi i8* [ %gep3, %loop ], [ null, %entry ]
    706 
    707   store i64 1879048192, i64* %l0, align 8
    708   %bc0 = bitcast i64* %l0 to i8*
    709   %gep0 = getelementptr i8* %bc0, i64 3
    710   %dead0 = bitcast i8* %gep0 to i64*
    711 
    712   store i64 1879048192, i64* %l1, align 8
    713   %bc1 = bitcast i64* %l1 to i8*
    714   %gep1 = getelementptr i8* %bc1, i64 3
    715   %dead1 = getelementptr i8* %gep1, i64 1
    716 
    717   store i64 1879048192, i64* %l2, align 8
    718   %bc2 = bitcast i64* %l2 to i8*
    719   %gep2.1 = getelementptr i8* %bc2, i64 1
    720   %gep2.2 = getelementptr i8* %bc2, i64 3
    721   ; Note that this select should get visited multiple times due to using two
    722   ; different GEPs off the same alloca. We should only delete it once.
    723   %dead2 = select i1 %flag, i8* %gep2.1, i8* %gep2.2
    724 
    725   store i64 1879048192, i64* %l3, align 8
    726   %bc3 = bitcast i64* %l3 to i8*
    727   %gep3 = getelementptr i8* %bc3, i64 3
    728 
    729   br label %loop
    730 }
    731 
    732 define void @test16(i8* %src, i8* %dst) {
    733 ; Ensure that we can promote an alloca of [3 x i8] to an i24 SSA value.
    734 ; CHECK-LABEL: @test16(
    735 ; CHECK-NOT: alloca
    736 ; CHECK:      %[[srccast:.*]] = bitcast i8* %src to i24*
    737 ; CHECK-NEXT: load i24* %[[srccast]]
    738 ; CHECK-NEXT: %[[dstcast:.*]] = bitcast i8* %dst to i24*
    739 ; CHECK-NEXT: store i24 0, i24* %[[dstcast]]
    740 ; CHECK-NEXT: ret void
    741 
    742 entry:
    743   %a = alloca [3 x i8]
    744   %ptr = getelementptr [3 x i8]* %a, i32 0, i32 0
    745   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %ptr, i8* %src, i32 4, i32 1, i1 false)
    746   %cast = bitcast i8* %ptr to i24*
    747   store i24 0, i24* %cast
    748   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %ptr, i32 4, i32 1, i1 false)
    749   ret void
    750 }
    751 
    752 define void @test17(i8* %src, i8* %dst) {
    753 ; Ensure that we can rewrite unpromotable memcpys which extend past the end of
    754 ; the alloca.
    755 ; CHECK-LABEL: @test17(
    756 ; CHECK:      %[[a:.*]] = alloca [3 x i8]
    757 ; CHECK-NEXT: %[[ptr:.*]] = getelementptr [3 x i8]* %[[a]], i32 0, i32 0
    758 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[ptr]], i8* %src,
    759 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %[[ptr]],
    760 ; CHECK-NEXT: ret void
    761 
    762 entry:
    763   %a = alloca [3 x i8]
    764   %ptr = getelementptr [3 x i8]* %a, i32 0, i32 0
    765   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %ptr, i8* %src, i32 4, i32 1, i1 true)
    766   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %ptr, i32 4, i32 1, i1 true)
    767   ret void
    768 }
    769 
    770 define void @test18(i8* %src, i8* %dst, i32 %size) {
    771 ; Preserve transfer instrinsics with a variable size, even if they overlap with
    772 ; fixed size operations. Further, continue to split and promote allocas preceding
    773 ; the variable sized intrinsic.
    774 ; CHECK-LABEL: @test18(
    775 ; CHECK:      %[[a:.*]] = alloca [34 x i8]
    776 ; CHECK:      %[[srcgep1:.*]] = getelementptr inbounds i8* %src, i64 4
    777 ; CHECK-NEXT: %[[srccast1:.*]] = bitcast i8* %[[srcgep1]] to i32*
    778 ; CHECK-NEXT: %[[srcload:.*]] = load i32* %[[srccast1]]
    779 ; CHECK-NEXT: %[[agep1:.*]] = getelementptr inbounds [34 x i8]* %[[a]], i64 0, i64 0
    780 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[agep1]], i8* %src, i32 %size,
    781 ; CHECK-NEXT: %[[agep2:.*]] = getelementptr inbounds [34 x i8]* %[[a]], i64 0, i64 0
    782 ; CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* %[[agep2]], i8 42, i32 %size,
    783 ; CHECK-NEXT: %[[dstcast1:.*]] = bitcast i8* %dst to i32*
    784 ; CHECK-NEXT: store i32 42, i32* %[[dstcast1]]
    785 ; CHECK-NEXT: %[[dstgep1:.*]] = getelementptr inbounds i8* %dst, i64 4
    786 ; CHECK-NEXT: %[[dstcast2:.*]] = bitcast i8* %[[dstgep1]] to i32*
    787 ; CHECK-NEXT: store i32 %[[srcload]], i32* %[[dstcast2]]
    788 ; CHECK-NEXT: %[[agep3:.*]] = getelementptr inbounds [34 x i8]* %[[a]], i64 0, i64 0
    789 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %[[agep3]], i32 %size,
    790 ; CHECK-NEXT: ret void
    791 
    792 entry:
    793   %a = alloca [42 x i8]
    794   %ptr = getelementptr [42 x i8]* %a, i32 0, i32 0
    795   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %ptr, i8* %src, i32 8, i32 1, i1 false)
    796   %ptr2 = getelementptr [42 x i8]* %a, i32 0, i32 8
    797   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %ptr2, i8* %src, i32 %size, i32 1, i1 false)
    798   call void @llvm.memset.p0i8.i32(i8* %ptr2, i8 42, i32 %size, i32 1, i1 false)
    799   %cast = bitcast i8* %ptr to i32*
    800   store i32 42, i32* %cast
    801   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %ptr, i32 8, i32 1, i1 false)
    802   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %ptr2, i32 %size, i32 1, i1 false)
    803   ret void
    804 }
    805 
    806 %opaque = type opaque
    807 
    808 define i32 @test19(%opaque* %x) {
    809 ; This input will cause us to try to compute a natural GEP when rewriting
    810 ; pointers in such a way that we try to GEP through the opaque type. Previously,
    811 ; a check for an unsized type was missing and this crashed. Ensure it behaves
    812 ; reasonably now.
    813 ; CHECK-LABEL: @test19(
    814 ; CHECK-NOT: alloca
    815 ; CHECK: ret i32 undef
    816 
    817 entry:
    818   %a = alloca { i64, i8* }
    819   %cast1 = bitcast %opaque* %x to i8*
    820   %cast2 = bitcast { i64, i8* }* %a to i8*
    821   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %cast2, i8* %cast1, i32 16, i32 1, i1 false)
    822   %gep = getelementptr inbounds { i64, i8* }* %a, i32 0, i32 0
    823   %val = load i64* %gep
    824   ret i32 undef
    825 }
    826 
    827 define i32 @test20() {
    828 ; Ensure we can track negative offsets (before the beginning of the alloca) and
    829 ; negative relative offsets from offsets starting past the end of the alloca.
    830 ; CHECK-LABEL: @test20(
    831 ; CHECK-NOT: alloca
    832 ; CHECK: %[[sum1:.*]] = add i32 1, 2
    833 ; CHECK: %[[sum2:.*]] = add i32 %[[sum1]], 3
    834 ; CHECK: ret i32 %[[sum2]]
    835 
    836 entry:
    837   %a = alloca [3 x i32]
    838   %gep1 = getelementptr [3 x i32]* %a, i32 0, i32 0
    839   store i32 1, i32* %gep1
    840   %gep2.1 = getelementptr [3 x i32]* %a, i32 0, i32 -2
    841   %gep2.2 = getelementptr i32* %gep2.1, i32 3
    842   store i32 2, i32* %gep2.2
    843   %gep3.1 = getelementptr [3 x i32]* %a, i32 0, i32 14
    844   %gep3.2 = getelementptr i32* %gep3.1, i32 -12
    845   store i32 3, i32* %gep3.2
    846 
    847   %load1 = load i32* %gep1
    848   %load2 = load i32* %gep2.2
    849   %load3 = load i32* %gep3.2
    850   %sum1 = add i32 %load1, %load2
    851   %sum2 = add i32 %sum1, %load3
    852   ret i32 %sum2
    853 }
    854 
    855 declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
    856 
    857 define i8 @test21() {
    858 ; Test allocations and offsets which border on overflow of the int64_t used
    859 ; internally. This is really awkward to really test as LLVM doesn't really
    860 ; support such extreme constructs cleanly.
    861 ; CHECK-LABEL: @test21(
    862 ; CHECK-NOT: alloca
    863 ; CHECK: or i8 -1, -1
    864 
    865 entry:
    866   %a = alloca [2305843009213693951 x i8]
    867   %gep0 = getelementptr [2305843009213693951 x i8]* %a, i64 0, i64 2305843009213693949
    868   store i8 255, i8* %gep0
    869   %gep1 = getelementptr [2305843009213693951 x i8]* %a, i64 0, i64 -9223372036854775807
    870   %gep2 = getelementptr i8* %gep1, i64 -1
    871   call void @llvm.memset.p0i8.i64(i8* %gep2, i8 0, i64 18446744073709551615, i32 1, i1 false)
    872   %gep3 = getelementptr i8* %gep1, i64 9223372036854775807
    873   %gep4 = getelementptr i8* %gep3, i64 9223372036854775807
    874   %gep5 = getelementptr i8* %gep4, i64 -6917529027641081857
    875   store i8 255, i8* %gep5
    876   %cast1 = bitcast i8* %gep4 to i32*
    877   store i32 0, i32* %cast1
    878   %load = load i8* %gep0
    879   %gep6 = getelementptr i8* %gep0, i32 1
    880   %load2 = load i8* %gep6
    881   %result = or i8 %load, %load2
    882   ret i8 %result
    883 }
    884 
    885 %PR13916.struct = type { i8 }
    886 
    887 define void @PR13916.1() {
    888 ; Ensure that we handle overlapping memcpy intrinsics correctly, especially in
    889 ; the case where there is a directly identical value for both source and dest.
    890 ; CHECK: @PR13916.1
    891 ; CHECK-NOT: alloca
    892 ; CHECK: ret void
    893 
    894 entry:
    895   %a = alloca i8
    896   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a, i8* %a, i32 1, i32 1, i1 false)
    897   %tmp2 = load i8* %a
    898   ret void
    899 }
    900 
    901 define void @PR13916.2() {
    902 ; Check whether we continue to handle them correctly when they start off with
    903 ; different pointer value chains, but during rewriting we coalesce them into the
    904 ; same value.
    905 ; CHECK: @PR13916.2
    906 ; CHECK-NOT: alloca
    907 ; CHECK: ret void
    908 
    909 entry:
    910   %a = alloca %PR13916.struct, align 1
    911   br i1 undef, label %if.then, label %if.end
    912 
    913 if.then:
    914   %tmp0 = bitcast %PR13916.struct* %a to i8*
    915   %tmp1 = bitcast %PR13916.struct* %a to i8*
    916   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp0, i8* %tmp1, i32 1, i32 1, i1 false)
    917   br label %if.end
    918 
    919 if.end:
    920   %gep = getelementptr %PR13916.struct* %a, i32 0, i32 0
    921   %tmp2 = load i8* %gep
    922   ret void
    923 }
    924 
    925 define void @PR13990() {
    926 ; Ensure we can handle cases where processing one alloca causes the other
    927 ; alloca to become dead and get deleted. This might crash or fail under
    928 ; Valgrind if we regress.
    929 ; CHECK-LABEL: @PR13990(
    930 ; CHECK-NOT: alloca
    931 ; CHECK: unreachable
    932 ; CHECK: unreachable
    933 
    934 entry:
    935   %tmp1 = alloca i8*
    936   %tmp2 = alloca i8*
    937   br i1 undef, label %bb1, label %bb2
    938 
    939 bb1:
    940   store i8* undef, i8** %tmp2
    941   br i1 undef, label %bb2, label %bb3
    942 
    943 bb2:
    944   %tmp50 = select i1 undef, i8** %tmp2, i8** %tmp1
    945   br i1 undef, label %bb3, label %bb4
    946 
    947 bb3:
    948   unreachable
    949 
    950 bb4:
    951   unreachable
    952 }
    953 
    954 define double @PR13969(double %x) {
    955 ; Check that we detect when promotion will un-escape an alloca and iterate to
    956 ; re-try running SROA over that alloca. Without that, the two allocas that are
    957 ; stored into a dead alloca don't get rewritten and promoted.
    958 ; CHECK-LABEL: @PR13969(
    959 
    960 entry:
    961   %a = alloca double
    962   %b = alloca double*
    963   %c = alloca double
    964 ; CHECK-NOT: alloca
    965 
    966   store double %x, double* %a
    967   store double* %c, double** %b
    968   store double* %a, double** %b
    969   store double %x, double* %c
    970   %ret = load double* %a
    971 ; CHECK-NOT: store
    972 ; CHECK-NOT: load
    973 
    974   ret double %ret
    975 ; CHECK: ret double %x
    976 }
    977 
    978 %PR14034.struct = type { { {} }, i32, %PR14034.list }
    979 %PR14034.list = type { %PR14034.list*, %PR14034.list* }
    980 
    981 define void @PR14034() {
    982 ; This test case tries to form GEPs into the empty leading struct members, and
    983 ; subsequently crashed (under valgrind) before we fixed the PR. The important
    984 ; thing is to handle empty structs gracefully.
    985 ; CHECK-LABEL: @PR14034(
    986 
    987 entry:
    988   %a = alloca %PR14034.struct
    989   %list = getelementptr %PR14034.struct* %a, i32 0, i32 2
    990   %prev = getelementptr %PR14034.list* %list, i32 0, i32 1
    991   store %PR14034.list* undef, %PR14034.list** %prev
    992   %cast0 = bitcast %PR14034.struct* undef to i8*
    993   %cast1 = bitcast %PR14034.struct* %a to i8*
    994   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %cast0, i8* %cast1, i32 12, i32 0, i1 false)
    995   ret void
    996 }
    997 
    998 define i32 @test22(i32 %x) {
    999 ; Test that SROA and promotion is not confused by a grab bax mixture of pointer
   1000 ; types involving wrapper aggregates and zero-length aggregate members.
   1001 ; CHECK-LABEL: @test22(
   1002 
   1003 entry:
   1004   %a1 = alloca { { [1 x { i32 }] } }
   1005   %a2 = alloca { {}, { float }, [0 x i8] }
   1006   %a3 = alloca { [0 x i8], { [0 x double], [1 x [1 x <4 x i8>]], {} }, { { {} } } }
   1007 ; CHECK-NOT: alloca
   1008 
   1009   %wrap1 = insertvalue [1 x { i32 }] undef, i32 %x, 0, 0
   1010   %gep1 = getelementptr { { [1 x { i32 }] } }* %a1, i32 0, i32 0, i32 0
   1011   store [1 x { i32 }] %wrap1, [1 x { i32 }]* %gep1
   1012 
   1013   %gep2 = getelementptr { { [1 x { i32 }] } }* %a1, i32 0, i32 0
   1014   %ptrcast1 = bitcast { [1 x { i32 }] }* %gep2 to { [1 x { float }] }*
   1015   %load1 = load { [1 x { float }] }* %ptrcast1
   1016   %unwrap1 = extractvalue { [1 x { float }] } %load1, 0, 0
   1017 
   1018   %wrap2 = insertvalue { {}, { float }, [0 x i8] } undef, { float } %unwrap1, 1
   1019   store { {}, { float }, [0 x i8] } %wrap2, { {}, { float }, [0 x i8] }* %a2
   1020 
   1021   %gep3 = getelementptr { {}, { float }, [0 x i8] }* %a2, i32 0, i32 1, i32 0
   1022   %ptrcast2 = bitcast float* %gep3 to <4 x i8>*
   1023   %load3 = load <4 x i8>* %ptrcast2
   1024   %valcast1 = bitcast <4 x i8> %load3 to i32
   1025 
   1026   %wrap3 = insertvalue [1 x [1 x i32]] undef, i32 %valcast1, 0, 0
   1027   %wrap4 = insertvalue { [1 x [1 x i32]], {} } undef, [1 x [1 x i32]] %wrap3, 0
   1028   %gep4 = getelementptr { [0 x i8], { [0 x double], [1 x [1 x <4 x i8>]], {} }, { { {} } } }* %a3, i32 0, i32 1
   1029   %ptrcast3 = bitcast { [0 x double], [1 x [1 x <4 x i8>]], {} }* %gep4 to { [1 x [1 x i32]], {} }*
   1030   store { [1 x [1 x i32]], {} } %wrap4, { [1 x [1 x i32]], {} }* %ptrcast3
   1031 
   1032   %gep5 = getelementptr { [0 x i8], { [0 x double], [1 x [1 x <4 x i8>]], {} }, { { {} } } }* %a3, i32 0, i32 1, i32 1, i32 0
   1033   %ptrcast4 = bitcast [1 x <4 x i8>]* %gep5 to { {}, float, {} }*
   1034   %load4 = load { {}, float, {} }* %ptrcast4
   1035   %unwrap2 = extractvalue { {}, float, {} } %load4, 1
   1036   %valcast2 = bitcast float %unwrap2 to i32
   1037 
   1038   ret i32 %valcast2
   1039 ; CHECK: ret i32
   1040 }
   1041 
   1042 define void @PR14059.1(double* %d) {
   1043 ; In PR14059 a peculiar construct was identified as something that is used
   1044 ; pervasively in ARM's ABI-calling-convention lowering: the passing of a struct
   1045 ; of doubles via an array of i32 in order to place the data into integer
   1046 ; registers. This in turn was missed as an optimization by SROA due to the
   1047 ; partial loads and stores of integers to the double alloca we were trying to
   1048 ; form and promote. The solution is to widen the integer operations to be
   1049 ; whole-alloca operations, and perform the appropriate bitcasting on the
   1050 ; *values* rather than the pointers. When this works, partial reads and writes
   1051 ; via integers can be promoted away.
   1052 ; CHECK: @PR14059.1
   1053 ; CHECK-NOT: alloca
   1054 ; CHECK: ret void
   1055 
   1056 entry:
   1057   %X.sroa.0.i = alloca double, align 8
   1058   %0 = bitcast double* %X.sroa.0.i to i8*
   1059   call void @llvm.lifetime.start(i64 -1, i8* %0)
   1060 
   1061   ; Store to the low 32-bits...
   1062   %X.sroa.0.0.cast2.i = bitcast double* %X.sroa.0.i to i32*
   1063   store i32 0, i32* %X.sroa.0.0.cast2.i, align 8
   1064 
   1065   ; Also use a memset to the middle 32-bits for fun.
   1066   %X.sroa.0.2.raw_idx2.i = getelementptr inbounds i8* %0, i32 2
   1067   call void @llvm.memset.p0i8.i64(i8* %X.sroa.0.2.raw_idx2.i, i8 0, i64 4, i32 1, i1 false)
   1068 
   1069   ; Or a memset of the whole thing.
   1070   call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 8, i32 1, i1 false)
   1071 
   1072   ; Write to the high 32-bits with a memcpy.
   1073   %X.sroa.0.4.raw_idx4.i = getelementptr inbounds i8* %0, i32 4
   1074   %d.raw = bitcast double* %d to i8*
   1075   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %X.sroa.0.4.raw_idx4.i, i8* %d.raw, i32 4, i32 1, i1 false)
   1076 
   1077   ; Store to the high 32-bits...
   1078   %X.sroa.0.4.cast5.i = bitcast i8* %X.sroa.0.4.raw_idx4.i to i32*
   1079   store i32 1072693248, i32* %X.sroa.0.4.cast5.i, align 4
   1080 
   1081   ; Do the actual math...
   1082   %X.sroa.0.0.load1.i = load double* %X.sroa.0.i, align 8
   1083   %accum.real.i = load double* %d, align 8
   1084   %add.r.i = fadd double %accum.real.i, %X.sroa.0.0.load1.i
   1085   store double %add.r.i, double* %d, align 8
   1086   call void @llvm.lifetime.end(i64 -1, i8* %0)
   1087   ret void
   1088 }
   1089 
   1090 define i64 @PR14059.2({ float, float }* %phi) {
   1091 ; Check that SROA can split up alloca-wide integer loads and stores where the
   1092 ; underlying alloca has smaller components that are accessed independently. This
   1093 ; shows up particularly with ABI lowering patterns coming out of Clang that rely
   1094 ; on the particular register placement of a single large integer return value.
   1095 ; CHECK: @PR14059.2
   1096 
   1097 entry:
   1098   %retval = alloca { float, float }, align 4
   1099   ; CHECK-NOT: alloca
   1100 
   1101   %0 = bitcast { float, float }* %retval to i64*
   1102   store i64 0, i64* %0
   1103   ; CHECK-NOT: store
   1104 
   1105   %phi.realp = getelementptr inbounds { float, float }* %phi, i32 0, i32 0
   1106   %phi.real = load float* %phi.realp
   1107   %phi.imagp = getelementptr inbounds { float, float }* %phi, i32 0, i32 1
   1108   %phi.imag = load float* %phi.imagp
   1109   ; CHECK:      %[[realp:.*]] = getelementptr inbounds { float, float }* %phi, i32 0, i32 0
   1110   ; CHECK-NEXT: %[[real:.*]] = load float* %[[realp]]
   1111   ; CHECK-NEXT: %[[imagp:.*]] = getelementptr inbounds { float, float }* %phi, i32 0, i32 1
   1112   ; CHECK-NEXT: %[[imag:.*]] = load float* %[[imagp]]
   1113 
   1114   %real = getelementptr inbounds { float, float }* %retval, i32 0, i32 0
   1115   %imag = getelementptr inbounds { float, float }* %retval, i32 0, i32 1
   1116   store float %phi.real, float* %real
   1117   store float %phi.imag, float* %imag
   1118   ; CHECK-NEXT: %[[real_convert:.*]] = bitcast float %[[real]] to i32
   1119   ; CHECK-NEXT: %[[imag_convert:.*]] = bitcast float %[[imag]] to i32
   1120   ; CHECK-NEXT: %[[imag_ext:.*]] = zext i32 %[[imag_convert]] to i64
   1121   ; CHECK-NEXT: %[[imag_shift:.*]] = shl i64 %[[imag_ext]], 32
   1122   ; CHECK-NEXT: %[[imag_mask:.*]] = and i64 undef, 4294967295
   1123   ; CHECK-NEXT: %[[imag_insert:.*]] = or i64 %[[imag_mask]], %[[imag_shift]]
   1124   ; CHECK-NEXT: %[[real_ext:.*]] = zext i32 %[[real_convert]] to i64
   1125   ; CHECK-NEXT: %[[real_mask:.*]] = and i64 %[[imag_insert]], -4294967296
   1126   ; CHECK-NEXT: %[[real_insert:.*]] = or i64 %[[real_mask]], %[[real_ext]]
   1127 
   1128   %1 = load i64* %0, align 1
   1129   ret i64 %1
   1130   ; CHECK-NEXT: ret i64 %[[real_insert]]
   1131 }
   1132 
   1133 define void @PR14105({ [16 x i8] }* %ptr) {
   1134 ; Ensure that when rewriting the GEP index '-1' for this alloca we preserve is
   1135 ; sign as negative. We use a volatile memcpy to ensure promotion never actually
   1136 ; occurs.
   1137 ; CHECK-LABEL: @PR14105(
   1138 
   1139 entry:
   1140   %a = alloca { [16 x i8] }, align 8
   1141 ; CHECK: alloca [16 x i8], align 8
   1142 
   1143   %gep = getelementptr inbounds { [16 x i8] }* %ptr, i64 -1
   1144 ; CHECK-NEXT: getelementptr inbounds { [16 x i8] }* %ptr, i64 -1, i32 0, i64 0
   1145 
   1146   %cast1 = bitcast { [16 x i8 ] }* %gep to i8*
   1147   %cast2 = bitcast { [16 x i8 ] }* %a to i8*
   1148   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %cast1, i8* %cast2, i32 16, i32 8, i1 true)
   1149   ret void
   1150 ; CHECK: ret
   1151 }
   1152 
   1153 define void @PR14465() {
   1154 ; Ensure that we don't crash when analyzing a alloca larger than the maximum
   1155 ; integer type width (MAX_INT_BITS) supported by llvm (1048576*32 > (1<<23)-1).
   1156 ; CHECK-LABEL: @PR14465(
   1157 
   1158   %stack = alloca [1048576 x i32], align 16
   1159 ; CHECK: alloca [1048576 x i32]
   1160   %cast = bitcast [1048576 x i32]* %stack to i8*
   1161   call void @llvm.memset.p0i8.i64(i8* %cast, i8 -2, i64 4194304, i32 16, i1 false)
   1162   ret void
   1163 ; CHECK: ret
   1164 }
   1165 
   1166 define void @PR14548(i1 %x) {
   1167 ; Handle a mixture of i1 and i8 loads and stores to allocas. This particular
   1168 ; pattern caused crashes and invalid output in the PR, and its nature will
   1169 ; trigger a mixture in several permutations as we resolve each alloca
   1170 ; iteratively.
   1171 ; Note that we don't do a particularly good *job* of handling these mixtures,
   1172 ; but the hope is that this is very rare.
   1173 ; CHECK-LABEL: @PR14548(
   1174 
   1175 entry:
   1176   %a = alloca <{ i1 }>, align 8
   1177   %b = alloca <{ i1 }>, align 8
   1178 ; CHECK:      %[[a:.*]] = alloca i8, align 8
   1179 
   1180   %b.i1 = bitcast <{ i1 }>* %b to i1*
   1181   store i1 %x, i1* %b.i1, align 8
   1182   %b.i8 = bitcast <{ i1 }>* %b to i8*
   1183   %foo = load i8* %b.i8, align 1
   1184 ; CHECK-NEXT: {{.*}} = zext i1 %x to i8
   1185 ; CHECK-NEXT: %[[ext:.*]] = zext i1 %x to i8
   1186 ; CHECK-NEXT: store i8 %[[ext]], i8* %[[a]], align 8
   1187 ; CHECK-NEXT: {{.*}} = load i8* %[[a]], align 8
   1188 
   1189   %a.i8 = bitcast <{ i1 }>* %a to i8*
   1190   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a.i8, i8* %b.i8, i32 1, i32 1, i1 false) nounwind
   1191   %bar = load i8* %a.i8, align 1
   1192   %a.i1 = getelementptr inbounds <{ i1 }>* %a, i32 0, i32 0
   1193   %baz = load i1* %a.i1, align 1
   1194 ; CHECK-NEXT: %[[a_cast:.*]] = bitcast i8* %[[a]] to i1*
   1195 ; CHECK-NEXT: {{.*}} = load i1* %[[a_cast]], align 8
   1196 
   1197   ret void
   1198 }
   1199 
   1200 define <3 x i8> @PR14572.1(i32 %x) {
   1201 ; Ensure that a split integer store which is wider than the type size of the
   1202 ; alloca (relying on the alloc size padding) doesn't trigger an assert.
   1203 ; CHECK: @PR14572.1
   1204 
   1205 entry:
   1206   %a = alloca <3 x i8>, align 4
   1207 ; CHECK-NOT: alloca
   1208 
   1209   %cast = bitcast <3 x i8>* %a to i32*
   1210   store i32 %x, i32* %cast, align 1
   1211   %y = load <3 x i8>* %a, align 4
   1212   ret <3 x i8> %y
   1213 ; CHECK: ret <3 x i8>
   1214 }
   1215 
   1216 define i32 @PR14572.2(<3 x i8> %x) {
   1217 ; Ensure that a split integer load which is wider than the type size of the
   1218 ; alloca (relying on the alloc size padding) doesn't trigger an assert.
   1219 ; CHECK: @PR14572.2
   1220 
   1221 entry:
   1222   %a = alloca <3 x i8>, align 4
   1223 ; CHECK-NOT: alloca
   1224 
   1225   store <3 x i8> %x, <3 x i8>* %a, align 1
   1226   %cast = bitcast <3 x i8>* %a to i32*
   1227   %y = load i32* %cast, align 4
   1228   ret i32 %y
   1229 ; CHECK: ret i32
   1230 }
   1231 
   1232 define i32 @PR14601(i32 %x) {
   1233 ; Don't try to form a promotable integer alloca when there is a variable length
   1234 ; memory intrinsic.
   1235 ; CHECK-LABEL: @PR14601(
   1236 
   1237 entry:
   1238   %a = alloca i32
   1239 ; CHECK: alloca
   1240 
   1241   %a.i8 = bitcast i32* %a to i8*
   1242   call void @llvm.memset.p0i8.i32(i8* %a.i8, i8 0, i32 %x, i32 1, i1 false)
   1243   %v = load i32* %a
   1244   ret i32 %v
   1245 }
   1246 
   1247 define void @PR15674(i8* %data, i8* %src, i32 %size) {
   1248 ; Arrange (via control flow) to have unmerged stores of a particular width to
   1249 ; an alloca where we incrementally store from the end of the array toward the
   1250 ; beginning of the array. Ensure that the final integer store, despite being
   1251 ; convertable to the integer type that we end up promoting this alloca toward,
   1252 ; doesn't get widened to a full alloca store.
   1253 ; CHECK-LABEL: @PR15674(
   1254 
   1255 entry:
   1256   %tmp = alloca [4 x i8], align 1
   1257 ; CHECK: alloca i32
   1258 
   1259   switch i32 %size, label %end [
   1260     i32 4, label %bb4
   1261     i32 3, label %bb3
   1262     i32 2, label %bb2
   1263     i32 1, label %bb1
   1264   ]
   1265 
   1266 bb4:
   1267   %src.gep3 = getelementptr inbounds i8* %src, i32 3
   1268   %src.3 = load i8* %src.gep3
   1269   %tmp.gep3 = getelementptr inbounds [4 x i8]* %tmp, i32 0, i32 3
   1270   store i8 %src.3, i8* %tmp.gep3
   1271 ; CHECK: store i8
   1272 
   1273   br label %bb3
   1274 
   1275 bb3:
   1276   %src.gep2 = getelementptr inbounds i8* %src, i32 2
   1277   %src.2 = load i8* %src.gep2
   1278   %tmp.gep2 = getelementptr inbounds [4 x i8]* %tmp, i32 0, i32 2
   1279   store i8 %src.2, i8* %tmp.gep2
   1280 ; CHECK: store i8
   1281 
   1282   br label %bb2
   1283 
   1284 bb2:
   1285   %src.gep1 = getelementptr inbounds i8* %src, i32 1
   1286   %src.1 = load i8* %src.gep1
   1287   %tmp.gep1 = getelementptr inbounds [4 x i8]* %tmp, i32 0, i32 1
   1288   store i8 %src.1, i8* %tmp.gep1
   1289 ; CHECK: store i8
   1290 
   1291   br label %bb1
   1292 
   1293 bb1:
   1294   %src.gep0 = getelementptr inbounds i8* %src, i32 0
   1295   %src.0 = load i8* %src.gep0
   1296   %tmp.gep0 = getelementptr inbounds [4 x i8]* %tmp, i32 0, i32 0
   1297   store i8 %src.0, i8* %tmp.gep0
   1298 ; CHECK: store i8
   1299 
   1300   br label %end
   1301 
   1302 end:
   1303   %tmp.raw = bitcast [4 x i8]* %tmp to i8*
   1304   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %data, i8* %tmp.raw, i32 %size, i32 1, i1 false)
   1305   ret void
   1306 ; CHECK: ret void
   1307 }
   1308 
   1309 define void @PR15805(i1 %a, i1 %b) {
   1310 ; CHECK-LABEL: @PR15805(
   1311 ; CHECK-NOT: alloca
   1312 ; CHECK: ret void
   1313 
   1314   %c = alloca i64, align 8
   1315   %p.0.c = select i1 undef, i64* %c, i64* %c
   1316   %cond.in = select i1 undef, i64* %p.0.c, i64* %c
   1317   %cond = load i64* %cond.in, align 8
   1318   ret void
   1319 }
   1320 
   1321 define void @PR16651.1(i8* %a) {
   1322 ; This test case caused a crash due to the volatile memcpy in combination with
   1323 ; lowering to integer loads and stores of a width other than that of the original
   1324 ; memcpy.
   1325 ;
   1326 ; CHECK-LABEL: @PR16651.1(
   1327 ; CHECK: alloca i16
   1328 ; CHECK: alloca i8
   1329 ; CHECK: alloca i8
   1330 ; CHECK: unreachable
   1331 
   1332 entry:
   1333   %b = alloca i32, align 4
   1334   %b.cast = bitcast i32* %b to i8*
   1335   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %b.cast, i8* %a, i32 4, i32 4, i1 true)
   1336   %b.gep = getelementptr inbounds i8* %b.cast, i32 2
   1337   load i8* %b.gep, align 2
   1338   unreachable
   1339 }
   1340 
   1341 define void @PR16651.2() {
   1342 ; This test case caused a crash due to failing to promote given a select that
   1343 ; can't be speculated. It shouldn't be promoted, but we missed that fact when
   1344 ; analyzing whether we could form a vector promotion because that code didn't
   1345 ; bail on select instructions.
   1346 ;
   1347 ; CHECK-LABEL: @PR16651.2(
   1348 ; CHECK: alloca <2 x float>
   1349 ; CHECK: ret void
   1350 
   1351 entry:
   1352   %tv1 = alloca { <2 x float>, <2 x float> }, align 8
   1353   %0 = getelementptr { <2 x float>, <2 x float> }* %tv1, i64 0, i32 1
   1354   store <2 x float> undef, <2 x float>* %0, align 8
   1355   %1 = getelementptr inbounds { <2 x float>, <2 x float> }* %tv1, i64 0, i32 1, i64 0
   1356   %cond105.in.i.i = select i1 undef, float* null, float* %1
   1357   %cond105.i.i = load float* %cond105.in.i.i, align 8
   1358   ret void
   1359 }
   1360