Home | History | Annotate | Download | only in SROA
      1 ; RUN: opt < %s -sroa -S | FileCheck %s
      2 ; RUN: opt < %s -sroa -force-ssa-updater -S | FileCheck %s
      3 
      4 target datalayout = "e-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64"
      5 
      6 declare void @llvm.lifetime.start(i64, i8* nocapture)
      7 declare void @llvm.lifetime.end(i64, i8* nocapture)
      8 
      9 define i32 @test0() {
     10 ; CHECK-LABEL: @test0(
     11 ; CHECK-NOT: alloca
     12 ; CHECK: ret i32
     13 
     14 entry:
     15   %a1 = alloca i32
     16   %a2 = alloca float
     17 
     18   %a1.i8 = bitcast i32* %a1 to i8*
     19   call void @llvm.lifetime.start(i64 4, i8* %a1.i8)
     20 
     21   store i32 0, i32* %a1
     22   %v1 = load i32* %a1
     23 
     24   call void @llvm.lifetime.end(i64 4, i8* %a1.i8)
     25 
     26   %a2.i8 = bitcast float* %a2 to i8*
     27   call void @llvm.lifetime.start(i64 4, i8* %a2.i8)
     28 
     29   store float 0.0, float* %a2
     30   %v2 = load float * %a2
     31   %v2.int = bitcast float %v2 to i32
     32   %sum1 = add i32 %v1, %v2.int
     33 
     34   call void @llvm.lifetime.end(i64 4, i8* %a2.i8)
     35 
     36   ret i32 %sum1
     37 }
     38 
     39 define i32 @test1() {
     40 ; CHECK-LABEL: @test1(
     41 ; CHECK-NOT: alloca
     42 ; CHECK: ret i32 0
     43 
     44 entry:
     45   %X = alloca { i32, float }
     46   %Y = getelementptr { i32, float }* %X, i64 0, i32 0
     47   store i32 0, i32* %Y
     48   %Z = load i32* %Y
     49   ret i32 %Z
     50 }
     51 
     52 define i64 @test2(i64 %X) {
     53 ; CHECK-LABEL: @test2(
     54 ; CHECK-NOT: alloca
     55 ; CHECK: ret i64 %X
     56 
     57 entry:
     58   %A = alloca [8 x i8]
     59   %B = bitcast [8 x i8]* %A to i64*
     60   store i64 %X, i64* %B
     61   br label %L2
     62 
     63 L2:
     64   %Z = load i64* %B
     65   ret i64 %Z
     66 }
     67 
     68 define void @test3(i8* %dst, i8* %src) {
     69 ; CHECK-LABEL: @test3(
     70 
     71 entry:
     72   %a = alloca [300 x i8]
     73 ; CHECK-NOT:  alloca
     74 ; CHECK:      %[[test3_a1:.*]] = alloca [42 x i8]
     75 ; CHECK-NEXT: %[[test3_a2:.*]] = alloca [99 x i8]
     76 ; CHECK-NEXT: %[[test3_a3:.*]] = alloca [16 x i8]
     77 ; CHECK-NEXT: %[[test3_a4:.*]] = alloca [42 x i8]
     78 ; CHECK-NEXT: %[[test3_a5:.*]] = alloca [7 x i8]
     79 ; CHECK-NEXT: %[[test3_a6:.*]] = alloca [7 x i8]
     80 ; CHECK-NEXT: %[[test3_a7:.*]] = alloca [85 x i8]
     81 
     82   %b = getelementptr [300 x i8]* %a, i64 0, i64 0
     83   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %b, i8* %src, i32 300, i32 1, i1 false)
     84 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [42 x i8]* %[[test3_a1]], i64 0, i64 0
     85 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %src, i32 42
     86 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8* %src, i64 42
     87 ; CHECK-NEXT: %[[test3_r1:.*]] = load i8* %[[gep]]
     88 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8* %src, i64 43
     89 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [99 x i8]* %[[test3_a2]], i64 0, i64 0
     90 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 99
     91 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8* %src, i64 142
     92 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [16 x i8]* %[[test3_a3]], i64 0, i64 0
     93 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 16
     94 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8* %src, i64 158
     95 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [42 x i8]* %[[test3_a4]], i64 0, i64 0
     96 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 42
     97 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8* %src, i64 200
     98 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [7 x i8]* %[[test3_a5]], i64 0, i64 0
     99 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 7
    100 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8* %src, i64 207
    101 ; CHECK-NEXT: %[[test3_r2:.*]] = load i8* %[[gep]]
    102 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8* %src, i64 208
    103 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [7 x i8]* %[[test3_a6]], i64 0, i64 0
    104 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 7
    105 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8* %src, i64 215
    106 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [85 x i8]* %[[test3_a7]], i64 0, i64 0
    107 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 85
    108 
    109   ; Clobber a single element of the array, this should be promotable.
    110   %c = getelementptr [300 x i8]* %a, i64 0, i64 42
    111   store i8 0, i8* %c
    112 
    113   ; Make a sequence of overlapping stores to the array. These overlap both in
    114   ; forward strides and in shrinking accesses.
    115   %overlap.1.i8 = getelementptr [300 x i8]* %a, i64 0, i64 142
    116   %overlap.2.i8 = getelementptr [300 x i8]* %a, i64 0, i64 143
    117   %overlap.3.i8 = getelementptr [300 x i8]* %a, i64 0, i64 144
    118   %overlap.4.i8 = getelementptr [300 x i8]* %a, i64 0, i64 145
    119   %overlap.5.i8 = getelementptr [300 x i8]* %a, i64 0, i64 146
    120   %overlap.6.i8 = getelementptr [300 x i8]* %a, i64 0, i64 147
    121   %overlap.7.i8 = getelementptr [300 x i8]* %a, i64 0, i64 148
    122   %overlap.8.i8 = getelementptr [300 x i8]* %a, i64 0, i64 149
    123   %overlap.9.i8 = getelementptr [300 x i8]* %a, i64 0, i64 150
    124   %overlap.1.i16 = bitcast i8* %overlap.1.i8 to i16*
    125   %overlap.1.i32 = bitcast i8* %overlap.1.i8 to i32*
    126   %overlap.1.i64 = bitcast i8* %overlap.1.i8 to i64*
    127   %overlap.2.i64 = bitcast i8* %overlap.2.i8 to i64*
    128   %overlap.3.i64 = bitcast i8* %overlap.3.i8 to i64*
    129   %overlap.4.i64 = bitcast i8* %overlap.4.i8 to i64*
    130   %overlap.5.i64 = bitcast i8* %overlap.5.i8 to i64*
    131   %overlap.6.i64 = bitcast i8* %overlap.6.i8 to i64*
    132   %overlap.7.i64 = bitcast i8* %overlap.7.i8 to i64*
    133   %overlap.8.i64 = bitcast i8* %overlap.8.i8 to i64*
    134   %overlap.9.i64 = bitcast i8* %overlap.9.i8 to i64*
    135   store i8 1, i8* %overlap.1.i8
    136 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [16 x i8]* %[[test3_a3]], i64 0, i64 0
    137 ; CHECK-NEXT: store i8 1, i8* %[[gep]]
    138   store i16 1, i16* %overlap.1.i16
    139 ; CHECK-NEXT: %[[bitcast:.*]] = bitcast [16 x i8]* %[[test3_a3]] to i16*
    140 ; CHECK-NEXT: store i16 1, i16* %[[bitcast]]
    141   store i32 1, i32* %overlap.1.i32
    142 ; CHECK-NEXT: %[[bitcast:.*]] = bitcast [16 x i8]* %[[test3_a3]] to i32*
    143 ; CHECK-NEXT: store i32 1, i32* %[[bitcast]]
    144   store i64 1, i64* %overlap.1.i64
    145 ; CHECK-NEXT: %[[bitcast:.*]] = bitcast [16 x i8]* %[[test3_a3]] to i64*
    146 ; CHECK-NEXT: store i64 1, i64* %[[bitcast]]
    147   store i64 2, i64* %overlap.2.i64
    148 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [16 x i8]* %[[test3_a3]], i64 0, i64 1
    149 ; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i64*
    150 ; CHECK-NEXT: store i64 2, i64* %[[bitcast]]
    151   store i64 3, i64* %overlap.3.i64
    152 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [16 x i8]* %[[test3_a3]], i64 0, i64 2
    153 ; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i64*
    154 ; CHECK-NEXT: store i64 3, i64* %[[bitcast]]
    155   store i64 4, i64* %overlap.4.i64
    156 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [16 x i8]* %[[test3_a3]], i64 0, i64 3
    157 ; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i64*
    158 ; CHECK-NEXT: store i64 4, i64* %[[bitcast]]
    159   store i64 5, i64* %overlap.5.i64
    160 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [16 x i8]* %[[test3_a3]], i64 0, i64 4
    161 ; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i64*
    162 ; CHECK-NEXT: store i64 5, i64* %[[bitcast]]
    163   store i64 6, i64* %overlap.6.i64
    164 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [16 x i8]* %[[test3_a3]], i64 0, i64 5
    165 ; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i64*
    166 ; CHECK-NEXT: store i64 6, i64* %[[bitcast]]
    167   store i64 7, i64* %overlap.7.i64
    168 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [16 x i8]* %[[test3_a3]], i64 0, i64 6
    169 ; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i64*
    170 ; CHECK-NEXT: store i64 7, i64* %[[bitcast]]
    171   store i64 8, i64* %overlap.8.i64
    172 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [16 x i8]* %[[test3_a3]], i64 0, i64 7
    173 ; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i64*
    174 ; CHECK-NEXT: store i64 8, i64* %[[bitcast]]
    175   store i64 9, i64* %overlap.9.i64
    176 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [16 x i8]* %[[test3_a3]], i64 0, i64 8
    177 ; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i64*
    178 ; CHECK-NEXT: store i64 9, i64* %[[bitcast]]
    179 
    180   ; Make two sequences of overlapping stores with more gaps and irregularities.
    181   %overlap2.1.0.i8 = getelementptr [300 x i8]* %a, i64 0, i64 200
    182   %overlap2.1.1.i8 = getelementptr [300 x i8]* %a, i64 0, i64 201
    183   %overlap2.1.2.i8 = getelementptr [300 x i8]* %a, i64 0, i64 202
    184   %overlap2.1.3.i8 = getelementptr [300 x i8]* %a, i64 0, i64 203
    185 
    186   %overlap2.2.0.i8 = getelementptr [300 x i8]* %a, i64 0, i64 208
    187   %overlap2.2.1.i8 = getelementptr [300 x i8]* %a, i64 0, i64 209
    188   %overlap2.2.2.i8 = getelementptr [300 x i8]* %a, i64 0, i64 210
    189   %overlap2.2.3.i8 = getelementptr [300 x i8]* %a, i64 0, i64 211
    190 
    191   %overlap2.1.0.i16 = bitcast i8* %overlap2.1.0.i8 to i16*
    192   %overlap2.1.0.i32 = bitcast i8* %overlap2.1.0.i8 to i32*
    193   %overlap2.1.1.i32 = bitcast i8* %overlap2.1.1.i8 to i32*
    194   %overlap2.1.2.i32 = bitcast i8* %overlap2.1.2.i8 to i32*
    195   %overlap2.1.3.i32 = bitcast i8* %overlap2.1.3.i8 to i32*
    196   store i8 1,  i8*  %overlap2.1.0.i8
    197 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [7 x i8]* %[[test3_a5]], i64 0, i64 0
    198 ; CHECK-NEXT: store i8 1, i8* %[[gep]]
    199   store i16 1, i16* %overlap2.1.0.i16
    200 ; CHECK-NEXT: %[[bitcast:.*]] = bitcast [7 x i8]* %[[test3_a5]] to i16*
    201 ; CHECK-NEXT: store i16 1, i16* %[[bitcast]]
    202   store i32 1, i32* %overlap2.1.0.i32
    203 ; CHECK-NEXT: %[[bitcast:.*]] = bitcast [7 x i8]* %[[test3_a5]] to i32*
    204 ; CHECK-NEXT: store i32 1, i32* %[[bitcast]]
    205   store i32 2, i32* %overlap2.1.1.i32
    206 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [7 x i8]* %[[test3_a5]], i64 0, i64 1
    207 ; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i32*
    208 ; CHECK-NEXT: store i32 2, i32* %[[bitcast]]
    209   store i32 3, i32* %overlap2.1.2.i32
    210 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [7 x i8]* %[[test3_a5]], i64 0, i64 2
    211 ; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i32*
    212 ; CHECK-NEXT: store i32 3, i32* %[[bitcast]]
    213   store i32 4, i32* %overlap2.1.3.i32
    214 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [7 x i8]* %[[test3_a5]], i64 0, i64 3
    215 ; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i32*
    216 ; CHECK-NEXT: store i32 4, i32* %[[bitcast]]
    217 
    218   %overlap2.2.0.i32 = bitcast i8* %overlap2.2.0.i8 to i32*
    219   %overlap2.2.1.i16 = bitcast i8* %overlap2.2.1.i8 to i16*
    220   %overlap2.2.1.i32 = bitcast i8* %overlap2.2.1.i8 to i32*
    221   %overlap2.2.2.i32 = bitcast i8* %overlap2.2.2.i8 to i32*
    222   %overlap2.2.3.i32 = bitcast i8* %overlap2.2.3.i8 to i32*
    223   store i32 1, i32* %overlap2.2.0.i32
    224 ; CHECK-NEXT: %[[bitcast:.*]] = bitcast [7 x i8]* %[[test3_a6]] to i32*
    225 ; CHECK-NEXT: store i32 1, i32* %[[bitcast]]
    226   store i8 1,  i8*  %overlap2.2.1.i8
    227 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [7 x i8]* %[[test3_a6]], i64 0, i64 1
    228 ; CHECK-NEXT: store i8 1, i8* %[[gep]]
    229   store i16 1, i16* %overlap2.2.1.i16
    230 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [7 x i8]* %[[test3_a6]], i64 0, i64 1
    231 ; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i16*
    232 ; CHECK-NEXT: store i16 1, i16* %[[bitcast]]
    233   store i32 1, i32* %overlap2.2.1.i32
    234 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [7 x i8]* %[[test3_a6]], i64 0, i64 1
    235 ; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i32*
    236 ; CHECK-NEXT: store i32 1, i32* %[[bitcast]]
    237   store i32 3, i32* %overlap2.2.2.i32
    238 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [7 x i8]* %[[test3_a6]], i64 0, i64 2
    239 ; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i32*
    240 ; CHECK-NEXT: store i32 3, i32* %[[bitcast]]
    241   store i32 4, i32* %overlap2.2.3.i32
    242 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [7 x i8]* %[[test3_a6]], i64 0, i64 3
    243 ; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i32*
    244 ; CHECK-NEXT: store i32 4, i32* %[[bitcast]]
    245 
    246   %overlap2.prefix = getelementptr i8* %overlap2.1.1.i8, i64 -4
    247   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %overlap2.prefix, i8* %src, i32 8, i32 1, i1 false)
    248 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [42 x i8]* %[[test3_a4]], i64 0, i64 39
    249 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %src, i32 3
    250 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8* %src, i64 3
    251 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [7 x i8]* %[[test3_a5]], i64 0, i64 0
    252 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 5
    253 
    254   ; Bridge between the overlapping areas
    255   call void @llvm.memset.p0i8.i32(i8* %overlap2.1.2.i8, i8 42, i32 8, i32 1, i1 false)
    256 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [7 x i8]* %[[test3_a5]], i64 0, i64 2
    257 ; CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* %[[gep]], i8 42, i32 5
    258 ; ...promoted i8 store...
    259 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [7 x i8]* %[[test3_a6]], i64 0, i64 0
    260 ; CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* %[[gep]], i8 42, i32 2
    261 
    262   ; Entirely within the second overlap.
    263   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %overlap2.2.1.i8, i8* %src, i32 5, i32 1, i1 false)
    264 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [7 x i8]* %[[test3_a6]], i64 0, i64 1
    265 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep]], i8* %src, i32 5
    266 
    267   ; Trailing past the second overlap.
    268   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %overlap2.2.2.i8, i8* %src, i32 8, i32 1, i1 false)
    269 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [7 x i8]* %[[test3_a6]], i64 0, i64 2
    270 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep]], i8* %src, i32 5
    271 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8* %src, i64 5
    272 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [85 x i8]* %[[test3_a7]], i64 0, i64 0
    273 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 3
    274 
    275   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %b, i32 300, i32 1, i1 false)
    276 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [42 x i8]* %[[test3_a1]], i64 0, i64 0
    277 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %[[gep]], i32 42
    278 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8* %dst, i64 42
    279 ; CHECK-NEXT: store i8 0, i8* %[[gep]]
    280 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8* %dst, i64 43
    281 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [99 x i8]* %[[test3_a2]], i64 0, i64 0
    282 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 99
    283 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8* %dst, i64 142
    284 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [16 x i8]* %[[test3_a3]], i64 0, i64 0
    285 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 16
    286 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8* %dst, i64 158
    287 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [42 x i8]* %[[test3_a4]], i64 0, i64 0
    288 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 42
    289 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8* %dst, i64 200
    290 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [7 x i8]* %[[test3_a5]], i64 0, i64 0
    291 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 7
    292 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8* %dst, i64 207
    293 ; CHECK-NEXT: store i8 42, i8* %[[gep]]
    294 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8* %dst, i64 208
    295 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [7 x i8]* %[[test3_a6]], i64 0, i64 0
    296 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 7
    297 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8* %dst, i64 215
    298 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [85 x i8]* %[[test3_a7]], i64 0, i64 0
    299 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 85
    300 
    301   ret void
    302 }
    303 
    304 define void @test4(i8* %dst, i8* %src) {
    305 ; CHECK-LABEL: @test4(
    306 
    307 entry:
    308   %a = alloca [100 x i8]
    309 ; CHECK-NOT:  alloca
    310 ; CHECK:      %[[test4_a1:.*]] = alloca [20 x i8]
    311 ; CHECK-NEXT: %[[test4_a2:.*]] = alloca [7 x i8]
    312 ; CHECK-NEXT: %[[test4_a3:.*]] = alloca [10 x i8]
    313 ; CHECK-NEXT: %[[test4_a4:.*]] = alloca [7 x i8]
    314 ; CHECK-NEXT: %[[test4_a5:.*]] = alloca [7 x i8]
    315 ; CHECK-NEXT: %[[test4_a6:.*]] = alloca [40 x i8]
    316 
    317   %b = getelementptr [100 x i8]* %a, i64 0, i64 0
    318   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %b, i8* %src, i32 100, i32 1, i1 false)
    319 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [20 x i8]* %[[test4_a1]], i64 0, i64 0
    320 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep]], i8* %src, i32 20
    321 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8* %src, i64 20
    322 ; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i16*
    323 ; CHECK-NEXT: %[[test4_r1:.*]] = load i16* %[[bitcast]]
    324 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8* %src, i64 22
    325 ; CHECK-NEXT: %[[test4_r2:.*]] = load i8* %[[gep]]
    326 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8* %src, i64 23
    327 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [7 x i8]* %[[test4_a2]], i64 0, i64 0
    328 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 7
    329 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8* %src, i64 30
    330 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [10 x i8]* %[[test4_a3]], i64 0, i64 0
    331 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 10
    332 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8* %src, i64 40
    333 ; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i16*
    334 ; CHECK-NEXT: %[[test4_r3:.*]] = load i16* %[[bitcast]]
    335 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8* %src, i64 42
    336 ; CHECK-NEXT: %[[test4_r4:.*]] = load i8* %[[gep]]
    337 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8* %src, i64 43
    338 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [7 x i8]* %[[test4_a4]], i64 0, i64 0
    339 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 7
    340 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8* %src, i64 50
    341 ; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i16*
    342 ; CHECK-NEXT: %[[test4_r5:.*]] = load i16* %[[bitcast]]
    343 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8* %src, i64 52
    344 ; CHECK-NEXT: %[[test4_r6:.*]] = load i8* %[[gep]]
    345 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8* %src, i64 53
    346 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [7 x i8]* %[[test4_a5]], i64 0, i64 0
    347 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 7
    348 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8* %src, i64 60
    349 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [40 x i8]* %[[test4_a6]], i64 0, i64 0
    350 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 40
    351 
    352   %a.src.1 = getelementptr [100 x i8]* %a, i64 0, i64 20
    353   %a.dst.1 = getelementptr [100 x i8]* %a, i64 0, i64 40
    354   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a.dst.1, i8* %a.src.1, i32 10, i32 1, i1 false)
    355 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [7 x i8]* %[[test4_a4]], i64 0, i64 0
    356 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [7 x i8]* %[[test4_a2]], i64 0, i64 0
    357 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 7
    358 
    359   ; Clobber a single element of the array, this should be promotable, and be deleted.
    360   %c = getelementptr [100 x i8]* %a, i64 0, i64 42
    361   store i8 0, i8* %c
    362 
    363   %a.src.2 = getelementptr [100 x i8]* %a, i64 0, i64 50
    364   call void @llvm.memmove.p0i8.p0i8.i32(i8* %a.dst.1, i8* %a.src.2, i32 10, i32 1, i1 false)
    365 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [7 x i8]* %[[test4_a4]], i64 0, i64 0
    366 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [7 x i8]* %[[test4_a5]], i64 0, i64 0
    367 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 7
    368 
    369   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %b, i32 100, i32 1, i1 false)
    370 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [20 x i8]* %[[test4_a1]], i64 0, i64 0
    371 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %[[gep]], i32 20
    372 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8* %dst, i64 20
    373 ; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i16*
    374 ; CHECK-NEXT: store i16 %[[test4_r1]], i16* %[[bitcast]]
    375 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8* %dst, i64 22
    376 ; CHECK-NEXT: store i8 %[[test4_r2]], i8* %[[gep]]
    377 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8* %dst, i64 23
    378 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [7 x i8]* %[[test4_a2]], i64 0, i64 0
    379 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 7
    380 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8* %dst, i64 30
    381 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [10 x i8]* %[[test4_a3]], i64 0, i64 0
    382 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 10
    383 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8* %dst, i64 40
    384 ; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i16*
    385 ; CHECK-NEXT: store i16 %[[test4_r5]], i16* %[[bitcast]]
    386 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8* %dst, i64 42
    387 ; CHECK-NEXT: store i8 %[[test4_r6]], i8* %[[gep]]
    388 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8* %dst, i64 43
    389 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [7 x i8]* %[[test4_a4]], i64 0, i64 0
    390 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 7
    391 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8* %dst, i64 50
    392 ; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i16*
    393 ; CHECK-NEXT: store i16 %[[test4_r5]], i16* %[[bitcast]]
    394 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8* %dst, i64 52
    395 ; CHECK-NEXT: store i8 %[[test4_r6]], i8* %[[gep]]
    396 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8* %dst, i64 53
    397 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [7 x i8]* %[[test4_a5]], i64 0, i64 0
    398 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 7
    399 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8* %dst, i64 60
    400 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [40 x i8]* %[[test4_a6]], i64 0, i64 0
    401 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 40
    402 
    403   ret void
    404 }
    405 
    406 declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
    407 declare void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)* nocapture, i8* nocapture, i32, i32, i1) nounwind
    408 declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
    409 declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
    410 
    411 define i16 @test5() {
    412 ; CHECK-LABEL: @test5(
    413 ; CHECK-NOT: alloca float
    414 ; CHECK:      %[[cast:.*]] = bitcast float 0.0{{.*}} to i32
    415 ; CHECK-NEXT: %[[shr:.*]] = lshr i32 %[[cast]], 16
    416 ; CHECK-NEXT: %[[trunc:.*]] = trunc i32 %[[shr]] to i16
    417 ; CHECK-NEXT: ret i16 %[[trunc]]
    418 
    419 entry:
    420   %a = alloca [4 x i8]
    421   %fptr = bitcast [4 x i8]* %a to float*
    422   store float 0.0, float* %fptr
    423   %ptr = getelementptr [4 x i8]* %a, i32 0, i32 2
    424   %iptr = bitcast i8* %ptr to i16*
    425   %val = load i16* %iptr
    426   ret i16 %val
    427 }
    428 
    429 define i32 @test6() {
    430 ; CHECK-LABEL: @test6(
    431 ; CHECK: alloca i32
    432 ; CHECK-NEXT: store volatile i32
    433 ; CHECK-NEXT: load i32*
    434 ; CHECK-NEXT: ret i32
    435 
    436 entry:
    437   %a = alloca [4 x i8]
    438   %ptr = getelementptr [4 x i8]* %a, i32 0, i32 0
    439   call void @llvm.memset.p0i8.i32(i8* %ptr, i8 42, i32 4, i32 1, i1 true)
    440   %iptr = bitcast i8* %ptr to i32*
    441   %val = load i32* %iptr
    442   ret i32 %val
    443 }
    444 
    445 define void @test7(i8* %src, i8* %dst) {
    446 ; CHECK-LABEL: @test7(
    447 ; CHECK: alloca i32
    448 ; CHECK-NEXT: bitcast i8* %src to i32*
    449 ; CHECK-NEXT: load volatile i32*
    450 ; CHECK-NEXT: store volatile i32
    451 ; CHECK-NEXT: bitcast i8* %dst to i32*
    452 ; CHECK-NEXT: load volatile i32*
    453 ; CHECK-NEXT: store volatile i32
    454 ; CHECK-NEXT: ret
    455 
    456 entry:
    457   %a = alloca [4 x i8]
    458   %ptr = getelementptr [4 x i8]* %a, i32 0, i32 0
    459   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %ptr, i8* %src, i32 4, i32 1, i1 true)
    460   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %ptr, i32 4, i32 1, i1 true)
    461   ret void
    462 }
    463 
    464 
    465 %S1 = type { i32, i32, [16 x i8] }
    466 %S2 = type { %S1*, %S2* }
    467 
    468 define %S2 @test8(%S2* %s2) {
    469 ; CHECK-LABEL: @test8(
    470 entry:
    471   %new = alloca %S2
    472 ; CHECK-NOT: alloca
    473 
    474   %s2.next.ptr = getelementptr %S2* %s2, i64 0, i32 1
    475   %s2.next = load %S2** %s2.next.ptr
    476 ; CHECK:      %[[gep:.*]] = getelementptr %S2* %s2, i64 0, i32 1
    477 ; CHECK-NEXT: %[[next:.*]] = load %S2** %[[gep]]
    478 
    479   %s2.next.s1.ptr = getelementptr %S2* %s2.next, i64 0, i32 0
    480   %s2.next.s1 = load %S1** %s2.next.s1.ptr
    481   %new.s1.ptr = getelementptr %S2* %new, i64 0, i32 0
    482   store %S1* %s2.next.s1, %S1** %new.s1.ptr
    483   %s2.next.next.ptr = getelementptr %S2* %s2.next, i64 0, i32 1
    484   %s2.next.next = load %S2** %s2.next.next.ptr
    485   %new.next.ptr = getelementptr %S2* %new, i64 0, i32 1
    486   store %S2* %s2.next.next, %S2** %new.next.ptr
    487 ; CHECK-NEXT: %[[gep:.*]] = getelementptr %S2* %[[next]], i64 0, i32 0
    488 ; CHECK-NEXT: %[[next_s1:.*]] = load %S1** %[[gep]]
    489 ; CHECK-NEXT: %[[gep:.*]] = getelementptr %S2* %[[next]], i64 0, i32 1
    490 ; CHECK-NEXT: %[[next_next:.*]] = load %S2** %[[gep]]
    491 
    492   %new.s1 = load %S1** %new.s1.ptr
    493   %result1 = insertvalue %S2 undef, %S1* %new.s1, 0
    494 ; CHECK-NEXT: %[[result1:.*]] = insertvalue %S2 undef, %S1* %[[next_s1]], 0
    495   %new.next = load %S2** %new.next.ptr
    496   %result2 = insertvalue %S2 %result1, %S2* %new.next, 1
    497 ; CHECK-NEXT: %[[result2:.*]] = insertvalue %S2 %[[result1]], %S2* %[[next_next]], 1
    498   ret %S2 %result2
    499 ; CHECK-NEXT: ret %S2 %[[result2]]
    500 }
    501 
    502 define i64 @test9() {
    503 ; Ensure we can handle loads off the end of an alloca even when wrapped in
    504 ; weird bit casts and types. This is valid IR due to the alignment and masking
    505 ; off the bits past the end of the alloca.
    506 ;
    507 ; CHECK-LABEL: @test9(
    508 ; CHECK-NOT: alloca
    509 ; CHECK:      %[[b2:.*]] = zext i8 26 to i64
    510 ; CHECK-NEXT: %[[s2:.*]] = shl i64 %[[b2]], 16
    511 ; CHECK-NEXT: %[[m2:.*]] = and i64 undef, -16711681
    512 ; CHECK-NEXT: %[[i2:.*]] = or i64 %[[m2]], %[[s2]]
    513 ; CHECK-NEXT: %[[b1:.*]] = zext i8 0 to i64
    514 ; CHECK-NEXT: %[[s1:.*]] = shl i64 %[[b1]], 8
    515 ; CHECK-NEXT: %[[m1:.*]] = and i64 %[[i2]], -65281
    516 ; CHECK-NEXT: %[[i1:.*]] = or i64 %[[m1]], %[[s1]]
    517 ; CHECK-NEXT: %[[b0:.*]] = zext i8 0 to i64
    518 ; CHECK-NEXT: %[[m0:.*]] = and i64 %[[i1]], -256
    519 ; CHECK-NEXT: %[[i0:.*]] = or i64 %[[m0]], %[[b0]]
    520 ; CHECK-NEXT: %[[result:.*]] = and i64 %[[i0]], 16777215
    521 ; CHECK-NEXT: ret i64 %[[result]]
    522 
    523 entry:
    524   %a = alloca { [3 x i8] }, align 8
    525   %gep1 = getelementptr inbounds { [3 x i8] }* %a, i32 0, i32 0, i32 0
    526   store i8 0, i8* %gep1, align 1
    527   %gep2 = getelementptr inbounds { [3 x i8] }* %a, i32 0, i32 0, i32 1
    528   store i8 0, i8* %gep2, align 1
    529   %gep3 = getelementptr inbounds { [3 x i8] }* %a, i32 0, i32 0, i32 2
    530   store i8 26, i8* %gep3, align 1
    531   %cast = bitcast { [3 x i8] }* %a to { i64 }*
    532   %elt = getelementptr inbounds { i64 }* %cast, i32 0, i32 0
    533   %load = load i64* %elt
    534   %result = and i64 %load, 16777215
    535   ret i64 %result
    536 }
    537 
    538 define %S2* @test10() {
    539 ; CHECK-LABEL: @test10(
    540 ; CHECK-NOT: alloca %S2*
    541 ; CHECK: ret %S2* null
    542 
    543 entry:
    544   %a = alloca [8 x i8]
    545   %ptr = getelementptr [8 x i8]* %a, i32 0, i32 0
    546   call void @llvm.memset.p0i8.i32(i8* %ptr, i8 0, i32 8, i32 1, i1 false)
    547   %s2ptrptr = bitcast i8* %ptr to %S2**
    548   %s2ptr = load %S2** %s2ptrptr
    549   ret %S2* %s2ptr
    550 }
    551 
    552 define i32 @test11() {
    553 ; CHECK-LABEL: @test11(
    554 ; CHECK-NOT: alloca
    555 ; CHECK: ret i32 0
    556 
    557 entry:
    558   %X = alloca i32
    559   br i1 undef, label %good, label %bad
    560 
    561 good:
    562   %Y = getelementptr i32* %X, i64 0
    563   store i32 0, i32* %Y
    564   %Z = load i32* %Y
    565   ret i32 %Z
    566 
    567 bad:
    568   %Y2 = getelementptr i32* %X, i64 1
    569   store i32 0, i32* %Y2
    570   %Z2 = load i32* %Y2
    571   ret i32 %Z2
    572 }
    573 
    574 define i8 @test12() {
    575 ; We fully promote these to the i24 load or store size, resulting in just masks
    576 ; and other operations that instcombine will fold, but no alloca.
    577 ;
    578 ; CHECK-LABEL: @test12(
    579 
    580 entry:
    581   %a = alloca [3 x i8]
    582   %b = alloca [3 x i8]
    583 ; CHECK-NOT: alloca
    584 
    585   %a0ptr = getelementptr [3 x i8]* %a, i64 0, i32 0
    586   store i8 0, i8* %a0ptr
    587   %a1ptr = getelementptr [3 x i8]* %a, i64 0, i32 1
    588   store i8 0, i8* %a1ptr
    589   %a2ptr = getelementptr [3 x i8]* %a, i64 0, i32 2
    590   store i8 0, i8* %a2ptr
    591   %aiptr = bitcast [3 x i8]* %a to i24*
    592   %ai = load i24* %aiptr
    593 ; CHECK-NOT: store
    594 ; CHECK-NOT: load
    595 ; CHECK:      %[[ext2:.*]] = zext i8 0 to i24
    596 ; CHECK-NEXT: %[[shift2:.*]] = shl i24 %[[ext2]], 16
    597 ; CHECK-NEXT: %[[mask2:.*]] = and i24 undef, 65535
    598 ; CHECK-NEXT: %[[insert2:.*]] = or i24 %[[mask2]], %[[shift2]]
    599 ; CHECK-NEXT: %[[ext1:.*]] = zext i8 0 to i24
    600 ; CHECK-NEXT: %[[shift1:.*]] = shl i24 %[[ext1]], 8
    601 ; CHECK-NEXT: %[[mask1:.*]] = and i24 %[[insert2]], -65281
    602 ; CHECK-NEXT: %[[insert1:.*]] = or i24 %[[mask1]], %[[shift1]]
    603 ; CHECK-NEXT: %[[ext0:.*]] = zext i8 0 to i24
    604 ; CHECK-NEXT: %[[mask0:.*]] = and i24 %[[insert1]], -256
    605 ; CHECK-NEXT: %[[insert0:.*]] = or i24 %[[mask0]], %[[ext0]]
    606 
    607   %biptr = bitcast [3 x i8]* %b to i24*
    608   store i24 %ai, i24* %biptr
    609   %b0ptr = getelementptr [3 x i8]* %b, i64 0, i32 0
    610   %b0 = load i8* %b0ptr
    611   %b1ptr = getelementptr [3 x i8]* %b, i64 0, i32 1
    612   %b1 = load i8* %b1ptr
    613   %b2ptr = getelementptr [3 x i8]* %b, i64 0, i32 2
    614   %b2 = load i8* %b2ptr
    615 ; CHECK-NOT: store
    616 ; CHECK-NOT: load
    617 ; CHECK:      %[[trunc0:.*]] = trunc i24 %[[insert0]] to i8
    618 ; CHECK-NEXT: %[[shift1:.*]] = lshr i24 %[[insert0]], 8
    619 ; CHECK-NEXT: %[[trunc1:.*]] = trunc i24 %[[shift1]] to i8
    620 ; CHECK-NEXT: %[[shift2:.*]] = lshr i24 %[[insert0]], 16
    621 ; CHECK-NEXT: %[[trunc2:.*]] = trunc i24 %[[shift2]] to i8
    622 
    623   %bsum0 = add i8 %b0, %b1
    624   %bsum1 = add i8 %bsum0, %b2
    625   ret i8 %bsum1
    626 ; CHECK:      %[[sum0:.*]] = add i8 %[[trunc0]], %[[trunc1]]
    627 ; CHECK-NEXT: %[[sum1:.*]] = add i8 %[[sum0]], %[[trunc2]]
    628 ; CHECK-NEXT: ret i8 %[[sum1]]
    629 }
    630 
    631 define i32 @test13() {
    632 ; Ensure we don't crash and handle undefined loads that straddle the end of the
    633 ; allocation.
    634 ; CHECK-LABEL: @test13(
    635 ; CHECK:      %[[value:.*]] = zext i8 0 to i16
    636 ; CHECK-NEXT: %[[ret:.*]] = zext i16 %[[value]] to i32
    637 ; CHECK-NEXT: ret i32 %[[ret]]
    638 
    639 entry:
    640   %a = alloca [3 x i8], align 2
    641   %b0ptr = getelementptr [3 x i8]* %a, i64 0, i32 0
    642   store i8 0, i8* %b0ptr
    643   %b1ptr = getelementptr [3 x i8]* %a, i64 0, i32 1
    644   store i8 0, i8* %b1ptr
    645   %b2ptr = getelementptr [3 x i8]* %a, i64 0, i32 2
    646   store i8 0, i8* %b2ptr
    647   %iptrcast = bitcast [3 x i8]* %a to i16*
    648   %iptrgep = getelementptr i16* %iptrcast, i64 1
    649   %i = load i16* %iptrgep
    650   %ret = zext i16 %i to i32
    651   ret i32 %ret
    652 }
    653 
    654 %test14.struct = type { [3 x i32] }
    655 
    656 define void @test14(...) nounwind uwtable {
    657 ; This is a strange case where we split allocas into promotable partitions, but
    658 ; also gain enough data to prove they must be dead allocas due to GEPs that walk
    659 ; across two adjacent allocas. Test that we don't try to promote or otherwise
    660 ; do bad things to these dead allocas, they should just be removed.
    661 ; CHECK-LABEL: @test14(
    662 ; CHECK-NEXT: entry:
    663 ; CHECK-NEXT: ret void
    664 
    665 entry:
    666   %a = alloca %test14.struct
    667   %p = alloca %test14.struct*
    668   %0 = bitcast %test14.struct* %a to i8*
    669   %1 = getelementptr i8* %0, i64 12
    670   %2 = bitcast i8* %1 to %test14.struct*
    671   %3 = getelementptr inbounds %test14.struct* %2, i32 0, i32 0
    672   %4 = getelementptr inbounds %test14.struct* %a, i32 0, i32 0
    673   %5 = bitcast [3 x i32]* %3 to i32*
    674   %6 = bitcast [3 x i32]* %4 to i32*
    675   %7 = load i32* %6, align 4
    676   store i32 %7, i32* %5, align 4
    677   %8 = getelementptr inbounds i32* %5, i32 1
    678   %9 = getelementptr inbounds i32* %6, i32 1
    679   %10 = load i32* %9, align 4
    680   store i32 %10, i32* %8, align 4
    681   %11 = getelementptr inbounds i32* %5, i32 2
    682   %12 = getelementptr inbounds i32* %6, i32 2
    683   %13 = load i32* %12, align 4
    684   store i32 %13, i32* %11, align 4
    685   ret void
    686 }
    687 
    688 define i32 @test15(i1 %flag) nounwind uwtable {
    689 ; Ensure that when there are dead instructions using an alloca that are not
    690 ; loads or stores we still delete them during partitioning and rewriting.
    691 ; Otherwise we'll go to promote them while thy still have unpromotable uses.
    692 ; CHECK-LABEL: @test15(
    693 ; CHECK-NEXT: entry:
    694 ; CHECK-NEXT:   br label %loop
    695 ; CHECK:      loop:
    696 ; CHECK-NEXT:   br label %loop
    697 
    698 entry:
    699   %l0 = alloca i64
    700   %l1 = alloca i64
    701   %l2 = alloca i64
    702   %l3 = alloca i64
    703   br label %loop
    704 
    705 loop:
    706   %dead3 = phi i8* [ %gep3, %loop ], [ null, %entry ]
    707 
    708   store i64 1879048192, i64* %l0, align 8
    709   %bc0 = bitcast i64* %l0 to i8*
    710   %gep0 = getelementptr i8* %bc0, i64 3
    711   %dead0 = bitcast i8* %gep0 to i64*
    712 
    713   store i64 1879048192, i64* %l1, align 8
    714   %bc1 = bitcast i64* %l1 to i8*
    715   %gep1 = getelementptr i8* %bc1, i64 3
    716   %dead1 = getelementptr i8* %gep1, i64 1
    717 
    718   store i64 1879048192, i64* %l2, align 8
    719   %bc2 = bitcast i64* %l2 to i8*
    720   %gep2.1 = getelementptr i8* %bc2, i64 1
    721   %gep2.2 = getelementptr i8* %bc2, i64 3
    722   ; Note that this select should get visited multiple times due to using two
    723   ; different GEPs off the same alloca. We should only delete it once.
    724   %dead2 = select i1 %flag, i8* %gep2.1, i8* %gep2.2
    725 
    726   store i64 1879048192, i64* %l3, align 8
    727   %bc3 = bitcast i64* %l3 to i8*
    728   %gep3 = getelementptr i8* %bc3, i64 3
    729 
    730   br label %loop
    731 }
    732 
    733 define void @test16(i8* %src, i8* %dst) {
    734 ; Ensure that we can promote an alloca of [3 x i8] to an i24 SSA value.
    735 ; CHECK-LABEL: @test16(
    736 ; CHECK-NOT: alloca
    737 ; CHECK:      %[[srccast:.*]] = bitcast i8* %src to i24*
    738 ; CHECK-NEXT: load i24* %[[srccast]]
    739 ; CHECK-NEXT: %[[dstcast:.*]] = bitcast i8* %dst to i24*
    740 ; CHECK-NEXT: store i24 0, i24* %[[dstcast]]
    741 ; CHECK-NEXT: ret void
    742 
    743 entry:
    744   %a = alloca [3 x i8]
    745   %ptr = getelementptr [3 x i8]* %a, i32 0, i32 0
    746   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %ptr, i8* %src, i32 4, i32 1, i1 false)
    747   %cast = bitcast i8* %ptr to i24*
    748   store i24 0, i24* %cast
    749   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %ptr, i32 4, i32 1, i1 false)
    750   ret void
    751 }
    752 
    753 define void @test17(i8* %src, i8* %dst) {
    754 ; Ensure that we can rewrite unpromotable memcpys which extend past the end of
    755 ; the alloca.
    756 ; CHECK-LABEL: @test17(
    757 ; CHECK:      %[[a:.*]] = alloca [3 x i8]
    758 ; CHECK-NEXT: %[[ptr:.*]] = getelementptr [3 x i8]* %[[a]], i32 0, i32 0
    759 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[ptr]], i8* %src,
    760 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %[[ptr]],
    761 ; CHECK-NEXT: ret void
    762 
    763 entry:
    764   %a = alloca [3 x i8]
    765   %ptr = getelementptr [3 x i8]* %a, i32 0, i32 0
    766   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %ptr, i8* %src, i32 4, i32 1, i1 true)
    767   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %ptr, i32 4, i32 1, i1 true)
    768   ret void
    769 }
    770 
    771 define void @test18(i8* %src, i8* %dst, i32 %size) {
    772 ; Preserve transfer instrinsics with a variable size, even if they overlap with
    773 ; fixed size operations. Further, continue to split and promote allocas preceding
    774 ; the variable sized intrinsic.
    775 ; CHECK-LABEL: @test18(
    776 ; CHECK:      %[[a:.*]] = alloca [34 x i8]
    777 ; CHECK:      %[[srcgep1:.*]] = getelementptr inbounds i8* %src, i64 4
    778 ; CHECK-NEXT: %[[srccast1:.*]] = bitcast i8* %[[srcgep1]] to i32*
    779 ; CHECK-NEXT: %[[srcload:.*]] = load i32* %[[srccast1]]
    780 ; CHECK-NEXT: %[[agep1:.*]] = getelementptr inbounds [34 x i8]* %[[a]], i64 0, i64 0
    781 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[agep1]], i8* %src, i32 %size,
    782 ; CHECK-NEXT: %[[agep2:.*]] = getelementptr inbounds [34 x i8]* %[[a]], i64 0, i64 0
    783 ; CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* %[[agep2]], i8 42, i32 %size,
    784 ; CHECK-NEXT: %[[dstcast1:.*]] = bitcast i8* %dst to i32*
    785 ; CHECK-NEXT: store i32 42, i32* %[[dstcast1]]
    786 ; CHECK-NEXT: %[[dstgep1:.*]] = getelementptr inbounds i8* %dst, i64 4
    787 ; CHECK-NEXT: %[[dstcast2:.*]] = bitcast i8* %[[dstgep1]] to i32*
    788 ; CHECK-NEXT: store i32 %[[srcload]], i32* %[[dstcast2]]
    789 ; CHECK-NEXT: %[[agep3:.*]] = getelementptr inbounds [34 x i8]* %[[a]], i64 0, i64 0
    790 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %[[agep3]], i32 %size,
    791 ; CHECK-NEXT: ret void
    792 
    793 entry:
    794   %a = alloca [42 x i8]
    795   %ptr = getelementptr [42 x i8]* %a, i32 0, i32 0
    796   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %ptr, i8* %src, i32 8, i32 1, i1 false)
    797   %ptr2 = getelementptr [42 x i8]* %a, i32 0, i32 8
    798   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %ptr2, i8* %src, i32 %size, i32 1, i1 false)
    799   call void @llvm.memset.p0i8.i32(i8* %ptr2, i8 42, i32 %size, i32 1, i1 false)
    800   %cast = bitcast i8* %ptr to i32*
    801   store i32 42, i32* %cast
    802   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %ptr, i32 8, i32 1, i1 false)
    803   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %ptr2, i32 %size, i32 1, i1 false)
    804   ret void
    805 }
    806 
    807 %opaque = type opaque
    808 
    809 define i32 @test19(%opaque* %x) {
    810 ; This input will cause us to try to compute a natural GEP when rewriting
    811 ; pointers in such a way that we try to GEP through the opaque type. Previously,
    812 ; a check for an unsized type was missing and this crashed. Ensure it behaves
    813 ; reasonably now.
    814 ; CHECK-LABEL: @test19(
    815 ; CHECK-NOT: alloca
    816 ; CHECK: ret i32 undef
    817 
    818 entry:
    819   %a = alloca { i64, i8* }
    820   %cast1 = bitcast %opaque* %x to i8*
    821   %cast2 = bitcast { i64, i8* }* %a to i8*
    822   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %cast2, i8* %cast1, i32 16, i32 1, i1 false)
    823   %gep = getelementptr inbounds { i64, i8* }* %a, i32 0, i32 0
    824   %val = load i64* %gep
    825   ret i32 undef
    826 }
    827 
    828 define i32 @test20() {
    829 ; Ensure we can track negative offsets (before the beginning of the alloca) and
    830 ; negative relative offsets from offsets starting past the end of the alloca.
    831 ; CHECK-LABEL: @test20(
    832 ; CHECK-NOT: alloca
    833 ; CHECK: %[[sum1:.*]] = add i32 1, 2
    834 ; CHECK: %[[sum2:.*]] = add i32 %[[sum1]], 3
    835 ; CHECK: ret i32 %[[sum2]]
    836 
    837 entry:
    838   %a = alloca [3 x i32]
    839   %gep1 = getelementptr [3 x i32]* %a, i32 0, i32 0
    840   store i32 1, i32* %gep1
    841   %gep2.1 = getelementptr [3 x i32]* %a, i32 0, i32 -2
    842   %gep2.2 = getelementptr i32* %gep2.1, i32 3
    843   store i32 2, i32* %gep2.2
    844   %gep3.1 = getelementptr [3 x i32]* %a, i32 0, i32 14
    845   %gep3.2 = getelementptr i32* %gep3.1, i32 -12
    846   store i32 3, i32* %gep3.2
    847 
    848   %load1 = load i32* %gep1
    849   %load2 = load i32* %gep2.2
    850   %load3 = load i32* %gep3.2
    851   %sum1 = add i32 %load1, %load2
    852   %sum2 = add i32 %sum1, %load3
    853   ret i32 %sum2
    854 }
    855 
    856 declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
    857 
    858 define i8 @test21() {
    859 ; Test allocations and offsets which border on overflow of the int64_t used
    860 ; internally. This is really awkward to really test as LLVM doesn't really
    861 ; support such extreme constructs cleanly.
    862 ; CHECK-LABEL: @test21(
    863 ; CHECK-NOT: alloca
    864 ; CHECK: or i8 -1, -1
    865 
    866 entry:
    867   %a = alloca [2305843009213693951 x i8]
    868   %gep0 = getelementptr [2305843009213693951 x i8]* %a, i64 0, i64 2305843009213693949
    869   store i8 255, i8* %gep0
    870   %gep1 = getelementptr [2305843009213693951 x i8]* %a, i64 0, i64 -9223372036854775807
    871   %gep2 = getelementptr i8* %gep1, i64 -1
    872   call void @llvm.memset.p0i8.i64(i8* %gep2, i8 0, i64 18446744073709551615, i32 1, i1 false)
    873   %gep3 = getelementptr i8* %gep1, i64 9223372036854775807
    874   %gep4 = getelementptr i8* %gep3, i64 9223372036854775807
    875   %gep5 = getelementptr i8* %gep4, i64 -6917529027641081857
    876   store i8 255, i8* %gep5
    877   %cast1 = bitcast i8* %gep4 to i32*
    878   store i32 0, i32* %cast1
    879   %load = load i8* %gep0
    880   %gep6 = getelementptr i8* %gep0, i32 1
    881   %load2 = load i8* %gep6
    882   %result = or i8 %load, %load2
    883   ret i8 %result
    884 }
    885 
    886 %PR13916.struct = type { i8 }
    887 
    888 define void @PR13916.1() {
    889 ; Ensure that we handle overlapping memcpy intrinsics correctly, especially in
    890 ; the case where there is a directly identical value for both source and dest.
    891 ; CHECK: @PR13916.1
    892 ; CHECK-NOT: alloca
    893 ; CHECK: ret void
    894 
    895 entry:
    896   %a = alloca i8
    897   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a, i8* %a, i32 1, i32 1, i1 false)
    898   %tmp2 = load i8* %a
    899   ret void
    900 }
    901 
    902 define void @PR13916.2() {
    903 ; Check whether we continue to handle them correctly when they start off with
    904 ; different pointer value chains, but during rewriting we coalesce them into the
    905 ; same value.
    906 ; CHECK: @PR13916.2
    907 ; CHECK-NOT: alloca
    908 ; CHECK: ret void
    909 
    910 entry:
    911   %a = alloca %PR13916.struct, align 1
    912   br i1 undef, label %if.then, label %if.end
    913 
    914 if.then:
    915   %tmp0 = bitcast %PR13916.struct* %a to i8*
    916   %tmp1 = bitcast %PR13916.struct* %a to i8*
    917   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp0, i8* %tmp1, i32 1, i32 1, i1 false)
    918   br label %if.end
    919 
    920 if.end:
    921   %gep = getelementptr %PR13916.struct* %a, i32 0, i32 0
    922   %tmp2 = load i8* %gep
    923   ret void
    924 }
    925 
    926 define void @PR13990() {
    927 ; Ensure we can handle cases where processing one alloca causes the other
    928 ; alloca to become dead and get deleted. This might crash or fail under
    929 ; Valgrind if we regress.
    930 ; CHECK-LABEL: @PR13990(
    931 ; CHECK-NOT: alloca
    932 ; CHECK: unreachable
    933 ; CHECK: unreachable
    934 
    935 entry:
    936   %tmp1 = alloca i8*
    937   %tmp2 = alloca i8*
    938   br i1 undef, label %bb1, label %bb2
    939 
    940 bb1:
    941   store i8* undef, i8** %tmp2
    942   br i1 undef, label %bb2, label %bb3
    943 
    944 bb2:
    945   %tmp50 = select i1 undef, i8** %tmp2, i8** %tmp1
    946   br i1 undef, label %bb3, label %bb4
    947 
    948 bb3:
    949   unreachable
    950 
    951 bb4:
    952   unreachable
    953 }
    954 
    955 define double @PR13969(double %x) {
    956 ; Check that we detect when promotion will un-escape an alloca and iterate to
    957 ; re-try running SROA over that alloca. Without that, the two allocas that are
    958 ; stored into a dead alloca don't get rewritten and promoted.
    959 ; CHECK-LABEL: @PR13969(
    960 
    961 entry:
    962   %a = alloca double
    963   %b = alloca double*
    964   %c = alloca double
    965 ; CHECK-NOT: alloca
    966 
    967   store double %x, double* %a
    968   store double* %c, double** %b
    969   store double* %a, double** %b
    970   store double %x, double* %c
    971   %ret = load double* %a
    972 ; CHECK-NOT: store
    973 ; CHECK-NOT: load
    974 
    975   ret double %ret
    976 ; CHECK: ret double %x
    977 }
    978 
    979 %PR14034.struct = type { { {} }, i32, %PR14034.list }
    980 %PR14034.list = type { %PR14034.list*, %PR14034.list* }
    981 
    982 define void @PR14034() {
    983 ; This test case tries to form GEPs into the empty leading struct members, and
    984 ; subsequently crashed (under valgrind) before we fixed the PR. The important
    985 ; thing is to handle empty structs gracefully.
    986 ; CHECK-LABEL: @PR14034(
    987 
    988 entry:
    989   %a = alloca %PR14034.struct
    990   %list = getelementptr %PR14034.struct* %a, i32 0, i32 2
    991   %prev = getelementptr %PR14034.list* %list, i32 0, i32 1
    992   store %PR14034.list* undef, %PR14034.list** %prev
    993   %cast0 = bitcast %PR14034.struct* undef to i8*
    994   %cast1 = bitcast %PR14034.struct* %a to i8*
    995   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %cast0, i8* %cast1, i32 12, i32 0, i1 false)
    996   ret void
    997 }
    998 
    999 define i32 @test22(i32 %x) {
   1000 ; Test that SROA and promotion is not confused by a grab bax mixture of pointer
   1001 ; types involving wrapper aggregates and zero-length aggregate members.
   1002 ; CHECK-LABEL: @test22(
   1003 
   1004 entry:
   1005   %a1 = alloca { { [1 x { i32 }] } }
   1006   %a2 = alloca { {}, { float }, [0 x i8] }
   1007   %a3 = alloca { [0 x i8], { [0 x double], [1 x [1 x <4 x i8>]], {} }, { { {} } } }
   1008 ; CHECK-NOT: alloca
   1009 
   1010   %wrap1 = insertvalue [1 x { i32 }] undef, i32 %x, 0, 0
   1011   %gep1 = getelementptr { { [1 x { i32 }] } }* %a1, i32 0, i32 0, i32 0
   1012   store [1 x { i32 }] %wrap1, [1 x { i32 }]* %gep1
   1013 
   1014   %gep2 = getelementptr { { [1 x { i32 }] } }* %a1, i32 0, i32 0
   1015   %ptrcast1 = bitcast { [1 x { i32 }] }* %gep2 to { [1 x { float }] }*
   1016   %load1 = load { [1 x { float }] }* %ptrcast1
   1017   %unwrap1 = extractvalue { [1 x { float }] } %load1, 0, 0
   1018 
   1019   %wrap2 = insertvalue { {}, { float }, [0 x i8] } undef, { float } %unwrap1, 1
   1020   store { {}, { float }, [0 x i8] } %wrap2, { {}, { float }, [0 x i8] }* %a2
   1021 
   1022   %gep3 = getelementptr { {}, { float }, [0 x i8] }* %a2, i32 0, i32 1, i32 0
   1023   %ptrcast2 = bitcast float* %gep3 to <4 x i8>*
   1024   %load3 = load <4 x i8>* %ptrcast2
   1025   %valcast1 = bitcast <4 x i8> %load3 to i32
   1026 
   1027   %wrap3 = insertvalue [1 x [1 x i32]] undef, i32 %valcast1, 0, 0
   1028   %wrap4 = insertvalue { [1 x [1 x i32]], {} } undef, [1 x [1 x i32]] %wrap3, 0
   1029   %gep4 = getelementptr { [0 x i8], { [0 x double], [1 x [1 x <4 x i8>]], {} }, { { {} } } }* %a3, i32 0, i32 1
   1030   %ptrcast3 = bitcast { [0 x double], [1 x [1 x <4 x i8>]], {} }* %gep4 to { [1 x [1 x i32]], {} }*
   1031   store { [1 x [1 x i32]], {} } %wrap4, { [1 x [1 x i32]], {} }* %ptrcast3
   1032 
   1033   %gep5 = getelementptr { [0 x i8], { [0 x double], [1 x [1 x <4 x i8>]], {} }, { { {} } } }* %a3, i32 0, i32 1, i32 1, i32 0
   1034   %ptrcast4 = bitcast [1 x <4 x i8>]* %gep5 to { {}, float, {} }*
   1035   %load4 = load { {}, float, {} }* %ptrcast4
   1036   %unwrap2 = extractvalue { {}, float, {} } %load4, 1
   1037   %valcast2 = bitcast float %unwrap2 to i32
   1038 
   1039   ret i32 %valcast2
   1040 ; CHECK: ret i32
   1041 }
   1042 
   1043 define void @PR14059.1(double* %d) {
   1044 ; In PR14059 a peculiar construct was identified as something that is used
   1045 ; pervasively in ARM's ABI-calling-convention lowering: the passing of a struct
   1046 ; of doubles via an array of i32 in order to place the data into integer
   1047 ; registers. This in turn was missed as an optimization by SROA due to the
   1048 ; partial loads and stores of integers to the double alloca we were trying to
   1049 ; form and promote. The solution is to widen the integer operations to be
   1050 ; whole-alloca operations, and perform the appropriate bitcasting on the
   1051 ; *values* rather than the pointers. When this works, partial reads and writes
   1052 ; via integers can be promoted away.
   1053 ; CHECK: @PR14059.1
   1054 ; CHECK-NOT: alloca
   1055 ; CHECK: ret void
   1056 
   1057 entry:
   1058   %X.sroa.0.i = alloca double, align 8
   1059   %0 = bitcast double* %X.sroa.0.i to i8*
   1060   call void @llvm.lifetime.start(i64 -1, i8* %0)
   1061 
   1062   ; Store to the low 32-bits...
   1063   %X.sroa.0.0.cast2.i = bitcast double* %X.sroa.0.i to i32*
   1064   store i32 0, i32* %X.sroa.0.0.cast2.i, align 8
   1065 
   1066   ; Also use a memset to the middle 32-bits for fun.
   1067   %X.sroa.0.2.raw_idx2.i = getelementptr inbounds i8* %0, i32 2
   1068   call void @llvm.memset.p0i8.i64(i8* %X.sroa.0.2.raw_idx2.i, i8 0, i64 4, i32 1, i1 false)
   1069 
   1070   ; Or a memset of the whole thing.
   1071   call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 8, i32 1, i1 false)
   1072 
   1073   ; Write to the high 32-bits with a memcpy.
   1074   %X.sroa.0.4.raw_idx4.i = getelementptr inbounds i8* %0, i32 4
   1075   %d.raw = bitcast double* %d to i8*
   1076   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %X.sroa.0.4.raw_idx4.i, i8* %d.raw, i32 4, i32 1, i1 false)
   1077 
   1078   ; Store to the high 32-bits...
   1079   %X.sroa.0.4.cast5.i = bitcast i8* %X.sroa.0.4.raw_idx4.i to i32*
   1080   store i32 1072693248, i32* %X.sroa.0.4.cast5.i, align 4
   1081 
   1082   ; Do the actual math...
   1083   %X.sroa.0.0.load1.i = load double* %X.sroa.0.i, align 8
   1084   %accum.real.i = load double* %d, align 8
   1085   %add.r.i = fadd double %accum.real.i, %X.sroa.0.0.load1.i
   1086   store double %add.r.i, double* %d, align 8
   1087   call void @llvm.lifetime.end(i64 -1, i8* %0)
   1088   ret void
   1089 }
   1090 
   1091 define i64 @PR14059.2({ float, float }* %phi) {
   1092 ; Check that SROA can split up alloca-wide integer loads and stores where the
   1093 ; underlying alloca has smaller components that are accessed independently. This
   1094 ; shows up particularly with ABI lowering patterns coming out of Clang that rely
   1095 ; on the particular register placement of a single large integer return value.
   1096 ; CHECK: @PR14059.2
   1097 
   1098 entry:
   1099   %retval = alloca { float, float }, align 4
   1100   ; CHECK-NOT: alloca
   1101 
   1102   %0 = bitcast { float, float }* %retval to i64*
   1103   store i64 0, i64* %0
   1104   ; CHECK-NOT: store
   1105 
   1106   %phi.realp = getelementptr inbounds { float, float }* %phi, i32 0, i32 0
   1107   %phi.real = load float* %phi.realp
   1108   %phi.imagp = getelementptr inbounds { float, float }* %phi, i32 0, i32 1
   1109   %phi.imag = load float* %phi.imagp
   1110   ; CHECK:      %[[realp:.*]] = getelementptr inbounds { float, float }* %phi, i32 0, i32 0
   1111   ; CHECK-NEXT: %[[real:.*]] = load float* %[[realp]]
   1112   ; CHECK-NEXT: %[[imagp:.*]] = getelementptr inbounds { float, float }* %phi, i32 0, i32 1
   1113   ; CHECK-NEXT: %[[imag:.*]] = load float* %[[imagp]]
   1114 
   1115   %real = getelementptr inbounds { float, float }* %retval, i32 0, i32 0
   1116   %imag = getelementptr inbounds { float, float }* %retval, i32 0, i32 1
   1117   store float %phi.real, float* %real
   1118   store float %phi.imag, float* %imag
   1119   ; CHECK-NEXT: %[[real_convert:.*]] = bitcast float %[[real]] to i32
   1120   ; CHECK-NEXT: %[[imag_convert:.*]] = bitcast float %[[imag]] to i32
   1121   ; CHECK-NEXT: %[[imag_ext:.*]] = zext i32 %[[imag_convert]] to i64
   1122   ; CHECK-NEXT: %[[imag_shift:.*]] = shl i64 %[[imag_ext]], 32
   1123   ; CHECK-NEXT: %[[imag_mask:.*]] = and i64 undef, 4294967295
   1124   ; CHECK-NEXT: %[[imag_insert:.*]] = or i64 %[[imag_mask]], %[[imag_shift]]
   1125   ; CHECK-NEXT: %[[real_ext:.*]] = zext i32 %[[real_convert]] to i64
   1126   ; CHECK-NEXT: %[[real_mask:.*]] = and i64 %[[imag_insert]], -4294967296
   1127   ; CHECK-NEXT: %[[real_insert:.*]] = or i64 %[[real_mask]], %[[real_ext]]
   1128 
   1129   %1 = load i64* %0, align 1
   1130   ret i64 %1
   1131   ; CHECK-NEXT: ret i64 %[[real_insert]]
   1132 }
   1133 
   1134 define void @PR14105({ [16 x i8] }* %ptr) {
   1135 ; Ensure that when rewriting the GEP index '-1' for this alloca we preserve is
   1136 ; sign as negative. We use a volatile memcpy to ensure promotion never actually
   1137 ; occurs.
   1138 ; CHECK-LABEL: @PR14105(
   1139 
   1140 entry:
   1141   %a = alloca { [16 x i8] }, align 8
   1142 ; CHECK: alloca [16 x i8], align 8
   1143 
   1144   %gep = getelementptr inbounds { [16 x i8] }* %ptr, i64 -1
   1145 ; CHECK-NEXT: getelementptr inbounds { [16 x i8] }* %ptr, i64 -1, i32 0, i64 0
   1146 
   1147   %cast1 = bitcast { [16 x i8 ] }* %gep to i8*
   1148   %cast2 = bitcast { [16 x i8 ] }* %a to i8*
   1149   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %cast1, i8* %cast2, i32 16, i32 8, i1 true)
   1150   ret void
   1151 ; CHECK: ret
   1152 }
   1153 
   1154 define void @PR14105_as1({ [16 x i8] } addrspace(1)* %ptr) {
   1155 ; Make sure this the right address space pointer is used for type check.
   1156 ; CHECK-LABEL: @PR14105_as1(
   1157 
   1158 entry:
   1159   %a = alloca { [16 x i8] }, align 8
   1160 ; CHECK: alloca [16 x i8], align 8
   1161 
   1162   %gep = getelementptr inbounds { [16 x i8] } addrspace(1)* %ptr, i64 -1
   1163 ; CHECK-NEXT: getelementptr inbounds { [16 x i8] } addrspace(1)* %ptr, i16 -1, i32 0, i16 0
   1164 
   1165   %cast1 = bitcast { [16 x i8 ] } addrspace(1)* %gep to i8 addrspace(1)*
   1166   %cast2 = bitcast { [16 x i8 ] }* %a to i8*
   1167   call void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)* %cast1, i8* %cast2, i32 16, i32 8, i1 true)
   1168   ret void
   1169 ; CHECK: ret
   1170 }
   1171 
   1172 define void @PR14465() {
   1173 ; Ensure that we don't crash when analyzing a alloca larger than the maximum
   1174 ; integer type width (MAX_INT_BITS) supported by llvm (1048576*32 > (1<<23)-1).
   1175 ; CHECK-LABEL: @PR14465(
   1176 
   1177   %stack = alloca [1048576 x i32], align 16
   1178 ; CHECK: alloca [1048576 x i32]
   1179   %cast = bitcast [1048576 x i32]* %stack to i8*
   1180   call void @llvm.memset.p0i8.i64(i8* %cast, i8 -2, i64 4194304, i32 16, i1 false)
   1181   ret void
   1182 ; CHECK: ret
   1183 }
   1184 
   1185 define void @PR14548(i1 %x) {
   1186 ; Handle a mixture of i1 and i8 loads and stores to allocas. This particular
   1187 ; pattern caused crashes and invalid output in the PR, and its nature will
   1188 ; trigger a mixture in several permutations as we resolve each alloca
   1189 ; iteratively.
   1190 ; Note that we don't do a particularly good *job* of handling these mixtures,
   1191 ; but the hope is that this is very rare.
   1192 ; CHECK-LABEL: @PR14548(
   1193 
   1194 entry:
   1195   %a = alloca <{ i1 }>, align 8
   1196   %b = alloca <{ i1 }>, align 8
   1197 ; CHECK:      %[[a:.*]] = alloca i8, align 8
   1198 
   1199   %b.i1 = bitcast <{ i1 }>* %b to i1*
   1200   store i1 %x, i1* %b.i1, align 8
   1201   %b.i8 = bitcast <{ i1 }>* %b to i8*
   1202   %foo = load i8* %b.i8, align 1
   1203 ; CHECK-NEXT: %[[ext:.*]] = zext i1 %x to i8
   1204 ; CHECK-NEXT: store i8 %[[ext]], i8* %[[a]], align 8
   1205 ; CHECK-NEXT: {{.*}} = load i8* %[[a]], align 8
   1206 
   1207   %a.i8 = bitcast <{ i1 }>* %a to i8*
   1208   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a.i8, i8* %b.i8, i32 1, i32 1, i1 false) nounwind
   1209   %bar = load i8* %a.i8, align 1
   1210   %a.i1 = getelementptr inbounds <{ i1 }>* %a, i32 0, i32 0
   1211   %baz = load i1* %a.i1, align 1
   1212 ; CHECK-NEXT: %[[a_cast:.*]] = bitcast i8* %[[a]] to i1*
   1213 ; CHECK-NEXT: {{.*}} = load i1* %[[a_cast]], align 8
   1214 
   1215   ret void
   1216 }
   1217 
   1218 define <3 x i8> @PR14572.1(i32 %x) {
   1219 ; Ensure that a split integer store which is wider than the type size of the
   1220 ; alloca (relying on the alloc size padding) doesn't trigger an assert.
   1221 ; CHECK: @PR14572.1
   1222 
   1223 entry:
   1224   %a = alloca <3 x i8>, align 4
   1225 ; CHECK-NOT: alloca
   1226 
   1227   %cast = bitcast <3 x i8>* %a to i32*
   1228   store i32 %x, i32* %cast, align 1
   1229   %y = load <3 x i8>* %a, align 4
   1230   ret <3 x i8> %y
   1231 ; CHECK: ret <3 x i8>
   1232 }
   1233 
   1234 define i32 @PR14572.2(<3 x i8> %x) {
   1235 ; Ensure that a split integer load which is wider than the type size of the
   1236 ; alloca (relying on the alloc size padding) doesn't trigger an assert.
   1237 ; CHECK: @PR14572.2
   1238 
   1239 entry:
   1240   %a = alloca <3 x i8>, align 4
   1241 ; CHECK-NOT: alloca
   1242 
   1243   store <3 x i8> %x, <3 x i8>* %a, align 1
   1244   %cast = bitcast <3 x i8>* %a to i32*
   1245   %y = load i32* %cast, align 4
   1246   ret i32 %y
   1247 ; CHECK: ret i32
   1248 }
   1249 
   1250 define i32 @PR14601(i32 %x) {
   1251 ; Don't try to form a promotable integer alloca when there is a variable length
   1252 ; memory intrinsic.
   1253 ; CHECK-LABEL: @PR14601(
   1254 
   1255 entry:
   1256   %a = alloca i32
   1257 ; CHECK: alloca
   1258 
   1259   %a.i8 = bitcast i32* %a to i8*
   1260   call void @llvm.memset.p0i8.i32(i8* %a.i8, i8 0, i32 %x, i32 1, i1 false)
   1261   %v = load i32* %a
   1262   ret i32 %v
   1263 }
   1264 
   1265 define void @PR15674(i8* %data, i8* %src, i32 %size) {
   1266 ; Arrange (via control flow) to have unmerged stores of a particular width to
   1267 ; an alloca where we incrementally store from the end of the array toward the
   1268 ; beginning of the array. Ensure that the final integer store, despite being
   1269 ; convertable to the integer type that we end up promoting this alloca toward,
   1270 ; doesn't get widened to a full alloca store.
   1271 ; CHECK-LABEL: @PR15674(
   1272 
   1273 entry:
   1274   %tmp = alloca [4 x i8], align 1
   1275 ; CHECK: alloca i32
   1276 
   1277   switch i32 %size, label %end [
   1278     i32 4, label %bb4
   1279     i32 3, label %bb3
   1280     i32 2, label %bb2
   1281     i32 1, label %bb1
   1282   ]
   1283 
   1284 bb4:
   1285   %src.gep3 = getelementptr inbounds i8* %src, i32 3
   1286   %src.3 = load i8* %src.gep3
   1287   %tmp.gep3 = getelementptr inbounds [4 x i8]* %tmp, i32 0, i32 3
   1288   store i8 %src.3, i8* %tmp.gep3
   1289 ; CHECK: store i8
   1290 
   1291   br label %bb3
   1292 
   1293 bb3:
   1294   %src.gep2 = getelementptr inbounds i8* %src, i32 2
   1295   %src.2 = load i8* %src.gep2
   1296   %tmp.gep2 = getelementptr inbounds [4 x i8]* %tmp, i32 0, i32 2
   1297   store i8 %src.2, i8* %tmp.gep2
   1298 ; CHECK: store i8
   1299 
   1300   br label %bb2
   1301 
   1302 bb2:
   1303   %src.gep1 = getelementptr inbounds i8* %src, i32 1
   1304   %src.1 = load i8* %src.gep1
   1305   %tmp.gep1 = getelementptr inbounds [4 x i8]* %tmp, i32 0, i32 1
   1306   store i8 %src.1, i8* %tmp.gep1
   1307 ; CHECK: store i8
   1308 
   1309   br label %bb1
   1310 
   1311 bb1:
   1312   %src.gep0 = getelementptr inbounds i8* %src, i32 0
   1313   %src.0 = load i8* %src.gep0
   1314   %tmp.gep0 = getelementptr inbounds [4 x i8]* %tmp, i32 0, i32 0
   1315   store i8 %src.0, i8* %tmp.gep0
   1316 ; CHECK: store i8
   1317 
   1318   br label %end
   1319 
   1320 end:
   1321   %tmp.raw = bitcast [4 x i8]* %tmp to i8*
   1322   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %data, i8* %tmp.raw, i32 %size, i32 1, i1 false)
   1323   ret void
   1324 ; CHECK: ret void
   1325 }
   1326 
   1327 define void @PR15805(i1 %a, i1 %b) {
   1328 ; CHECK-LABEL: @PR15805(
   1329 ; CHECK-NOT: alloca
   1330 ; CHECK: ret void
   1331 
   1332   %c = alloca i64, align 8
   1333   %p.0.c = select i1 undef, i64* %c, i64* %c
   1334   %cond.in = select i1 undef, i64* %p.0.c, i64* %c
   1335   %cond = load i64* %cond.in, align 8
   1336   ret void
   1337 }
   1338 
   1339 define void @PR15805.1(i1 %a, i1 %b) {
   1340 ; Same as the normal PR15805, but rigged to place the use before the def inside
   1341 ; of looping unreachable code. This helps ensure that we aren't sensitive to the
   1342 ; order in which the uses of the alloca are visited.
   1343 ;
   1344 ; CHECK-LABEL: @PR15805.1(
   1345 ; CHECK-NOT: alloca
   1346 ; CHECK: ret void
   1347 
   1348   %c = alloca i64, align 8
   1349   br label %exit
   1350 
   1351 loop:
   1352   %cond.in = select i1 undef, i64* %c, i64* %p.0.c
   1353   %p.0.c = select i1 undef, i64* %c, i64* %c
   1354   %cond = load i64* %cond.in, align 8
   1355   br i1 undef, label %loop, label %exit
   1356 
   1357 exit:
   1358   ret void
   1359 }
   1360 
   1361 define void @PR16651.1(i8* %a) {
   1362 ; This test case caused a crash due to the volatile memcpy in combination with
   1363 ; lowering to integer loads and stores of a width other than that of the original
   1364 ; memcpy.
   1365 ;
   1366 ; CHECK-LABEL: @PR16651.1(
   1367 ; CHECK: alloca i16
   1368 ; CHECK: alloca i8
   1369 ; CHECK: alloca i8
   1370 ; CHECK: unreachable
   1371 
   1372 entry:
   1373   %b = alloca i32, align 4
   1374   %b.cast = bitcast i32* %b to i8*
   1375   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %b.cast, i8* %a, i32 4, i32 4, i1 true)
   1376   %b.gep = getelementptr inbounds i8* %b.cast, i32 2
   1377   load i8* %b.gep, align 2
   1378   unreachable
   1379 }
   1380 
   1381 define void @PR16651.2() {
   1382 ; This test case caused a crash due to failing to promote given a select that
   1383 ; can't be speculated. It shouldn't be promoted, but we missed that fact when
   1384 ; analyzing whether we could form a vector promotion because that code didn't
   1385 ; bail on select instructions.
   1386 ;
   1387 ; CHECK-LABEL: @PR16651.2(
   1388 ; CHECK: alloca <2 x float>
   1389 ; CHECK: ret void
   1390 
   1391 entry:
   1392   %tv1 = alloca { <2 x float>, <2 x float> }, align 8
   1393   %0 = getelementptr { <2 x float>, <2 x float> }* %tv1, i64 0, i32 1
   1394   store <2 x float> undef, <2 x float>* %0, align 8
   1395   %1 = getelementptr inbounds { <2 x float>, <2 x float> }* %tv1, i64 0, i32 1, i64 0
   1396   %cond105.in.i.i = select i1 undef, float* null, float* %1
   1397   %cond105.i.i = load float* %cond105.in.i.i, align 8
   1398   ret void
   1399 }
   1400 
   1401 define void @test23(i32 %x) {
   1402 ; CHECK-LABEL: @test23(
   1403 ; CHECK-NOT: alloca
   1404 ; CHECK: ret void
   1405 entry:
   1406   %a = alloca i32, align 4
   1407   store i32 %x, i32* %a, align 4
   1408   %gep1 = getelementptr inbounds i32* %a, i32 1
   1409   %gep0 = getelementptr inbounds i32* %a, i32 0
   1410   %cast1 = bitcast i32* %gep1 to i8*
   1411   %cast0 = bitcast i32* %gep0 to i8*
   1412   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %cast1, i8* %cast0, i32 4, i32 1, i1 false)
   1413   ret void
   1414 }
   1415 
   1416 define void @PR18615() {
   1417 ; CHECK-LABEL: @PR18615(
   1418 ; CHECK-NOT: alloca
   1419 ; CHECK: ret void
   1420 entry:
   1421   %f = alloca i8
   1422   %gep = getelementptr i8* %f, i64 -1
   1423   call void @llvm.memcpy.p0i8.p0i8.i32(i8* undef, i8* %gep, i32 1, i32 1, i1 false)
   1424   ret void
   1425 }
   1426 
   1427 define void @test24(i8* %src, i8* %dst) {
   1428 ; CHECK-LABEL: @test24(
   1429 ; CHECK: alloca i64, align 16
   1430 ; CHECK: load volatile i64* %{{[^,]*}}, align 1
   1431 ; CHECK: store volatile i64 %{{[^,]*}}, i64* %{{[^,]*}}, align 16
   1432 ; CHECK: load volatile i64* %{{[^,]*}}, align 16
   1433 ; CHECK: store volatile i64 %{{[^,]*}}, i64* %{{[^,]*}}, align 1
   1434 
   1435 entry:
   1436   %a = alloca i64, align 16
   1437   %ptr = bitcast i64* %a to i8*
   1438   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %ptr, i8* %src, i32 8, i32 1, i1 true)
   1439   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %ptr, i32 8, i32 1, i1 true)
   1440   ret void
   1441 }
   1442 
   1443