Home | History | Annotate | Download | only in MemCpyOpt
      1 ; RUN: opt < %s -memcpyopt -S | FileCheck %s
      2 
      3 ; All the stores in this example should be merged into a single memset.
      4 
      5 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
      6 target triple = "i386-apple-darwin8"
      7 
      8 define void @test1(i8 signext  %c) nounwind  {
      9 entry:
     10 	%x = alloca [19 x i8]		; <[19 x i8]*> [#uses=20]
     11 	%tmp = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 0		; <i8*> [#uses=1]
     12 	store i8 %c, i8* %tmp, align 1
     13 	%tmp5 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 1		; <i8*> [#uses=1]
     14 	store i8 %c, i8* %tmp5, align 1
     15 	%tmp9 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 2		; <i8*> [#uses=1]
     16 	store i8 %c, i8* %tmp9, align 1
     17 	%tmp13 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 3		; <i8*> [#uses=1]
     18 	store i8 %c, i8* %tmp13, align 1
     19 	%tmp17 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 4		; <i8*> [#uses=1]
     20 	store i8 %c, i8* %tmp17, align 1
     21 	%tmp21 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 5		; <i8*> [#uses=1]
     22 	store i8 %c, i8* %tmp21, align 1
     23 	%tmp25 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 6		; <i8*> [#uses=1]
     24 	store i8 %c, i8* %tmp25, align 1
     25 	%tmp29 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 7		; <i8*> [#uses=1]
     26 	store i8 %c, i8* %tmp29, align 1
     27 	%tmp33 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 8		; <i8*> [#uses=1]
     28 	store i8 %c, i8* %tmp33, align 1
     29 	%tmp37 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 9		; <i8*> [#uses=1]
     30 	store i8 %c, i8* %tmp37, align 1
     31 	%tmp41 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 10		; <i8*> [#uses=1]
     32 	store i8 %c, i8* %tmp41, align 1
     33 	%tmp45 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 11		; <i8*> [#uses=1]
     34 	store i8 %c, i8* %tmp45, align 1
     35 	%tmp49 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 12		; <i8*> [#uses=1]
     36 	store i8 %c, i8* %tmp49, align 1
     37 	%tmp53 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 13		; <i8*> [#uses=1]
     38 	store i8 %c, i8* %tmp53, align 1
     39 	%tmp57 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 14		; <i8*> [#uses=1]
     40 	store i8 %c, i8* %tmp57, align 1
     41 	%tmp61 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 15		; <i8*> [#uses=1]
     42 	store i8 %c, i8* %tmp61, align 1
     43 	%tmp65 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 16		; <i8*> [#uses=1]
     44 	store i8 %c, i8* %tmp65, align 1
     45 	%tmp69 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 17		; <i8*> [#uses=1]
     46 	store i8 %c, i8* %tmp69, align 1
     47 	%tmp73 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 18		; <i8*> [#uses=1]
     48 	store i8 %c, i8* %tmp73, align 1
     49 	%tmp76 = call i32 (...) @bar( [19 x i8]* %x ) nounwind
     50 	ret void
     51 ; CHECK-LABEL: @test1(
     52 ; CHECK-NOT: store
     53 ; CHECK: call void @llvm.memset.p0i8.i64
     54 ; CHECK-NOT: store
     55 ; CHECK: ret
     56 }
     57 
     58 declare i32 @bar(...)
     59 
     60 %struct.MV = type { i16, i16 }
     61 
     62 
     63 define void @test2() nounwind  {
     64 entry:
     65 	%ref_idx = alloca [8 x i8]		; <[8 x i8]*> [#uses=8]
     66 	%left_mvd = alloca [8 x %struct.MV]		; <[8 x %struct.MV]*> [#uses=17]
     67 	%up_mvd = alloca [8 x %struct.MV]		; <[8 x %struct.MV]*> [#uses=17]
     68 	%tmp20 = getelementptr [8 x i8], [8 x i8]* %ref_idx, i32 0, i32 7		; <i8*> [#uses=1]
     69 	store i8 -1, i8* %tmp20, align 1
     70 	%tmp23 = getelementptr [8 x i8], [8 x i8]* %ref_idx, i32 0, i32 6		; <i8*> [#uses=1]
     71 	store i8 -1, i8* %tmp23, align 1
     72 	%tmp26 = getelementptr [8 x i8], [8 x i8]* %ref_idx, i32 0, i32 5		; <i8*> [#uses=1]
     73 	store i8 -1, i8* %tmp26, align 1
     74 	%tmp29 = getelementptr [8 x i8], [8 x i8]* %ref_idx, i32 0, i32 4		; <i8*> [#uses=1]
     75 	store i8 -1, i8* %tmp29, align 1
     76 	%tmp32 = getelementptr [8 x i8], [8 x i8]* %ref_idx, i32 0, i32 3		; <i8*> [#uses=1]
     77 	store i8 -1, i8* %tmp32, align 1
     78 	%tmp35 = getelementptr [8 x i8], [8 x i8]* %ref_idx, i32 0, i32 2		; <i8*> [#uses=1]
     79 	store i8 -1, i8* %tmp35, align 1
     80 	%tmp38 = getelementptr [8 x i8], [8 x i8]* %ref_idx, i32 0, i32 1		; <i8*> [#uses=1]
     81 	store i8 -1, i8* %tmp38, align 1
     82 	%tmp41 = getelementptr [8 x i8], [8 x i8]* %ref_idx, i32 0, i32 0		; <i8*> [#uses=2]
     83 	store i8 -1, i8* %tmp41, align 1
     84 	%tmp43 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 7, i32 0		; <i16*> [#uses=1]
     85 	store i16 0, i16* %tmp43, align 2
     86 	%tmp46 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 7, i32 1		; <i16*> [#uses=1]
     87 	store i16 0, i16* %tmp46, align 2
     88 	%tmp57 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 6, i32 0		; <i16*> [#uses=1]
     89 	store i16 0, i16* %tmp57, align 2
     90 	%tmp60 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 6, i32 1		; <i16*> [#uses=1]
     91 	store i16 0, i16* %tmp60, align 2
     92 	%tmp71 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 5, i32 0		; <i16*> [#uses=1]
     93 	store i16 0, i16* %tmp71, align 2
     94 	%tmp74 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 5, i32 1		; <i16*> [#uses=1]
     95 	store i16 0, i16* %tmp74, align 2
     96 	%tmp85 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 4, i32 0		; <i16*> [#uses=1]
     97 	store i16 0, i16* %tmp85, align 2
     98 	%tmp88 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 4, i32 1		; <i16*> [#uses=1]
     99 	store i16 0, i16* %tmp88, align 2
    100 	%tmp99 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 3, i32 0		; <i16*> [#uses=1]
    101 	store i16 0, i16* %tmp99, align 2
    102 	%tmp102 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 3, i32 1		; <i16*> [#uses=1]
    103 	store i16 0, i16* %tmp102, align 2
    104 	%tmp113 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 2, i32 0		; <i16*> [#uses=1]
    105 	store i16 0, i16* %tmp113, align 2
    106 	%tmp116 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 2, i32 1		; <i16*> [#uses=1]
    107 	store i16 0, i16* %tmp116, align 2
    108 	%tmp127 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 1, i32 0		; <i16*> [#uses=1]
    109 	store i16 0, i16* %tmp127, align 2
    110 	%tmp130 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 1, i32 1		; <i16*> [#uses=1]
    111 	store i16 0, i16* %tmp130, align 2
    112 	%tmp141 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 0, i32 0		; <i16*> [#uses=1]
    113 	store i16 0, i16* %tmp141, align 8
    114 	%tmp144 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 0, i32 1		; <i16*> [#uses=1]
    115 	store i16 0, i16* %tmp144, align 2
    116 	%tmp148 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 7, i32 0		; <i16*> [#uses=1]
    117 	store i16 0, i16* %tmp148, align 2
    118 	%tmp151 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 7, i32 1		; <i16*> [#uses=1]
    119 	store i16 0, i16* %tmp151, align 2
    120 	%tmp162 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 6, i32 0		; <i16*> [#uses=1]
    121 	store i16 0, i16* %tmp162, align 2
    122 	%tmp165 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 6, i32 1		; <i16*> [#uses=1]
    123 	store i16 0, i16* %tmp165, align 2
    124 	%tmp176 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 5, i32 0		; <i16*> [#uses=1]
    125 	store i16 0, i16* %tmp176, align 2
    126 	%tmp179 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 5, i32 1		; <i16*> [#uses=1]
    127 	store i16 0, i16* %tmp179, align 2
    128 	%tmp190 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 4, i32 0		; <i16*> [#uses=1]
    129 	store i16 0, i16* %tmp190, align 2
    130 	%tmp193 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 4, i32 1		; <i16*> [#uses=1]
    131 	store i16 0, i16* %tmp193, align 2
    132 	%tmp204 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 3, i32 0		; <i16*> [#uses=1]
    133 	store i16 0, i16* %tmp204, align 2
    134 	%tmp207 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 3, i32 1		; <i16*> [#uses=1]
    135 	store i16 0, i16* %tmp207, align 2
    136 	%tmp218 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 2, i32 0		; <i16*> [#uses=1]
    137 	store i16 0, i16* %tmp218, align 2
    138 	%tmp221 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 2, i32 1		; <i16*> [#uses=1]
    139 	store i16 0, i16* %tmp221, align 2
    140 	%tmp232 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 1, i32 0		; <i16*> [#uses=1]
    141 	store i16 0, i16* %tmp232, align 2
    142 	%tmp235 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 1, i32 1		; <i16*> [#uses=1]
    143 	store i16 0, i16* %tmp235, align 2
    144 	%tmp246 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 0, i32 0		; <i16*> [#uses=1]
    145 	store i16 0, i16* %tmp246, align 8
    146 	%tmp249 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 0, i32 1		; <i16*> [#uses=1]
    147 	store i16 0, i16* %tmp249, align 2
    148 	%up_mvd252 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 0		; <%struct.MV*> [#uses=1]
    149 	%left_mvd253 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 0		; <%struct.MV*> [#uses=1]
    150 	call void @foo( %struct.MV* %up_mvd252, %struct.MV* %left_mvd253, i8* %tmp41 ) nounwind 
    151 	ret void
    152         
    153 ; CHECK-LABEL: @test2(
    154 ; CHECK-NOT: store
    155 ; CHECK: call void @llvm.memset.p0i8.i64(i8* %tmp41, i8 -1, i64 8, i32 1, i1 false)
    156 ; CHECK-NOT: store
    157 ; CHECK: call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 32, i32 8, i1 false)
    158 ; CHECK-NOT: store
    159 ; CHECK: call void @llvm.memset.p0i8.i64(i8* %1, i8 0, i64 32, i32 8, i1 false)
    160 ; CHECK-NOT: store
    161 ; CHECK: ret
    162 }
    163 
    164 declare void @foo(%struct.MV*, %struct.MV*, i8*)
    165 
    166 
    167 ; Store followed by memset.
    168 define void @test3(i32* nocapture %P) nounwind ssp {
    169 entry:
    170   %arrayidx = getelementptr inbounds i32, i32* %P, i64 1
    171   store i32 0, i32* %arrayidx, align 4
    172   %add.ptr = getelementptr inbounds i32, i32* %P, i64 2
    173   %0 = bitcast i32* %add.ptr to i8*
    174   tail call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 11, i32 1, i1 false)
    175   ret void
    176 ; CHECK-LABEL: @test3(
    177 ; CHECK-NOT: store
    178 ; CHECK: call void @llvm.memset.p0i8.i64(i8* %1, i8 0, i64 15, i32 4, i1 false)
    179 }
    180 
    181 ; store followed by memset, different offset scenario
    182 define void @test4(i32* nocapture %P) nounwind ssp {
    183 entry:
    184   store i32 0, i32* %P, align 4
    185   %add.ptr = getelementptr inbounds i32, i32* %P, i64 1
    186   %0 = bitcast i32* %add.ptr to i8*
    187   tail call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 11, i32 1, i1 false)
    188   ret void
    189 ; CHECK-LABEL: @test4(
    190 ; CHECK-NOT: store
    191 ; CHECK: call void @llvm.memset.p0i8.i64(i8* %1, i8 0, i64 15, i32 4, i1 false)
    192 }
    193 
    194 declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
    195 
    196 ; Memset followed by store.
    197 define void @test5(i32* nocapture %P) nounwind ssp {
    198 entry:
    199   %add.ptr = getelementptr inbounds i32, i32* %P, i64 2
    200   %0 = bitcast i32* %add.ptr to i8*
    201   tail call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 11, i32 1, i1 false)
    202   %arrayidx = getelementptr inbounds i32, i32* %P, i64 1
    203   store i32 0, i32* %arrayidx, align 4
    204   ret void
    205 ; CHECK-LABEL: @test5(
    206 ; CHECK-NOT: store
    207 ; CHECK: call void @llvm.memset.p0i8.i64(i8* %1, i8 0, i64 15, i32 4, i1 false)
    208 }
    209 
    210 ;; Memset followed by memset.
    211 define void @test6(i32* nocapture %P) nounwind ssp {
    212 entry:
    213   %0 = bitcast i32* %P to i8*
    214   tail call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 12, i32 1, i1 false)
    215   %add.ptr = getelementptr inbounds i32, i32* %P, i64 3
    216   %1 = bitcast i32* %add.ptr to i8*
    217   tail call void @llvm.memset.p0i8.i64(i8* %1, i8 0, i64 12, i32 1, i1 false)
    218   ret void
    219 ; CHECK-LABEL: @test6(
    220 ; CHECK: call void @llvm.memset.p0i8.i64(i8* %2, i8 0, i64 24, i32 1, i1 false)
    221 }
    222 
    223 ; More aggressive heuristic
    224 ; rdar://9892684
    225 define void @test7(i32* nocapture %c) nounwind optsize {
    226   store i32 -1, i32* %c, align 4
    227   %1 = getelementptr inbounds i32, i32* %c, i32 1
    228   store i32 -1, i32* %1, align 4
    229   %2 = getelementptr inbounds i32, i32* %c, i32 2
    230   store i32 -1, i32* %2, align 4
    231   %3 = getelementptr inbounds i32, i32* %c, i32 3
    232   store i32 -1, i32* %3, align 4
    233   %4 = getelementptr inbounds i32, i32* %c, i32 4
    234   store i32 -1, i32* %4, align 4
    235 ; CHECK-LABEL: @test7(
    236 ; CHECK: call void @llvm.memset.p0i8.i64(i8* %5, i8 -1, i64 20, i32 4, i1 false)
    237   ret void
    238 }
    239 
    240 %struct.test8 = type { [4 x i32] }
    241 
    242 define void @test8() {
    243 entry:
    244   %memtmp = alloca %struct.test8, align 16
    245   %0 = bitcast %struct.test8* %memtmp to <4 x i32>*
    246   store <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32>* %0, align 16
    247   ret void
    248 ; CHECK-LABEL: @test8(
    249 ; CHECK: store <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32>* %0, align 16
    250 }
    251 
    252 @test9buf = internal unnamed_addr global [16 x i64] zeroinitializer, align 16
    253 
    254 define void @test9() nounwind {
    255   store i8 -1, i8* bitcast ([16 x i64]* @test9buf to i8*), align 16
    256   store i8 -1, i8* getelementptr (i8, i8* bitcast ([16 x i64]* @test9buf to i8*), i64 1), align 1
    257   store i8 -1, i8* getelementptr (i8, i8* bitcast ([16 x i64]* @test9buf to i8*), i64 2), align 2
    258   store i8 -1, i8* getelementptr (i8, i8* bitcast ([16 x i64]* @test9buf to i8*), i64 3), align 1
    259   store i8 -1, i8* getelementptr (i8, i8* bitcast ([16 x i64]* @test9buf to i8*), i64 4), align 4
    260   store i8 -1, i8* getelementptr (i8, i8* bitcast ([16 x i64]* @test9buf to i8*), i64 5), align 1
    261   store i8 -1, i8* getelementptr (i8, i8* bitcast ([16 x i64]* @test9buf to i8*), i64 6), align 2
    262   store i8 -1, i8* getelementptr (i8, i8* bitcast ([16 x i64]* @test9buf to i8*), i64 7), align 1
    263   store i8 -1, i8* bitcast (i64* getelementptr inbounds ([16 x i64], [16 x i64]* @test9buf, i64 0, i64 1) to i8*), align 8
    264   store i8 -1, i8* getelementptr (i8, i8* bitcast ([16 x i64]* @test9buf to i8*), i64 9), align 1
    265   store i8 -1, i8* getelementptr (i8, i8* bitcast ([16 x i64]* @test9buf to i8*), i64 10), align 2
    266   store i8 -1, i8* getelementptr (i8, i8* bitcast ([16 x i64]* @test9buf to i8*), i64 11), align 1
    267   store i8 -1, i8* getelementptr (i8, i8* bitcast ([16 x i64]* @test9buf to i8*), i64 12), align 4
    268   store i8 -1, i8* getelementptr (i8, i8* bitcast ([16 x i64]* @test9buf to i8*), i64 13), align 1
    269   store i8 -1, i8* getelementptr (i8, i8* bitcast ([16 x i64]* @test9buf to i8*), i64 14), align 2
    270   store i8 -1, i8* getelementptr (i8, i8* bitcast ([16 x i64]* @test9buf to i8*), i64 15), align 1
    271   ret void
    272 ; CHECK-LABEL: @test9(
    273 ; CHECK: call void @llvm.memset.p0i8.i64(i8* bitcast ([16 x i64]* @test9buf to i8*), i8 -1, i64 16, i32 16, i1 false)
    274 }
    275 
    276 ; PR19092
    277 define void @test10(i8* nocapture %P) nounwind {
    278   tail call void @llvm.memset.p0i8.i64(i8* %P, i8 0, i64 42, i32 1, i1 false)
    279   tail call void @llvm.memset.p0i8.i64(i8* %P, i8 0, i64 23, i32 1, i1 false)
    280   ret void
    281 ; CHECK-LABEL: @test10(
    282 ; CHECK-NOT: memset
    283 ; CHECK: call void @llvm.memset.p0i8.i64(i8* %P, i8 0, i64 42, i32 1, i1 false)
    284 ; CHECK-NOT: memset
    285 ; CHECK: ret void
    286 }
    287 
    288 ; Memset followed by odd store.
    289 define void @test11(i32* nocapture %P) nounwind ssp {
    290 entry:
    291   %add.ptr = getelementptr inbounds i32, i32* %P, i64 3
    292   %0 = bitcast i32* %add.ptr to i8*
    293   tail call void @llvm.memset.p0i8.i64(i8* %0, i8 1, i64 11, i32 1, i1 false)
    294   %arrayidx = getelementptr inbounds i32, i32* %P, i64 0
    295   %arrayidx.cast = bitcast i32* %arrayidx to i96*
    296   store i96 310698676526526814092329217, i96* %arrayidx.cast, align 4
    297   ret void
    298 ; CHECK-LABEL: @test11(
    299 ; CHECK-NOT: store
    300 ; CHECK: call void @llvm.memset.p0i8.i64(i8* %1, i8 1, i64 23, i32 4, i1 false)
    301 }
    302