1 ; RUN: opt < %s -scalarrepl -S | FileCheck %s 2 3 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" 4 target triple = "x86_64-apple-darwin10.0.0" 5 6 ; CHECK: @test1 7 ; CHECK: %[[alloc:[\.a-z0-9]*]] = alloca <4 x float> 8 ; CHECK: store <4 x float> zeroinitializer, <4 x float>* %[[alloc]] 9 ; CHECK: memset 10 ; CHECK: extractelement <4 x float> zeroinitializer, i32 %idx2 11 12 ; Split the array but don't replace the memset with an insert 13 ; element as its not a constant offset. 14 ; The load, however, can be replaced with an extract element. 15 define float @test1(i32 %idx1, i32 %idx2) { 16 entry: 17 %0 = alloca [4 x <4 x float>] 18 store [4 x <4 x float>] zeroinitializer, [4 x <4 x float>]* %0 19 %ptr1 = getelementptr [4 x <4 x float>]* %0, i32 0, i32 0, i32 %idx1 20 %cast = bitcast float* %ptr1 to i8* 21 call void @llvm.memset.p0i8.i32(i8* %cast, i8 0, i32 4, i32 4, i1 false) 22 %ptr2 = getelementptr [4 x <4 x float>]* %0, i32 0, i32 1, i32 %idx2 23 %ret = load float* %ptr2 24 ret float %ret 25 } 26 27 ; CHECK: @test2 28 ; CHECK: %[[ins:[\.a-z0-9]*]] = insertelement <4 x float> zeroinitializer, float 1.000000e+00, i32 %idx1 29 ; CHECK: extractelement <4 x float> %[[ins]], i32 %idx2 30 31 ; Do SROA on the array when it has dynamic vector reads and writes. 32 define float @test2(i32 %idx1, i32 %idx2) { 33 entry: 34 %0 = alloca [4 x <4 x float>] 35 store [4 x <4 x float>] zeroinitializer, [4 x <4 x float>]* %0 36 %ptr1 = getelementptr [4 x <4 x float>]* %0, i32 0, i32 0, i32 %idx1 37 store float 1.0, float* %ptr1 38 %ptr2 = getelementptr [4 x <4 x float>]* %0, i32 0, i32 0, i32 %idx2 39 %ret = load float* %ptr2 40 ret float %ret 41 } 42 43 ; CHECK: test3 44 ; CHECK: %0 = alloca [4 x <4 x float>] 45 ; CHECK-NOT: alloca 46 47 ; Don't do SROA on a dynamically indexed vector when it spans 48 ; more than one array element of the alloca array it is within. 49 define float @test3(i32 %idx1, i32 %idx2) { 50 entry: 51 %0 = alloca [4 x <4 x float>] 52 store [4 x <4 x float>] zeroinitializer, [4 x <4 x float>]* %0 53 %bigvec = bitcast [4 x <4 x float>]* %0 to <16 x float>* 54 %ptr1 = getelementptr <16 x float>* %bigvec, i32 0, i32 %idx1 55 store float 1.0, float* %ptr1 56 %ptr2 = getelementptr <16 x float>* %bigvec, i32 0, i32 %idx2 57 %ret = load float* %ptr2 58 ret float %ret 59 } 60 61 ; CHECK: test4 62 ; CHECK: insertelement <16 x float> zeroinitializer, float 1.000000e+00, i32 %idx1 63 ; CHECK: extractelement <16 x float> %0, i32 %idx2 64 65 ; Don't do SROA on a dynamically indexed vector when it spans 66 ; more than one array element of the alloca array it is within. 67 ; However, unlike test3, the store is on the vector type 68 ; so SROA will convert the large alloca into the large vector 69 ; type and do all accesses with insert/extract element 70 define float @test4(i32 %idx1, i32 %idx2) { 71 entry: 72 %0 = alloca [4 x <4 x float>] 73 %bigvec = bitcast [4 x <4 x float>]* %0 to <16 x float>* 74 store <16 x float> zeroinitializer, <16 x float>* %bigvec 75 %ptr1 = getelementptr <16 x float>* %bigvec, i32 0, i32 %idx1 76 store float 1.0, float* %ptr1 77 %ptr2 = getelementptr <16 x float>* %bigvec, i32 0, i32 %idx2 78 %ret = load float* %ptr2 79 ret float %ret 80 } 81 82 ; CHECK: @test5 83 ; CHECK: %0 = alloca [4 x <4 x float>] 84 ; CHECK-NOT: alloca 85 86 ; Don't do SROA as the is a second dynamically indexed array 87 ; which may span multiple elements of the alloca. 88 define float @test5(i32 %idx1, i32 %idx2) { 89 entry: 90 %0 = alloca [4 x <4 x float>] 91 store [4 x <4 x float>] zeroinitializer, [4 x <4 x float>]* %0 92 %ptr1 = getelementptr [4 x <4 x float>]* %0, i32 0, i32 0, i32 %idx1 93 %ptr2 = bitcast float* %ptr1 to [1 x <2 x float>]* 94 %ptr3 = getelementptr [1 x <2 x float>]* %ptr2, i32 0, i32 0, i32 %idx1 95 store float 1.0, float* %ptr1 96 %ptr4 = getelementptr [4 x <4 x float>]* %0, i32 0, i32 0, i32 %idx2 97 %ret = load float* %ptr4 98 ret float %ret 99 } 100 101 ; CHECK: test6 102 ; CHECK: insertelement <4 x float> zeroinitializer, float 1.000000e+00, i32 %idx1 103 ; CHECK: extractelement <4 x float> zeroinitializer, i32 %idx2 104 105 %vector.pair = type { %vector.anon, %vector.anon } 106 %vector.anon = type { %vector } 107 %vector = type { <4 x float> } 108 109 ; Dynamic GEPs on vectors were crashing when the vector was inside a struct 110 ; as the new GEP for the new alloca might not include all the indices from 111 ; the original GEP, just the indices it needs to get to the correct offset of 112 ; some type, not necessarily the dynamic vector. 113 ; This test makes sure we don't have this crash. 114 define float @test6(i32 %idx1, i32 %idx2) { 115 entry: 116 %0 = alloca %vector.pair 117 store %vector.pair zeroinitializer, %vector.pair* %0 118 %ptr1 = getelementptr %vector.pair* %0, i32 0, i32 0, i32 0, i32 0, i32 %idx1 119 store float 1.0, float* %ptr1 120 %ptr2 = getelementptr %vector.pair* %0, i32 0, i32 1, i32 0, i32 0, i32 %idx2 121 %ret = load float* %ptr2 122 ret float %ret 123 } 124 125 ; CHECK: test7 126 ; CHECK: insertelement <4 x float> zeroinitializer, float 1.000000e+00, i32 %idx1 127 ; CHECK: extractelement <4 x float> zeroinitializer, i32 %idx2 128 129 %array.pair = type { [2 x %array.anon], %array.anon } 130 %array.anon = type { [2 x %vector] } 131 132 ; This is the same as test6 and tests the same crash, but on arrays. 133 define float @test7(i32 %idx1, i32 %idx2) { 134 entry: 135 %0 = alloca %array.pair 136 store %array.pair zeroinitializer, %array.pair* %0 137 %ptr1 = getelementptr %array.pair* %0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 %idx1 138 store float 1.0, float* %ptr1 139 %ptr2 = getelementptr %array.pair* %0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 %idx2 140 %ret = load float* %ptr2 141 ret float %ret 142 } 143 144 ; CHECK: test8 145 ; CHECK: %[[offset1:[\.a-z0-9]*]] = add i32 %idx1, 1 146 ; CHECK: %[[ins:[\.a-z0-9]*]] = insertelement <4 x float> zeroinitializer, float 1.000000e+00, i32 %[[offset1]] 147 ; CHECK: %[[offset2:[\.a-z0-9]*]] = add i32 %idx2, 2 148 ; CHECK: extractelement <4 x float> %[[ins]], i32 %[[offset2]] 149 150 ; Do SROA on the vector when it has dynamic vector reads and writes 151 ; from a non-zero offset. 152 define float @test8(i32 %idx1, i32 %idx2) { 153 entry: 154 %0 = alloca <4 x float> 155 store <4 x float> zeroinitializer, <4 x float>* %0 156 %ptr1 = getelementptr <4 x float>* %0, i32 0, i32 1 157 %ptr2 = bitcast float* %ptr1 to <3 x float>* 158 %ptr3 = getelementptr <3 x float>* %ptr2, i32 0, i32 %idx1 159 store float 1.0, float* %ptr3 160 %ptr4 = getelementptr <4 x float>* %0, i32 0, i32 2 161 %ptr5 = bitcast float* %ptr4 to <2 x float>* 162 %ptr6 = getelementptr <2 x float>* %ptr5, i32 0, i32 %idx2 163 %ret = load float* %ptr6 164 ret float %ret 165 } 166 167 declare void @llvm.memset.p0i8.i32(i8*, i8, i32, i32, i1) 168