1 ; RUN: opt -loop-accesses -analyze < %s | FileCheck %s 2 3 target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" 4 target triple = "aarch64--linux-gnueabi" 5 6 ; 3 reads and 3 writes should need 12 memchecks 7 ; CHECK: function 'testf': 8 ; CHECK: Memory dependences are safe with run-time checks 9 10 ; Memory dependencies have labels starting from 0, so in 11 ; order to verify that we have n checks, we look for 12 ; (n-1): and not n:. 13 14 ; CHECK: Run-time memory checks: 15 ; CHECK-NEXT: Check 0: 16 ; CHECK: Check 11: 17 ; CHECK-NOT: Check 12: 18 19 define void @testf(i16* %a, 20 i16* %b, 21 i16* %c, 22 i16* %d, 23 i16* %e, 24 i16* %f) { 25 entry: 26 br label %for.body 27 28 for.body: ; preds = %for.body, %entry 29 %ind = phi i64 [ 0, %entry ], [ %add, %for.body ] 30 31 %add = add nuw nsw i64 %ind, 1 32 33 %arrayidxA = getelementptr inbounds i16, i16* %a, i64 %ind 34 %loadA = load i16, i16* %arrayidxA, align 2 35 36 %arrayidxB = getelementptr inbounds i16, i16* %b, i64 %ind 37 %loadB = load i16, i16* %arrayidxB, align 2 38 39 %arrayidxC = getelementptr inbounds i16, i16* %c, i64 %ind 40 %loadC = load i16, i16* %arrayidxC, align 2 41 42 %mul = mul i16 %loadB, %loadA 43 %mul1 = mul i16 %mul, %loadC 44 45 %arrayidxD = getelementptr inbounds i16, i16* %d, i64 %ind 46 store i16 %mul1, i16* %arrayidxD, align 2 47 48 %arrayidxE = getelementptr inbounds i16, i16* %e, i64 %ind 49 store i16 %mul, i16* %arrayidxE, align 2 50 51 %arrayidxF = getelementptr inbounds i16, i16* %f, i64 %ind 52 store i16 %mul1, i16* %arrayidxF, align 2 53 54 %exitcond = icmp eq i64 %add, 20 55 br i1 %exitcond, label %for.end, label %for.body 56 57 for.end: ; preds = %for.body 58 ret void 59 } 60 61 ; The following (testg and testh) check that we can group 62 ; memory checks of accesses which differ by a constant value. 63 ; Both tests are based on the following C code: 64 ; 65 ; void testh(short *a, short *b, short *c) { 66 ; unsigned long ind = 0; 67 ; for (unsigned long ind = 0; ind < 20; ++ind) { 68 ; c[2 * ind] = a[ind] * a[ind + 1]; 69 ; c[2 * ind + 1] = a[ind] * a[ind + 1] * b[ind]; 70 ; } 71 ; } 72 ; 73 ; It is sufficient to check the intervals 74 ; [a, a + 21], [b, b + 20] against [c, c + 41]. 75 76 ; 3 reads and 2 writes - two of the reads can be merged, 77 ; and the writes can be merged as well. This gives us a 78 ; total of 2 memory checks. 79 80 ; CHECK: function 'testg': 81 82 ; CHECK: Run-time memory checks: 83 ; CHECK-NEXT: Check 0: 84 ; CHECK-NEXT: Comparing group ([[ZERO:.+]]): 85 ; CHECK-NEXT: %arrayidxC1 = getelementptr inbounds i16, i16* %c, i64 %store_ind_inc 86 ; CHECK-NEXT: %arrayidxC = getelementptr inbounds i16, i16* %c, i64 %store_ind 87 ; CHECK-NEXT: Against group ([[ONE:.+]]): 88 ; CHECK-NEXT: %arrayidxA1 = getelementptr inbounds i16, i16* %a, i64 %add 89 ; CHECK-NEXT: %arrayidxA = getelementptr inbounds i16, i16* %a, i64 %ind 90 ; CHECK-NEXT: Check 1: 91 ; CHECK-NEXT: Comparing group ({{.*}}[[ZERO]]): 92 ; CHECK-NEXT: %arrayidxC1 = getelementptr inbounds i16, i16* %c, i64 %store_ind_inc 93 ; CHECK-NEXT: %arrayidxC = getelementptr inbounds i16, i16* %c, i64 %store_ind 94 ; CHECK-NEXT: Against group ([[TWO:.+]]): 95 ; CHECK-NEXT: %arrayidxB = getelementptr inbounds i16, i16* %b, i64 %ind 96 ; CHECK-NEXT: Grouped accesses: 97 ; CHECK-NEXT: Group {{.*}}[[ZERO]]: 98 ; CHECK-NEXT: (Low: %c High: (78 + %c)) 99 ; CHECK-NEXT: Member: {(2 + %c),+,4} 100 ; CHECK-NEXT: Member: {%c,+,4} 101 ; CHECK-NEXT: Group {{.*}}[[ONE]]: 102 ; CHECK-NEXT: (Low: %a High: (40 + %a)) 103 ; CHECK-NEXT: Member: {(2 + %a),+,2} 104 ; CHECK-NEXT: Member: {%a,+,2} 105 ; CHECK-NEXT: Group {{.*}}[[TWO]]: 106 ; CHECK-NEXT: (Low: %b High: (38 + %b)) 107 ; CHECK-NEXT: Member: {%b,+,2} 108 109 define void @testg(i16* %a, 110 i16* %b, 111 i16* %c) { 112 entry: 113 br label %for.body 114 115 for.body: ; preds = %for.body, %entry 116 %ind = phi i64 [ 0, %entry ], [ %add, %for.body ] 117 %store_ind = phi i64 [ 0, %entry ], [ %store_ind_next, %for.body ] 118 119 %add = add nuw nsw i64 %ind, 1 120 %store_ind_inc = add nuw nsw i64 %store_ind, 1 121 %store_ind_next = add nuw nsw i64 %store_ind_inc, 1 122 123 %arrayidxA = getelementptr inbounds i16, i16* %a, i64 %ind 124 %loadA = load i16, i16* %arrayidxA, align 2 125 126 %arrayidxA1 = getelementptr inbounds i16, i16* %a, i64 %add 127 %loadA1 = load i16, i16* %arrayidxA1, align 2 128 129 %arrayidxB = getelementptr inbounds i16, i16* %b, i64 %ind 130 %loadB = load i16, i16* %arrayidxB, align 2 131 132 %mul = mul i16 %loadA, %loadA1 133 %mul1 = mul i16 %mul, %loadB 134 135 %arrayidxC = getelementptr inbounds i16, i16* %c, i64 %store_ind 136 store i16 %mul1, i16* %arrayidxC, align 2 137 138 %arrayidxC1 = getelementptr inbounds i16, i16* %c, i64 %store_ind_inc 139 store i16 %mul, i16* %arrayidxC1, align 2 140 141 %exitcond = icmp eq i64 %add, 20 142 br i1 %exitcond, label %for.end, label %for.body 143 144 for.end: ; preds = %for.body 145 ret void 146 } 147 148 ; 3 reads and 2 writes - the writes can be merged into a single 149 ; group, but the GEPs used for the reads are not marked as inbounds. 150 ; We can still merge them because we are using a unit stride for 151 ; accesses, so we cannot overflow the GEPs. 152 153 ; CHECK: function 'testh': 154 ; CHECK: Run-time memory checks: 155 ; CHECK-NEXT: Check 0: 156 ; CHECK-NEXT: Comparing group ([[ZERO:.+]]): 157 ; CHECK-NEXT: %arrayidxC1 = getelementptr inbounds i16, i16* %c, i64 %store_ind_inc 158 ; CHECK-NEXT: %arrayidxC = getelementptr inbounds i16, i16* %c, i64 %store_ind 159 ; CHECK-NEXT: Against group ([[ONE:.+]]): 160 ; CHECK-NEXT: %arrayidxA1 = getelementptr i16, i16* %a, i64 %add 161 ; CHECK-NEXT: %arrayidxA = getelementptr i16, i16* %a, i64 %ind 162 ; CHECK-NEXT: Check 1: 163 ; CHECK-NEXT: Comparing group ({{.*}}[[ZERO]]): 164 ; CHECK-NEXT: %arrayidxC1 = getelementptr inbounds i16, i16* %c, i64 %store_ind_inc 165 ; CHECK-NEXT: %arrayidxC = getelementptr inbounds i16, i16* %c, i64 %store_ind 166 ; CHECK-NEXT: Against group ([[TWO:.+]]): 167 ; CHECK-NEXT: %arrayidxB = getelementptr i16, i16* %b, i64 %ind 168 ; CHECK-NEXT: Grouped accesses: 169 ; CHECK-NEXT: Group {{.*}}[[ZERO]]: 170 ; CHECK-NEXT: (Low: %c High: (78 + %c)) 171 ; CHECK-NEXT: Member: {(2 + %c),+,4} 172 ; CHECK-NEXT: Member: {%c,+,4} 173 ; CHECK-NEXT: Group {{.*}}[[ONE]]: 174 ; CHECK-NEXT: (Low: %a High: (40 + %a)) 175 ; CHECK-NEXT: Member: {(2 + %a),+,2} 176 ; CHECK-NEXT: Member: {%a,+,2} 177 ; CHECK-NEXT: Group {{.*}}[[TWO]]: 178 ; CHECK-NEXT: (Low: %b High: (38 + %b)) 179 ; CHECK-NEXT: Member: {%b,+,2} 180 181 define void @testh(i16* %a, 182 i16* %b, 183 i16* %c) { 184 entry: 185 br label %for.body 186 187 for.body: ; preds = %for.body, %entry 188 %ind = phi i64 [ 0, %entry ], [ %add, %for.body ] 189 %store_ind = phi i64 [ 0, %entry ], [ %store_ind_next, %for.body ] 190 191 %add = add nuw nsw i64 %ind, 1 192 %store_ind_inc = add nuw nsw i64 %store_ind, 1 193 %store_ind_next = add nuw nsw i64 %store_ind_inc, 1 194 195 %arrayidxA = getelementptr i16, i16* %a, i64 %ind 196 %loadA = load i16, i16* %arrayidxA, align 2 197 198 %arrayidxA1 = getelementptr i16, i16* %a, i64 %add 199 %loadA1 = load i16, i16* %arrayidxA1, align 2 200 201 %arrayidxB = getelementptr i16, i16* %b, i64 %ind 202 %loadB = load i16, i16* %arrayidxB, align 2 203 204 %mul = mul i16 %loadA, %loadA1 205 %mul1 = mul i16 %mul, %loadB 206 207 %arrayidxC = getelementptr inbounds i16, i16* %c, i64 %store_ind 208 store i16 %mul1, i16* %arrayidxC, align 2 209 210 %arrayidxC1 = getelementptr inbounds i16, i16* %c, i64 %store_ind_inc 211 store i16 %mul, i16* %arrayidxC1, align 2 212 213 %exitcond = icmp eq i64 %add, 20 214 br i1 %exitcond, label %for.end, label %for.body 215 216 for.end: ; preds = %for.body 217 ret void 218 } 219 220 ; Don't merge pointers if we need to perform a check against a pointer 221 ; to the same underlying object (doing so would emit a check that could be 222 ; falsely invalidated) For example, in the following loop: 223 ; 224 ; for (i = 0; i < 5000; ++i) 225 ; a[i + offset] = a[i] + a[i + 10000] 226 ; 227 ; we should not merge the intervals associated with the reads (0,5000) and 228 ; (10000, 15000) into (0, 15000) as this will pottentially fail the check 229 ; against the interval associated with the write. 230 ; 231 ; We cannot have this check unless ShouldRetryWithRuntimeCheck is set, 232 ; and therefore the grouping algorithm would create a separate group for 233 ; each pointer. 234 235 ; CHECK: function 'testi': 236 ; CHECK: Run-time memory checks: 237 ; CHECK-NEXT: Check 0: 238 ; CHECK-NEXT: Comparing group ([[ZERO:.+]]): 239 ; CHECK-NEXT: %storeidx = getelementptr inbounds i16, i16* %a, i64 %store_ind 240 ; CHECK-NEXT: Against group ([[ONE:.+]]): 241 ; CHECK-NEXT: %arrayidxA1 = getelementptr i16, i16* %a, i64 %ind 242 ; CHECK-NEXT: Check 1: 243 ; CHECK-NEXT: Comparing group ({{.*}}[[ZERO]]): 244 ; CHECK-NEXT: %storeidx = getelementptr inbounds i16, i16* %a, i64 %store_ind 245 ; CHECK-NEXT: Against group ([[TWO:.+]]): 246 ; CHECK-NEXT: %arrayidxA2 = getelementptr i16, i16* %a, i64 %ind2 247 ; CHECK-NEXT: Grouped accesses: 248 ; CHECK-NEXT: Group {{.*}}[[ZERO]]: 249 ; CHECK-NEXT: (Low: ((2 * %offset) + %a) High: (9998 + (2 * %offset) + %a)) 250 ; CHECK-NEXT: Member: {((2 * %offset) + %a),+,2}<nsw><%for.body> 251 ; CHECK-NEXT: Group {{.*}}[[ONE]]: 252 ; CHECK-NEXT: (Low: %a High: (9998 + %a)) 253 ; CHECK-NEXT: Member: {%a,+,2}<%for.body> 254 ; CHECK-NEXT: Group {{.*}}[[TWO]]: 255 ; CHECK-NEXT: (Low: (20000 + %a) High: (29998 + %a)) 256 ; CHECK-NEXT: Member: {(20000 + %a),+,2}<%for.body> 257 258 define void @testi(i16* %a, 259 i64 %offset) { 260 entry: 261 br label %for.body 262 263 for.body: ; preds = %for.body, %entry 264 %ind = phi i64 [ 0, %entry ], [ %add, %for.body ] 265 %store_ind = phi i64 [ %offset, %entry ], [ %store_ind_inc, %for.body ] 266 267 %add = add nuw nsw i64 %ind, 1 268 %store_ind_inc = add nuw nsw i64 %store_ind, 1 269 270 %arrayidxA1 = getelementptr i16, i16* %a, i64 %ind 271 %ind2 = add nuw nsw i64 %ind, 10000 272 %arrayidxA2 = getelementptr i16, i16* %a, i64 %ind2 273 274 %loadA1 = load i16, i16* %arrayidxA1, align 2 275 %loadA2 = load i16, i16* %arrayidxA2, align 2 276 277 %addres = add i16 %loadA1, %loadA2 278 279 %storeidx = getelementptr inbounds i16, i16* %a, i64 %store_ind 280 store i16 %addres, i16* %storeidx, align 2 281 282 %exitcond = icmp eq i64 %add, 5000 283 br i1 %exitcond, label %for.end, label %for.body 284 285 for.end: ; preds = %for.body 286 ret void 287 } 288