1 ; RUN: opt < %s -basicaa -gvn -enable-load-pre -S | FileCheck %s 2 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" 3 4 define i32 @test1(i32* %p, i1 %C) { 5 ; CHECK-LABEL: @test1( 6 block1: 7 br i1 %C, label %block2, label %block3 8 9 block2: 10 br label %block4 11 ; CHECK: block2: 12 ; CHECK-NEXT: load i32, i32* %p 13 14 block3: 15 store i32 0, i32* %p 16 br label %block4 17 18 block4: 19 %PRE = load i32, i32* %p 20 ret i32 %PRE 21 ; CHECK: block4: 22 ; CHECK-NEXT: phi i32 23 ; CHECK-NEXT: ret i32 24 } 25 26 ; This is a simple phi translation case. 27 define i32 @test2(i32* %p, i32* %q, i1 %C) { 28 ; CHECK-LABEL: @test2( 29 block1: 30 br i1 %C, label %block2, label %block3 31 32 block2: 33 br label %block4 34 ; CHECK: block2: 35 ; CHECK-NEXT: load i32, i32* %q 36 37 block3: 38 store i32 0, i32* %p 39 br label %block4 40 41 block4: 42 %P2 = phi i32* [%p, %block3], [%q, %block2] 43 %PRE = load i32, i32* %P2 44 ret i32 %PRE 45 ; CHECK: block4: 46 ; CHECK-NEXT: phi i32 [ 47 ; CHECK-NOT: load 48 ; CHECK: ret i32 49 } 50 51 ; This is a PRE case that requires phi translation through a GEP. 52 define i32 @test3(i32* %p, i32* %q, i32** %Hack, i1 %C) { 53 ; CHECK-LABEL: @test3( 54 block1: 55 %B = getelementptr i32, i32* %q, i32 1 56 store i32* %B, i32** %Hack 57 br i1 %C, label %block2, label %block3 58 59 block2: 60 br label %block4 61 ; CHECK: block2: 62 ; CHECK-NEXT: load i32, i32* %B 63 64 block3: 65 %A = getelementptr i32, i32* %p, i32 1 66 store i32 0, i32* %A 67 br label %block4 68 69 block4: 70 %P2 = phi i32* [%p, %block3], [%q, %block2] 71 %P3 = getelementptr i32, i32* %P2, i32 1 72 %PRE = load i32, i32* %P3 73 ret i32 %PRE 74 ; CHECK: block4: 75 ; CHECK-NEXT: phi i32 [ 76 ; CHECK-NOT: load 77 ; CHECK: ret i32 78 } 79 80 ;; Here the loaded address is available, but the computation is in 'block3' 81 ;; which does not dominate 'block2'. 82 define i32 @test4(i32* %p, i32* %q, i32** %Hack, i1 %C) { 83 ; CHECK-LABEL: @test4( 84 block1: 85 br i1 %C, label %block2, label %block3 86 87 block2: 88 br label %block4 89 ; CHECK: block2: 90 ; CHECK: load i32, i32* 91 ; CHECK: br label %block4 92 93 block3: 94 %B = getelementptr i32, i32* %q, i32 1 95 store i32* %B, i32** %Hack 96 97 %A = getelementptr i32, i32* %p, i32 1 98 store i32 0, i32* %A 99 br label %block4 100 101 block4: 102 %P2 = phi i32* [%p, %block3], [%q, %block2] 103 %P3 = getelementptr i32, i32* %P2, i32 1 104 %PRE = load i32, i32* %P3 105 ret i32 %PRE 106 ; CHECK: block4: 107 ; CHECK-NEXT: phi i32 [ 108 ; CHECK-NOT: load 109 ; CHECK: ret i32 110 } 111 112 ;void test5(int N, double *G) { 113 ; int j; 114 ; for (j = 0; j < N - 1; j++) 115 ; G[j] = G[j] + G[j+1]; 116 ;} 117 118 define void @test5(i32 %N, double* nocapture %G) nounwind ssp { 119 ; CHECK-LABEL: @test5( 120 entry: 121 %0 = add i32 %N, -1 122 %1 = icmp sgt i32 %0, 0 123 br i1 %1, label %bb.nph, label %return 124 125 bb.nph: 126 %tmp = zext i32 %0 to i64 127 br label %bb 128 129 ; CHECK: bb.nph: 130 ; CHECK: load double, double* 131 ; CHECK: br label %bb 132 133 bb: 134 %indvar = phi i64 [ 0, %bb.nph ], [ %tmp6, %bb ] 135 %tmp6 = add i64 %indvar, 1 136 %scevgep = getelementptr double, double* %G, i64 %tmp6 137 %scevgep7 = getelementptr double, double* %G, i64 %indvar 138 %2 = load double, double* %scevgep7, align 8 139 %3 = load double, double* %scevgep, align 8 140 %4 = fadd double %2, %3 141 store double %4, double* %scevgep7, align 8 142 %exitcond = icmp eq i64 %tmp6, %tmp 143 br i1 %exitcond, label %return, label %bb 144 145 ; Should only be one load in the loop. 146 ; CHECK: bb: 147 ; CHECK: load double, double* 148 ; CHECK-NOT: load double, double* 149 ; CHECK: br i1 %exitcond 150 151 return: 152 ret void 153 } 154 155 ;void test6(int N, double *G) { 156 ; int j; 157 ; for (j = 0; j < N - 1; j++) 158 ; G[j+1] = G[j] + G[j+1]; 159 ;} 160 161 define void @test6(i32 %N, double* nocapture %G) nounwind ssp { 162 ; CHECK-LABEL: @test6( 163 entry: 164 %0 = add i32 %N, -1 165 %1 = icmp sgt i32 %0, 0 166 br i1 %1, label %bb.nph, label %return 167 168 bb.nph: 169 %tmp = zext i32 %0 to i64 170 br label %bb 171 172 ; CHECK: bb.nph: 173 ; CHECK: load double, double* 174 ; CHECK: br label %bb 175 176 bb: 177 %indvar = phi i64 [ 0, %bb.nph ], [ %tmp6, %bb ] 178 %tmp6 = add i64 %indvar, 1 179 %scevgep = getelementptr double, double* %G, i64 %tmp6 180 %scevgep7 = getelementptr double, double* %G, i64 %indvar 181 %2 = load double, double* %scevgep7, align 8 182 %3 = load double, double* %scevgep, align 8 183 %4 = fadd double %2, %3 184 store double %4, double* %scevgep, align 8 185 %exitcond = icmp eq i64 %tmp6, %tmp 186 br i1 %exitcond, label %return, label %bb 187 188 ; Should only be one load in the loop. 189 ; CHECK: bb: 190 ; CHECK: load double, double* 191 ; CHECK-NOT: load double, double* 192 ; CHECK: br i1 %exitcond 193 194 return: 195 ret void 196 } 197 198 ;void test7(int N, double* G) { 199 ; long j; 200 ; G[1] = 1; 201 ; for (j = 1; j < N - 1; j++) 202 ; G[j+1] = G[j] + G[j+1]; 203 ;} 204 205 ; This requires phi translation of the adds. 206 define void @test7(i32 %N, double* nocapture %G) nounwind ssp { 207 entry: 208 %0 = getelementptr inbounds double, double* %G, i64 1 209 store double 1.000000e+00, double* %0, align 8 210 %1 = add i32 %N, -1 211 %2 = icmp sgt i32 %1, 1 212 br i1 %2, label %bb.nph, label %return 213 214 bb.nph: 215 %tmp = sext i32 %1 to i64 216 %tmp7 = add i64 %tmp, -1 217 br label %bb 218 219 bb: 220 %indvar = phi i64 [ 0, %bb.nph ], [ %tmp9, %bb ] 221 %tmp8 = add i64 %indvar, 2 222 %scevgep = getelementptr double, double* %G, i64 %tmp8 223 %tmp9 = add i64 %indvar, 1 224 %scevgep10 = getelementptr double, double* %G, i64 %tmp9 225 %3 = load double, double* %scevgep10, align 8 226 %4 = load double, double* %scevgep, align 8 227 %5 = fadd double %3, %4 228 store double %5, double* %scevgep, align 8 229 %exitcond = icmp eq i64 %tmp9, %tmp7 230 br i1 %exitcond, label %return, label %bb 231 232 ; Should only be one load in the loop. 233 ; CHECK: bb: 234 ; CHECK: load double, double* 235 ; CHECK-NOT: load double, double* 236 ; CHECK: br i1 %exitcond 237 238 return: 239 ret void 240 } 241 242 ;; Here the loaded address isn't available in 'block2' at all, requiring a new 243 ;; GEP to be inserted into it. 244 define i32 @test8(i32* %p, i32* %q, i32** %Hack, i1 %C) { 245 ; CHECK-LABEL: @test8( 246 block1: 247 br i1 %C, label %block2, label %block3 248 249 block2: 250 br label %block4 251 ; CHECK: block2: 252 ; CHECK: load i32, i32* 253 ; CHECK: br label %block4 254 255 block3: 256 %A = getelementptr i32, i32* %p, i32 1 257 store i32 0, i32* %A 258 br label %block4 259 260 block4: 261 %P2 = phi i32* [%p, %block3], [%q, %block2] 262 %P3 = getelementptr i32, i32* %P2, i32 1 263 %PRE = load i32, i32* %P3 264 ret i32 %PRE 265 ; CHECK: block4: 266 ; CHECK-NEXT: phi i32 [ 267 ; CHECK-NOT: load 268 ; CHECK: ret i32 269 } 270 271 ;void test9(int N, double* G) { 272 ; long j; 273 ; for (j = 1; j < N - 1; j++) 274 ; G[j+1] = G[j] + G[j+1]; 275 ;} 276 277 ; This requires phi translation of the adds. 278 define void @test9(i32 %N, double* nocapture %G) nounwind ssp { 279 entry: 280 add i32 0, 0 281 %1 = add i32 %N, -1 282 %2 = icmp sgt i32 %1, 1 283 br i1 %2, label %bb.nph, label %return 284 285 bb.nph: 286 %tmp = sext i32 %1 to i64 287 %tmp7 = add i64 %tmp, -1 288 br label %bb 289 290 ; CHECK: bb.nph: 291 ; CHECK: load double, double* 292 ; CHECK: br label %bb 293 294 bb: 295 %indvar = phi i64 [ 0, %bb.nph ], [ %tmp9, %bb ] 296 %tmp8 = add i64 %indvar, 2 297 %scevgep = getelementptr double, double* %G, i64 %tmp8 298 %tmp9 = add i64 %indvar, 1 299 %scevgep10 = getelementptr double, double* %G, i64 %tmp9 300 %3 = load double, double* %scevgep10, align 8 301 %4 = load double, double* %scevgep, align 8 302 %5 = fadd double %3, %4 303 store double %5, double* %scevgep, align 8 304 %exitcond = icmp eq i64 %tmp9, %tmp7 305 br i1 %exitcond, label %return, label %bb 306 307 ; Should only be one load in the loop. 308 ; CHECK: bb: 309 ; CHECK: load double, double* 310 ; CHECK-NOT: load double, double* 311 ; CHECK: br i1 %exitcond 312 313 return: 314 ret void 315 } 316 317 ;void test10(int N, double* G) { 318 ; long j; 319 ; for (j = 1; j < N - 1; j++) 320 ; G[j] = G[j] + G[j+1] + G[j-1]; 321 ;} 322 323 ; PR5501 324 define void @test10(i32 %N, double* nocapture %G) nounwind ssp { 325 entry: 326 %0 = add i32 %N, -1 327 %1 = icmp sgt i32 %0, 1 328 br i1 %1, label %bb.nph, label %return 329 330 bb.nph: 331 %tmp = sext i32 %0 to i64 332 %tmp8 = add i64 %tmp, -1 333 br label %bb 334 ; CHECK: bb.nph: 335 ; CHECK: load double, double* 336 ; CHECK: load double, double* 337 ; CHECK: br label %bb 338 339 340 bb: 341 %indvar = phi i64 [ 0, %bb.nph ], [ %tmp11, %bb ] 342 %scevgep = getelementptr double, double* %G, i64 %indvar 343 %tmp9 = add i64 %indvar, 2 344 %scevgep10 = getelementptr double, double* %G, i64 %tmp9 345 %tmp11 = add i64 %indvar, 1 346 %scevgep12 = getelementptr double, double* %G, i64 %tmp11 347 %2 = load double, double* %scevgep12, align 8 348 %3 = load double, double* %scevgep10, align 8 349 %4 = fadd double %2, %3 350 %5 = load double, double* %scevgep, align 8 351 %6 = fadd double %4, %5 352 store double %6, double* %scevgep12, align 8 353 %exitcond = icmp eq i64 %tmp11, %tmp8 354 br i1 %exitcond, label %return, label %bb 355 356 ; Should only be one load in the loop. 357 ; CHECK: bb: 358 ; CHECK: load double, double* 359 ; CHECK-NOT: load double, double* 360 ; CHECK: br i1 %exitcond 361 362 return: 363 ret void 364 } 365 366 ; Test critical edge splitting. 367 define i32 @test11(i32* %p, i1 %C, i32 %N) { 368 ; CHECK-LABEL: @test11( 369 block1: 370 br i1 %C, label %block2, label %block3 371 372 block2: 373 %cond = icmp sgt i32 %N, 1 374 br i1 %cond, label %block4, label %block5 375 ; CHECK: load i32, i32* %p 376 ; CHECK-NEXT: br label %block4 377 378 block3: 379 store i32 0, i32* %p 380 br label %block4 381 382 block4: 383 %PRE = load i32, i32* %p 384 br label %block5 385 386 block5: 387 %ret = phi i32 [ 0, %block2 ], [ %PRE, %block4 ] 388 ret i32 %ret 389 ; CHECK: block4: 390 ; CHECK-NEXT: phi i32 391 } 392 393 declare void @f() 394 declare void @g(i32) 395 declare i32 @__CxxFrameHandler3(...) 396 397 ; Test that loads aren't PRE'd into EH pads. 398 define void @test12(i32* %p) personality i32 (...)* @__CxxFrameHandler3 { 399 ; CHECK-LABEL: @test12( 400 block1: 401 invoke void @f() 402 to label %block2 unwind label %catch.dispatch 403 404 block2: 405 invoke void @f() 406 to label %block3 unwind label %cleanup 407 408 block3: 409 ret void 410 411 catch.dispatch: 412 %cs1 = catchswitch within none [label %catch] unwind label %cleanup2 413 414 catch: 415 %c = catchpad within %cs1 [] 416 catchret from %c to label %block2 417 418 cleanup: 419 %c1 = cleanuppad within none [] 420 store i32 0, i32* %p 421 cleanupret from %c1 unwind label %cleanup2 422 423 ; CHECK: cleanup2: 424 ; CHECK-NOT: phi 425 ; CHECK-NEXT: %c2 = cleanuppad within none [] 426 ; CHECK-NEXT: %NOTPRE = load i32, i32* %p 427 cleanup2: 428 %c2 = cleanuppad within none [] 429 %NOTPRE = load i32, i32* %p 430 call void @g(i32 %NOTPRE) 431 cleanupret from %c2 unwind to caller 432 } 433