1 ; RUN: opt -S -loop-sink < %s | FileCheck %s 2 ; RUN: opt -S -aa-pipeline=basic-aa -passes=loop-sink < %s | FileCheck %s 3 4 @g = global i32 0, align 4 5 6 ; b1 7 ; / \ 8 ; b2 b6 9 ; / \ | 10 ; b3 b4 | 11 ; \ / | 12 ; b5 | 13 ; \ / 14 ; b7 15 ; preheader: 1000 16 ; b2: 15 17 ; b3: 7 18 ; b4: 7 19 ; Sink load to b2 20 ; CHECK: t1 21 ; CHECK: .b2: 22 ; CHECK: load i32, i32* @g 23 ; CHECK: .b3: 24 ; CHECK-NOT: load i32, i32* @g 25 define i32 @t1(i32, i32) #0 !prof !0 { 26 %3 = icmp eq i32 %1, 0 27 br i1 %3, label %.exit, label %.preheader 28 29 .preheader: 30 %invariant = load i32, i32* @g 31 br label %.b1 32 33 .b1: 34 %iv = phi i32 [ %t7, %.b7 ], [ 0, %.preheader ] 35 %c1 = icmp sgt i32 %iv, %0 36 br i1 %c1, label %.b2, label %.b6, !prof !1 37 38 .b2: 39 %c2 = icmp sgt i32 %iv, 1 40 br i1 %c2, label %.b3, label %.b4 41 42 .b3: 43 %t3 = sub nsw i32 %invariant, %iv 44 br label %.b5 45 46 .b4: 47 %t4 = add nsw i32 %invariant, %iv 48 br label %.b5 49 50 .b5: 51 %p5 = phi i32 [ %t3, %.b3 ], [ %t4, %.b4 ] 52 %t5 = mul nsw i32 %p5, 5 53 br label %.b7 54 55 .b6: 56 %t6 = add nsw i32 %iv, 100 57 br label %.b7 58 59 .b7: 60 %p7 = phi i32 [ %t6, %.b6 ], [ %t5, %.b5 ] 61 %t7 = add nuw nsw i32 %iv, 1 62 %c7 = icmp eq i32 %t7, %p7 63 br i1 %c7, label %.b1, label %.exit, !prof !3 64 65 .exit: 66 ret i32 10 67 } 68 69 ; b1 70 ; / \ 71 ; b2 b6 72 ; / \ | 73 ; b3 b4 | 74 ; \ / | 75 ; b5 | 76 ; \ / 77 ; b7 78 ; preheader: 500 79 ; b1: 16016 80 ; b3: 8 81 ; b6: 8 82 ; Sink load to b3 and b6 83 ; CHECK: t2 84 ; CHECK: .preheader: 85 ; CHECK-NOT: load i32, i32* @g 86 ; CHECK: .b3: 87 ; CHECK: load i32, i32* @g 88 ; CHECK: .b4: 89 ; CHECK: .b6: 90 ; CHECK: load i32, i32* @g 91 ; CHECK: .b7: 92 define i32 @t2(i32, i32) #0 !prof !0 { 93 %3 = icmp eq i32 %1, 0 94 br i1 %3, label %.exit, label %.preheader 95 96 .preheader: 97 %invariant = load i32, i32* @g 98 br label %.b1 99 100 .b1: 101 %iv = phi i32 [ %t7, %.b7 ], [ 0, %.preheader ] 102 %c1 = icmp sgt i32 %iv, %0 103 br i1 %c1, label %.b2, label %.b6, !prof !2 104 105 .b2: 106 %c2 = icmp sgt i32 %iv, 1 107 br i1 %c2, label %.b3, label %.b4, !prof !1 108 109 .b3: 110 %t3 = sub nsw i32 %invariant, %iv 111 br label %.b5 112 113 .b4: 114 %t4 = add nsw i32 5, %iv 115 br label %.b5 116 117 .b5: 118 %p5 = phi i32 [ %t3, %.b3 ], [ %t4, %.b4 ] 119 %t5 = mul nsw i32 %p5, 5 120 br label %.b7 121 122 .b6: 123 %t6 = add nsw i32 %iv, %invariant 124 br label %.b7 125 126 .b7: 127 %p7 = phi i32 [ %t6, %.b6 ], [ %t5, %.b5 ] 128 %t7 = add nuw nsw i32 %iv, 1 129 %c7 = icmp eq i32 %t7, %p7 130 br i1 %c7, label %.b1, label %.exit, !prof !3 131 132 .exit: 133 ret i32 10 134 } 135 136 ; b1 137 ; / \ 138 ; b2 b6 139 ; / \ | 140 ; b3 b4 | 141 ; \ / | 142 ; b5 | 143 ; \ / 144 ; b7 145 ; preheader: 500 146 ; b3: 8 147 ; b5: 16008 148 ; Do not sink load from preheader. 149 ; CHECK: t3 150 ; CHECK: .preheader: 151 ; CHECK: load i32, i32* @g 152 ; CHECK: .b1: 153 ; CHECK-NOT: load i32, i32* @g 154 define i32 @t3(i32, i32) #0 !prof !0 { 155 %3 = icmp eq i32 %1, 0 156 br i1 %3, label %.exit, label %.preheader 157 158 .preheader: 159 %invariant = load i32, i32* @g 160 br label %.b1 161 162 .b1: 163 %iv = phi i32 [ %t7, %.b7 ], [ 0, %.preheader ] 164 %c1 = icmp sgt i32 %iv, %0 165 br i1 %c1, label %.b2, label %.b6, !prof !2 166 167 .b2: 168 %c2 = icmp sgt i32 %iv, 1 169 br i1 %c2, label %.b3, label %.b4, !prof !1 170 171 .b3: 172 %t3 = sub nsw i32 %invariant, %iv 173 br label %.b5 174 175 .b4: 176 %t4 = add nsw i32 5, %iv 177 br label %.b5 178 179 .b5: 180 %p5 = phi i32 [ %t3, %.b3 ], [ %t4, %.b4 ] 181 %t5 = mul nsw i32 %p5, %invariant 182 br label %.b7 183 184 .b6: 185 %t6 = add nsw i32 %iv, 5 186 br label %.b7 187 188 .b7: 189 %p7 = phi i32 [ %t6, %.b6 ], [ %t5, %.b5 ] 190 %t7 = add nuw nsw i32 %iv, 1 191 %c7 = icmp eq i32 %t7, %p7 192 br i1 %c7, label %.b1, label %.exit, !prof !3 193 194 .exit: 195 ret i32 10 196 } 197 198 ; For single-BB loop with <=1 avg trip count, sink load to b1 199 ; CHECK: t4 200 ; CHECK: .preheader: 201 ; CHECK-NOT: load i32, i32* @g 202 ; CHECK: .b1: 203 ; CHECK: load i32, i32* @g 204 ; CHECK: .exit: 205 define i32 @t4(i32, i32) #0 !prof !0 { 206 .preheader: 207 %invariant = load i32, i32* @g 208 br label %.b1 209 210 .b1: 211 %iv = phi i32 [ %t1, %.b1 ], [ 0, %.preheader ] 212 %t1 = add nsw i32 %invariant, %iv 213 %c1 = icmp sgt i32 %iv, %0 214 br i1 %c1, label %.b1, label %.exit, !prof !1 215 216 .exit: 217 ret i32 10 218 } 219 220 ; b1 221 ; / \ 222 ; b2 b6 223 ; / \ | 224 ; b3 b4 | 225 ; \ / | 226 ; b5 | 227 ; \ / 228 ; b7 229 ; preheader: 1000 230 ; b2: 15 231 ; b3: 7 232 ; b4: 7 233 ; There is alias store in loop, do not sink load 234 ; CHECK: t5 235 ; CHECK: .preheader: 236 ; CHECK: load i32, i32* @g 237 ; CHECK: .b1: 238 ; CHECK-NOT: load i32, i32* @g 239 define i32 @t5(i32, i32*) #0 !prof !0 { 240 %3 = icmp eq i32 %0, 0 241 br i1 %3, label %.exit, label %.preheader 242 243 .preheader: 244 %invariant = load i32, i32* @g 245 br label %.b1 246 247 .b1: 248 %iv = phi i32 [ %t7, %.b7 ], [ 0, %.preheader ] 249 %c1 = icmp sgt i32 %iv, %0 250 br i1 %c1, label %.b2, label %.b6, !prof !1 251 252 .b2: 253 %c2 = icmp sgt i32 %iv, 1 254 br i1 %c2, label %.b3, label %.b4 255 256 .b3: 257 %t3 = sub nsw i32 %invariant, %iv 258 br label %.b5 259 260 .b4: 261 %t4 = add nsw i32 %invariant, %iv 262 br label %.b5 263 264 .b5: 265 %p5 = phi i32 [ %t3, %.b3 ], [ %t4, %.b4 ] 266 %t5 = mul nsw i32 %p5, 5 267 br label %.b7 268 269 .b6: 270 %t6 = call i32 @foo() 271 br label %.b7 272 273 .b7: 274 %p7 = phi i32 [ %t6, %.b6 ], [ %t5, %.b5 ] 275 %t7 = add nuw nsw i32 %iv, 1 276 %c7 = icmp eq i32 %t7, %p7 277 br i1 %c7, label %.b1, label %.exit, !prof !3 278 279 .exit: 280 ret i32 10 281 } 282 283 ; b1 284 ; / \ 285 ; b2 b6 286 ; / \ | 287 ; b3 b4 | 288 ; \ / | 289 ; b5 | 290 ; \ / 291 ; b7 292 ; preheader: 1000 293 ; b2: 15 294 ; b3: 7 295 ; b4: 7 296 ; Regardless of aliasing store in loop this load from constant memory can be sunk. 297 ; CHECK: t5_const_memory 298 ; CHECK: .preheader: 299 ; CHECK-NOT: load i32, i32* @g_const 300 ; CHECK: .b2: 301 ; CHECK: load i32, i32* @g_const 302 ; CHECK: br i1 %c2, label %.b3, label %.b4 303 define i32 @t5_const_memory(i32, i32*) #0 !prof !0 { 304 %3 = icmp eq i32 %0, 0 305 br i1 %3, label %.exit, label %.preheader 306 307 .preheader: 308 %invariant = load i32, i32* @g_const 309 br label %.b1 310 311 .b1: 312 %iv = phi i32 [ %t7, %.b7 ], [ 0, %.preheader ] 313 %c1 = icmp sgt i32 %iv, %0 314 br i1 %c1, label %.b2, label %.b6, !prof !1 315 316 .b2: 317 %c2 = icmp sgt i32 %iv, 1 318 br i1 %c2, label %.b3, label %.b4 319 320 .b3: 321 %t3 = sub nsw i32 %invariant, %iv 322 br label %.b5 323 324 .b4: 325 %t4 = add nsw i32 %invariant, %iv 326 br label %.b5 327 328 .b5: 329 %p5 = phi i32 [ %t3, %.b3 ], [ %t4, %.b4 ] 330 %t5 = mul nsw i32 %p5, 5 331 br label %.b7 332 333 .b6: 334 %t6 = call i32 @foo() 335 br label %.b7 336 337 .b7: 338 %p7 = phi i32 [ %t6, %.b6 ], [ %t5, %.b5 ] 339 %t7 = add nuw nsw i32 %iv, 1 340 %c7 = icmp eq i32 %t7, %p7 341 br i1 %c7, label %.b1, label %.exit, !prof !3 342 343 .exit: 344 ret i32 10 345 } 346 347 ; b1 348 ; / \ 349 ; b2 b3 350 ; \ / 351 ; b4 352 ; preheader: 1000 353 ; b2: 15 354 ; b3: 7 355 ; Do not sink unordered atomic load to b2 356 ; CHECK: t6 357 ; CHECK: .preheader: 358 ; CHECK: load atomic i32, i32* @g unordered, align 4 359 ; CHECK: .b2: 360 ; CHECK-NOT: load atomic i32, i32* @g unordered, align 4 361 define i32 @t6(i32, i32) #0 !prof !0 { 362 %3 = icmp eq i32 %1, 0 363 br i1 %3, label %.exit, label %.preheader 364 365 .preheader: 366 %invariant = load atomic i32, i32* @g unordered, align 4 367 br label %.b1 368 369 .b1: 370 %iv = phi i32 [ %t3, %.b4 ], [ 0, %.preheader ] 371 %c1 = icmp sgt i32 %iv, %0 372 br i1 %c1, label %.b2, label %.b3, !prof !1 373 374 .b2: 375 %t1 = add nsw i32 %invariant, %iv 376 br label %.b4 377 378 .b3: 379 %t2 = add nsw i32 %iv, 100 380 br label %.b4 381 382 .b4: 383 %p1 = phi i32 [ %t2, %.b3 ], [ %t1, %.b2 ] 384 %t3 = add nuw nsw i32 %iv, 1 385 %c2 = icmp eq i32 %t3, %p1 386 br i1 %c2, label %.b1, label %.exit, !prof !3 387 388 .exit: 389 ret i32 10 390 } 391 392 @g_const = constant i32 0, align 4 393 394 ; b1 395 ; / \ 396 ; b2 b3 397 ; \ / 398 ; b4 399 ; preheader: 1000 400 ; b2: 0.5 401 ; b3: 999.5 402 ; Sink unordered atomic load to b2. It is allowed to sink into loop unordered 403 ; load from constant. 404 ; CHECK: t7 405 ; CHECK: .preheader: 406 ; CHECK-NOT: load atomic i32, i32* @g_const unordered, align 4 407 ; CHECK: .b2: 408 ; CHECK: load atomic i32, i32* @g_const unordered, align 4 409 define i32 @t7(i32, i32) #0 !prof !0 { 410 %3 = icmp eq i32 %1, 0 411 br i1 %3, label %.exit, label %.preheader 412 413 .preheader: 414 %invariant = load atomic i32, i32* @g_const unordered, align 4 415 br label %.b1 416 417 .b1: 418 %iv = phi i32 [ %t3, %.b4 ], [ 0, %.preheader ] 419 %c1 = icmp sgt i32 %iv, %0 420 br i1 %c1, label %.b2, label %.b3, !prof !1 421 422 .b2: 423 %t1 = add nsw i32 %invariant, %iv 424 br label %.b4 425 426 .b3: 427 %t2 = add nsw i32 %iv, 100 428 br label %.b4 429 430 .b4: 431 %p1 = phi i32 [ %t2, %.b3 ], [ %t1, %.b2 ] 432 %t3 = add nuw nsw i32 %iv, 1 433 %c2 = icmp eq i32 %t3, %p1 434 br i1 %c2, label %.b1, label %.exit, !prof !3 435 436 .exit: 437 ret i32 10 438 } 439 440 declare i32 @foo() 441 442 !0 = !{!"function_entry_count", i64 1} 443 !1 = !{!"branch_weights", i32 1, i32 2000} 444 !2 = !{!"branch_weights", i32 2000, i32 1} 445 !3 = !{!"branch_weights", i32 100, i32 1} 446