1 ; This tests the optimization where producers and consumers of i1 (bool) 2 ; variables are combined to implicitly use flags instead of explicitly using 3 ; stack or register variables. 4 5 ; RUN: %p2i -i %s --filetype=obj --disassemble --args -O2 \ 6 ; RUN: -allow-externally-defined-symbols | FileCheck %s 7 8 ; RUN: %if --need=target_ARM32 --command %p2i --filetype=obj \ 9 ; RUN: --target arm32 -i %s --disassemble --args -O2 \ 10 ; RUN: -allow-externally-defined-symbols \ 11 ; RUN: | %if --need=target_ARM32 --command FileCheck %s \ 12 ; RUN: --check-prefix=ARM32 13 14 declare void @use_value(i32) 15 16 ; Basic cmp/branch folding. 17 define internal i32 @fold_cmp_br(i32 %arg1, i32 %arg2) { 18 entry: 19 %cmp1 = icmp slt i32 %arg1, %arg2 20 br i1 %cmp1, label %branch1, label %branch2 21 branch1: 22 ret i32 1 23 branch2: 24 ret i32 2 25 } 26 27 ; CHECK-LABEL: fold_cmp_br 28 ; CHECK: cmp 29 ; CHECK: jge 30 ; ARM32-LABEL: fold_cmp_br 31 ; ARM32: cmp r0, r1 32 ; ARM32: bge 33 ; ARM32: mov r0, #1 34 ; ARM32: bx lr 35 ; ARM32: mov r0, #2 36 ; ARM32: bx lr 37 38 39 ; Cmp/branch folding with intervening instructions. 40 define internal i32 @fold_cmp_br_intervening_insts(i32 %arg1, i32 %arg2) { 41 entry: 42 %cmp1 = icmp slt i32 %arg1, %arg2 43 call void @use_value(i32 %arg1) 44 br i1 %cmp1, label %branch1, label %branch2 45 branch1: 46 ret i32 1 47 branch2: 48 ret i32 2 49 } 50 51 ; CHECK-LABEL: fold_cmp_br_intervening_insts 52 ; CHECK-NOT: cmp 53 ; CHECK: call 54 ; CHECK: cmp 55 ; CHECK: jge 56 ; ARM32-LABEL: fold_cmp_br_intervening_insts 57 ; ARM32: push {{[{].*[}]}} 58 ; ARM32: bl{{.*}}use_value 59 ; ARM32: cmp {{r[0-9]+}}, {{r[0-9]+}} 60 ; ARM32: bge 61 ; ARM32: mov r0, #1 62 ; ARM32: bx lr 63 ; ARM32: mov r0, #2 64 ; ARM32: bx lr 65 66 67 ; Cmp/branch non-folding because of live-out. 68 define internal i32 @no_fold_cmp_br_liveout(i32 %arg1, i32 %arg2) { 69 entry: 70 %cmp1 = icmp slt i32 %arg1, %arg2 71 br label %next 72 next: 73 br i1 %cmp1, label %branch1, label %branch2 74 branch1: 75 ret i32 1 76 branch2: 77 ret i32 2 78 } 79 80 ; CHECK-LABEL: no_fold_cmp_br_liveout 81 ; CHECK: cmp 82 ; CHECK: set 83 ; CHECK: cmp 84 ; CHECK: je 85 ; ARM32-LABEL: no_fold_cmp_br_liveout 86 ; ARM32: cmp 87 ; ARM32: movlt [[REG:r[0-9]+]] 88 ; ARM32: tst [[REG]], #1 89 ; ARM32: beq 90 91 92 ; Cmp/branch non-folding because of extra non-whitelisted uses. 93 define internal i32 @no_fold_cmp_br_non_whitelist(i32 %arg1, i32 %arg2) { 94 entry: 95 %cmp1 = icmp slt i32 %arg1, %arg2 96 %result = zext i1 %cmp1 to i32 97 br i1 %cmp1, label %branch1, label %branch2 98 branch1: 99 ret i32 %result 100 branch2: 101 ret i32 2 102 } 103 104 ; CHECK-LABEL: no_fold_cmp_br_non_whitelist 105 ; CHECK: cmp 106 ; CHECK: set 107 ; CHECK: movzx 108 ; CHECK: cmp 109 ; CHECK: je 110 ; ARM32-LABEL: no_fold_cmp_br_non_whitelist 111 ; ARM32: mov [[R:r[0-9]+]], #0 112 ; ARM32: cmp r0, r1 113 ; ARM32: movlt [[R]], #1 114 ; ARM32: tst [[R]], #1 115 ; ARM32: beq 116 ; ARM32: bx lr 117 ; ARM32: mov r0, #2 118 ; ARM32: bx lr 119 120 121 ; Basic cmp/select folding. 122 define internal i32 @fold_cmp_select(i32 %arg1, i32 %arg2) { 123 entry: 124 %cmp1 = icmp slt i32 %arg1, %arg2 125 %result = select i1 %cmp1, i32 %arg1, i32 %arg2 126 ret i32 %result 127 } 128 129 ; CHECK-LABEL: fold_cmp_select 130 ; CHECK: cmp 131 ; CHECK: cmovl 132 ; ARM32-LABEL: fold_cmp_select 133 ; ARM32: cmp r0, r1 134 ; ARM32: movlt {{r[0-9]+}}, r0 135 136 ; 64-bit cmp/select folding. 137 define internal i64 @fold_cmp_select_64(i64 %arg1, i64 %arg2) { 138 entry: 139 %arg1_trunc = trunc i64 %arg1 to i32 140 %arg2_trunc = trunc i64 %arg2 to i32 141 %cmp1 = icmp slt i32 %arg1_trunc, %arg2_trunc 142 %result = select i1 %cmp1, i64 %arg1, i64 %arg2 143 ret i64 %result 144 } 145 146 ; CHECK-LABEL: fold_cmp_select_64 147 ; CHECK: cmp 148 ; CHECK: cmovl 149 ; CHECK: cmovl 150 ; ARM32-LABEL: fold_cmp_select_64 151 ; ARM32: cmp r0, r2 152 ; ARM32: movlt [[LOW:r[0-9]+]], r0 153 ; ARM32: movlt [[HIGH:r[0-9]+]], r1 154 ; ARM32: mov r0, [[LOW]] 155 ; ARM32: mov r1, [[HIGH]] 156 ; ARM32: bx lr 157 158 159 define internal i64 @fold_cmp_select_64_undef(i64 %arg1) { 160 entry: 161 %arg1_trunc = trunc i64 %arg1 to i32 162 %cmp1 = icmp slt i32 undef, %arg1_trunc 163 %result = select i1 %cmp1, i64 %arg1, i64 undef 164 ret i64 %result 165 } 166 ; CHECK-LABEL: fold_cmp_select_64_undef 167 ; CHECK: cmp 168 ; CHECK: cmovl 169 ; CHECK: cmovl 170 ; ARM32-LABEL: fold_cmp_select_64_undef 171 ; ARM32: mov 172 ; ARM32: rsbs r{{[0-9]+}}, r{{[0-9]+}}, #0 173 ; ARM32: movlt 174 ; ARM32: movlt 175 ; ARM32: bx lr 176 177 178 ; Cmp/select folding with intervening instructions. 179 define internal i32 @fold_cmp_select_intervening_insts(i32 %arg1, i32 %arg2) { 180 entry: 181 %cmp1 = icmp slt i32 %arg1, %arg2 182 call void @use_value(i32 %arg1) 183 %result = select i1 %cmp1, i32 %arg1, i32 %arg2 184 ret i32 %result 185 } 186 187 ; CHECK-LABEL: fold_cmp_select_intervening_insts 188 ; CHECK-NOT: cmp 189 ; CHECK: call 190 ; CHECK: cmp 191 ; CHECK: cmovl 192 ; ARM32-LABEL: fold_cmp_select_intervening_insts 193 ; ARM32: bl{{.*}}use_value 194 ; ARM32: cmp r{{[0-9]+}}, r{{[0-9]+}} 195 ; ARM32: movlt 196 ; ARM32: bx lr 197 198 ; Cmp/multi-select folding. 199 define internal i32 @fold_cmp_select_multi(i32 %arg1, i32 %arg2) { 200 entry: 201 %cmp1 = icmp slt i32 %arg1, %arg2 202 %a = select i1 %cmp1, i32 %arg1, i32 %arg2 203 %b = select i1 %cmp1, i32 %arg2, i32 %arg1 204 %c = select i1 %cmp1, i32 123, i32 %arg1 205 %partial = add i32 %a, %b 206 %result = add i32 %partial, %c 207 ret i32 %result 208 } 209 210 ; CHECK-LABEL: fold_cmp_select_multi 211 ; CHECK: cmp 212 ; CHECK: cmovl 213 ; CHECK: cmp 214 ; CHECK: cmovl 215 ; CHECK: cmp 216 ; CHECK: cmovge 217 ; CHECK: add 218 ; CHECK: add 219 ; ARM32-LABEL: fold_cmp_select_multi 220 ; ARM32: mov 221 ; ARM32: cmp 222 ; ARM32: movlt {{.*}}, #1 223 ; ARM32: mov 224 ; ARM32: tst {{.*}}, #1 225 ; ARM32: movne 226 ; ARM32: mov 227 ; ARM32: tst {{.*}}, #1 228 ; ARM32: movne 229 ; ARM32: tst {{.*}}, #1 230 ; ARM32: movne {{.*}}, #123 231 ; ARM32: bx lr 232 233 234 ; Cmp/multi-select non-folding because of live-out. 235 define internal i32 @no_fold_cmp_select_multi_liveout(i32 %arg1, i32 %arg2) { 236 entry: 237 %cmp1 = icmp slt i32 %arg1, %arg2 238 %a = select i1 %cmp1, i32 %arg1, i32 %arg2 239 %b = select i1 %cmp1, i32 %arg2, i32 %arg1 240 br label %next 241 next: 242 %c = select i1 %cmp1, i32 123, i32 %arg1 243 %partial = add i32 %a, %b 244 %result = add i32 %partial, %c 245 ret i32 %result 246 } 247 248 ; CHECK-LABEL: no_fold_cmp_select_multi_liveout 249 ; CHECK: set 250 ; CHECK: cmp 251 ; CHECK: cmovne 252 ; CHECK: cmp 253 ; CHECK: cmovne 254 ; CHECK: cmp 255 ; CHECK: cmove 256 ; CHECK: add 257 ; CHECK: add 258 ; ARM32-LABEL: no_fold_cmp_select_multi_liveout 259 ; ARM32: mov 260 ; ARM32: cmp r0, r1 261 ; ARM32: movlt 262 ; ARM32: mov 263 ; ARM32: tst 264 ; ARM32: movne 265 ; ARM32: mov 266 ; ARM32: tst 267 ; ARM32: movne 268 ; ARM32: tst 269 ; ARM32: movne 270 ; ARM32: bx lr 271 272 ; Cmp/branch non-folding due to load folding and intervening store. 273 define internal i32 @no_fold_cmp_br_store(i32 %arg2, i32 %argaddr) { 274 entry: 275 %addr = inttoptr i32 %argaddr to i32* 276 %arg1 = load i32, i32* %addr, align 1 277 %cmp1 = icmp slt i32 %arg1, %arg2 278 store i32 1, i32* %addr, align 1 279 br i1 %cmp1, label %branch1, label %branch2 280 branch1: 281 ret i32 1 282 branch2: 283 ret i32 2 284 } 285 286 ; CHECK-LABEL: no_fold_cmp_br_store 287 ; CHECK: cmp 288 ; CHECK: set 289 ; CHECK: cmp 290 291 ; Cmp/select non-folding due to load folding and intervening store. 292 define internal i32 @no_fold_cmp_select_store(i32 %arg1, i32 %argaddr) { 293 entry: 294 %addr = inttoptr i32 %argaddr to i32* 295 %arg2 = load i32, i32* %addr, align 1 296 %cmp1 = icmp slt i32 %arg1, %arg2 297 store i32 1, i32* %addr, align 1 298 %result = select i1 %cmp1, i32 %arg1, i32 %argaddr 299 ret i32 %result 300 } 301 302 ; CHECK-LABEL: no_fold_cmp_select_store 303 ; CHECK: cmp 304 ; CHECK: setl 305 ; CHECK: mov DWORD PTR 306 ; CHECK: cmp 307 ; CHECK: cmovne 308 309 ; Cmp/select folding due to load folding and non-intervening store. 310 define internal i32 @fold_cmp_select_store(i32 %arg1, i32 %argaddr) { 311 entry: 312 %addr = inttoptr i32 %argaddr to i32* 313 %arg2 = load i32, i32* %addr, align 1 314 %cmp1 = icmp slt i32 %arg1, %arg2 315 %result = select i1 %cmp1, i32 %arg1, i32 %argaddr 316 store i32 1, i32* %addr, align 1 317 ret i32 %result 318 } 319 320 ; CHECK-LABEL: fold_cmp_select_store 321 ; CHECK: cmp {{.*}},DWORD PTR 322 ; CHECK: cmovl 323 324 ; Cmp/multi-select non-folding because of extra non-whitelisted uses. 325 define internal i32 @no_fold_cmp_select_multi_non_whitelist(i32 %arg1, 326 i32 %arg2) { 327 entry: 328 %cmp1 = icmp slt i32 %arg1, %arg2 329 %a = select i1 %cmp1, i32 %arg1, i32 %arg2 330 %b = select i1 %cmp1, i32 %arg2, i32 %arg1 331 %c = select i1 %cmp1, i32 123, i32 %arg1 332 %ext = zext i1 %cmp1 to i32 333 %partial1 = add i32 %a, %b 334 %partial2 = add i32 %partial1, %c 335 %result = add i32 %partial2, %ext 336 ret i32 %result 337 } 338 339 ; CHECK-LABEL: no_fold_cmp_select_multi_non_whitelist 340 ; CHECK: set 341 ; CHECK: cmp 342 ; CHECK: cmovne 343 ; CHECK: cmp 344 ; CHECK: cmovne 345 ; CHECK: cmp 346 ; CHECK: cmove 347 ; CHECK: movzx 348 ; CHECK: add 349 ; CHECK: add 350 ; CHECK: add 351 ; ARM32-LABEL: no_fold_cmp_select_multi_non_whitelist 352 ; ARM32: mov 353 ; ARM32: cmp r0, r1 354 ; ARM32: movlt 355 ; ARM32: mov 356 ; ARM32: tst 357 ; ARM32: movne 358 ; ARM32: mov 359 ; ARM32: tst 360 ; ARM32: movne 361 ; ARM32: tst 362 ; ARM32: movne 363 ; ARM32: bx lr 364 365 define internal i32 @br_i1_folding2_and(i32 %arg1, i32 %arg2) { 366 %t0 = trunc i32 %arg1 to i1 367 %t1 = trunc i32 %arg2 to i1 368 369 %t2 = and i1 %t0, %t1 370 br i1 %t2, label %target_true, label %target_false 371 372 target_true: 373 ret i32 1 374 375 target_false: 376 ret i32 0 377 } 378 ; ARM32-LABEL: br_i1_folding2_and 379 ; ARM32: tst r0, #1 380 ; ARM32: beq 381 ; ARM32: tst r1, #1 382 ; ARM32: beq 383 384 define internal i32 @br_i1_folding2_or(i32 %arg1, i32 %arg2) { 385 %t0 = trunc i32 %arg1 to i1 386 %t1 = trunc i32 %arg2 to i1 387 388 %t2 = or i1 %t0, %t1 389 br i1 %t2, label %target_true, label %target_false 390 391 target_true: 392 ret i32 1 393 394 target_false: 395 ret i32 0 396 } 397 ; ARM32-LABEL: br_i1_folding2_or 398 ; ARM32: tst r0, #1 399 ; ARM32: bne 400 ; ARM32: tst r1, #1 401 ; ARM32: beq 402 403 define internal i32 @br_i1_folding3_and_or(i32 %arg1, i32 %arg2, i32 %arg3) { 404 %t0 = trunc i32 %arg1 to i1 405 %t1 = trunc i32 %arg2 to i1 406 %t2 = trunc i32 %arg3 to i1 407 408 %t3 = and i1 %t0, %t1 409 %t4 = or i1 %t3, %t2 410 411 br i1 %t4, label %target_true, label %target_false 412 413 target_true: 414 ret i32 1 415 416 target_false: 417 ret i32 0 418 } 419 ; ARM32-LABEL: br_i1_folding3_and_or 420 ; ARM32: tst r0, #1 421 ; ARM32: beq 422 ; ARM32: tst r1, #1 423 ; ARM32: bne 424 ; ARM32: tst r2, #1 425 ; ARM32: beq 426 427 define internal i32 @br_i1_folding3_or_and(i32 %arg1, i32 %arg2, i32 %arg3) { 428 %t0 = trunc i32 %arg1 to i1 429 %t1 = trunc i32 %arg2 to i1 430 %t2 = trunc i32 %arg3 to i1 431 432 %t3 = or i1 %t0, %t1 433 %t4 = and i1 %t3, %t2 434 435 br i1 %t4, label %target_true, label %target_false 436 437 target_true: 438 ret i32 1 439 440 target_false: 441 ret i32 0 442 } 443 ; ARM32-LABEL: br_i1_folding3_or_and 444 ; ARM32: tst r0, #1 445 ; ARM32: bne 446 ; ARM32: tst r1, #1 447 ; ARM32: beq 448 ; ARM32: tst r2, #1 449 ; ARM32: beq 450 451 define internal i32 @br_i1_folding4(i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, 452 i32 %arg5) { 453 %t0 = trunc i32 %arg1 to i1 454 %t1 = trunc i32 %arg2 to i1 455 %t2 = trunc i32 %arg3 to i1 456 %t3 = trunc i32 %arg4 to i1 457 %t4 = trunc i32 %arg5 to i1 458 459 %t5 = or i1 %t0, %t1 460 %t6 = and i1 %t5, %t2 461 %t7 = and i1 %t3, %t4 462 %t8 = or i1 %t6, %t7 463 br i1 %t8, label %target_true, label %target_false 464 465 target_true: 466 ret i32 1 467 468 target_false: 469 ret i32 0 470 } 471 ; ARM32-LABEL: br_i1_folding4 472 ; ARM32: tst r0, #1 473 ; ARM32: bne 474 ; ARM32: tst r1, #1 475 ; ARM32: beq 476 ; ARM32: tst r2, #1 477 ; ARM32: bne 478 ; ARM32: tst r3, #1 479 ; ARM32: beq [[TARGET:.*]] 480 ; ARM32: tst r4, #1 481 ; ARM32: beq [[TARGET]] 482