1 ; RUN: llc -mtriple=x86_64-pc-linux -x86-cmov-converter=true -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap %s 2 3 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 4 ;; This test checks that x86-cmov-converter optimization transform CMOV 5 ;; instruction into branches when it is profitable. 6 ;; There are 5 cases below: 7 ;; 1. CmovInCriticalPath: 8 ;; CMOV depends on the condition and it is in the hot path. 9 ;; Thus, it worths transforming. 10 ;; 11 ;; 2. CmovNotInCriticalPath: 12 ;; Similar test like in (1), just that CMOV is not in the hot path. 13 ;; Thus, it does not worth transforming. 14 ;; 15 ;; 3. MaxIndex: 16 ;; Maximum calculation algorithm that is looking for the max index, 17 ;; calculating CMOV value is cheaper than calculating CMOV condition. 18 ;; Thus, it worths transforming. 19 ;; 20 ;; 4. MaxValue: 21 ;; Maximum calculation algorithm that is looking for the max value, 22 ;; calculating CMOV value is not cheaper than calculating CMOV condition. 23 ;; Thus, it does not worth transforming. 24 ;; 25 ;; 5. BinarySearch: 26 ;; Usually, binary search CMOV is not predicted. 27 ;; Thus, it does not worth transforming. 28 ;; 29 ;; Test was created using the following command line: 30 ;; > clang -S -O2 -m64 -fno-vectorize -fno-unroll-loops -emit-llvm foo.c -o - 31 ;; Where foo.c is: 32 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 33 ;;void CmovInHotPath(int n, int a, int b, int *c, int *d) { 34 ;; for (int i = 0; i < n; i++) { 35 ;; int t = c[i] + 1; 36 ;; if (c[i] * a > b) 37 ;; t = 10; 38 ;; c[i] = (c[i] + 1) * t; 39 ;; } 40 ;;} 41 ;; 42 ;; 43 ;;void CmovNotInHotPath(int n, int a, int b, int *c, int *d) { 44 ;; for (int i = 0; i < n; i++) { 45 ;; int t = c[i]; 46 ;; if (c[i] * a > b) 47 ;; t = 10; 48 ;; c[i] = t; 49 ;; d[i] /= b; 50 ;; } 51 ;;} 52 ;; 53 ;; 54 ;;int MaxIndex(int n, int *a) { 55 ;; int t = 0; 56 ;; for (int i = 1; i < n; i++) { 57 ;; if (a[i] > a[t]) 58 ;; t = i; 59 ;; } 60 ;; return a[t]; 61 ;;} 62 ;; 63 ;; 64 ;;int MaxValue(int n, int *a) { 65 ;; int t = a[0]; 66 ;; for (int i = 1; i < n; i++) { 67 ;; if (a[i] > t) 68 ;; t = a[i]; 69 ;; } 70 ;; return t; 71 ;;} 72 ;; 73 ;;typedef struct Node Node; 74 ;;struct Node { 75 ;; unsigned Val; 76 ;; Node *Right; 77 ;; Node *Left; 78 ;;}; 79 ;; 80 ;;unsigned BinarySearch(unsigned Mask, Node *Curr, Node *Next) { 81 ;; while (Curr->Val > Next->Val) { 82 ;; Curr = Next; 83 ;; if (Mask & (0x1 << Curr->Val)) 84 ;; Next = Curr->Right; 85 ;; else 86 ;; Next = Curr->Left; 87 ;; } 88 ;; return Curr->Val; 89 ;;} 90 ;; 91 ;; 92 ;;void SmallGainPerLoop(int n, int a, int b, int *c, int *d) { 93 ;; for (int i = 0; i < n; i++) { 94 ;; int t = c[i]; 95 ;; if (c[i] * a > b) 96 ;; t = 10; 97 ;; c[i] = t; 98 ;; } 99 ;;} 100 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 101 102 %struct.Node = type { i32, %struct.Node*, %struct.Node* } 103 104 ; CHECK-LABEL: CmovInHotPath 105 ; CHECK-NOT: cmov 106 ; CHECK: jg 107 108 define void @CmovInHotPath(i32 %n, i32 %a, i32 %b, i32* nocapture %c, i32* nocapture readnone %d) #0 { 109 entry: 110 %cmp14 = icmp sgt i32 %n, 0 111 br i1 %cmp14, label %for.body.preheader, label %for.cond.cleanup 112 113 for.body.preheader: ; preds = %entry 114 %wide.trip.count = zext i32 %n to i64 115 br label %for.body 116 117 for.cond.cleanup: ; preds = %for.body, %entry 118 ret void 119 120 for.body: ; preds = %for.body.preheader, %for.body 121 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ] 122 %arrayidx = getelementptr inbounds i32, i32* %c, i64 %indvars.iv 123 %0 = load i32, i32* %arrayidx, align 4 124 %add = add nsw i32 %0, 1 125 %mul = mul nsw i32 %0, %a 126 %cmp3 = icmp sgt i32 %mul, %b 127 %. = select i1 %cmp3, i32 10, i32 %add 128 %mul7 = mul nsw i32 %., %add 129 store i32 %mul7, i32* %arrayidx, align 4 130 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 131 %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count 132 br i1 %exitcond, label %for.cond.cleanup, label %for.body 133 } 134 135 ; CHECK-LABEL: CmovNotInHotPath 136 ; CHECK: cmovg 137 138 define void @CmovNotInHotPath(i32 %n, i32 %a, i32 %b, i32* nocapture %c, i32* nocapture %d) #0 { 139 entry: 140 %cmp18 = icmp sgt i32 %n, 0 141 br i1 %cmp18, label %for.body.preheader, label %for.cond.cleanup 142 143 for.body.preheader: ; preds = %entry 144 %wide.trip.count = zext i32 %n to i64 145 br label %for.body 146 147 for.cond.cleanup: ; preds = %for.body, %entry 148 ret void 149 150 for.body: ; preds = %for.body.preheader, %for.body 151 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ] 152 %arrayidx = getelementptr inbounds i32, i32* %c, i64 %indvars.iv 153 %0 = load i32, i32* %arrayidx, align 4 154 %mul = mul nsw i32 %0, %a 155 %cmp3 = icmp sgt i32 %mul, %b 156 %. = select i1 %cmp3, i32 10, i32 %0 157 store i32 %., i32* %arrayidx, align 4 158 %arrayidx7 = getelementptr inbounds i32, i32* %d, i64 %indvars.iv 159 %1 = load i32, i32* %arrayidx7, align 4 160 %div = sdiv i32 %1, %b 161 store i32 %div, i32* %arrayidx7, align 4 162 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 163 %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count 164 br i1 %exitcond, label %for.cond.cleanup, label %for.body 165 } 166 167 ; CHECK-LABEL: MaxIndex 168 ; CHECK-NOT: cmov 169 ; CHECK: jg 170 171 define i32 @MaxIndex(i32 %n, i32* nocapture readonly %a) #0 { 172 entry: 173 %cmp14 = icmp sgt i32 %n, 1 174 br i1 %cmp14, label %for.body.preheader, label %for.cond.cleanup 175 176 for.body.preheader: ; preds = %entry 177 %wide.trip.count = zext i32 %n to i64 178 br label %for.body 179 180 for.cond.cleanup.loopexit: ; preds = %for.body 181 %phitmp = sext i32 %i.0.t.0 to i64 182 br label %for.cond.cleanup 183 184 for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry 185 %t.0.lcssa = phi i64 [ 0, %entry ], [ %phitmp, %for.cond.cleanup.loopexit ] 186 %arrayidx5 = getelementptr inbounds i32, i32* %a, i64 %t.0.lcssa 187 %0 = load i32, i32* %arrayidx5, align 4 188 ret i32 %0 189 190 for.body: ; preds = %for.body.preheader, %for.body 191 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 1, %for.body.preheader ] 192 %t.015 = phi i32 [ %i.0.t.0, %for.body ], [ 0, %for.body.preheader ] 193 %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv 194 %1 = load i32, i32* %arrayidx, align 4 195 %idxprom1 = sext i32 %t.015 to i64 196 %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %idxprom1 197 %2 = load i32, i32* %arrayidx2, align 4 198 %cmp3 = icmp sgt i32 %1, %2 199 %3 = trunc i64 %indvars.iv to i32 200 %i.0.t.0 = select i1 %cmp3, i32 %3, i32 %t.015 201 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 202 %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count 203 br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body 204 } 205 206 ; CHECK-LABEL: MaxValue 207 ; CHECK-NOT: jg 208 ; CHECK: cmovg 209 210 define i32 @MaxValue(i32 %n, i32* nocapture readonly %a) #0 { 211 entry: 212 %0 = load i32, i32* %a, align 4 213 %cmp13 = icmp sgt i32 %n, 1 214 br i1 %cmp13, label %for.body.preheader, label %for.cond.cleanup 215 216 for.body.preheader: ; preds = %entry 217 %wide.trip.count = zext i32 %n to i64 218 br label %for.body 219 220 for.cond.cleanup: ; preds = %for.body, %entry 221 %t.0.lcssa = phi i32 [ %0, %entry ], [ %.t.0, %for.body ] 222 ret i32 %t.0.lcssa 223 224 for.body: ; preds = %for.body.preheader, %for.body 225 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 1, %for.body.preheader ] 226 %t.014 = phi i32 [ %.t.0, %for.body ], [ %0, %for.body.preheader ] 227 %arrayidx1 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv 228 %1 = load i32, i32* %arrayidx1, align 4 229 %cmp2 = icmp sgt i32 %1, %t.014 230 %.t.0 = select i1 %cmp2, i32 %1, i32 %t.014 231 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 232 %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count 233 br i1 %exitcond, label %for.cond.cleanup, label %for.body 234 } 235 236 ; CHECK-LABEL: BinarySearch 237 ; CHECK: set 238 239 define i32 @BinarySearch(i32 %Mask, %struct.Node* nocapture readonly %Curr, %struct.Node* nocapture readonly %Next) #0 { 240 entry: 241 %Val8 = getelementptr inbounds %struct.Node, %struct.Node* %Curr, i64 0, i32 0 242 %0 = load i32, i32* %Val8, align 8 243 %Val19 = getelementptr inbounds %struct.Node, %struct.Node* %Next, i64 0, i32 0 244 %1 = load i32, i32* %Val19, align 8 245 %cmp10 = icmp ugt i32 %0, %1 246 br i1 %cmp10, label %while.body, label %while.end 247 248 while.body: ; preds = %entry, %while.body 249 %2 = phi i32 [ %4, %while.body ], [ %1, %entry ] 250 %Next.addr.011 = phi %struct.Node* [ %3, %while.body ], [ %Next, %entry ] 251 %shl = shl i32 1, %2 252 %and = and i32 %shl, %Mask 253 %tobool = icmp eq i32 %and, 0 254 %Left = getelementptr inbounds %struct.Node, %struct.Node* %Next.addr.011, i64 0, i32 2 255 %Right = getelementptr inbounds %struct.Node, %struct.Node* %Next.addr.011, i64 0, i32 1 256 %Left.sink = select i1 %tobool, %struct.Node** %Left, %struct.Node** %Right 257 %3 = load %struct.Node*, %struct.Node** %Left.sink, align 8 258 %Val1 = getelementptr inbounds %struct.Node, %struct.Node* %3, i64 0, i32 0 259 %4 = load i32, i32* %Val1, align 8 260 %cmp = icmp ugt i32 %2, %4 261 br i1 %cmp, label %while.body, label %while.end 262 263 while.end: ; preds = %while.body, %entry 264 %.lcssa = phi i32 [ %0, %entry ], [ %2, %while.body ] 265 ret i32 %.lcssa 266 } 267 268 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 269 ;; The following test checks that x86-cmov-converter optimization transforms 270 ;; CMOV instructions into branch correctly. 271 ;; 272 ;; MBB: 273 ;; cond = cmp ... 274 ;; v1 = CMOVgt t1, f1, cond 275 ;; v2 = CMOVle s1, f2, cond 276 ;; 277 ;; Where: t1 = 11, f1 = 22, f2 = a 278 ;; 279 ;; After CMOV transformation 280 ;; ------------------------- 281 ;; MBB: 282 ;; cond = cmp ... 283 ;; ja %SinkMBB 284 ;; 285 ;; FalseMBB: 286 ;; jmp %SinkMBB 287 ;; 288 ;; SinkMBB: 289 ;; %v1 = phi[%f1, %FalseMBB], [%t1, %MBB] 290 ;; %v2 = phi[%f1, %FalseMBB], [%f2, %MBB] ; For CMOV with OppCC switch 291 ;; ; true-value with false-value 292 ;; ; Phi instruction cannot use 293 ;; ; previous Phi instruction result 294 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 295 296 ; CHECK-LABEL: Transform 297 ; CHECK-NOT: cmov 298 ; CHECK: divl [[a:%[0-9a-z]*]] 299 ; CHECK: movl $11, [[s1:%[0-9a-z]*]] 300 ; CHECK: movl [[a]], [[s2:%[0-9a-z]*]] 301 ; CHECK: cmpl [[a]], %edx 302 ; CHECK: ja [[SinkBB:.*]] 303 ; CHECK: [[FalseBB:.*]]: 304 ; CHECK: movl $22, [[s1]] 305 ; CHECK: movl $22, [[s2]] 306 ; CHECK: [[SinkBB]]: 307 ; CHECK: ja 308 309 define void @Transform(i32 *%arr, i32 *%arr2, i32 %a, i32 %b, i32 %c, i32 %n) #0 { 310 entry: 311 %cmp10 = icmp ugt i32 0, %n 312 br i1 %cmp10, label %while.body, label %while.end 313 314 while.body: ; preds = %entry, %while.body 315 %i = phi i32 [ %i_inc, %while.body ], [ 0, %entry ] 316 %arr_i = getelementptr inbounds i32, i32* %arr, i32 %i 317 %x = load i32, i32* %arr_i, align 4 318 %div = udiv i32 %x, %a 319 %cond = icmp ugt i32 %div, %a 320 %condOpp = icmp ule i32 %div, %a 321 %s1 = select i1 %cond, i32 11, i32 22 322 %s2 = select i1 %condOpp, i32 %s1, i32 %a 323 %sum = urem i32 %s1, %s2 324 store i32 %sum, i32* %arr_i, align 4 325 %i_inc = add i32 %i, 1 326 %cmp = icmp ugt i32 %i_inc, %n 327 br i1 %cmp, label %while.body, label %while.end 328 329 while.end: ; preds = %while.body, %entry 330 ret void 331 } 332 333 ; Test that we always will convert a cmov with a memory operand into a branch, 334 ; even outside of a loop. 335 define i32 @test_cmov_memoperand(i32 %a, i32 %b, i32 %x, i32* %y) #0 { 336 ; CHECK-LABEL: test_cmov_memoperand: 337 entry: 338 %cond = icmp ugt i32 %a, %b 339 ; CHECK: cmpl 340 %load = load i32, i32* %y 341 %z = select i1 %cond, i32 %x, i32 %load 342 ; CHECK-NOT: cmov 343 ; CHECK: ja [[FALSE_BB:.*]] 344 ; CHECK: movl (%r{{..}}), %[[R:.*]] 345 ; CHECK: [[FALSE_BB]]: 346 ; CHECK: movl %[[R]], % 347 ret i32 %z 348 } 349 350 ; Test that we can convert a group of cmovs where only one has a memory 351 ; operand. 352 define i32 @test_cmov_memoperand_in_group(i32 %a, i32 %b, i32 %x, i32* %y.ptr) #0 { 353 ; CHECK-LABEL: test_cmov_memoperand_in_group: 354 entry: 355 %cond = icmp ugt i32 %a, %b 356 ; CHECK: cmpl 357 %y = load i32, i32* %y.ptr 358 %z1 = select i1 %cond, i32 %x, i32 %a 359 %z2 = select i1 %cond, i32 %x, i32 %y 360 %z3 = select i1 %cond, i32 %x, i32 %b 361 ; CHECK-NOT: cmov 362 ; CHECK: ja [[FALSE_BB:.*]] 363 ; CHECK-DAG: movl %{{.*}}, %[[R1:.*]] 364 ; CHECK-DAG: movl (%r{{..}}), %[[R2:.*]] 365 ; CHECK-DAG: movl %{{.*}} %[[R3:.*]] 366 ; CHECK: [[FALSE_BB]]: 367 ; CHECK: addl 368 ; CHECK-DAG: %[[R1]] 369 ; CHECK-DAG: , 370 ; CHECK-DAG: %[[R3]] 371 ; CHECK-DAG: addl 372 ; CHECK-DAG: %[[R2]] 373 ; CHECK-DAG: , 374 ; CHECK-DAG: %[[R3]] 375 ; CHECK: movl %[[R3]], %eax 376 ; CHECK: retq 377 %s1 = add i32 %z1, %z2 378 %s2 = add i32 %s1, %z3 379 ret i32 %s2 380 } 381 382 ; Same as before but with operands reversed in the select with a load. 383 define i32 @test_cmov_memoperand_in_group2(i32 %a, i32 %b, i32 %x, i32* %y.ptr) #0 { 384 ; CHECK-LABEL: test_cmov_memoperand_in_group2: 385 entry: 386 %cond = icmp ugt i32 %a, %b 387 ; CHECK: cmpl 388 %y = load i32, i32* %y.ptr 389 %z2 = select i1 %cond, i32 %a, i32 %x 390 %z1 = select i1 %cond, i32 %y, i32 %x 391 %z3 = select i1 %cond, i32 %b, i32 %x 392 ; CHECK-NOT: cmov 393 ; CHECK: jbe [[FALSE_BB:.*]] 394 ; CHECK-DAG: movl %{{.*}}, %[[R1:.*]] 395 ; CHECK-DAG: movl (%r{{..}}), %[[R2:.*]] 396 ; CHECK-DAG: movl %{{.*}} %[[R3:.*]] 397 ; CHECK: [[FALSE_BB]]: 398 ; CHECK: addl 399 ; CHECK-DAG: %[[R1]] 400 ; CHECK-DAG: , 401 ; CHECK-DAG: %[[R3]] 402 ; CHECK-DAG: addl 403 ; CHECK-DAG: %[[R2]] 404 ; CHECK-DAG: , 405 ; CHECK-DAG: %[[R3]] 406 ; CHECK: movl %[[R3]], %eax 407 ; CHECK: retq 408 %s1 = add i32 %z1, %z2 409 %s2 = add i32 %s1, %z3 410 ret i32 %s2 411 } 412 413 ; Test that we don't convert a group of cmovs with conflicting directions of 414 ; loads. 415 define i32 @test_cmov_memoperand_conflicting_dir(i32 %a, i32 %b, i32 %x, i32* %y1.ptr, i32* %y2.ptr) #0 { 416 ; CHECK-LABEL: test_cmov_memoperand_conflicting_dir: 417 entry: 418 %cond = icmp ugt i32 %a, %b 419 ; CHECK: cmpl 420 %y1 = load i32, i32* %y1.ptr 421 %y2 = load i32, i32* %y2.ptr 422 %z1 = select i1 %cond, i32 %x, i32 %y1 423 %z2 = select i1 %cond, i32 %y2, i32 %x 424 ; CHECK: cmoval 425 ; CHECK: cmoval 426 %s1 = add i32 %z1, %z2 427 ret i32 %s1 428 } 429 430 ; Test that we can convert a group of cmovs where only one has a memory 431 ; operand and where that memory operand's registers come from a prior cmov in 432 ; the group. 433 define i32 @test_cmov_memoperand_in_group_reuse_for_addr(i32 %a, i32 %b, i32* %x, i32* %y) #0 { 434 ; CHECK-LABEL: test_cmov_memoperand_in_group_reuse_for_addr: 435 entry: 436 %cond = icmp ugt i32 %a, %b 437 ; CHECK: cmpl 438 %p = select i1 %cond, i32* %x, i32* %y 439 %load = load i32, i32* %p 440 %z = select i1 %cond, i32 %a, i32 %load 441 ; CHECK-NOT: cmov 442 ; CHECK: ja [[FALSE_BB:.*]] 443 ; CHECK: movl (%r{{..}}), %[[R:.*]] 444 ; CHECK: [[FALSE_BB]]: 445 ; CHECK: movl %[[R]], %eax 446 ; CHECK: retq 447 ret i32 %z 448 } 449 450 ; Test that we can convert a group of two cmovs with memory operands where one 451 ; uses the result of the other as part of the address. 452 define i32 @test_cmov_memoperand_in_group_reuse_for_addr2(i32 %a, i32 %b, i32* %x, i32** %y) #0 { 453 ; CHECK-LABEL: test_cmov_memoperand_in_group_reuse_for_addr2: 454 entry: 455 %cond = icmp ugt i32 %a, %b 456 ; CHECK: cmpl 457 %load1 = load i32*, i32** %y 458 %p = select i1 %cond, i32* %x, i32* %load1 459 %load2 = load i32, i32* %p 460 %z = select i1 %cond, i32 %a, i32 %load2 461 ; CHECK-NOT: cmov 462 ; CHECK: ja [[FALSE_BB:.*]] 463 ; CHECK: movq (%r{{..}}), %[[R1:.*]] 464 ; CHECK: movl (%[[R1]]), %[[R2:.*]] 465 ; CHECK: [[FALSE_BB]]: 466 ; CHECK: movl %[[R2]], %eax 467 ; CHECK: retq 468 ret i32 %z 469 } 470 471 ; Test that we can convert a group of cmovs where only one has a memory 472 ; operand and where that memory operand's registers come from a prior cmov and 473 ; where that cmov gets *its* input from a prior cmov in the group. 474 define i32 @test_cmov_memoperand_in_group_reuse_for_addr3(i32 %a, i32 %b, i32* %x, i32* %y, i32* %z) #0 { 475 ; CHECK-LABEL: test_cmov_memoperand_in_group_reuse_for_addr3: 476 entry: 477 %cond = icmp ugt i32 %a, %b 478 ; CHECK: cmpl 479 %p = select i1 %cond, i32* %x, i32* %y 480 %p2 = select i1 %cond, i32* %z, i32* %p 481 %load = load i32, i32* %p2 482 %r = select i1 %cond, i32 %a, i32 %load 483 ; CHECK-NOT: cmov 484 ; CHECK: ja [[FALSE_BB:.*]] 485 ; CHECK: movl (%r{{..}}), %[[R:.*]] 486 ; CHECK: [[FALSE_BB]]: 487 ; CHECK: movl %[[R]], %eax 488 ; CHECK: retq 489 ret i32 %r 490 } 491 492 attributes #0 = {"target-cpu"="x86-64"} 493