1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc -mtriple=i386-linux-gnu -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=32-ALL,32-GOOD-RA 3 ; RUN: llc -mtriple=i386-linux-gnu -verify-machineinstrs -pre-RA-sched=fast %s -o - | FileCheck %s --check-prefixes=32-ALL,32-FAST-RA 4 5 ; RUN: llc -mtriple=x86_64-linux-gnu -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=64-ALL,64-GOOD-RA 6 ; RUN: llc -mtriple=x86_64-linux-gnu -verify-machineinstrs -pre-RA-sched=fast %s -o - | FileCheck %s --check-prefixes=64-ALL,64-FAST-RA 7 ; RUN: llc -mtriple=x86_64-linux-gnu -verify-machineinstrs -mattr=+sahf %s -o - | FileCheck %s --check-prefixes=64-ALL,64-GOOD-RA-SAHF 8 ; RUN: llc -mtriple=x86_64-linux-gnu -verify-machineinstrs -mattr=+sahf -pre-RA-sched=fast %s -o - | FileCheck %s --check-prefixes=64-ALL,64-FAST-RA-SAHF 9 ; RUN: llc -mtriple=x86_64-linux-gnu -verify-machineinstrs -mcpu=corei7 %s -o - | FileCheck %s --check-prefixes=64-ALL,64-GOOD-RA-SAHF 10 11 declare i32 @foo() 12 declare i32 @bar(i64) 13 14 ; In the following case when using fast scheduling we get a long chain of 15 ; EFLAGS save/restore due to a sequence of: 16 ; cmpxchg8b (implicit-def eflags) 17 ; eax = copy eflags 18 ; adjcallstackdown32 19 ; ... 20 ; use of eax 21 ; During PEI the adjcallstackdown32 is replaced with the subl which 22 ; clobbers eflags, effectively interfering in the liveness interval. However, 23 ; we then promote these copies into independent conditions in GPRs that avoids 24 ; repeated saving and restoring logic and can be trivially managed by the 25 ; register allocator. 26 define i64 @test_intervening_call(i64* %foo, i64 %bar, i64 %baz) nounwind { 27 ; 32-GOOD-RA-LABEL: test_intervening_call: 28 ; 32-GOOD-RA: # %bb.0: # %entry 29 ; 32-GOOD-RA-NEXT: pushl %ebx 30 ; 32-GOOD-RA-NEXT: pushl %esi 31 ; 32-GOOD-RA-NEXT: pushl %eax 32 ; 32-GOOD-RA-NEXT: movl {{[0-9]+}}(%esp), %eax 33 ; 32-GOOD-RA-NEXT: movl {{[0-9]+}}(%esp), %edx 34 ; 32-GOOD-RA-NEXT: movl {{[0-9]+}}(%esp), %ebx 35 ; 32-GOOD-RA-NEXT: movl {{[0-9]+}}(%esp), %ecx 36 ; 32-GOOD-RA-NEXT: movl {{[0-9]+}}(%esp), %esi 37 ; 32-GOOD-RA-NEXT: lock cmpxchg8b (%esi) 38 ; 32-GOOD-RA-NEXT: setne %bl 39 ; 32-GOOD-RA-NEXT: subl $8, %esp 40 ; 32-GOOD-RA-NEXT: pushl %edx 41 ; 32-GOOD-RA-NEXT: pushl %eax 42 ; 32-GOOD-RA-NEXT: calll bar 43 ; 32-GOOD-RA-NEXT: addl $16, %esp 44 ; 32-GOOD-RA-NEXT: testb %bl, %bl 45 ; 32-GOOD-RA-NEXT: jne .LBB0_3 46 ; 32-GOOD-RA-NEXT: # %bb.1: # %t 47 ; 32-GOOD-RA-NEXT: movl $42, %eax 48 ; 32-GOOD-RA-NEXT: jmp .LBB0_2 49 ; 32-GOOD-RA-NEXT: .LBB0_3: # %f 50 ; 32-GOOD-RA-NEXT: xorl %eax, %eax 51 ; 32-GOOD-RA-NEXT: .LBB0_2: # %t 52 ; 32-GOOD-RA-NEXT: xorl %edx, %edx 53 ; 32-GOOD-RA-NEXT: addl $4, %esp 54 ; 32-GOOD-RA-NEXT: popl %esi 55 ; 32-GOOD-RA-NEXT: popl %ebx 56 ; 32-GOOD-RA-NEXT: retl 57 ; 58 ; 32-FAST-RA-LABEL: test_intervening_call: 59 ; 32-FAST-RA: # %bb.0: # %entry 60 ; 32-FAST-RA-NEXT: pushl %ebx 61 ; 32-FAST-RA-NEXT: pushl %esi 62 ; 32-FAST-RA-NEXT: pushl %eax 63 ; 32-FAST-RA-NEXT: movl {{[0-9]+}}(%esp), %esi 64 ; 32-FAST-RA-NEXT: movl {{[0-9]+}}(%esp), %ebx 65 ; 32-FAST-RA-NEXT: movl {{[0-9]+}}(%esp), %ecx 66 ; 32-FAST-RA-NEXT: movl {{[0-9]+}}(%esp), %eax 67 ; 32-FAST-RA-NEXT: movl {{[0-9]+}}(%esp), %edx 68 ; 32-FAST-RA-NEXT: lock cmpxchg8b (%esi) 69 ; 32-FAST-RA-NEXT: setne %bl 70 ; 32-FAST-RA-NEXT: subl $8, %esp 71 ; 32-FAST-RA-NEXT: pushl %edx 72 ; 32-FAST-RA-NEXT: pushl %eax 73 ; 32-FAST-RA-NEXT: calll bar 74 ; 32-FAST-RA-NEXT: addl $16, %esp 75 ; 32-FAST-RA-NEXT: testb %bl, %bl 76 ; 32-FAST-RA-NEXT: jne .LBB0_3 77 ; 32-FAST-RA-NEXT: # %bb.1: # %t 78 ; 32-FAST-RA-NEXT: movl $42, %eax 79 ; 32-FAST-RA-NEXT: jmp .LBB0_2 80 ; 32-FAST-RA-NEXT: .LBB0_3: # %f 81 ; 32-FAST-RA-NEXT: xorl %eax, %eax 82 ; 32-FAST-RA-NEXT: .LBB0_2: # %t 83 ; 32-FAST-RA-NEXT: xorl %edx, %edx 84 ; 32-FAST-RA-NEXT: addl $4, %esp 85 ; 32-FAST-RA-NEXT: popl %esi 86 ; 32-FAST-RA-NEXT: popl %ebx 87 ; 32-FAST-RA-NEXT: retl 88 ; 89 ; 64-ALL-LABEL: test_intervening_call: 90 ; 64-ALL: # %bb.0: # %entry 91 ; 64-ALL-NEXT: pushq %rbx 92 ; 64-ALL-NEXT: movq %rsi, %rax 93 ; 64-ALL-NEXT: lock cmpxchgq %rdx, (%rdi) 94 ; 64-ALL-NEXT: setne %bl 95 ; 64-ALL-NEXT: movq %rax, %rdi 96 ; 64-ALL-NEXT: callq bar 97 ; 64-ALL-NEXT: testb %bl, %bl 98 ; 64-ALL-NEXT: jne .LBB0_2 99 ; 64-ALL-NEXT: # %bb.1: # %t 100 ; 64-ALL-NEXT: movl $42, %eax 101 ; 64-ALL-NEXT: popq %rbx 102 ; 64-ALL-NEXT: retq 103 ; 64-ALL-NEXT: .LBB0_2: # %f 104 ; 64-ALL-NEXT: xorl %eax, %eax 105 ; 64-ALL-NEXT: popq %rbx 106 ; 64-ALL-NEXT: retq 107 entry: 108 %cx = cmpxchg i64* %foo, i64 %bar, i64 %baz seq_cst seq_cst 109 %v = extractvalue { i64, i1 } %cx, 0 110 %p = extractvalue { i64, i1 } %cx, 1 111 call i32 @bar(i64 %v) 112 br i1 %p, label %t, label %f 113 114 t: 115 ret i64 42 116 117 f: 118 ret i64 0 119 } 120 121 ; Interesting in producing a clobber without any function calls. 122 define i32 @test_control_flow(i32* %p, i32 %i, i32 %j) nounwind { 123 ; 32-ALL-LABEL: test_control_flow: 124 ; 32-ALL: # %bb.0: # %entry 125 ; 32-ALL-NEXT: movl {{[0-9]+}}(%esp), %eax 126 ; 32-ALL-NEXT: cmpl {{[0-9]+}}(%esp), %eax 127 ; 32-ALL-NEXT: jle .LBB1_6 128 ; 32-ALL-NEXT: # %bb.1: # %loop_start 129 ; 32-ALL-NEXT: movl {{[0-9]+}}(%esp), %ecx 130 ; 32-ALL-NEXT: .p2align 4, 0x90 131 ; 32-ALL-NEXT: .LBB1_2: # %while.condthread-pre-split.i 132 ; 32-ALL-NEXT: # =>This Loop Header: Depth=1 133 ; 32-ALL-NEXT: # Child Loop BB1_3 Depth 2 134 ; 32-ALL-NEXT: movl (%ecx), %edx 135 ; 32-ALL-NEXT: .p2align 4, 0x90 136 ; 32-ALL-NEXT: .LBB1_3: # %while.cond.i 137 ; 32-ALL-NEXT: # Parent Loop BB1_2 Depth=1 138 ; 32-ALL-NEXT: # => This Inner Loop Header: Depth=2 139 ; 32-ALL-NEXT: movl %edx, %eax 140 ; 32-ALL-NEXT: xorl %edx, %edx 141 ; 32-ALL-NEXT: testl %eax, %eax 142 ; 32-ALL-NEXT: je .LBB1_3 143 ; 32-ALL-NEXT: # %bb.4: # %while.body.i 144 ; 32-ALL-NEXT: # in Loop: Header=BB1_2 Depth=1 145 ; 32-ALL-NEXT: lock cmpxchgl %eax, (%ecx) 146 ; 32-ALL-NEXT: jne .LBB1_2 147 ; 32-ALL-NEXT: # %bb.5: 148 ; 32-ALL-NEXT: xorl %eax, %eax 149 ; 32-ALL-NEXT: .LBB1_6: # %cond.end 150 ; 32-ALL-NEXT: retl 151 ; 152 ; 64-ALL-LABEL: test_control_flow: 153 ; 64-ALL: # %bb.0: # %entry 154 ; 64-ALL-NEXT: cmpl %edx, %esi 155 ; 64-ALL-NEXT: jle .LBB1_5 156 ; 64-ALL-NEXT: .p2align 4, 0x90 157 ; 64-ALL-NEXT: .LBB1_1: # %while.condthread-pre-split.i 158 ; 64-ALL-NEXT: # =>This Loop Header: Depth=1 159 ; 64-ALL-NEXT: # Child Loop BB1_2 Depth 2 160 ; 64-ALL-NEXT: movl (%rdi), %ecx 161 ; 64-ALL-NEXT: .p2align 4, 0x90 162 ; 64-ALL-NEXT: .LBB1_2: # %while.cond.i 163 ; 64-ALL-NEXT: # Parent Loop BB1_1 Depth=1 164 ; 64-ALL-NEXT: # => This Inner Loop Header: Depth=2 165 ; 64-ALL-NEXT: movl %ecx, %eax 166 ; 64-ALL-NEXT: xorl %ecx, %ecx 167 ; 64-ALL-NEXT: testl %eax, %eax 168 ; 64-ALL-NEXT: je .LBB1_2 169 ; 64-ALL-NEXT: # %bb.3: # %while.body.i 170 ; 64-ALL-NEXT: # in Loop: Header=BB1_1 Depth=1 171 ; 64-ALL-NEXT: lock cmpxchgl %eax, (%rdi) 172 ; 64-ALL-NEXT: jne .LBB1_1 173 ; 64-ALL-NEXT: # %bb.4: 174 ; 64-ALL-NEXT: xorl %esi, %esi 175 ; 64-ALL-NEXT: .LBB1_5: # %cond.end 176 ; 64-ALL-NEXT: movl %esi, %eax 177 ; 64-ALL-NEXT: retq 178 entry: 179 %cmp = icmp sgt i32 %i, %j 180 br i1 %cmp, label %loop_start, label %cond.end 181 182 loop_start: 183 br label %while.condthread-pre-split.i 184 185 while.condthread-pre-split.i: 186 %.pr.i = load i32, i32* %p, align 4 187 br label %while.cond.i 188 189 while.cond.i: 190 %0 = phi i32 [ %.pr.i, %while.condthread-pre-split.i ], [ 0, %while.cond.i ] 191 %tobool.i = icmp eq i32 %0, 0 192 br i1 %tobool.i, label %while.cond.i, label %while.body.i 193 194 while.body.i: 195 %.lcssa = phi i32 [ %0, %while.cond.i ] 196 %1 = cmpxchg i32* %p, i32 %.lcssa, i32 %.lcssa seq_cst seq_cst 197 %2 = extractvalue { i32, i1 } %1, 1 198 br i1 %2, label %cond.end.loopexit, label %while.condthread-pre-split.i 199 200 cond.end.loopexit: 201 br label %cond.end 202 203 cond.end: 204 %cond = phi i32 [ %i, %entry ], [ 0, %cond.end.loopexit ] 205 ret i32 %cond 206 } 207 208 ; This one is an interesting case because CMOV doesn't have a chain 209 ; operand. Naive attempts to limit cmpxchg EFLAGS use are likely to fail here. 210 define i32 @test_feed_cmov(i32* %addr, i32 %desired, i32 %new) nounwind { 211 ; 32-GOOD-RA-LABEL: test_feed_cmov: 212 ; 32-GOOD-RA: # %bb.0: # %entry 213 ; 32-GOOD-RA-NEXT: pushl %ebx 214 ; 32-GOOD-RA-NEXT: pushl %esi 215 ; 32-GOOD-RA-NEXT: pushl %eax 216 ; 32-GOOD-RA-NEXT: movl {{[0-9]+}}(%esp), %eax 217 ; 32-GOOD-RA-NEXT: movl {{[0-9]+}}(%esp), %esi 218 ; 32-GOOD-RA-NEXT: movl {{[0-9]+}}(%esp), %ecx 219 ; 32-GOOD-RA-NEXT: lock cmpxchgl %esi, (%ecx) 220 ; 32-GOOD-RA-NEXT: sete %bl 221 ; 32-GOOD-RA-NEXT: calll foo 222 ; 32-GOOD-RA-NEXT: testb %bl, %bl 223 ; 32-GOOD-RA-NEXT: jne .LBB2_2 224 ; 32-GOOD-RA-NEXT: # %bb.1: # %entry 225 ; 32-GOOD-RA-NEXT: movl %eax, %esi 226 ; 32-GOOD-RA-NEXT: .LBB2_2: # %entry 227 ; 32-GOOD-RA-NEXT: movl %esi, %eax 228 ; 32-GOOD-RA-NEXT: addl $4, %esp 229 ; 32-GOOD-RA-NEXT: popl %esi 230 ; 32-GOOD-RA-NEXT: popl %ebx 231 ; 32-GOOD-RA-NEXT: retl 232 ; 233 ; 32-FAST-RA-LABEL: test_feed_cmov: 234 ; 32-FAST-RA: # %bb.0: # %entry 235 ; 32-FAST-RA-NEXT: pushl %ebx 236 ; 32-FAST-RA-NEXT: pushl %esi 237 ; 32-FAST-RA-NEXT: pushl %eax 238 ; 32-FAST-RA-NEXT: movl {{[0-9]+}}(%esp), %ecx 239 ; 32-FAST-RA-NEXT: movl {{[0-9]+}}(%esp), %esi 240 ; 32-FAST-RA-NEXT: movl {{[0-9]+}}(%esp), %eax 241 ; 32-FAST-RA-NEXT: lock cmpxchgl %esi, (%ecx) 242 ; 32-FAST-RA-NEXT: sete %bl 243 ; 32-FAST-RA-NEXT: calll foo 244 ; 32-FAST-RA-NEXT: testb %bl, %bl 245 ; 32-FAST-RA-NEXT: jne .LBB2_2 246 ; 32-FAST-RA-NEXT: # %bb.1: # %entry 247 ; 32-FAST-RA-NEXT: movl %eax, %esi 248 ; 32-FAST-RA-NEXT: .LBB2_2: # %entry 249 ; 32-FAST-RA-NEXT: movl %esi, %eax 250 ; 32-FAST-RA-NEXT: addl $4, %esp 251 ; 32-FAST-RA-NEXT: popl %esi 252 ; 32-FAST-RA-NEXT: popl %ebx 253 ; 32-FAST-RA-NEXT: retl 254 ; 255 ; 64-ALL-LABEL: test_feed_cmov: 256 ; 64-ALL: # %bb.0: # %entry 257 ; 64-ALL-NEXT: pushq %rbp 258 ; 64-ALL-NEXT: pushq %rbx 259 ; 64-ALL-NEXT: pushq %rax 260 ; 64-ALL-NEXT: movl %edx, %ebx 261 ; 64-ALL-NEXT: movl %esi, %eax 262 ; 64-ALL-NEXT: lock cmpxchgl %edx, (%rdi) 263 ; 64-ALL-NEXT: sete %bpl 264 ; 64-ALL-NEXT: callq foo 265 ; 64-ALL-NEXT: testb %bpl, %bpl 266 ; 64-ALL-NEXT: cmovnel %ebx, %eax 267 ; 64-ALL-NEXT: addq $8, %rsp 268 ; 64-ALL-NEXT: popq %rbx 269 ; 64-ALL-NEXT: popq %rbp 270 ; 64-ALL-NEXT: retq 271 entry: 272 %res = cmpxchg i32* %addr, i32 %desired, i32 %new seq_cst seq_cst 273 %success = extractvalue { i32, i1 } %res, 1 274 275 %rhs = call i32 @foo() 276 277 %ret = select i1 %success, i32 %new, i32 %rhs 278 ret i32 %ret 279 } 280