1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc -mtriple=i386-linux-gnu -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK32 3 ; RUN: llc -mtriple=x86_64-linux-gnu -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK64 4 5 ; The peephole optimizer can elide some physical register copies such as 6 ; EFLAGS. Make sure the flags are used directly, instead of needlessly using 7 ; saving and restoring specific conditions. 8 9 @L = external global i32 10 @M = external global i8 11 12 declare i32 @bar(i64) 13 14 define i1 @plus_one() nounwind { 15 ; CHECK32-LABEL: plus_one: 16 ; CHECK32: # %bb.0: # %entry 17 ; CHECK32-NEXT: movb M, %al 18 ; CHECK32-NEXT: incl L 19 ; CHECK32-NEXT: jne .LBB0_2 20 ; CHECK32-NEXT: # %bb.1: # %entry 21 ; CHECK32-NEXT: andb $8, %al 22 ; CHECK32-NEXT: je .LBB0_2 23 ; CHECK32-NEXT: # %bb.3: # %exit2 24 ; CHECK32-NEXT: xorl %eax, %eax 25 ; CHECK32-NEXT: retl 26 ; CHECK32-NEXT: .LBB0_2: # %exit 27 ; CHECK32-NEXT: movb $1, %al 28 ; CHECK32-NEXT: retl 29 ; 30 ; CHECK64-LABEL: plus_one: 31 ; CHECK64: # %bb.0: # %entry 32 ; CHECK64-NEXT: movb {{.*}}(%rip), %al 33 ; CHECK64-NEXT: incl {{.*}}(%rip) 34 ; CHECK64-NEXT: jne .LBB0_2 35 ; CHECK64-NEXT: # %bb.1: # %entry 36 ; CHECK64-NEXT: andb $8, %al 37 ; CHECK64-NEXT: je .LBB0_2 38 ; CHECK64-NEXT: # %bb.3: # %exit2 39 ; CHECK64-NEXT: xorl %eax, %eax 40 ; CHECK64-NEXT: retq 41 ; CHECK64-NEXT: .LBB0_2: # %exit 42 ; CHECK64-NEXT: movb $1, %al 43 ; CHECK64-NEXT: retq 44 entry: 45 %loaded_L = load i32, i32* @L 46 %val = add nsw i32 %loaded_L, 1 ; N.B. will emit inc. 47 store i32 %val, i32* @L 48 %loaded_M = load i8, i8* @M 49 %masked = and i8 %loaded_M, 8 50 %M_is_true = icmp ne i8 %masked, 0 51 %L_is_false = icmp eq i32 %val, 0 52 %cond = and i1 %L_is_false, %M_is_true 53 br i1 %cond, label %exit2, label %exit 54 55 exit: 56 ret i1 true 57 58 exit2: 59 ret i1 false 60 } 61 62 define i1 @plus_forty_two() nounwind { 63 ; CHECK32-LABEL: plus_forty_two: 64 ; CHECK32: # %bb.0: # %entry 65 ; CHECK32-NEXT: movb M, %al 66 ; CHECK32-NEXT: addl $42, L 67 ; CHECK32-NEXT: jne .LBB1_2 68 ; CHECK32-NEXT: # %bb.1: # %entry 69 ; CHECK32-NEXT: andb $8, %al 70 ; CHECK32-NEXT: je .LBB1_2 71 ; CHECK32-NEXT: # %bb.3: # %exit2 72 ; CHECK32-NEXT: xorl %eax, %eax 73 ; CHECK32-NEXT: retl 74 ; CHECK32-NEXT: .LBB1_2: # %exit 75 ; CHECK32-NEXT: movb $1, %al 76 ; CHECK32-NEXT: retl 77 ; 78 ; CHECK64-LABEL: plus_forty_two: 79 ; CHECK64: # %bb.0: # %entry 80 ; CHECK64-NEXT: movb {{.*}}(%rip), %al 81 ; CHECK64-NEXT: addl $42, {{.*}}(%rip) 82 ; CHECK64-NEXT: jne .LBB1_2 83 ; CHECK64-NEXT: # %bb.1: # %entry 84 ; CHECK64-NEXT: andb $8, %al 85 ; CHECK64-NEXT: je .LBB1_2 86 ; CHECK64-NEXT: # %bb.3: # %exit2 87 ; CHECK64-NEXT: xorl %eax, %eax 88 ; CHECK64-NEXT: retq 89 ; CHECK64-NEXT: .LBB1_2: # %exit 90 ; CHECK64-NEXT: movb $1, %al 91 ; CHECK64-NEXT: retq 92 entry: 93 %loaded_L = load i32, i32* @L 94 %val = add nsw i32 %loaded_L, 42 ; N.B. won't emit inc. 95 store i32 %val, i32* @L 96 %loaded_M = load i8, i8* @M 97 %masked = and i8 %loaded_M, 8 98 %M_is_true = icmp ne i8 %masked, 0 99 %L_is_false = icmp eq i32 %val, 0 100 %cond = and i1 %L_is_false, %M_is_true 101 br i1 %cond, label %exit2, label %exit 102 103 exit: 104 ret i1 true 105 106 exit2: 107 ret i1 false 108 } 109 110 define i1 @minus_one() nounwind { 111 ; CHECK32-LABEL: minus_one: 112 ; CHECK32: # %bb.0: # %entry 113 ; CHECK32-NEXT: movb M, %al 114 ; CHECK32-NEXT: decl L 115 ; CHECK32-NEXT: jne .LBB2_2 116 ; CHECK32-NEXT: # %bb.1: # %entry 117 ; CHECK32-NEXT: andb $8, %al 118 ; CHECK32-NEXT: je .LBB2_2 119 ; CHECK32-NEXT: # %bb.3: # %exit2 120 ; CHECK32-NEXT: xorl %eax, %eax 121 ; CHECK32-NEXT: retl 122 ; CHECK32-NEXT: .LBB2_2: # %exit 123 ; CHECK32-NEXT: movb $1, %al 124 ; CHECK32-NEXT: retl 125 ; 126 ; CHECK64-LABEL: minus_one: 127 ; CHECK64: # %bb.0: # %entry 128 ; CHECK64-NEXT: movb {{.*}}(%rip), %al 129 ; CHECK64-NEXT: decl {{.*}}(%rip) 130 ; CHECK64-NEXT: jne .LBB2_2 131 ; CHECK64-NEXT: # %bb.1: # %entry 132 ; CHECK64-NEXT: andb $8, %al 133 ; CHECK64-NEXT: je .LBB2_2 134 ; CHECK64-NEXT: # %bb.3: # %exit2 135 ; CHECK64-NEXT: xorl %eax, %eax 136 ; CHECK64-NEXT: retq 137 ; CHECK64-NEXT: .LBB2_2: # %exit 138 ; CHECK64-NEXT: movb $1, %al 139 ; CHECK64-NEXT: retq 140 entry: 141 %loaded_L = load i32, i32* @L 142 %val = add nsw i32 %loaded_L, -1 ; N.B. will emit dec. 143 store i32 %val, i32* @L 144 %loaded_M = load i8, i8* @M 145 %masked = and i8 %loaded_M, 8 146 %M_is_true = icmp ne i8 %masked, 0 147 %L_is_false = icmp eq i32 %val, 0 148 %cond = and i1 %L_is_false, %M_is_true 149 br i1 %cond, label %exit2, label %exit 150 151 exit: 152 ret i1 true 153 154 exit2: 155 ret i1 false 156 } 157 158 define i1 @minus_forty_two() nounwind { 159 ; CHECK32-LABEL: minus_forty_two: 160 ; CHECK32: # %bb.0: # %entry 161 ; CHECK32-NEXT: movb M, %al 162 ; CHECK32-NEXT: addl $-42, L 163 ; CHECK32-NEXT: jne .LBB3_2 164 ; CHECK32-NEXT: # %bb.1: # %entry 165 ; CHECK32-NEXT: andb $8, %al 166 ; CHECK32-NEXT: je .LBB3_2 167 ; CHECK32-NEXT: # %bb.3: # %exit2 168 ; CHECK32-NEXT: xorl %eax, %eax 169 ; CHECK32-NEXT: retl 170 ; CHECK32-NEXT: .LBB3_2: # %exit 171 ; CHECK32-NEXT: movb $1, %al 172 ; CHECK32-NEXT: retl 173 ; 174 ; CHECK64-LABEL: minus_forty_two: 175 ; CHECK64: # %bb.0: # %entry 176 ; CHECK64-NEXT: movb {{.*}}(%rip), %al 177 ; CHECK64-NEXT: addl $-42, {{.*}}(%rip) 178 ; CHECK64-NEXT: jne .LBB3_2 179 ; CHECK64-NEXT: # %bb.1: # %entry 180 ; CHECK64-NEXT: andb $8, %al 181 ; CHECK64-NEXT: je .LBB3_2 182 ; CHECK64-NEXT: # %bb.3: # %exit2 183 ; CHECK64-NEXT: xorl %eax, %eax 184 ; CHECK64-NEXT: retq 185 ; CHECK64-NEXT: .LBB3_2: # %exit 186 ; CHECK64-NEXT: movb $1, %al 187 ; CHECK64-NEXT: retq 188 entry: 189 %loaded_L = load i32, i32* @L 190 %val = add nsw i32 %loaded_L, -42 ; N.B. won't emit dec. 191 store i32 %val, i32* @L 192 %loaded_M = load i8, i8* @M 193 %masked = and i8 %loaded_M, 8 194 %M_is_true = icmp ne i8 %masked, 0 195 %L_is_false = icmp eq i32 %val, 0 196 %cond = and i1 %L_is_false, %M_is_true 197 br i1 %cond, label %exit2, label %exit 198 199 exit: 200 ret i1 true 201 202 exit2: 203 ret i1 false 204 } 205 206 define i64 @test_intervening_call(i64* %foo, i64 %bar, i64 %baz) nounwind { 207 ; CHECK32-LABEL: test_intervening_call: 208 ; CHECK32: # %bb.0: # %entry 209 ; CHECK32-NEXT: pushl %ebx 210 ; CHECK32-NEXT: pushl %esi 211 ; CHECK32-NEXT: pushl %eax 212 ; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %eax 213 ; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %edx 214 ; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %ebx 215 ; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %ecx 216 ; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %esi 217 ; CHECK32-NEXT: lock cmpxchg8b (%esi) 218 ; CHECK32-NEXT: setne %bl 219 ; CHECK32-NEXT: subl $8, %esp 220 ; CHECK32-NEXT: pushl %edx 221 ; CHECK32-NEXT: pushl %eax 222 ; CHECK32-NEXT: calll bar 223 ; CHECK32-NEXT: addl $16, %esp 224 ; CHECK32-NEXT: testb %bl, %bl 225 ; CHECK32-NEXT: jne .LBB4_3 226 ; CHECK32-NEXT: # %bb.1: # %t 227 ; CHECK32-NEXT: movl $42, %eax 228 ; CHECK32-NEXT: jmp .LBB4_2 229 ; CHECK32-NEXT: .LBB4_3: # %f 230 ; CHECK32-NEXT: xorl %eax, %eax 231 ; CHECK32-NEXT: .LBB4_2: # %t 232 ; CHECK32-NEXT: xorl %edx, %edx 233 ; CHECK32-NEXT: addl $4, %esp 234 ; CHECK32-NEXT: popl %esi 235 ; CHECK32-NEXT: popl %ebx 236 ; CHECK32-NEXT: retl 237 ; 238 ; CHECK64-LABEL: test_intervening_call: 239 ; CHECK64: # %bb.0: # %entry 240 ; CHECK64-NEXT: pushq %rbx 241 ; CHECK64-NEXT: movq %rsi, %rax 242 ; CHECK64-NEXT: lock cmpxchgq %rdx, (%rdi) 243 ; CHECK64-NEXT: setne %bl 244 ; CHECK64-NEXT: movq %rax, %rdi 245 ; CHECK64-NEXT: callq bar 246 ; CHECK64-NEXT: testb %bl, %bl 247 ; CHECK64-NEXT: jne .LBB4_2 248 ; CHECK64-NEXT: # %bb.1: # %t 249 ; CHECK64-NEXT: movl $42, %eax 250 ; CHECK64-NEXT: popq %rbx 251 ; CHECK64-NEXT: retq 252 ; CHECK64-NEXT: .LBB4_2: # %f 253 ; CHECK64-NEXT: xorl %eax, %eax 254 ; CHECK64-NEXT: popq %rbx 255 ; CHECK64-NEXT: retq 256 entry: 257 ; cmpxchg sets EFLAGS, call clobbers it, then br uses EFLAGS. 258 %cx = cmpxchg i64* %foo, i64 %bar, i64 %baz seq_cst seq_cst 259 %v = extractvalue { i64, i1 } %cx, 0 260 %p = extractvalue { i64, i1 } %cx, 1 261 call i32 @bar(i64 %v) 262 br i1 %p, label %t, label %f 263 264 t: 265 ret i64 42 266 267 f: 268 ret i64 0 269 } 270 271 define i64 @test_two_live_flags(i64* %foo0, i64 %bar0, i64 %baz0, i64* %foo1, i64 %bar1, i64 %baz1) nounwind { 272 ; CHECK32-LABEL: test_two_live_flags: 273 ; CHECK32: # %bb.0: # %entry 274 ; CHECK32-NEXT: pushl %ebp 275 ; CHECK32-NEXT: pushl %ebx 276 ; CHECK32-NEXT: pushl %edi 277 ; CHECK32-NEXT: pushl %esi 278 ; CHECK32-NEXT: pushl %eax 279 ; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %edi 280 ; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %ebp 281 ; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %eax 282 ; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %edx 283 ; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %ebx 284 ; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %ecx 285 ; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %esi 286 ; CHECK32-NEXT: lock cmpxchg8b (%esi) 287 ; CHECK32-NEXT: setne {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill 288 ; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %eax 289 ; CHECK32-NEXT: movl %edi, %edx 290 ; CHECK32-NEXT: movl %ebp, %ecx 291 ; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %ebx 292 ; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %esi 293 ; CHECK32-NEXT: lock cmpxchg8b (%esi) 294 ; CHECK32-NEXT: sete %al 295 ; CHECK32-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload 296 ; CHECK32-NEXT: jne .LBB5_4 297 ; CHECK32-NEXT: # %bb.1: # %entry 298 ; CHECK32-NEXT: testb %al, %al 299 ; CHECK32-NEXT: je .LBB5_4 300 ; CHECK32-NEXT: # %bb.2: # %t 301 ; CHECK32-NEXT: movl $42, %eax 302 ; CHECK32-NEXT: jmp .LBB5_3 303 ; CHECK32-NEXT: .LBB5_4: # %f 304 ; CHECK32-NEXT: xorl %eax, %eax 305 ; CHECK32-NEXT: .LBB5_3: # %t 306 ; CHECK32-NEXT: xorl %edx, %edx 307 ; CHECK32-NEXT: addl $4, %esp 308 ; CHECK32-NEXT: popl %esi 309 ; CHECK32-NEXT: popl %edi 310 ; CHECK32-NEXT: popl %ebx 311 ; CHECK32-NEXT: popl %ebp 312 ; CHECK32-NEXT: retl 313 ; 314 ; CHECK64-LABEL: test_two_live_flags: 315 ; CHECK64: # %bb.0: # %entry 316 ; CHECK64-NEXT: movq %rsi, %rax 317 ; CHECK64-NEXT: lock cmpxchgq %rdx, (%rdi) 318 ; CHECK64-NEXT: setne %dl 319 ; CHECK64-NEXT: movq %r8, %rax 320 ; CHECK64-NEXT: lock cmpxchgq %r9, (%rcx) 321 ; CHECK64-NEXT: sete %al 322 ; CHECK64-NEXT: testb %dl, %dl 323 ; CHECK64-NEXT: jne .LBB5_3 324 ; CHECK64-NEXT: # %bb.1: # %entry 325 ; CHECK64-NEXT: testb %al, %al 326 ; CHECK64-NEXT: je .LBB5_3 327 ; CHECK64-NEXT: # %bb.2: # %t 328 ; CHECK64-NEXT: movl $42, %eax 329 ; CHECK64-NEXT: retq 330 ; CHECK64-NEXT: .LBB5_3: # %f 331 ; CHECK64-NEXT: xorl %eax, %eax 332 ; CHECK64-NEXT: retq 333 entry: 334 %cx0 = cmpxchg i64* %foo0, i64 %bar0, i64 %baz0 seq_cst seq_cst 335 %p0 = extractvalue { i64, i1 } %cx0, 1 336 %cx1 = cmpxchg i64* %foo1, i64 %bar1, i64 %baz1 seq_cst seq_cst 337 %p1 = extractvalue { i64, i1 } %cx1, 1 338 %flag = and i1 %p0, %p1 339 br i1 %flag, label %t, label %f 340 341 t: 342 ret i64 42 343 344 f: 345 ret i64 0 346 } 347 348 define i1 @asm_clobbering_flags(i32* %mem) nounwind { 349 ; CHECK32-LABEL: asm_clobbering_flags: 350 ; CHECK32: # %bb.0: # %entry 351 ; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %ecx 352 ; CHECK32-NEXT: movl (%ecx), %edx 353 ; CHECK32-NEXT: testl %edx, %edx 354 ; CHECK32-NEXT: setg %al 355 ; CHECK32-NEXT: #APP 356 ; CHECK32-NEXT: bsfl %edx, %edx 357 ; CHECK32-NEXT: #NO_APP 358 ; CHECK32-NEXT: movl %edx, (%ecx) 359 ; CHECK32-NEXT: retl 360 ; 361 ; CHECK64-LABEL: asm_clobbering_flags: 362 ; CHECK64: # %bb.0: # %entry 363 ; CHECK64-NEXT: movl (%rdi), %ecx 364 ; CHECK64-NEXT: testl %ecx, %ecx 365 ; CHECK64-NEXT: setg %al 366 ; CHECK64-NEXT: #APP 367 ; CHECK64-NEXT: bsfl %ecx, %ecx 368 ; CHECK64-NEXT: #NO_APP 369 ; CHECK64-NEXT: movl %ecx, (%rdi) 370 ; CHECK64-NEXT: retq 371 entry: 372 %val = load i32, i32* %mem, align 4 373 %cmp = icmp sgt i32 %val, 0 374 %res = tail call i32 asm "bsfl $1,$0", "=r,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i32 %val) 375 store i32 %res, i32* %mem, align 4 376 ret i1 %cmp 377 } 378