1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc -o - -mtriple=i686-unknown-unknown %s | FileCheck %s --check-prefixes=ALL,X32 3 ; RUN: llc -o - -mtriple=x86_64-unknown-unknown %s | FileCheck %s --check-prefixes=ALL,X64 4 ; 5 ; Test patterns that require preserving and restoring flags. 6 7 @b = common global i8 0, align 1 8 @c = common global i32 0, align 4 9 @a = common global i8 0, align 1 10 @d = common global i8 0, align 1 11 @.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1 12 13 declare void @external(i32) 14 15 ; A test that re-uses flags in interesting ways due to volatile accesses. 16 ; Specifically, the first increment's flags are reused for the branch despite 17 ; being clobbered by the second increment. 18 define i32 @test1() nounwind { 19 ; X32-LABEL: test1: 20 ; X32: # %bb.0: # %entry 21 ; X32-NEXT: movb b, %cl 22 ; X32-NEXT: movl %ecx, %eax 23 ; X32-NEXT: incb %al 24 ; X32-NEXT: movb %al, b 25 ; X32-NEXT: incl c 26 ; X32-NEXT: sete %dl 27 ; X32-NEXT: movb a, %ah 28 ; X32-NEXT: movb %ah, %ch 29 ; X32-NEXT: incb %ch 30 ; X32-NEXT: cmpb %cl, %ah 31 ; X32-NEXT: sete d 32 ; X32-NEXT: movb %ch, a 33 ; X32-NEXT: testb %dl, %dl 34 ; X32-NEXT: jne .LBB0_2 35 ; X32-NEXT: # %bb.1: # %if.then 36 ; X32-NEXT: movsbl %al, %eax 37 ; X32-NEXT: pushl %eax 38 ; X32-NEXT: calll external 39 ; X32-NEXT: addl $4, %esp 40 ; X32-NEXT: .LBB0_2: # %if.end 41 ; X32-NEXT: xorl %eax, %eax 42 ; X32-NEXT: retl 43 ; 44 ; X64-LABEL: test1: 45 ; X64: # %bb.0: # %entry 46 ; X64-NEXT: movb {{.*}}(%rip), %dil 47 ; X64-NEXT: movl %edi, %eax 48 ; X64-NEXT: incb %al 49 ; X64-NEXT: movb %al, {{.*}}(%rip) 50 ; X64-NEXT: incl {{.*}}(%rip) 51 ; X64-NEXT: sete %sil 52 ; X64-NEXT: movb {{.*}}(%rip), %cl 53 ; X64-NEXT: movl %ecx, %edx 54 ; X64-NEXT: incb %dl 55 ; X64-NEXT: cmpb %dil, %cl 56 ; X64-NEXT: sete {{.*}}(%rip) 57 ; X64-NEXT: movb %dl, {{.*}}(%rip) 58 ; X64-NEXT: testb %sil, %sil 59 ; X64-NEXT: jne .LBB0_2 60 ; X64-NEXT: # %bb.1: # %if.then 61 ; X64-NEXT: pushq %rax 62 ; X64-NEXT: movsbl %al, %edi 63 ; X64-NEXT: callq external 64 ; X64-NEXT: addq $8, %rsp 65 ; X64-NEXT: .LBB0_2: # %if.end 66 ; X64-NEXT: xorl %eax, %eax 67 ; X64-NEXT: retq 68 entry: 69 %bval = load i8, i8* @b 70 %inc = add i8 %bval, 1 71 store volatile i8 %inc, i8* @b 72 %cval = load volatile i32, i32* @c 73 %inc1 = add nsw i32 %cval, 1 74 store volatile i32 %inc1, i32* @c 75 %aval = load volatile i8, i8* @a 76 %inc2 = add i8 %aval, 1 77 store volatile i8 %inc2, i8* @a 78 %cmp = icmp eq i8 %aval, %bval 79 %conv5 = zext i1 %cmp to i8 80 store i8 %conv5, i8* @d 81 %tobool = icmp eq i32 %inc1, 0 82 br i1 %tobool, label %if.end, label %if.then 83 84 if.then: 85 %conv6 = sext i8 %inc to i32 86 call void @external(i32 %conv6) 87 br label %if.end 88 89 if.end: 90 ret i32 0 91 } 92 93 ; Preserve increment flags across a call. 94 define i32 @test2(i32* %ptr) nounwind { 95 ; X32-LABEL: test2: 96 ; X32: # %bb.0: # %entry 97 ; X32-NEXT: pushl %ebx 98 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 99 ; X32-NEXT: incl (%eax) 100 ; X32-NEXT: setne %bl 101 ; X32-NEXT: pushl $42 102 ; X32-NEXT: calll external 103 ; X32-NEXT: addl $4, %esp 104 ; X32-NEXT: testb %bl, %bl 105 ; X32-NEXT: je .LBB1_1 106 ; X32-NEXT: # %bb.2: # %else 107 ; X32-NEXT: xorl %eax, %eax 108 ; X32-NEXT: popl %ebx 109 ; X32-NEXT: retl 110 ; X32-NEXT: .LBB1_1: # %then 111 ; X32-NEXT: movl $64, %eax 112 ; X32-NEXT: popl %ebx 113 ; X32-NEXT: retl 114 ; 115 ; X64-LABEL: test2: 116 ; X64: # %bb.0: # %entry 117 ; X64-NEXT: pushq %rbx 118 ; X64-NEXT: incl (%rdi) 119 ; X64-NEXT: setne %bl 120 ; X64-NEXT: movl $42, %edi 121 ; X64-NEXT: callq external 122 ; X64-NEXT: testb %bl, %bl 123 ; X64-NEXT: je .LBB1_1 124 ; X64-NEXT: # %bb.2: # %else 125 ; X64-NEXT: xorl %eax, %eax 126 ; X64-NEXT: popq %rbx 127 ; X64-NEXT: retq 128 ; X64-NEXT: .LBB1_1: # %then 129 ; X64-NEXT: movl $64, %eax 130 ; X64-NEXT: popq %rbx 131 ; X64-NEXT: retq 132 entry: 133 %val = load i32, i32* %ptr 134 %inc = add i32 %val, 1 135 store i32 %inc, i32* %ptr 136 %cmp = icmp eq i32 %inc, 0 137 call void @external(i32 42) 138 br i1 %cmp, label %then, label %else 139 140 then: 141 ret i32 64 142 143 else: 144 ret i32 0 145 } 146 147 declare void @external_a() 148 declare void @external_b() 149 150 ; This lowers to a conditional tail call instead of a conditional branch. This 151 ; is tricky because we can only do this from a leaf function, and so we have to 152 ; use volatile stores similar to test1 to force the save and restore of 153 ; a condition without calling another function. We then set up subsequent calls 154 ; in tail position. 155 define void @test_tail_call(i32* %ptr) nounwind optsize { 156 ; X32-LABEL: test_tail_call: 157 ; X32: # %bb.0: # %entry 158 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 159 ; X32-NEXT: incl (%eax) 160 ; X32-NEXT: setne %al 161 ; X32-NEXT: incb a 162 ; X32-NEXT: sete d 163 ; X32-NEXT: testb %al, %al 164 ; X32-NEXT: jne external_b # TAILCALL 165 ; X32-NEXT: # %bb.1: # %then 166 ; X32-NEXT: jmp external_a # TAILCALL 167 ; 168 ; X64-LABEL: test_tail_call: 169 ; X64: # %bb.0: # %entry 170 ; X64-NEXT: incl (%rdi) 171 ; X64-NEXT: setne %al 172 ; X64-NEXT: incb {{.*}}(%rip) 173 ; X64-NEXT: sete {{.*}}(%rip) 174 ; X64-NEXT: testb %al, %al 175 ; X64-NEXT: jne external_b # TAILCALL 176 ; X64-NEXT: # %bb.1: # %then 177 ; X64-NEXT: jmp external_a # TAILCALL 178 entry: 179 %val = load i32, i32* %ptr 180 %inc = add i32 %val, 1 181 store i32 %inc, i32* %ptr 182 %cmp = icmp eq i32 %inc, 0 183 %aval = load volatile i8, i8* @a 184 %inc2 = add i8 %aval, 1 185 store volatile i8 %inc2, i8* @a 186 %cmp2 = icmp eq i8 %inc2, 0 187 %conv5 = zext i1 %cmp2 to i8 188 store i8 %conv5, i8* @d 189 br i1 %cmp, label %then, label %else 190 191 then: 192 tail call void @external_a() 193 ret void 194 195 else: 196 tail call void @external_b() 197 ret void 198 } 199 200 ; Test a function that gets special select lowering into CFG with copied EFLAGS 201 ; threaded across the CFG. This requires our EFLAGS copy rewriting to handle 202 ; cross-block rewrites in at least some narrow cases. 203 define void @PR37100(i8 %arg1, i16 %arg2, i64 %arg3, i8 %arg4, i8* %ptr1, i32* %ptr2) { 204 ; X32-LABEL: PR37100: 205 ; X32: # %bb.0: # %bb 206 ; X32-NEXT: pushl %ebp 207 ; X32-NEXT: .cfi_def_cfa_offset 8 208 ; X32-NEXT: pushl %ebx 209 ; X32-NEXT: .cfi_def_cfa_offset 12 210 ; X32-NEXT: pushl %edi 211 ; X32-NEXT: .cfi_def_cfa_offset 16 212 ; X32-NEXT: pushl %esi 213 ; X32-NEXT: .cfi_def_cfa_offset 20 214 ; X32-NEXT: .cfi_offset %esi, -20 215 ; X32-NEXT: .cfi_offset %edi, -16 216 ; X32-NEXT: .cfi_offset %ebx, -12 217 ; X32-NEXT: .cfi_offset %ebp, -8 218 ; X32-NEXT: movl {{[0-9]+}}(%esp), %esi 219 ; X32-NEXT: movl {{[0-9]+}}(%esp), %edi 220 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx 221 ; X32-NEXT: movb {{[0-9]+}}(%esp), %ch 222 ; X32-NEXT: movb {{[0-9]+}}(%esp), %cl 223 ; X32-NEXT: jmp .LBB3_1 224 ; X32-NEXT: .p2align 4, 0x90 225 ; X32-NEXT: .LBB3_5: # %bb1 226 ; X32-NEXT: # in Loop: Header=BB3_1 Depth=1 227 ; X32-NEXT: xorl %eax, %eax 228 ; X32-NEXT: xorl %edx, %edx 229 ; X32-NEXT: idivl %ebp 230 ; X32-NEXT: .LBB3_1: # %bb1 231 ; X32-NEXT: # =>This Inner Loop Header: Depth=1 232 ; X32-NEXT: movsbl %cl, %eax 233 ; X32-NEXT: movl %eax, %edx 234 ; X32-NEXT: sarl $31, %edx 235 ; X32-NEXT: cmpl %eax, %esi 236 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 237 ; X32-NEXT: sbbl %edx, %eax 238 ; X32-NEXT: setl %al 239 ; X32-NEXT: setl %dl 240 ; X32-NEXT: movzbl %dl, %ebp 241 ; X32-NEXT: negl %ebp 242 ; X32-NEXT: testb %al, %al 243 ; X32-NEXT: jne .LBB3_3 244 ; X32-NEXT: # %bb.2: # %bb1 245 ; X32-NEXT: # in Loop: Header=BB3_1 Depth=1 246 ; X32-NEXT: movb %ch, %cl 247 ; X32-NEXT: .LBB3_3: # %bb1 248 ; X32-NEXT: # in Loop: Header=BB3_1 Depth=1 249 ; X32-NEXT: movb %cl, (%ebx) 250 ; X32-NEXT: movl (%edi), %edx 251 ; X32-NEXT: testb %al, %al 252 ; X32-NEXT: jne .LBB3_5 253 ; X32-NEXT: # %bb.4: # %bb1 254 ; X32-NEXT: # in Loop: Header=BB3_1 Depth=1 255 ; X32-NEXT: movl %edx, %ebp 256 ; X32-NEXT: jmp .LBB3_5 257 ; 258 ; X64-LABEL: PR37100: 259 ; X64: # %bb.0: # %bb 260 ; X64-NEXT: movq %rdx, %r10 261 ; X64-NEXT: jmp .LBB3_1 262 ; X64-NEXT: .p2align 4, 0x90 263 ; X64-NEXT: .LBB3_5: # %bb1 264 ; X64-NEXT: # in Loop: Header=BB3_1 Depth=1 265 ; X64-NEXT: xorl %eax, %eax 266 ; X64-NEXT: xorl %edx, %edx 267 ; X64-NEXT: idivl %esi 268 ; X64-NEXT: .LBB3_1: # %bb1 269 ; X64-NEXT: # =>This Inner Loop Header: Depth=1 270 ; X64-NEXT: movsbq %dil, %rax 271 ; X64-NEXT: xorl %esi, %esi 272 ; X64-NEXT: cmpq %rax, %r10 273 ; X64-NEXT: setl %sil 274 ; X64-NEXT: negl %esi 275 ; X64-NEXT: cmpq %rax, %r10 276 ; X64-NEXT: jl .LBB3_3 277 ; X64-NEXT: # %bb.2: # %bb1 278 ; X64-NEXT: # in Loop: Header=BB3_1 Depth=1 279 ; X64-NEXT: movl %ecx, %edi 280 ; X64-NEXT: .LBB3_3: # %bb1 281 ; X64-NEXT: # in Loop: Header=BB3_1 Depth=1 282 ; X64-NEXT: movb %dil, (%r8) 283 ; X64-NEXT: jl .LBB3_5 284 ; X64-NEXT: # %bb.4: # %bb1 285 ; X64-NEXT: # in Loop: Header=BB3_1 Depth=1 286 ; X64-NEXT: movl (%r9), %esi 287 ; X64-NEXT: jmp .LBB3_5 288 bb: 289 br label %bb1 290 291 bb1: 292 %tmp = phi i8 [ %tmp8, %bb1 ], [ %arg1, %bb ] 293 %tmp2 = phi i16 [ %tmp12, %bb1 ], [ %arg2, %bb ] 294 %tmp3 = icmp sgt i16 %tmp2, 7 295 %tmp4 = select i1 %tmp3, i16 %tmp2, i16 7 296 %tmp5 = sext i8 %tmp to i64 297 %tmp6 = icmp slt i64 %arg3, %tmp5 298 %tmp7 = sext i1 %tmp6 to i32 299 %tmp8 = select i1 %tmp6, i8 %tmp, i8 %arg4 300 store volatile i8 %tmp8, i8* %ptr1 301 %tmp9 = load volatile i32, i32* %ptr2 302 %tmp10 = select i1 %tmp6, i32 %tmp7, i32 %tmp9 303 %tmp11 = srem i32 0, %tmp10 304 %tmp12 = trunc i32 %tmp11 to i16 305 br label %bb1 306 } 307 308 ; Use a particular instruction pattern in order to lower to the post-RA pseudo 309 ; used to lower SETB into an SBB pattern in order to make sure that kind of 310 ; usage of a copied EFLAGS continues to work. 311 define void @PR37431(i32* %arg1, i8* %arg2, i8* %arg3) { 312 ; X32-LABEL: PR37431: 313 ; X32: # %bb.0: # %entry 314 ; X32-NEXT: pushl %esi 315 ; X32-NEXT: .cfi_def_cfa_offset 8 316 ; X32-NEXT: .cfi_offset %esi, -8 317 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 318 ; X32-NEXT: movl (%eax), %eax 319 ; X32-NEXT: movl %eax, %ecx 320 ; X32-NEXT: sarl $31, %ecx 321 ; X32-NEXT: cmpl %eax, %eax 322 ; X32-NEXT: sbbl %ecx, %eax 323 ; X32-NEXT: setb %al 324 ; X32-NEXT: sbbb %cl, %cl 325 ; X32-NEXT: movl {{[0-9]+}}(%esp), %esi 326 ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx 327 ; X32-NEXT: movb %cl, (%edx) 328 ; X32-NEXT: movzbl %al, %eax 329 ; X32-NEXT: xorl %ecx, %ecx 330 ; X32-NEXT: subl %eax, %ecx 331 ; X32-NEXT: xorl %eax, %eax 332 ; X32-NEXT: xorl %edx, %edx 333 ; X32-NEXT: idivl %ecx 334 ; X32-NEXT: movb %dl, (%esi) 335 ; X32-NEXT: popl %esi 336 ; X32-NEXT: .cfi_def_cfa_offset 4 337 ; X32-NEXT: retl 338 ; 339 ; X64-LABEL: PR37431: 340 ; X64: # %bb.0: # %entry 341 ; X64-NEXT: movq %rdx, %rcx 342 ; X64-NEXT: movslq (%rdi), %rax 343 ; X64-NEXT: cmpq %rax, %rax 344 ; X64-NEXT: sbbb %dl, %dl 345 ; X64-NEXT: cmpq %rax, %rax 346 ; X64-NEXT: movb %dl, (%rsi) 347 ; X64-NEXT: sbbl %esi, %esi 348 ; X64-NEXT: xorl %eax, %eax 349 ; X64-NEXT: xorl %edx, %edx 350 ; X64-NEXT: idivl %esi 351 ; X64-NEXT: movb %dl, (%rcx) 352 ; X64-NEXT: retq 353 entry: 354 %tmp = load i32, i32* %arg1 355 %tmp1 = sext i32 %tmp to i64 356 %tmp2 = icmp ugt i64 %tmp1, undef 357 %tmp3 = zext i1 %tmp2 to i8 358 %tmp4 = sub i8 0, %tmp3 359 store i8 %tmp4, i8* %arg2 360 %tmp5 = sext i8 %tmp4 to i32 361 %tmp6 = srem i32 0, %tmp5 362 %tmp7 = trunc i32 %tmp6 to i8 363 store i8 %tmp7, i8* %arg3 364 ret void 365 } 366