Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc -mtriple=i386-linux-gnu -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=32-ALL,32-GOOD-RA
      3 ; RUN: llc -mtriple=i386-linux-gnu -verify-machineinstrs -pre-RA-sched=fast %s -o - | FileCheck %s --check-prefixes=32-ALL,32-FAST-RA
      4 
      5 ; RUN: llc -mtriple=x86_64-linux-gnu -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=64-ALL,64-GOOD-RA
      6 ; RUN: llc -mtriple=x86_64-linux-gnu -verify-machineinstrs -pre-RA-sched=fast %s -o - | FileCheck %s --check-prefixes=64-ALL,64-FAST-RA
      7 ; RUN: llc -mtriple=x86_64-linux-gnu -verify-machineinstrs -mattr=+sahf %s -o - | FileCheck %s --check-prefixes=64-ALL,64-GOOD-RA-SAHF
      8 ; RUN: llc -mtriple=x86_64-linux-gnu -verify-machineinstrs -mattr=+sahf -pre-RA-sched=fast %s -o - | FileCheck %s --check-prefixes=64-ALL,64-FAST-RA-SAHF
      9 ; RUN: llc -mtriple=x86_64-linux-gnu -verify-machineinstrs -mcpu=corei7 %s -o - | FileCheck %s --check-prefixes=64-ALL,64-GOOD-RA-SAHF
     10 
     11 declare i32 @foo()
     12 declare i32 @bar(i64)
     13 
     14 ; In the following case when using fast scheduling we get a long chain of
     15 ; EFLAGS save/restore due to a sequence of:
     16 ; cmpxchg8b (implicit-def eflags)
     17 ; eax = copy eflags
     18 ; adjcallstackdown32
     19 ; ...
     20 ; use of eax
     21 ; During PEI the adjcallstackdown32 is replaced with the subl which
     22 ; clobbers eflags, effectively interfering in the liveness interval. However,
     23 ; we then promote these copies into independent conditions in GPRs that avoids
     24 ; repeated saving and restoring logic and can be trivially managed by the
     25 ; register allocator.
     26 define i64 @test_intervening_call(i64* %foo, i64 %bar, i64 %baz) nounwind {
     27 ; 32-GOOD-RA-LABEL: test_intervening_call:
     28 ; 32-GOOD-RA:       # %bb.0: # %entry
     29 ; 32-GOOD-RA-NEXT:    pushl %ebx
     30 ; 32-GOOD-RA-NEXT:    pushl %esi
     31 ; 32-GOOD-RA-NEXT:    pushl %eax
     32 ; 32-GOOD-RA-NEXT:    movl {{[0-9]+}}(%esp), %eax
     33 ; 32-GOOD-RA-NEXT:    movl {{[0-9]+}}(%esp), %edx
     34 ; 32-GOOD-RA-NEXT:    movl {{[0-9]+}}(%esp), %ebx
     35 ; 32-GOOD-RA-NEXT:    movl {{[0-9]+}}(%esp), %ecx
     36 ; 32-GOOD-RA-NEXT:    movl {{[0-9]+}}(%esp), %esi
     37 ; 32-GOOD-RA-NEXT:    lock cmpxchg8b (%esi)
     38 ; 32-GOOD-RA-NEXT:    setne %bl
     39 ; 32-GOOD-RA-NEXT:    subl $8, %esp
     40 ; 32-GOOD-RA-NEXT:    pushl %edx
     41 ; 32-GOOD-RA-NEXT:    pushl %eax
     42 ; 32-GOOD-RA-NEXT:    calll bar
     43 ; 32-GOOD-RA-NEXT:    addl $16, %esp
     44 ; 32-GOOD-RA-NEXT:    testb %bl, %bl
     45 ; 32-GOOD-RA-NEXT:    jne .LBB0_3
     46 ; 32-GOOD-RA-NEXT:  # %bb.1: # %t
     47 ; 32-GOOD-RA-NEXT:    movl $42, %eax
     48 ; 32-GOOD-RA-NEXT:    jmp .LBB0_2
     49 ; 32-GOOD-RA-NEXT:  .LBB0_3: # %f
     50 ; 32-GOOD-RA-NEXT:    xorl %eax, %eax
     51 ; 32-GOOD-RA-NEXT:  .LBB0_2: # %t
     52 ; 32-GOOD-RA-NEXT:    xorl %edx, %edx
     53 ; 32-GOOD-RA-NEXT:    addl $4, %esp
     54 ; 32-GOOD-RA-NEXT:    popl %esi
     55 ; 32-GOOD-RA-NEXT:    popl %ebx
     56 ; 32-GOOD-RA-NEXT:    retl
     57 ;
     58 ; 32-FAST-RA-LABEL: test_intervening_call:
     59 ; 32-FAST-RA:       # %bb.0: # %entry
     60 ; 32-FAST-RA-NEXT:    pushl %ebx
     61 ; 32-FAST-RA-NEXT:    pushl %esi
     62 ; 32-FAST-RA-NEXT:    pushl %eax
     63 ; 32-FAST-RA-NEXT:    movl {{[0-9]+}}(%esp), %esi
     64 ; 32-FAST-RA-NEXT:    movl {{[0-9]+}}(%esp), %ebx
     65 ; 32-FAST-RA-NEXT:    movl {{[0-9]+}}(%esp), %ecx
     66 ; 32-FAST-RA-NEXT:    movl {{[0-9]+}}(%esp), %eax
     67 ; 32-FAST-RA-NEXT:    movl {{[0-9]+}}(%esp), %edx
     68 ; 32-FAST-RA-NEXT:    lock cmpxchg8b (%esi)
     69 ; 32-FAST-RA-NEXT:    setne %bl
     70 ; 32-FAST-RA-NEXT:    subl $8, %esp
     71 ; 32-FAST-RA-NEXT:    pushl %edx
     72 ; 32-FAST-RA-NEXT:    pushl %eax
     73 ; 32-FAST-RA-NEXT:    calll bar
     74 ; 32-FAST-RA-NEXT:    addl $16, %esp
     75 ; 32-FAST-RA-NEXT:    testb %bl, %bl
     76 ; 32-FAST-RA-NEXT:    jne .LBB0_3
     77 ; 32-FAST-RA-NEXT:  # %bb.1: # %t
     78 ; 32-FAST-RA-NEXT:    movl $42, %eax
     79 ; 32-FAST-RA-NEXT:    jmp .LBB0_2
     80 ; 32-FAST-RA-NEXT:  .LBB0_3: # %f
     81 ; 32-FAST-RA-NEXT:    xorl %eax, %eax
     82 ; 32-FAST-RA-NEXT:  .LBB0_2: # %t
     83 ; 32-FAST-RA-NEXT:    xorl %edx, %edx
     84 ; 32-FAST-RA-NEXT:    addl $4, %esp
     85 ; 32-FAST-RA-NEXT:    popl %esi
     86 ; 32-FAST-RA-NEXT:    popl %ebx
     87 ; 32-FAST-RA-NEXT:    retl
     88 ;
     89 ; 64-ALL-LABEL: test_intervening_call:
     90 ; 64-ALL:       # %bb.0: # %entry
     91 ; 64-ALL-NEXT:    pushq %rbx
     92 ; 64-ALL-NEXT:    movq %rsi, %rax
     93 ; 64-ALL-NEXT:    lock cmpxchgq %rdx, (%rdi)
     94 ; 64-ALL-NEXT:    setne %bl
     95 ; 64-ALL-NEXT:    movq %rax, %rdi
     96 ; 64-ALL-NEXT:    callq bar
     97 ; 64-ALL-NEXT:    testb %bl, %bl
     98 ; 64-ALL-NEXT:    jne .LBB0_2
     99 ; 64-ALL-NEXT:  # %bb.1: # %t
    100 ; 64-ALL-NEXT:    movl $42, %eax
    101 ; 64-ALL-NEXT:    popq %rbx
    102 ; 64-ALL-NEXT:    retq
    103 ; 64-ALL-NEXT:  .LBB0_2: # %f
    104 ; 64-ALL-NEXT:    xorl %eax, %eax
    105 ; 64-ALL-NEXT:    popq %rbx
    106 ; 64-ALL-NEXT:    retq
    107 entry:
    108   %cx = cmpxchg i64* %foo, i64 %bar, i64 %baz seq_cst seq_cst
    109   %v = extractvalue { i64, i1 } %cx, 0
    110   %p = extractvalue { i64, i1 } %cx, 1
    111   call i32 @bar(i64 %v)
    112   br i1 %p, label %t, label %f
    113 
    114 t:
    115   ret i64 42
    116 
    117 f:
    118   ret i64 0
    119 }
    120 
    121 ; Interesting in producing a clobber without any function calls.
    122 define i32 @test_control_flow(i32* %p, i32 %i, i32 %j) nounwind {
    123 ; 32-ALL-LABEL: test_control_flow:
    124 ; 32-ALL:       # %bb.0: # %entry
    125 ; 32-ALL-NEXT:    movl {{[0-9]+}}(%esp), %eax
    126 ; 32-ALL-NEXT:    cmpl {{[0-9]+}}(%esp), %eax
    127 ; 32-ALL-NEXT:    jle .LBB1_6
    128 ; 32-ALL-NEXT:  # %bb.1: # %loop_start
    129 ; 32-ALL-NEXT:    movl {{[0-9]+}}(%esp), %ecx
    130 ; 32-ALL-NEXT:    .p2align 4, 0x90
    131 ; 32-ALL-NEXT:  .LBB1_2: # %while.condthread-pre-split.i
    132 ; 32-ALL-NEXT:    # =>This Loop Header: Depth=1
    133 ; 32-ALL-NEXT:    # Child Loop BB1_3 Depth 2
    134 ; 32-ALL-NEXT:    movl (%ecx), %edx
    135 ; 32-ALL-NEXT:    .p2align 4, 0x90
    136 ; 32-ALL-NEXT:  .LBB1_3: # %while.cond.i
    137 ; 32-ALL-NEXT:    # Parent Loop BB1_2 Depth=1
    138 ; 32-ALL-NEXT:    # => This Inner Loop Header: Depth=2
    139 ; 32-ALL-NEXT:    movl %edx, %eax
    140 ; 32-ALL-NEXT:    xorl %edx, %edx
    141 ; 32-ALL-NEXT:    testl %eax, %eax
    142 ; 32-ALL-NEXT:    je .LBB1_3
    143 ; 32-ALL-NEXT:  # %bb.4: # %while.body.i
    144 ; 32-ALL-NEXT:    # in Loop: Header=BB1_2 Depth=1
    145 ; 32-ALL-NEXT:    lock cmpxchgl %eax, (%ecx)
    146 ; 32-ALL-NEXT:    jne .LBB1_2
    147 ; 32-ALL-NEXT:  # %bb.5:
    148 ; 32-ALL-NEXT:    xorl %eax, %eax
    149 ; 32-ALL-NEXT:  .LBB1_6: # %cond.end
    150 ; 32-ALL-NEXT:    retl
    151 ;
    152 ; 64-ALL-LABEL: test_control_flow:
    153 ; 64-ALL:       # %bb.0: # %entry
    154 ; 64-ALL-NEXT:    cmpl %edx, %esi
    155 ; 64-ALL-NEXT:    jle .LBB1_5
    156 ; 64-ALL-NEXT:    .p2align 4, 0x90
    157 ; 64-ALL-NEXT:  .LBB1_1: # %while.condthread-pre-split.i
    158 ; 64-ALL-NEXT:    # =>This Loop Header: Depth=1
    159 ; 64-ALL-NEXT:    # Child Loop BB1_2 Depth 2
    160 ; 64-ALL-NEXT:    movl (%rdi), %ecx
    161 ; 64-ALL-NEXT:    .p2align 4, 0x90
    162 ; 64-ALL-NEXT:  .LBB1_2: # %while.cond.i
    163 ; 64-ALL-NEXT:    # Parent Loop BB1_1 Depth=1
    164 ; 64-ALL-NEXT:    # => This Inner Loop Header: Depth=2
    165 ; 64-ALL-NEXT:    movl %ecx, %eax
    166 ; 64-ALL-NEXT:    xorl %ecx, %ecx
    167 ; 64-ALL-NEXT:    testl %eax, %eax
    168 ; 64-ALL-NEXT:    je .LBB1_2
    169 ; 64-ALL-NEXT:  # %bb.3: # %while.body.i
    170 ; 64-ALL-NEXT:    # in Loop: Header=BB1_1 Depth=1
    171 ; 64-ALL-NEXT:    lock cmpxchgl %eax, (%rdi)
    172 ; 64-ALL-NEXT:    jne .LBB1_1
    173 ; 64-ALL-NEXT:  # %bb.4:
    174 ; 64-ALL-NEXT:    xorl %esi, %esi
    175 ; 64-ALL-NEXT:  .LBB1_5: # %cond.end
    176 ; 64-ALL-NEXT:    movl %esi, %eax
    177 ; 64-ALL-NEXT:    retq
    178 entry:
    179   %cmp = icmp sgt i32 %i, %j
    180   br i1 %cmp, label %loop_start, label %cond.end
    181 
    182 loop_start:
    183   br label %while.condthread-pre-split.i
    184 
    185 while.condthread-pre-split.i:
    186   %.pr.i = load i32, i32* %p, align 4
    187   br label %while.cond.i
    188 
    189 while.cond.i:
    190   %0 = phi i32 [ %.pr.i, %while.condthread-pre-split.i ], [ 0, %while.cond.i ]
    191   %tobool.i = icmp eq i32 %0, 0
    192   br i1 %tobool.i, label %while.cond.i, label %while.body.i
    193 
    194 while.body.i:
    195   %.lcssa = phi i32 [ %0, %while.cond.i ]
    196   %1 = cmpxchg i32* %p, i32 %.lcssa, i32 %.lcssa seq_cst seq_cst
    197   %2 = extractvalue { i32, i1 } %1, 1
    198   br i1 %2, label %cond.end.loopexit, label %while.condthread-pre-split.i
    199 
    200 cond.end.loopexit:
    201   br label %cond.end
    202 
    203 cond.end:
    204   %cond = phi i32 [ %i, %entry ], [ 0, %cond.end.loopexit ]
    205   ret i32 %cond
    206 }
    207 
    208 ; This one is an interesting case because CMOV doesn't have a chain
    209 ; operand. Naive attempts to limit cmpxchg EFLAGS use are likely to fail here.
    210 define i32 @test_feed_cmov(i32* %addr, i32 %desired, i32 %new) nounwind {
    211 ; 32-GOOD-RA-LABEL: test_feed_cmov:
    212 ; 32-GOOD-RA:       # %bb.0: # %entry
    213 ; 32-GOOD-RA-NEXT:    pushl %ebx
    214 ; 32-GOOD-RA-NEXT:    pushl %esi
    215 ; 32-GOOD-RA-NEXT:    pushl %eax
    216 ; 32-GOOD-RA-NEXT:    movl {{[0-9]+}}(%esp), %eax
    217 ; 32-GOOD-RA-NEXT:    movl {{[0-9]+}}(%esp), %esi
    218 ; 32-GOOD-RA-NEXT:    movl {{[0-9]+}}(%esp), %ecx
    219 ; 32-GOOD-RA-NEXT:    lock cmpxchgl %esi, (%ecx)
    220 ; 32-GOOD-RA-NEXT:    sete %bl
    221 ; 32-GOOD-RA-NEXT:    calll foo
    222 ; 32-GOOD-RA-NEXT:    testb %bl, %bl
    223 ; 32-GOOD-RA-NEXT:    jne .LBB2_2
    224 ; 32-GOOD-RA-NEXT:  # %bb.1: # %entry
    225 ; 32-GOOD-RA-NEXT:    movl %eax, %esi
    226 ; 32-GOOD-RA-NEXT:  .LBB2_2: # %entry
    227 ; 32-GOOD-RA-NEXT:    movl %esi, %eax
    228 ; 32-GOOD-RA-NEXT:    addl $4, %esp
    229 ; 32-GOOD-RA-NEXT:    popl %esi
    230 ; 32-GOOD-RA-NEXT:    popl %ebx
    231 ; 32-GOOD-RA-NEXT:    retl
    232 ;
    233 ; 32-FAST-RA-LABEL: test_feed_cmov:
    234 ; 32-FAST-RA:       # %bb.0: # %entry
    235 ; 32-FAST-RA-NEXT:    pushl %ebx
    236 ; 32-FAST-RA-NEXT:    pushl %esi
    237 ; 32-FAST-RA-NEXT:    pushl %eax
    238 ; 32-FAST-RA-NEXT:    movl {{[0-9]+}}(%esp), %ecx
    239 ; 32-FAST-RA-NEXT:    movl {{[0-9]+}}(%esp), %esi
    240 ; 32-FAST-RA-NEXT:    movl {{[0-9]+}}(%esp), %eax
    241 ; 32-FAST-RA-NEXT:    lock cmpxchgl %esi, (%ecx)
    242 ; 32-FAST-RA-NEXT:    sete %bl
    243 ; 32-FAST-RA-NEXT:    calll foo
    244 ; 32-FAST-RA-NEXT:    testb %bl, %bl
    245 ; 32-FAST-RA-NEXT:    jne .LBB2_2
    246 ; 32-FAST-RA-NEXT:  # %bb.1: # %entry
    247 ; 32-FAST-RA-NEXT:    movl %eax, %esi
    248 ; 32-FAST-RA-NEXT:  .LBB2_2: # %entry
    249 ; 32-FAST-RA-NEXT:    movl %esi, %eax
    250 ; 32-FAST-RA-NEXT:    addl $4, %esp
    251 ; 32-FAST-RA-NEXT:    popl %esi
    252 ; 32-FAST-RA-NEXT:    popl %ebx
    253 ; 32-FAST-RA-NEXT:    retl
    254 ;
    255 ; 64-ALL-LABEL: test_feed_cmov:
    256 ; 64-ALL:       # %bb.0: # %entry
    257 ; 64-ALL-NEXT:    pushq %rbp
    258 ; 64-ALL-NEXT:    pushq %rbx
    259 ; 64-ALL-NEXT:    pushq %rax
    260 ; 64-ALL-NEXT:    movl %edx, %ebx
    261 ; 64-ALL-NEXT:    movl %esi, %eax
    262 ; 64-ALL-NEXT:    lock cmpxchgl %edx, (%rdi)
    263 ; 64-ALL-NEXT:    sete %bpl
    264 ; 64-ALL-NEXT:    callq foo
    265 ; 64-ALL-NEXT:    testb %bpl, %bpl
    266 ; 64-ALL-NEXT:    cmovnel %ebx, %eax
    267 ; 64-ALL-NEXT:    addq $8, %rsp
    268 ; 64-ALL-NEXT:    popq %rbx
    269 ; 64-ALL-NEXT:    popq %rbp
    270 ; 64-ALL-NEXT:    retq
    271 entry:
    272   %res = cmpxchg i32* %addr, i32 %desired, i32 %new seq_cst seq_cst
    273   %success = extractvalue { i32, i1 } %res, 1
    274 
    275   %rhs = call i32 @foo()
    276 
    277   %ret = select i1 %success, i32 %new, i32 %rhs
    278   ret i32 %ret
    279 }
    280