Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc -mtriple=i386-linux-gnu -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK32
      3 ; RUN: llc -mtriple=x86_64-linux-gnu -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK64
      4 
      5 ; The peephole optimizer can elide some physical register copies such as
      6 ; EFLAGS. Make sure the flags are used directly, instead of needlessly using
      7 ; saving and restoring specific conditions.
      8 
      9 @L = external global i32
     10 @M = external global i8
     11 
     12 declare i32 @bar(i64)
     13 
     14 define i1 @plus_one() nounwind {
     15 ; CHECK32-LABEL: plus_one:
     16 ; CHECK32:       # %bb.0: # %entry
     17 ; CHECK32-NEXT:    movb M, %al
     18 ; CHECK32-NEXT:    incl L
     19 ; CHECK32-NEXT:    jne .LBB0_2
     20 ; CHECK32-NEXT:  # %bb.1: # %entry
     21 ; CHECK32-NEXT:    andb $8, %al
     22 ; CHECK32-NEXT:    je .LBB0_2
     23 ; CHECK32-NEXT:  # %bb.3: # %exit2
     24 ; CHECK32-NEXT:    xorl %eax, %eax
     25 ; CHECK32-NEXT:    retl
     26 ; CHECK32-NEXT:  .LBB0_2: # %exit
     27 ; CHECK32-NEXT:    movb $1, %al
     28 ; CHECK32-NEXT:    retl
     29 ;
     30 ; CHECK64-LABEL: plus_one:
     31 ; CHECK64:       # %bb.0: # %entry
     32 ; CHECK64-NEXT:    movb {{.*}}(%rip), %al
     33 ; CHECK64-NEXT:    incl {{.*}}(%rip)
     34 ; CHECK64-NEXT:    jne .LBB0_2
     35 ; CHECK64-NEXT:  # %bb.1: # %entry
     36 ; CHECK64-NEXT:    andb $8, %al
     37 ; CHECK64-NEXT:    je .LBB0_2
     38 ; CHECK64-NEXT:  # %bb.3: # %exit2
     39 ; CHECK64-NEXT:    xorl %eax, %eax
     40 ; CHECK64-NEXT:    retq
     41 ; CHECK64-NEXT:  .LBB0_2: # %exit
     42 ; CHECK64-NEXT:    movb $1, %al
     43 ; CHECK64-NEXT:    retq
     44 entry:
     45   %loaded_L = load i32, i32* @L
     46   %val = add nsw i32 %loaded_L, 1 ; N.B. will emit inc.
     47   store i32 %val, i32* @L
     48   %loaded_M = load i8, i8* @M
     49   %masked = and i8 %loaded_M, 8
     50   %M_is_true = icmp ne i8 %masked, 0
     51   %L_is_false = icmp eq i32 %val, 0
     52   %cond = and i1 %L_is_false, %M_is_true
     53   br i1 %cond, label %exit2, label %exit
     54 
     55 exit:
     56   ret i1 true
     57 
     58 exit2:
     59   ret i1 false
     60 }
     61 
     62 define i1 @plus_forty_two() nounwind {
     63 ; CHECK32-LABEL: plus_forty_two:
     64 ; CHECK32:       # %bb.0: # %entry
     65 ; CHECK32-NEXT:    movb M, %al
     66 ; CHECK32-NEXT:    addl $42, L
     67 ; CHECK32-NEXT:    jne .LBB1_2
     68 ; CHECK32-NEXT:  # %bb.1: # %entry
     69 ; CHECK32-NEXT:    andb $8, %al
     70 ; CHECK32-NEXT:    je .LBB1_2
     71 ; CHECK32-NEXT:  # %bb.3: # %exit2
     72 ; CHECK32-NEXT:    xorl %eax, %eax
     73 ; CHECK32-NEXT:    retl
     74 ; CHECK32-NEXT:  .LBB1_2: # %exit
     75 ; CHECK32-NEXT:    movb $1, %al
     76 ; CHECK32-NEXT:    retl
     77 ;
     78 ; CHECK64-LABEL: plus_forty_two:
     79 ; CHECK64:       # %bb.0: # %entry
     80 ; CHECK64-NEXT:    movb {{.*}}(%rip), %al
     81 ; CHECK64-NEXT:    addl $42, {{.*}}(%rip)
     82 ; CHECK64-NEXT:    jne .LBB1_2
     83 ; CHECK64-NEXT:  # %bb.1: # %entry
     84 ; CHECK64-NEXT:    andb $8, %al
     85 ; CHECK64-NEXT:    je .LBB1_2
     86 ; CHECK64-NEXT:  # %bb.3: # %exit2
     87 ; CHECK64-NEXT:    xorl %eax, %eax
     88 ; CHECK64-NEXT:    retq
     89 ; CHECK64-NEXT:  .LBB1_2: # %exit
     90 ; CHECK64-NEXT:    movb $1, %al
     91 ; CHECK64-NEXT:    retq
     92 entry:
     93   %loaded_L = load i32, i32* @L
     94   %val = add nsw i32 %loaded_L, 42 ; N.B. won't emit inc.
     95   store i32 %val, i32* @L
     96   %loaded_M = load i8, i8* @M
     97   %masked = and i8 %loaded_M, 8
     98   %M_is_true = icmp ne i8 %masked, 0
     99   %L_is_false = icmp eq i32 %val, 0
    100   %cond = and i1 %L_is_false, %M_is_true
    101   br i1 %cond, label %exit2, label %exit
    102 
    103 exit:
    104   ret i1 true
    105 
    106 exit2:
    107   ret i1 false
    108 }
    109 
    110 define i1 @minus_one() nounwind {
    111 ; CHECK32-LABEL: minus_one:
    112 ; CHECK32:       # %bb.0: # %entry
    113 ; CHECK32-NEXT:    movb M, %al
    114 ; CHECK32-NEXT:    decl L
    115 ; CHECK32-NEXT:    jne .LBB2_2
    116 ; CHECK32-NEXT:  # %bb.1: # %entry
    117 ; CHECK32-NEXT:    andb $8, %al
    118 ; CHECK32-NEXT:    je .LBB2_2
    119 ; CHECK32-NEXT:  # %bb.3: # %exit2
    120 ; CHECK32-NEXT:    xorl %eax, %eax
    121 ; CHECK32-NEXT:    retl
    122 ; CHECK32-NEXT:  .LBB2_2: # %exit
    123 ; CHECK32-NEXT:    movb $1, %al
    124 ; CHECK32-NEXT:    retl
    125 ;
    126 ; CHECK64-LABEL: minus_one:
    127 ; CHECK64:       # %bb.0: # %entry
    128 ; CHECK64-NEXT:    movb {{.*}}(%rip), %al
    129 ; CHECK64-NEXT:    decl {{.*}}(%rip)
    130 ; CHECK64-NEXT:    jne .LBB2_2
    131 ; CHECK64-NEXT:  # %bb.1: # %entry
    132 ; CHECK64-NEXT:    andb $8, %al
    133 ; CHECK64-NEXT:    je .LBB2_2
    134 ; CHECK64-NEXT:  # %bb.3: # %exit2
    135 ; CHECK64-NEXT:    xorl %eax, %eax
    136 ; CHECK64-NEXT:    retq
    137 ; CHECK64-NEXT:  .LBB2_2: # %exit
    138 ; CHECK64-NEXT:    movb $1, %al
    139 ; CHECK64-NEXT:    retq
    140 entry:
    141   %loaded_L = load i32, i32* @L
    142   %val = add nsw i32 %loaded_L, -1 ; N.B. will emit dec.
    143   store i32 %val, i32* @L
    144   %loaded_M = load i8, i8* @M
    145   %masked = and i8 %loaded_M, 8
    146   %M_is_true = icmp ne i8 %masked, 0
    147   %L_is_false = icmp eq i32 %val, 0
    148   %cond = and i1 %L_is_false, %M_is_true
    149   br i1 %cond, label %exit2, label %exit
    150 
    151 exit:
    152   ret i1 true
    153 
    154 exit2:
    155   ret i1 false
    156 }
    157 
    158 define i1 @minus_forty_two() nounwind {
    159 ; CHECK32-LABEL: minus_forty_two:
    160 ; CHECK32:       # %bb.0: # %entry
    161 ; CHECK32-NEXT:    movb M, %al
    162 ; CHECK32-NEXT:    addl $-42, L
    163 ; CHECK32-NEXT:    jne .LBB3_2
    164 ; CHECK32-NEXT:  # %bb.1: # %entry
    165 ; CHECK32-NEXT:    andb $8, %al
    166 ; CHECK32-NEXT:    je .LBB3_2
    167 ; CHECK32-NEXT:  # %bb.3: # %exit2
    168 ; CHECK32-NEXT:    xorl %eax, %eax
    169 ; CHECK32-NEXT:    retl
    170 ; CHECK32-NEXT:  .LBB3_2: # %exit
    171 ; CHECK32-NEXT:    movb $1, %al
    172 ; CHECK32-NEXT:    retl
    173 ;
    174 ; CHECK64-LABEL: minus_forty_two:
    175 ; CHECK64:       # %bb.0: # %entry
    176 ; CHECK64-NEXT:    movb {{.*}}(%rip), %al
    177 ; CHECK64-NEXT:    addl $-42, {{.*}}(%rip)
    178 ; CHECK64-NEXT:    jne .LBB3_2
    179 ; CHECK64-NEXT:  # %bb.1: # %entry
    180 ; CHECK64-NEXT:    andb $8, %al
    181 ; CHECK64-NEXT:    je .LBB3_2
    182 ; CHECK64-NEXT:  # %bb.3: # %exit2
    183 ; CHECK64-NEXT:    xorl %eax, %eax
    184 ; CHECK64-NEXT:    retq
    185 ; CHECK64-NEXT:  .LBB3_2: # %exit
    186 ; CHECK64-NEXT:    movb $1, %al
    187 ; CHECK64-NEXT:    retq
    188 entry:
    189   %loaded_L = load i32, i32* @L
    190   %val = add nsw i32 %loaded_L, -42 ; N.B. won't emit dec.
    191   store i32 %val, i32* @L
    192   %loaded_M = load i8, i8* @M
    193   %masked = and i8 %loaded_M, 8
    194   %M_is_true = icmp ne i8 %masked, 0
    195   %L_is_false = icmp eq i32 %val, 0
    196   %cond = and i1 %L_is_false, %M_is_true
    197   br i1 %cond, label %exit2, label %exit
    198 
    199 exit:
    200   ret i1 true
    201 
    202 exit2:
    203   ret i1 false
    204 }
    205 
    206 define i64 @test_intervening_call(i64* %foo, i64 %bar, i64 %baz) nounwind {
    207 ; CHECK32-LABEL: test_intervening_call:
    208 ; CHECK32:       # %bb.0: # %entry
    209 ; CHECK32-NEXT:    pushl %ebx
    210 ; CHECK32-NEXT:    pushl %esi
    211 ; CHECK32-NEXT:    pushl %eax
    212 ; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    213 ; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %edx
    214 ; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %ebx
    215 ; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
    216 ; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %esi
    217 ; CHECK32-NEXT:    lock cmpxchg8b (%esi)
    218 ; CHECK32-NEXT:    setne %bl
    219 ; CHECK32-NEXT:    subl $8, %esp
    220 ; CHECK32-NEXT:    pushl %edx
    221 ; CHECK32-NEXT:    pushl %eax
    222 ; CHECK32-NEXT:    calll bar
    223 ; CHECK32-NEXT:    addl $16, %esp
    224 ; CHECK32-NEXT:    testb %bl, %bl
    225 ; CHECK32-NEXT:    jne .LBB4_3
    226 ; CHECK32-NEXT:  # %bb.1: # %t
    227 ; CHECK32-NEXT:    movl $42, %eax
    228 ; CHECK32-NEXT:    jmp .LBB4_2
    229 ; CHECK32-NEXT:  .LBB4_3: # %f
    230 ; CHECK32-NEXT:    xorl %eax, %eax
    231 ; CHECK32-NEXT:  .LBB4_2: # %t
    232 ; CHECK32-NEXT:    xorl %edx, %edx
    233 ; CHECK32-NEXT:    addl $4, %esp
    234 ; CHECK32-NEXT:    popl %esi
    235 ; CHECK32-NEXT:    popl %ebx
    236 ; CHECK32-NEXT:    retl
    237 ;
    238 ; CHECK64-LABEL: test_intervening_call:
    239 ; CHECK64:       # %bb.0: # %entry
    240 ; CHECK64-NEXT:    pushq %rbx
    241 ; CHECK64-NEXT:    movq %rsi, %rax
    242 ; CHECK64-NEXT:    lock cmpxchgq %rdx, (%rdi)
    243 ; CHECK64-NEXT:    setne %bl
    244 ; CHECK64-NEXT:    movq %rax, %rdi
    245 ; CHECK64-NEXT:    callq bar
    246 ; CHECK64-NEXT:    testb %bl, %bl
    247 ; CHECK64-NEXT:    jne .LBB4_2
    248 ; CHECK64-NEXT:  # %bb.1: # %t
    249 ; CHECK64-NEXT:    movl $42, %eax
    250 ; CHECK64-NEXT:    popq %rbx
    251 ; CHECK64-NEXT:    retq
    252 ; CHECK64-NEXT:  .LBB4_2: # %f
    253 ; CHECK64-NEXT:    xorl %eax, %eax
    254 ; CHECK64-NEXT:    popq %rbx
    255 ; CHECK64-NEXT:    retq
    256 entry:
    257   ; cmpxchg sets EFLAGS, call clobbers it, then br uses EFLAGS.
    258   %cx = cmpxchg i64* %foo, i64 %bar, i64 %baz seq_cst seq_cst
    259   %v = extractvalue { i64, i1 } %cx, 0
    260   %p = extractvalue { i64, i1 } %cx, 1
    261   call i32 @bar(i64 %v)
    262   br i1 %p, label %t, label %f
    263 
    264 t:
    265   ret i64 42
    266 
    267 f:
    268   ret i64 0
    269 }
    270 
    271 define i64 @test_two_live_flags(i64* %foo0, i64 %bar0, i64 %baz0, i64* %foo1, i64 %bar1, i64 %baz1) nounwind {
    272 ; CHECK32-LABEL: test_two_live_flags:
    273 ; CHECK32:       # %bb.0: # %entry
    274 ; CHECK32-NEXT:    pushl %ebp
    275 ; CHECK32-NEXT:    pushl %ebx
    276 ; CHECK32-NEXT:    pushl %edi
    277 ; CHECK32-NEXT:    pushl %esi
    278 ; CHECK32-NEXT:    pushl %eax
    279 ; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %edi
    280 ; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %ebp
    281 ; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    282 ; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %edx
    283 ; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %ebx
    284 ; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
    285 ; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %esi
    286 ; CHECK32-NEXT:    lock cmpxchg8b (%esi)
    287 ; CHECK32-NEXT:    setne {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
    288 ; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    289 ; CHECK32-NEXT:    movl %edi, %edx
    290 ; CHECK32-NEXT:    movl %ebp, %ecx
    291 ; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %ebx
    292 ; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %esi
    293 ; CHECK32-NEXT:    lock cmpxchg8b (%esi)
    294 ; CHECK32-NEXT:    sete %al
    295 ; CHECK32-NEXT:    cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
    296 ; CHECK32-NEXT:    jne .LBB5_4
    297 ; CHECK32-NEXT:  # %bb.1: # %entry
    298 ; CHECK32-NEXT:    testb %al, %al
    299 ; CHECK32-NEXT:    je .LBB5_4
    300 ; CHECK32-NEXT:  # %bb.2: # %t
    301 ; CHECK32-NEXT:    movl $42, %eax
    302 ; CHECK32-NEXT:    jmp .LBB5_3
    303 ; CHECK32-NEXT:  .LBB5_4: # %f
    304 ; CHECK32-NEXT:    xorl %eax, %eax
    305 ; CHECK32-NEXT:  .LBB5_3: # %t
    306 ; CHECK32-NEXT:    xorl %edx, %edx
    307 ; CHECK32-NEXT:    addl $4, %esp
    308 ; CHECK32-NEXT:    popl %esi
    309 ; CHECK32-NEXT:    popl %edi
    310 ; CHECK32-NEXT:    popl %ebx
    311 ; CHECK32-NEXT:    popl %ebp
    312 ; CHECK32-NEXT:    retl
    313 ;
    314 ; CHECK64-LABEL: test_two_live_flags:
    315 ; CHECK64:       # %bb.0: # %entry
    316 ; CHECK64-NEXT:    movq %rsi, %rax
    317 ; CHECK64-NEXT:    lock cmpxchgq %rdx, (%rdi)
    318 ; CHECK64-NEXT:    setne %dl
    319 ; CHECK64-NEXT:    movq %r8, %rax
    320 ; CHECK64-NEXT:    lock cmpxchgq %r9, (%rcx)
    321 ; CHECK64-NEXT:    sete %al
    322 ; CHECK64-NEXT:    testb %dl, %dl
    323 ; CHECK64-NEXT:    jne .LBB5_3
    324 ; CHECK64-NEXT:  # %bb.1: # %entry
    325 ; CHECK64-NEXT:    testb %al, %al
    326 ; CHECK64-NEXT:    je .LBB5_3
    327 ; CHECK64-NEXT:  # %bb.2: # %t
    328 ; CHECK64-NEXT:    movl $42, %eax
    329 ; CHECK64-NEXT:    retq
    330 ; CHECK64-NEXT:  .LBB5_3: # %f
    331 ; CHECK64-NEXT:    xorl %eax, %eax
    332 ; CHECK64-NEXT:    retq
    333 entry:
    334   %cx0 = cmpxchg i64* %foo0, i64 %bar0, i64 %baz0 seq_cst seq_cst
    335   %p0 = extractvalue { i64, i1 } %cx0, 1
    336   %cx1 = cmpxchg i64* %foo1, i64 %bar1, i64 %baz1 seq_cst seq_cst
    337   %p1 = extractvalue { i64, i1 } %cx1, 1
    338   %flag = and i1 %p0, %p1
    339   br i1 %flag, label %t, label %f
    340 
    341 t:
    342   ret i64 42
    343 
    344 f:
    345   ret i64 0
    346 }
    347 
    348 define i1 @asm_clobbering_flags(i32* %mem) nounwind {
    349 ; CHECK32-LABEL: asm_clobbering_flags:
    350 ; CHECK32:       # %bb.0: # %entry
    351 ; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
    352 ; CHECK32-NEXT:    movl (%ecx), %edx
    353 ; CHECK32-NEXT:    testl %edx, %edx
    354 ; CHECK32-NEXT:    setg %al
    355 ; CHECK32-NEXT:    #APP
    356 ; CHECK32-NEXT:    bsfl %edx, %edx
    357 ; CHECK32-NEXT:    #NO_APP
    358 ; CHECK32-NEXT:    movl %edx, (%ecx)
    359 ; CHECK32-NEXT:    retl
    360 ;
    361 ; CHECK64-LABEL: asm_clobbering_flags:
    362 ; CHECK64:       # %bb.0: # %entry
    363 ; CHECK64-NEXT:    movl (%rdi), %ecx
    364 ; CHECK64-NEXT:    testl %ecx, %ecx
    365 ; CHECK64-NEXT:    setg %al
    366 ; CHECK64-NEXT:    #APP
    367 ; CHECK64-NEXT:    bsfl %ecx, %ecx
    368 ; CHECK64-NEXT:    #NO_APP
    369 ; CHECK64-NEXT:    movl %ecx, (%rdi)
    370 ; CHECK64-NEXT:    retq
    371 entry:
    372   %val = load i32, i32* %mem, align 4
    373   %cmp = icmp sgt i32 %val, 0
    374   %res = tail call i32 asm "bsfl $1,$0", "=r,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i32 %val)
    375   store i32 %res, i32* %mem, align 4
    376   ret i1 %cmp
    377 }
    378