Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc -o - -mtriple=i686-unknown-unknown %s | FileCheck %s --check-prefixes=ALL,X32
      3 ; RUN: llc -o - -mtriple=x86_64-unknown-unknown %s | FileCheck %s --check-prefixes=ALL,X64
      4 ;
      5 ; Test patterns that require preserving and restoring flags.
      6 
      7 @b = common global i8 0, align 1
      8 @c = common global i32 0, align 4
      9 @a = common global i8 0, align 1
     10 @d = common global i8 0, align 1
     11 @.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1
     12 
     13 declare void @external(i32)
     14 
     15 ; A test that re-uses flags in interesting ways due to volatile accesses.
     16 ; Specifically, the first increment's flags are reused for the branch despite
     17 ; being clobbered by the second increment.
     18 define i32 @test1() nounwind {
     19 ; X32-LABEL: test1:
     20 ; X32:       # %bb.0: # %entry
     21 ; X32-NEXT:    movb b, %cl
     22 ; X32-NEXT:    movl %ecx, %eax
     23 ; X32-NEXT:    incb %al
     24 ; X32-NEXT:    movb %al, b
     25 ; X32-NEXT:    incl c
     26 ; X32-NEXT:    sete %dl
     27 ; X32-NEXT:    movb a, %ah
     28 ; X32-NEXT:    movb %ah, %ch
     29 ; X32-NEXT:    incb %ch
     30 ; X32-NEXT:    cmpb %cl, %ah
     31 ; X32-NEXT:    sete d
     32 ; X32-NEXT:    movb %ch, a
     33 ; X32-NEXT:    testb %dl, %dl
     34 ; X32-NEXT:    jne .LBB0_2
     35 ; X32-NEXT:  # %bb.1: # %if.then
     36 ; X32-NEXT:    movsbl %al, %eax
     37 ; X32-NEXT:    pushl %eax
     38 ; X32-NEXT:    calll external
     39 ; X32-NEXT:    addl $4, %esp
     40 ; X32-NEXT:  .LBB0_2: # %if.end
     41 ; X32-NEXT:    xorl %eax, %eax
     42 ; X32-NEXT:    retl
     43 ;
     44 ; X64-LABEL: test1:
     45 ; X64:       # %bb.0: # %entry
     46 ; X64-NEXT:    movb {{.*}}(%rip), %dil
     47 ; X64-NEXT:    movl %edi, %eax
     48 ; X64-NEXT:    incb %al
     49 ; X64-NEXT:    movb %al, {{.*}}(%rip)
     50 ; X64-NEXT:    incl {{.*}}(%rip)
     51 ; X64-NEXT:    sete %sil
     52 ; X64-NEXT:    movb {{.*}}(%rip), %cl
     53 ; X64-NEXT:    movl %ecx, %edx
     54 ; X64-NEXT:    incb %dl
     55 ; X64-NEXT:    cmpb %dil, %cl
     56 ; X64-NEXT:    sete {{.*}}(%rip)
     57 ; X64-NEXT:    movb %dl, {{.*}}(%rip)
     58 ; X64-NEXT:    testb %sil, %sil
     59 ; X64-NEXT:    jne .LBB0_2
     60 ; X64-NEXT:  # %bb.1: # %if.then
     61 ; X64-NEXT:    pushq %rax
     62 ; X64-NEXT:    movsbl %al, %edi
     63 ; X64-NEXT:    callq external
     64 ; X64-NEXT:    addq $8, %rsp
     65 ; X64-NEXT:  .LBB0_2: # %if.end
     66 ; X64-NEXT:    xorl %eax, %eax
     67 ; X64-NEXT:    retq
     68 entry:
     69   %bval = load i8, i8* @b
     70   %inc = add i8 %bval, 1
     71   store volatile i8 %inc, i8* @b
     72   %cval = load volatile i32, i32* @c
     73   %inc1 = add nsw i32 %cval, 1
     74   store volatile i32 %inc1, i32* @c
     75   %aval = load volatile i8, i8* @a
     76   %inc2 = add i8 %aval, 1
     77   store volatile i8 %inc2, i8* @a
     78   %cmp = icmp eq i8 %aval, %bval
     79   %conv5 = zext i1 %cmp to i8
     80   store i8 %conv5, i8* @d
     81   %tobool = icmp eq i32 %inc1, 0
     82   br i1 %tobool, label %if.end, label %if.then
     83 
     84 if.then:
     85   %conv6 = sext i8 %inc to i32
     86   call void @external(i32 %conv6)
     87   br label %if.end
     88 
     89 if.end:
     90   ret i32 0
     91 }
     92 
     93 ; Preserve increment flags across a call.
     94 define i32 @test2(i32* %ptr) nounwind {
     95 ; X32-LABEL: test2:
     96 ; X32:       # %bb.0: # %entry
     97 ; X32-NEXT:    pushl %ebx
     98 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
     99 ; X32-NEXT:    incl (%eax)
    100 ; X32-NEXT:    setne %bl
    101 ; X32-NEXT:    pushl $42
    102 ; X32-NEXT:    calll external
    103 ; X32-NEXT:    addl $4, %esp
    104 ; X32-NEXT:    testb %bl, %bl
    105 ; X32-NEXT:    je .LBB1_1
    106 ; X32-NEXT:  # %bb.2: # %else
    107 ; X32-NEXT:    xorl %eax, %eax
    108 ; X32-NEXT:    popl %ebx
    109 ; X32-NEXT:    retl
    110 ; X32-NEXT:  .LBB1_1: # %then
    111 ; X32-NEXT:    movl $64, %eax
    112 ; X32-NEXT:    popl %ebx
    113 ; X32-NEXT:    retl
    114 ;
    115 ; X64-LABEL: test2:
    116 ; X64:       # %bb.0: # %entry
    117 ; X64-NEXT:    pushq %rbx
    118 ; X64-NEXT:    incl (%rdi)
    119 ; X64-NEXT:    setne %bl
    120 ; X64-NEXT:    movl $42, %edi
    121 ; X64-NEXT:    callq external
    122 ; X64-NEXT:    testb %bl, %bl
    123 ; X64-NEXT:    je .LBB1_1
    124 ; X64-NEXT:  # %bb.2: # %else
    125 ; X64-NEXT:    xorl %eax, %eax
    126 ; X64-NEXT:    popq %rbx
    127 ; X64-NEXT:    retq
    128 ; X64-NEXT:  .LBB1_1: # %then
    129 ; X64-NEXT:    movl $64, %eax
    130 ; X64-NEXT:    popq %rbx
    131 ; X64-NEXT:    retq
    132 entry:
    133   %val = load i32, i32* %ptr
    134   %inc = add i32 %val, 1
    135   store i32 %inc, i32* %ptr
    136   %cmp = icmp eq i32 %inc, 0
    137   call void @external(i32 42)
    138   br i1 %cmp, label %then, label %else
    139 
    140 then:
    141   ret i32 64
    142 
    143 else:
    144   ret i32 0
    145 }
    146 
    147 declare void @external_a()
    148 declare void @external_b()
    149 
    150 ; This lowers to a conditional tail call instead of a conditional branch. This
    151 ; is tricky because we can only do this from a leaf function, and so we have to
    152 ; use volatile stores similar to test1 to force the save and restore of
    153 ; a condition without calling another function. We then set up subsequent calls
    154 ; in tail position.
    155 define void @test_tail_call(i32* %ptr) nounwind optsize {
    156 ; X32-LABEL: test_tail_call:
    157 ; X32:       # %bb.0: # %entry
    158 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    159 ; X32-NEXT:    incl (%eax)
    160 ; X32-NEXT:    setne %al
    161 ; X32-NEXT:    incb a
    162 ; X32-NEXT:    sete d
    163 ; X32-NEXT:    testb %al, %al
    164 ; X32-NEXT:    jne external_b # TAILCALL
    165 ; X32-NEXT:  # %bb.1: # %then
    166 ; X32-NEXT:    jmp external_a # TAILCALL
    167 ;
    168 ; X64-LABEL: test_tail_call:
    169 ; X64:       # %bb.0: # %entry
    170 ; X64-NEXT:    incl (%rdi)
    171 ; X64-NEXT:    setne %al
    172 ; X64-NEXT:    incb {{.*}}(%rip)
    173 ; X64-NEXT:    sete {{.*}}(%rip)
    174 ; X64-NEXT:    testb %al, %al
    175 ; X64-NEXT:    jne external_b # TAILCALL
    176 ; X64-NEXT:  # %bb.1: # %then
    177 ; X64-NEXT:    jmp external_a # TAILCALL
    178 entry:
    179   %val = load i32, i32* %ptr
    180   %inc = add i32 %val, 1
    181   store i32 %inc, i32* %ptr
    182   %cmp = icmp eq i32 %inc, 0
    183   %aval = load volatile i8, i8* @a
    184   %inc2 = add i8 %aval, 1
    185   store volatile i8 %inc2, i8* @a
    186   %cmp2 = icmp eq i8 %inc2, 0
    187   %conv5 = zext i1 %cmp2 to i8
    188   store i8 %conv5, i8* @d
    189   br i1 %cmp, label %then, label %else
    190 
    191 then:
    192   tail call void @external_a()
    193   ret void
    194 
    195 else:
    196   tail call void @external_b()
    197   ret void
    198 }
    199 
    200 ; Test a function that gets special select lowering into CFG with copied EFLAGS
    201 ; threaded across the CFG. This requires our EFLAGS copy rewriting to handle
    202 ; cross-block rewrites in at least some narrow cases.
    203 define void @PR37100(i8 %arg1, i16 %arg2, i64 %arg3, i8 %arg4, i8* %ptr1, i32* %ptr2) {
    204 ; X32-LABEL: PR37100:
    205 ; X32:       # %bb.0: # %bb
    206 ; X32-NEXT:    pushl %ebp
    207 ; X32-NEXT:    .cfi_def_cfa_offset 8
    208 ; X32-NEXT:    pushl %ebx
    209 ; X32-NEXT:    .cfi_def_cfa_offset 12
    210 ; X32-NEXT:    pushl %edi
    211 ; X32-NEXT:    .cfi_def_cfa_offset 16
    212 ; X32-NEXT:    pushl %esi
    213 ; X32-NEXT:    .cfi_def_cfa_offset 20
    214 ; X32-NEXT:    .cfi_offset %esi, -20
    215 ; X32-NEXT:    .cfi_offset %edi, -16
    216 ; X32-NEXT:    .cfi_offset %ebx, -12
    217 ; X32-NEXT:    .cfi_offset %ebp, -8
    218 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %esi
    219 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %edi
    220 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %ebx
    221 ; X32-NEXT:    movb {{[0-9]+}}(%esp), %ch
    222 ; X32-NEXT:    movb {{[0-9]+}}(%esp), %cl
    223 ; X32-NEXT:    jmp .LBB3_1
    224 ; X32-NEXT:    .p2align 4, 0x90
    225 ; X32-NEXT:  .LBB3_5: # %bb1
    226 ; X32-NEXT:    # in Loop: Header=BB3_1 Depth=1
    227 ; X32-NEXT:    xorl %eax, %eax
    228 ; X32-NEXT:    xorl %edx, %edx
    229 ; X32-NEXT:    idivl %ebp
    230 ; X32-NEXT:  .LBB3_1: # %bb1
    231 ; X32-NEXT:    # =>This Inner Loop Header: Depth=1
    232 ; X32-NEXT:    movsbl %cl, %eax
    233 ; X32-NEXT:    movl %eax, %edx
    234 ; X32-NEXT:    sarl $31, %edx
    235 ; X32-NEXT:    cmpl %eax, %esi
    236 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    237 ; X32-NEXT:    sbbl %edx, %eax
    238 ; X32-NEXT:    setl %al
    239 ; X32-NEXT:    setl %dl
    240 ; X32-NEXT:    movzbl %dl, %ebp
    241 ; X32-NEXT:    negl %ebp
    242 ; X32-NEXT:    testb %al, %al
    243 ; X32-NEXT:    jne .LBB3_3
    244 ; X32-NEXT:  # %bb.2: # %bb1
    245 ; X32-NEXT:    # in Loop: Header=BB3_1 Depth=1
    246 ; X32-NEXT:    movb %ch, %cl
    247 ; X32-NEXT:  .LBB3_3: # %bb1
    248 ; X32-NEXT:    # in Loop: Header=BB3_1 Depth=1
    249 ; X32-NEXT:    movb %cl, (%ebx)
    250 ; X32-NEXT:    movl (%edi), %edx
    251 ; X32-NEXT:    testb %al, %al
    252 ; X32-NEXT:    jne .LBB3_5
    253 ; X32-NEXT:  # %bb.4: # %bb1
    254 ; X32-NEXT:    # in Loop: Header=BB3_1 Depth=1
    255 ; X32-NEXT:    movl %edx, %ebp
    256 ; X32-NEXT:    jmp .LBB3_5
    257 ;
    258 ; X64-LABEL: PR37100:
    259 ; X64:       # %bb.0: # %bb
    260 ; X64-NEXT:    movq %rdx, %r10
    261 ; X64-NEXT:    jmp .LBB3_1
    262 ; X64-NEXT:    .p2align 4, 0x90
    263 ; X64-NEXT:  .LBB3_5: # %bb1
    264 ; X64-NEXT:    # in Loop: Header=BB3_1 Depth=1
    265 ; X64-NEXT:    xorl %eax, %eax
    266 ; X64-NEXT:    xorl %edx, %edx
    267 ; X64-NEXT:    idivl %esi
    268 ; X64-NEXT:  .LBB3_1: # %bb1
    269 ; X64-NEXT:    # =>This Inner Loop Header: Depth=1
    270 ; X64-NEXT:    movsbq %dil, %rax
    271 ; X64-NEXT:    xorl %esi, %esi
    272 ; X64-NEXT:    cmpq %rax, %r10
    273 ; X64-NEXT:    setl %sil
    274 ; X64-NEXT:    negl %esi
    275 ; X64-NEXT:    cmpq %rax, %r10
    276 ; X64-NEXT:    jl .LBB3_3
    277 ; X64-NEXT:  # %bb.2: # %bb1
    278 ; X64-NEXT:    # in Loop: Header=BB3_1 Depth=1
    279 ; X64-NEXT:    movl %ecx, %edi
    280 ; X64-NEXT:  .LBB3_3: # %bb1
    281 ; X64-NEXT:    # in Loop: Header=BB3_1 Depth=1
    282 ; X64-NEXT:    movb %dil, (%r8)
    283 ; X64-NEXT:    jl .LBB3_5
    284 ; X64-NEXT:  # %bb.4: # %bb1
    285 ; X64-NEXT:    # in Loop: Header=BB3_1 Depth=1
    286 ; X64-NEXT:    movl (%r9), %esi
    287 ; X64-NEXT:    jmp .LBB3_5
    288 bb:
    289   br label %bb1
    290 
    291 bb1:
    292   %tmp = phi i8 [ %tmp8, %bb1 ], [ %arg1, %bb ]
    293   %tmp2 = phi i16 [ %tmp12, %bb1 ], [ %arg2, %bb ]
    294   %tmp3 = icmp sgt i16 %tmp2, 7
    295   %tmp4 = select i1 %tmp3, i16 %tmp2, i16 7
    296   %tmp5 = sext i8 %tmp to i64
    297   %tmp6 = icmp slt i64 %arg3, %tmp5
    298   %tmp7 = sext i1 %tmp6 to i32
    299   %tmp8 = select i1 %tmp6, i8 %tmp, i8 %arg4
    300   store volatile i8 %tmp8, i8* %ptr1
    301   %tmp9 = load volatile i32, i32* %ptr2
    302   %tmp10 = select i1 %tmp6, i32 %tmp7, i32 %tmp9
    303   %tmp11 = srem i32 0, %tmp10
    304   %tmp12 = trunc i32 %tmp11 to i16
    305   br label %bb1
    306 }
    307 
    308 ; Use a particular instruction pattern in order to lower to the post-RA pseudo
    309 ; used to lower SETB into an SBB pattern in order to make sure that kind of
    310 ; usage of a copied EFLAGS continues to work.
    311 define void @PR37431(i32* %arg1, i8* %arg2, i8* %arg3) {
    312 ; X32-LABEL: PR37431:
    313 ; X32:       # %bb.0: # %entry
    314 ; X32-NEXT:    pushl %esi
    315 ; X32-NEXT:    .cfi_def_cfa_offset 8
    316 ; X32-NEXT:    .cfi_offset %esi, -8
    317 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    318 ; X32-NEXT:    movl (%eax), %eax
    319 ; X32-NEXT:    movl %eax, %ecx
    320 ; X32-NEXT:    sarl $31, %ecx
    321 ; X32-NEXT:    cmpl %eax, %eax
    322 ; X32-NEXT:    sbbl %ecx, %eax
    323 ; X32-NEXT:    setb %al
    324 ; X32-NEXT:    sbbb %cl, %cl
    325 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %esi
    326 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %edx
    327 ; X32-NEXT:    movb %cl, (%edx)
    328 ; X32-NEXT:    movzbl %al, %eax
    329 ; X32-NEXT:    xorl %ecx, %ecx
    330 ; X32-NEXT:    subl %eax, %ecx
    331 ; X32-NEXT:    xorl %eax, %eax
    332 ; X32-NEXT:    xorl %edx, %edx
    333 ; X32-NEXT:    idivl %ecx
    334 ; X32-NEXT:    movb %dl, (%esi)
    335 ; X32-NEXT:    popl %esi
    336 ; X32-NEXT:    .cfi_def_cfa_offset 4
    337 ; X32-NEXT:    retl
    338 ;
    339 ; X64-LABEL: PR37431:
    340 ; X64:       # %bb.0: # %entry
    341 ; X64-NEXT:    movq %rdx, %rcx
    342 ; X64-NEXT:    movslq (%rdi), %rax
    343 ; X64-NEXT:    cmpq %rax, %rax
    344 ; X64-NEXT:    sbbb %dl, %dl
    345 ; X64-NEXT:    cmpq %rax, %rax
    346 ; X64-NEXT:    movb %dl, (%rsi)
    347 ; X64-NEXT:    sbbl %esi, %esi
    348 ; X64-NEXT:    xorl %eax, %eax
    349 ; X64-NEXT:    xorl %edx, %edx
    350 ; X64-NEXT:    idivl %esi
    351 ; X64-NEXT:    movb %dl, (%rcx)
    352 ; X64-NEXT:    retq
    353 entry:
    354   %tmp = load i32, i32* %arg1
    355   %tmp1 = sext i32 %tmp to i64
    356   %tmp2 = icmp ugt i64 %tmp1, undef
    357   %tmp3 = zext i1 %tmp2 to i8
    358   %tmp4 = sub i8 0, %tmp3
    359   store i8 %tmp4, i8* %arg2
    360   %tmp5 = sext i8 %tmp4 to i32
    361   %tmp6 = srem i32 0, %tmp5
    362   %tmp7 = trunc i32 %tmp6 to i8
    363   store i8 %tmp7, i8* %arg3
    364   ret void
    365 }
    366