Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=X32
      3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=X64
      4 
      5 define void @knownbits_zext_in_reg(i8*) nounwind {
      6 ; X32-LABEL: knownbits_zext_in_reg:
      7 ; X32:       # %bb.0: # %BB
      8 ; X32-NEXT:    pushl %ebp
      9 ; X32-NEXT:    pushl %ebx
     10 ; X32-NEXT:    pushl %edi
     11 ; X32-NEXT:    pushl %esi
     12 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
     13 ; X32-NEXT:    movzbl (%eax), %eax
     14 ; X32-NEXT:    imull $101, %eax, %eax
     15 ; X32-NEXT:    shrl $14, %eax
     16 ; X32-NEXT:    movzbl %al, %eax
     17 ; X32-NEXT:    vmovd %eax, %xmm0
     18 ; X32-NEXT:    vpshufb {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
     19 ; X32-NEXT:    vpextrd $1, %xmm0, %ebp
     20 ; X32-NEXT:    xorl %ecx, %ecx
     21 ; X32-NEXT:    vmovd %xmm0, %esi
     22 ; X32-NEXT:    vpextrd $2, %xmm0, %edi
     23 ; X32-NEXT:    vpextrd $3, %xmm0, %ebx
     24 ; X32-NEXT:    .p2align 4, 0x90
     25 ; X32-NEXT:  .LBB0_1: # %CF
     26 ; X32-NEXT:    # =>This Loop Header: Depth=1
     27 ; X32-NEXT:    # Child Loop BB0_2 Depth 2
     28 ; X32-NEXT:    xorl %edx, %edx
     29 ; X32-NEXT:    movl %ebp, %eax
     30 ; X32-NEXT:    divl %ebp
     31 ; X32-NEXT:    xorl %edx, %edx
     32 ; X32-NEXT:    movl %esi, %eax
     33 ; X32-NEXT:    divl %esi
     34 ; X32-NEXT:    xorl %edx, %edx
     35 ; X32-NEXT:    movl %edi, %eax
     36 ; X32-NEXT:    divl %edi
     37 ; X32-NEXT:    xorl %edx, %edx
     38 ; X32-NEXT:    movl %ebx, %eax
     39 ; X32-NEXT:    divl %ebx
     40 ; X32-NEXT:    .p2align 4, 0x90
     41 ; X32-NEXT:  .LBB0_2: # %CF237
     42 ; X32-NEXT:    # Parent Loop BB0_1 Depth=1
     43 ; X32-NEXT:    # => This Inner Loop Header: Depth=2
     44 ; X32-NEXT:    testb %cl, %cl
     45 ; X32-NEXT:    jne .LBB0_2
     46 ; X32-NEXT:    jmp .LBB0_1
     47 ;
     48 ; X64-LABEL: knownbits_zext_in_reg:
     49 ; X64:       # %bb.0: # %BB
     50 ; X64-NEXT:    movzbl (%rdi), %eax
     51 ; X64-NEXT:    imull $101, %eax, %eax
     52 ; X64-NEXT:    shrl $14, %eax
     53 ; X64-NEXT:    movzbl %al, %eax
     54 ; X64-NEXT:    vmovd %eax, %xmm0
     55 ; X64-NEXT:    vpshufb {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
     56 ; X64-NEXT:    vpextrd $1, %xmm0, %r8d
     57 ; X64-NEXT:    xorl %esi, %esi
     58 ; X64-NEXT:    vmovd %xmm0, %r9d
     59 ; X64-NEXT:    vpextrd $2, %xmm0, %edi
     60 ; X64-NEXT:    vpextrd $3, %xmm0, %ecx
     61 ; X64-NEXT:    .p2align 4, 0x90
     62 ; X64-NEXT:  .LBB0_1: # %CF
     63 ; X64-NEXT:    # =>This Loop Header: Depth=1
     64 ; X64-NEXT:    # Child Loop BB0_2 Depth 2
     65 ; X64-NEXT:    xorl %edx, %edx
     66 ; X64-NEXT:    movl %r8d, %eax
     67 ; X64-NEXT:    divl %r8d
     68 ; X64-NEXT:    xorl %edx, %edx
     69 ; X64-NEXT:    movl %r9d, %eax
     70 ; X64-NEXT:    divl %r9d
     71 ; X64-NEXT:    xorl %edx, %edx
     72 ; X64-NEXT:    movl %edi, %eax
     73 ; X64-NEXT:    divl %edi
     74 ; X64-NEXT:    xorl %edx, %edx
     75 ; X64-NEXT:    movl %ecx, %eax
     76 ; X64-NEXT:    divl %ecx
     77 ; X64-NEXT:    .p2align 4, 0x90
     78 ; X64-NEXT:  .LBB0_2: # %CF237
     79 ; X64-NEXT:    # Parent Loop BB0_1 Depth=1
     80 ; X64-NEXT:    # => This Inner Loop Header: Depth=2
     81 ; X64-NEXT:    testb %sil, %sil
     82 ; X64-NEXT:    jne .LBB0_2
     83 ; X64-NEXT:    jmp .LBB0_1
     84 BB:
     85   %L5 = load i8, i8* %0
     86   %Sl9 = select i1 true, i8 %L5, i8 undef
     87   %B21 = udiv i8 %Sl9, -93
     88   br label %CF
     89 
     90 CF:                                               ; preds = %CF246, %BB
     91   %I40 = insertelement <4 x i8> zeroinitializer, i8 %B21, i32 1
     92   %B41 = srem <4 x i8> %I40, %I40
     93   br label %CF237
     94 
     95 CF237:                                            ; preds = %CF237, %CF
     96   %Cmp73 = icmp ne i1 undef, undef
     97   br i1 %Cmp73, label %CF237, label %CF246
     98 
     99 CF246:                                            ; preds = %CF237
    100   %Cmp117 = icmp ult <4 x i8> %B41, undef
    101   %E156 = extractelement <4 x i1> %Cmp117, i32 2
    102   br label %CF
    103 }
    104 
    105 define i32 @knownbits_mask_add_lshr(i32 %a0, i32 %a1) nounwind {
    106 ; X32-LABEL: knownbits_mask_add_lshr:
    107 ; X32:       # %bb.0:
    108 ; X32-NEXT:    xorl %eax, %eax
    109 ; X32-NEXT:    retl
    110 ;
    111 ; X64-LABEL: knownbits_mask_add_lshr:
    112 ; X64:       # %bb.0:
    113 ; X64-NEXT:    xorl %eax, %eax
    114 ; X64-NEXT:    retq
    115   %1 = and i32 %a0, 32767
    116   %2 = and i32 %a1, 32766
    117   %3 = add i32 %1, %2
    118   %4 = lshr i32 %3, 17
    119   ret i32 %4
    120 }
    121 
    122 define i128 @knownbits_mask_addc_shl(i64 %a0, i64 %a1, i64 %a2) nounwind {
    123 ; X32-LABEL: knownbits_mask_addc_shl:
    124 ; X32:       # %bb.0:
    125 ; X32-NEXT:    pushl %edi
    126 ; X32-NEXT:    pushl %esi
    127 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    128 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
    129 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %edx
    130 ; X32-NEXT:    movl $-1024, %esi # imm = 0xFC00
    131 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %edi
    132 ; X32-NEXT:    andl %esi, %edi
    133 ; X32-NEXT:    andl {{[0-9]+}}(%esp), %esi
    134 ; X32-NEXT:    addl %edi, %esi
    135 ; X32-NEXT:    adcl {{[0-9]+}}(%esp), %edx
    136 ; X32-NEXT:    adcl $0, %ecx
    137 ; X32-NEXT:    shldl $22, %edx, %ecx
    138 ; X32-NEXT:    shldl $22, %esi, %edx
    139 ; X32-NEXT:    movl %edx, 8(%eax)
    140 ; X32-NEXT:    movl %ecx, 12(%eax)
    141 ; X32-NEXT:    movl $0, 4(%eax)
    142 ; X32-NEXT:    movl $0, (%eax)
    143 ; X32-NEXT:    popl %esi
    144 ; X32-NEXT:    popl %edi
    145 ; X32-NEXT:    retl $4
    146 ;
    147 ; X64-LABEL: knownbits_mask_addc_shl:
    148 ; X64:       # %bb.0:
    149 ; X64-NEXT:    andq $-1024, %rdi # imm = 0xFC00
    150 ; X64-NEXT:    andq $-1024, %rsi # imm = 0xFC00
    151 ; X64-NEXT:    addq %rdi, %rsi
    152 ; X64-NEXT:    adcl $0, %edx
    153 ; X64-NEXT:    shldq $54, %rsi, %rdx
    154 ; X64-NEXT:    xorl %eax, %eax
    155 ; X64-NEXT:    retq
    156   %1 = and i64 %a0, -1024
    157   %2 = zext i64 %1 to i128
    158   %3 = and i64 %a1, -1024
    159   %4 = zext i64 %3 to i128
    160   %5 = add i128 %2, %4
    161   %6 = zext i64 %a2 to i128
    162   %7 = shl i128 %6, 64
    163   %8 = add i128 %5, %7
    164   %9 = shl i128 %8, 54
    165   ret i128 %9
    166 }
    167 
    168 define {i32, i1} @knownbits_uaddo_saddo(i64 %a0, i64 %a1) nounwind {
    169 ; X32-LABEL: knownbits_uaddo_saddo:
    170 ; X32:       # %bb.0:
    171 ; X32-NEXT:    pushl %ebx
    172 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    173 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
    174 ; X32-NEXT:    movl %ecx, %edx
    175 ; X32-NEXT:    addl %eax, %edx
    176 ; X32-NEXT:    setb %bl
    177 ; X32-NEXT:    testl %eax, %eax
    178 ; X32-NEXT:    setns %al
    179 ; X32-NEXT:    testl %ecx, %ecx
    180 ; X32-NEXT:    setns %cl
    181 ; X32-NEXT:    cmpb %al, %cl
    182 ; X32-NEXT:    sete %al
    183 ; X32-NEXT:    testl %edx, %edx
    184 ; X32-NEXT:    setns %dl
    185 ; X32-NEXT:    cmpb %dl, %cl
    186 ; X32-NEXT:    setne %dl
    187 ; X32-NEXT:    andb %al, %dl
    188 ; X32-NEXT:    orb %bl, %dl
    189 ; X32-NEXT:    xorl %eax, %eax
    190 ; X32-NEXT:    popl %ebx
    191 ; X32-NEXT:    retl
    192 ;
    193 ; X64-LABEL: knownbits_uaddo_saddo:
    194 ; X64:       # %bb.0:
    195 ; X64-NEXT:    shlq $32, %rdi
    196 ; X64-NEXT:    shlq $32, %rsi
    197 ; X64-NEXT:    addq %rdi, %rsi
    198 ; X64-NEXT:    setb %al
    199 ; X64-NEXT:    seto %dl
    200 ; X64-NEXT:    orb %al, %dl
    201 ; X64-NEXT:    xorl %eax, %eax
    202 ; X64-NEXT:    retq
    203   %1 = shl i64 %a0, 32
    204   %2 = shl i64 %a1, 32
    205   %u = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %1, i64 %2)
    206   %uval = extractvalue {i64, i1} %u, 0
    207   %uovf = extractvalue {i64, i1} %u, 1
    208   %s = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %1, i64 %2)
    209   %sval = extractvalue {i64, i1} %s, 0
    210   %sovf = extractvalue {i64, i1} %s, 1
    211   %sum = add i64 %uval, %sval
    212   %3 = trunc i64 %sum to i32
    213   %4 = or i1 %uovf, %sovf
    214   %ret0 = insertvalue {i32, i1} undef, i32 %3, 0
    215   %ret1 = insertvalue {i32, i1} %ret0, i1 %4, 1
    216   ret {i32, i1} %ret1
    217 }
    218 
    219 define {i32, i1} @knownbits_usubo_ssubo(i64 %a0, i64 %a1) nounwind {
    220 ; X32-LABEL: knownbits_usubo_ssubo:
    221 ; X32:       # %bb.0:
    222 ; X32-NEXT:    pushl %ebx
    223 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    224 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
    225 ; X32-NEXT:    movl %ecx, %edx
    226 ; X32-NEXT:    subl %eax, %edx
    227 ; X32-NEXT:    setb %bl
    228 ; X32-NEXT:    testl %eax, %eax
    229 ; X32-NEXT:    setns %al
    230 ; X32-NEXT:    testl %ecx, %ecx
    231 ; X32-NEXT:    setns %cl
    232 ; X32-NEXT:    cmpb %al, %cl
    233 ; X32-NEXT:    setne %al
    234 ; X32-NEXT:    testl %edx, %edx
    235 ; X32-NEXT:    setns %dl
    236 ; X32-NEXT:    cmpb %dl, %cl
    237 ; X32-NEXT:    setne %dl
    238 ; X32-NEXT:    andb %al, %dl
    239 ; X32-NEXT:    orb %bl, %dl
    240 ; X32-NEXT:    xorl %eax, %eax
    241 ; X32-NEXT:    popl %ebx
    242 ; X32-NEXT:    retl
    243 ;
    244 ; X64-LABEL: knownbits_usubo_ssubo:
    245 ; X64:       # %bb.0:
    246 ; X64-NEXT:    shlq $32, %rdi
    247 ; X64-NEXT:    shlq $32, %rsi
    248 ; X64-NEXT:    cmpq %rsi, %rdi
    249 ; X64-NEXT:    setb %al
    250 ; X64-NEXT:    seto %dl
    251 ; X64-NEXT:    orb %al, %dl
    252 ; X64-NEXT:    xorl %eax, %eax
    253 ; X64-NEXT:    retq
    254   %1 = shl i64 %a0, 32
    255   %2 = shl i64 %a1, 32
    256   %u = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %1, i64 %2)
    257   %uval = extractvalue {i64, i1} %u, 0
    258   %uovf = extractvalue {i64, i1} %u, 1
    259   %s = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %1, i64 %2)
    260   %sval = extractvalue {i64, i1} %s, 0
    261   %sovf = extractvalue {i64, i1} %s, 1
    262   %sum = add i64 %uval, %sval
    263   %3 = trunc i64 %sum to i32
    264   %4 = or i1 %uovf, %sovf
    265   %ret0 = insertvalue {i32, i1} undef, i32 %3, 0
    266   %ret1 = insertvalue {i32, i1} %ret0, i1 %4, 1
    267   ret {i32, i1} %ret1
    268 }
    269 
    270 declare {i64, i1} @llvm.uadd.with.overflow.i64(i64, i64) nounwind readnone
    271 declare {i64, i1} @llvm.sadd.with.overflow.i64(i64, i64) nounwind readnone
    272 declare {i64, i1} @llvm.usub.with.overflow.i64(i64, i64) nounwind readnone
    273 declare {i64, i1} @llvm.ssub.with.overflow.i64(i64, i64) nounwind readnone
    274