Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X86
      3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=X64
      4 
      5 ; The easy case: a constant power-of-2 divisor.
      6 
      7 define i64 @const_pow_2(i64 %x) {
      8 ; X86-LABEL: const_pow_2:
      9 ; X86:       # %bb.0:
     10 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
     11 ; X86-NEXT:    andl $31, %eax
     12 ; X86-NEXT:    xorl %edx, %edx
     13 ; X86-NEXT:    retl
     14 ;
     15 ; X64-LABEL: const_pow_2:
     16 ; X64:       # %bb.0:
     17 ; X64-NEXT:    andl $31, %edi
     18 ; X64-NEXT:    movq %rdi, %rax
     19 ; X64-NEXT:    retq
     20   %urem = urem i64 %x, 32
     21   ret i64 %urem
     22 }
     23 
     24 ; A left-shifted power-of-2 divisor. Use a weird type for wider coverage.
     25 
     26 define i25 @shift_left_pow_2(i25 %x, i25 %y) {
     27 ; X86-LABEL: shift_left_pow_2:
     28 ; X86:       # %bb.0:
     29 ; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
     30 ; X86-NEXT:    movl $1, %eax
     31 ; X86-NEXT:    shll %cl, %eax
     32 ; X86-NEXT:    addl $33554431, %eax # imm = 0x1FFFFFF
     33 ; X86-NEXT:    andl {{[0-9]+}}(%esp), %eax
     34 ; X86-NEXT:    retl
     35 ;
     36 ; X64-LABEL: shift_left_pow_2:
     37 ; X64:       # %bb.0:
     38 ; X64-NEXT:    movl $1, %eax
     39 ; X64-NEXT:    movl %esi, %ecx
     40 ; X64-NEXT:    shll %cl, %eax
     41 ; X64-NEXT:    addl $33554431, %eax # imm = 0x1FFFFFF
     42 ; X64-NEXT:    andl %edi, %eax
     43 ; X64-NEXT:    retq
     44   %shl = shl i25 1, %y
     45   %urem = urem i25 %x, %shl
     46   ret i25 %urem
     47 }
     48 
     49 ; A logically right-shifted sign bit is a power-of-2 or UB.
     50 
     51 define i16 @shift_right_pow_2(i16 %x, i16 %y) {
     52 ; X86-LABEL: shift_right_pow_2:
     53 ; X86:       # %bb.0:
     54 ; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
     55 ; X86-NEXT:    movl $32768, %eax # imm = 0x8000
     56 ; X86-NEXT:    shrl %cl, %eax
     57 ; X86-NEXT:    decl %eax
     58 ; X86-NEXT:    andw {{[0-9]+}}(%esp), %ax
     59 ; X86-NEXT:    # kill: def $ax killed $ax killed $eax
     60 ; X86-NEXT:    retl
     61 ;
     62 ; X64-LABEL: shift_right_pow_2:
     63 ; X64:       # %bb.0:
     64 ; X64-NEXT:    movl $32768, %eax # imm = 0x8000
     65 ; X64-NEXT:    movl %esi, %ecx
     66 ; X64-NEXT:    shrl %cl, %eax
     67 ; X64-NEXT:    decl %eax
     68 ; X64-NEXT:    andl %edi, %eax
     69 ; X64-NEXT:    # kill: def $ax killed $ax killed $eax
     70 ; X64-NEXT:    retq
     71   %shr = lshr i16 -32768, %y
     72   %urem = urem i16 %x, %shr
     73   ret i16 %urem
     74 }
     75 
     76 ; FIXME: A zero divisor would be UB, so this could be reduced to an 'and' with 3.
     77 
     78 define i8 @and_pow_2(i8 %x, i8 %y) {
     79 ; X86-LABEL: and_pow_2:
     80 ; X86:       # %bb.0:
     81 ; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
     82 ; X86-NEXT:    andb $4, %cl
     83 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
     84 ; X86-NEXT:    # kill: def $eax killed $eax def $ax
     85 ; X86-NEXT:    divb %cl
     86 ; X86-NEXT:    movzbl %ah, %eax
     87 ; X86-NEXT:    # kill: def $al killed $al killed $eax
     88 ; X86-NEXT:    retl
     89 ;
     90 ; X64-LABEL: and_pow_2:
     91 ; X64:       # %bb.0:
     92 ; X64-NEXT:    andb $4, %sil
     93 ; X64-NEXT:    movzbl %dil, %eax
     94 ; X64-NEXT:    # kill: def $eax killed $eax def $ax
     95 ; X64-NEXT:    divb %sil
     96 ; X64-NEXT:    movzbl %ah, %eax
     97 ; X64-NEXT:    # kill: def $al killed $al killed $eax
     98 ; X64-NEXT:    retq
     99   %and = and i8 %y, 4
    100   %urem = urem i8 %x, %and
    101   ret i8 %urem
    102 }
    103 
    104 ; A vector constant divisor should get the same treatment as a scalar.
    105 
    106 define <4 x i32> @vec_const_uniform_pow_2(<4 x i32> %x) {
    107 ; X86-LABEL: vec_const_uniform_pow_2:
    108 ; X86:       # %bb.0:
    109 ; X86-NEXT:    andps {{\.LCPI.*}}, %xmm0
    110 ; X86-NEXT:    retl
    111 ;
    112 ; X64-LABEL: vec_const_uniform_pow_2:
    113 ; X64:       # %bb.0:
    114 ; X64-NEXT:    andps {{.*}}(%rip), %xmm0
    115 ; X64-NEXT:    retq
    116   %urem = urem <4 x i32> %x, <i32 16, i32 16, i32 16, i32 16>
    117   ret <4 x i32> %urem
    118 }
    119 
    120 define <4 x i32> @vec_const_nonuniform_pow_2(<4 x i32> %x) {
    121 ; X86-LABEL: vec_const_nonuniform_pow_2:
    122 ; X86:       # %bb.0:
    123 ; X86-NEXT:    andps {{\.LCPI.*}}, %xmm0
    124 ; X86-NEXT:    retl
    125 ;
    126 ; X64-LABEL: vec_const_nonuniform_pow_2:
    127 ; X64:       # %bb.0:
    128 ; X64-NEXT:    andps {{.*}}(%rip), %xmm0
    129 ; X64-NEXT:    retq
    130   %urem = urem <4 x i32> %x, <i32 2, i32 4, i32 8, i32 16>
    131   ret <4 x i32> %urem
    132 }
    133