1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X86 3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=X64 4 5 ; The easy case: a constant power-of-2 divisor. 6 7 define i64 @const_pow_2(i64 %x) { 8 ; X86-LABEL: const_pow_2: 9 ; X86: # %bb.0: 10 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 11 ; X86-NEXT: andl $31, %eax 12 ; X86-NEXT: xorl %edx, %edx 13 ; X86-NEXT: retl 14 ; 15 ; X64-LABEL: const_pow_2: 16 ; X64: # %bb.0: 17 ; X64-NEXT: andl $31, %edi 18 ; X64-NEXT: movq %rdi, %rax 19 ; X64-NEXT: retq 20 %urem = urem i64 %x, 32 21 ret i64 %urem 22 } 23 24 ; A left-shifted power-of-2 divisor. Use a weird type for wider coverage. 25 26 define i25 @shift_left_pow_2(i25 %x, i25 %y) { 27 ; X86-LABEL: shift_left_pow_2: 28 ; X86: # %bb.0: 29 ; X86-NEXT: movb {{[0-9]+}}(%esp), %cl 30 ; X86-NEXT: movl $1, %eax 31 ; X86-NEXT: shll %cl, %eax 32 ; X86-NEXT: addl $33554431, %eax # imm = 0x1FFFFFF 33 ; X86-NEXT: andl {{[0-9]+}}(%esp), %eax 34 ; X86-NEXT: retl 35 ; 36 ; X64-LABEL: shift_left_pow_2: 37 ; X64: # %bb.0: 38 ; X64-NEXT: movl $1, %eax 39 ; X64-NEXT: movl %esi, %ecx 40 ; X64-NEXT: shll %cl, %eax 41 ; X64-NEXT: addl $33554431, %eax # imm = 0x1FFFFFF 42 ; X64-NEXT: andl %edi, %eax 43 ; X64-NEXT: retq 44 %shl = shl i25 1, %y 45 %urem = urem i25 %x, %shl 46 ret i25 %urem 47 } 48 49 ; A logically right-shifted sign bit is a power-of-2 or UB. 50 51 define i16 @shift_right_pow_2(i16 %x, i16 %y) { 52 ; X86-LABEL: shift_right_pow_2: 53 ; X86: # %bb.0: 54 ; X86-NEXT: movb {{[0-9]+}}(%esp), %cl 55 ; X86-NEXT: movl $32768, %eax # imm = 0x8000 56 ; X86-NEXT: shrl %cl, %eax 57 ; X86-NEXT: decl %eax 58 ; X86-NEXT: andw {{[0-9]+}}(%esp), %ax 59 ; X86-NEXT: # kill: def $ax killed $ax killed $eax 60 ; X86-NEXT: retl 61 ; 62 ; X64-LABEL: shift_right_pow_2: 63 ; X64: # %bb.0: 64 ; X64-NEXT: movl $32768, %eax # imm = 0x8000 65 ; X64-NEXT: movl %esi, %ecx 66 ; X64-NEXT: shrl %cl, %eax 67 ; X64-NEXT: decl %eax 68 ; X64-NEXT: andl %edi, %eax 69 ; X64-NEXT: # kill: def $ax killed $ax killed $eax 70 ; X64-NEXT: retq 71 %shr = lshr i16 -32768, %y 72 %urem = urem i16 %x, %shr 73 ret i16 %urem 74 } 75 76 ; FIXME: A zero divisor would be UB, so this could be reduced to an 'and' with 3. 77 78 define i8 @and_pow_2(i8 %x, i8 %y) { 79 ; X86-LABEL: and_pow_2: 80 ; X86: # %bb.0: 81 ; X86-NEXT: movb {{[0-9]+}}(%esp), %cl 82 ; X86-NEXT: andb $4, %cl 83 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 84 ; X86-NEXT: # kill: def $eax killed $eax def $ax 85 ; X86-NEXT: divb %cl 86 ; X86-NEXT: movzbl %ah, %eax 87 ; X86-NEXT: # kill: def $al killed $al killed $eax 88 ; X86-NEXT: retl 89 ; 90 ; X64-LABEL: and_pow_2: 91 ; X64: # %bb.0: 92 ; X64-NEXT: andb $4, %sil 93 ; X64-NEXT: movzbl %dil, %eax 94 ; X64-NEXT: # kill: def $eax killed $eax def $ax 95 ; X64-NEXT: divb %sil 96 ; X64-NEXT: movzbl %ah, %eax 97 ; X64-NEXT: # kill: def $al killed $al killed $eax 98 ; X64-NEXT: retq 99 %and = and i8 %y, 4 100 %urem = urem i8 %x, %and 101 ret i8 %urem 102 } 103 104 ; A vector constant divisor should get the same treatment as a scalar. 105 106 define <4 x i32> @vec_const_uniform_pow_2(<4 x i32> %x) { 107 ; X86-LABEL: vec_const_uniform_pow_2: 108 ; X86: # %bb.0: 109 ; X86-NEXT: andps {{\.LCPI.*}}, %xmm0 110 ; X86-NEXT: retl 111 ; 112 ; X64-LABEL: vec_const_uniform_pow_2: 113 ; X64: # %bb.0: 114 ; X64-NEXT: andps {{.*}}(%rip), %xmm0 115 ; X64-NEXT: retq 116 %urem = urem <4 x i32> %x, <i32 16, i32 16, i32 16, i32 16> 117 ret <4 x i32> %urem 118 } 119 120 define <4 x i32> @vec_const_nonuniform_pow_2(<4 x i32> %x) { 121 ; X86-LABEL: vec_const_nonuniform_pow_2: 122 ; X86: # %bb.0: 123 ; X86-NEXT: andps {{\.LCPI.*}}, %xmm0 124 ; X86-NEXT: retl 125 ; 126 ; X64-LABEL: vec_const_nonuniform_pow_2: 127 ; X64: # %bb.0: 128 ; X64-NEXT: andps {{.*}}(%rip), %xmm0 129 ; X64-NEXT: retq 130 %urem = urem <4 x i32> %x, <i32 2, i32 4, i32 8, i32 16> 131 ret <4 x i32> %urem 132 } 133