1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=X32 3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=X64 4 5 define void @knownbits_zext_in_reg(i8*) nounwind { 6 ; X32-LABEL: knownbits_zext_in_reg: 7 ; X32: # %bb.0: # %BB 8 ; X32-NEXT: pushl %ebp 9 ; X32-NEXT: pushl %ebx 10 ; X32-NEXT: pushl %edi 11 ; X32-NEXT: pushl %esi 12 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 13 ; X32-NEXT: movzbl (%eax), %eax 14 ; X32-NEXT: imull $101, %eax, %eax 15 ; X32-NEXT: shrl $14, %eax 16 ; X32-NEXT: movzbl %al, %eax 17 ; X32-NEXT: vmovd %eax, %xmm0 18 ; X32-NEXT: vpshufb {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 19 ; X32-NEXT: vpextrd $1, %xmm0, %ebp 20 ; X32-NEXT: xorl %ecx, %ecx 21 ; X32-NEXT: vmovd %xmm0, %esi 22 ; X32-NEXT: vpextrd $2, %xmm0, %edi 23 ; X32-NEXT: vpextrd $3, %xmm0, %ebx 24 ; X32-NEXT: .p2align 4, 0x90 25 ; X32-NEXT: .LBB0_1: # %CF 26 ; X32-NEXT: # =>This Loop Header: Depth=1 27 ; X32-NEXT: # Child Loop BB0_2 Depth 2 28 ; X32-NEXT: xorl %edx, %edx 29 ; X32-NEXT: movl %ebp, %eax 30 ; X32-NEXT: divl %ebp 31 ; X32-NEXT: xorl %edx, %edx 32 ; X32-NEXT: movl %esi, %eax 33 ; X32-NEXT: divl %esi 34 ; X32-NEXT: xorl %edx, %edx 35 ; X32-NEXT: movl %edi, %eax 36 ; X32-NEXT: divl %edi 37 ; X32-NEXT: xorl %edx, %edx 38 ; X32-NEXT: movl %ebx, %eax 39 ; X32-NEXT: divl %ebx 40 ; X32-NEXT: .p2align 4, 0x90 41 ; X32-NEXT: .LBB0_2: # %CF237 42 ; X32-NEXT: # Parent Loop BB0_1 Depth=1 43 ; X32-NEXT: # => This Inner Loop Header: Depth=2 44 ; X32-NEXT: testb %cl, %cl 45 ; X32-NEXT: jne .LBB0_2 46 ; X32-NEXT: jmp .LBB0_1 47 ; 48 ; X64-LABEL: knownbits_zext_in_reg: 49 ; X64: # %bb.0: # %BB 50 ; X64-NEXT: movzbl (%rdi), %eax 51 ; X64-NEXT: imull $101, %eax, %eax 52 ; X64-NEXT: shrl $14, %eax 53 ; X64-NEXT: movzbl %al, %eax 54 ; X64-NEXT: vmovd %eax, %xmm0 55 ; X64-NEXT: vpshufb {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 56 ; X64-NEXT: vpextrd $1, %xmm0, %r8d 57 ; X64-NEXT: xorl %esi, %esi 58 ; X64-NEXT: vmovd %xmm0, %r9d 59 ; X64-NEXT: vpextrd $2, %xmm0, %edi 60 ; X64-NEXT: vpextrd $3, %xmm0, %ecx 61 ; X64-NEXT: .p2align 4, 0x90 62 ; X64-NEXT: .LBB0_1: # %CF 63 ; X64-NEXT: # =>This Loop Header: Depth=1 64 ; X64-NEXT: # Child Loop BB0_2 Depth 2 65 ; X64-NEXT: xorl %edx, %edx 66 ; X64-NEXT: movl %r8d, %eax 67 ; X64-NEXT: divl %r8d 68 ; X64-NEXT: xorl %edx, %edx 69 ; X64-NEXT: movl %r9d, %eax 70 ; X64-NEXT: divl %r9d 71 ; X64-NEXT: xorl %edx, %edx 72 ; X64-NEXT: movl %edi, %eax 73 ; X64-NEXT: divl %edi 74 ; X64-NEXT: xorl %edx, %edx 75 ; X64-NEXT: movl %ecx, %eax 76 ; X64-NEXT: divl %ecx 77 ; X64-NEXT: .p2align 4, 0x90 78 ; X64-NEXT: .LBB0_2: # %CF237 79 ; X64-NEXT: # Parent Loop BB0_1 Depth=1 80 ; X64-NEXT: # => This Inner Loop Header: Depth=2 81 ; X64-NEXT: testb %sil, %sil 82 ; X64-NEXT: jne .LBB0_2 83 ; X64-NEXT: jmp .LBB0_1 84 BB: 85 %L5 = load i8, i8* %0 86 %Sl9 = select i1 true, i8 %L5, i8 undef 87 %B21 = udiv i8 %Sl9, -93 88 br label %CF 89 90 CF: ; preds = %CF246, %BB 91 %I40 = insertelement <4 x i8> zeroinitializer, i8 %B21, i32 1 92 %B41 = srem <4 x i8> %I40, %I40 93 br label %CF237 94 95 CF237: ; preds = %CF237, %CF 96 %Cmp73 = icmp ne i1 undef, undef 97 br i1 %Cmp73, label %CF237, label %CF246 98 99 CF246: ; preds = %CF237 100 %Cmp117 = icmp ult <4 x i8> %B41, undef 101 %E156 = extractelement <4 x i1> %Cmp117, i32 2 102 br label %CF 103 } 104 105 define i32 @knownbits_mask_add_lshr(i32 %a0, i32 %a1) nounwind { 106 ; X32-LABEL: knownbits_mask_add_lshr: 107 ; X32: # %bb.0: 108 ; X32-NEXT: xorl %eax, %eax 109 ; X32-NEXT: retl 110 ; 111 ; X64-LABEL: knownbits_mask_add_lshr: 112 ; X64: # %bb.0: 113 ; X64-NEXT: xorl %eax, %eax 114 ; X64-NEXT: retq 115 %1 = and i32 %a0, 32767 116 %2 = and i32 %a1, 32766 117 %3 = add i32 %1, %2 118 %4 = lshr i32 %3, 17 119 ret i32 %4 120 } 121 122 define i128 @knownbits_mask_addc_shl(i64 %a0, i64 %a1, i64 %a2) nounwind { 123 ; X32-LABEL: knownbits_mask_addc_shl: 124 ; X32: # %bb.0: 125 ; X32-NEXT: pushl %edi 126 ; X32-NEXT: pushl %esi 127 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 128 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 129 ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx 130 ; X32-NEXT: movl $-1024, %esi # imm = 0xFC00 131 ; X32-NEXT: movl {{[0-9]+}}(%esp), %edi 132 ; X32-NEXT: andl %esi, %edi 133 ; X32-NEXT: andl {{[0-9]+}}(%esp), %esi 134 ; X32-NEXT: addl %edi, %esi 135 ; X32-NEXT: adcl {{[0-9]+}}(%esp), %edx 136 ; X32-NEXT: adcl $0, %ecx 137 ; X32-NEXT: shldl $22, %edx, %ecx 138 ; X32-NEXT: shldl $22, %esi, %edx 139 ; X32-NEXT: movl %edx, 8(%eax) 140 ; X32-NEXT: movl %ecx, 12(%eax) 141 ; X32-NEXT: movl $0, 4(%eax) 142 ; X32-NEXT: movl $0, (%eax) 143 ; X32-NEXT: popl %esi 144 ; X32-NEXT: popl %edi 145 ; X32-NEXT: retl $4 146 ; 147 ; X64-LABEL: knownbits_mask_addc_shl: 148 ; X64: # %bb.0: 149 ; X64-NEXT: andq $-1024, %rdi # imm = 0xFC00 150 ; X64-NEXT: andq $-1024, %rsi # imm = 0xFC00 151 ; X64-NEXT: addq %rdi, %rsi 152 ; X64-NEXT: adcl $0, %edx 153 ; X64-NEXT: shldq $54, %rsi, %rdx 154 ; X64-NEXT: xorl %eax, %eax 155 ; X64-NEXT: retq 156 %1 = and i64 %a0, -1024 157 %2 = zext i64 %1 to i128 158 %3 = and i64 %a1, -1024 159 %4 = zext i64 %3 to i128 160 %5 = add i128 %2, %4 161 %6 = zext i64 %a2 to i128 162 %7 = shl i128 %6, 64 163 %8 = add i128 %5, %7 164 %9 = shl i128 %8, 54 165 ret i128 %9 166 } 167 168 define {i32, i1} @knownbits_uaddo_saddo(i64 %a0, i64 %a1) nounwind { 169 ; X32-LABEL: knownbits_uaddo_saddo: 170 ; X32: # %bb.0: 171 ; X32-NEXT: pushl %ebx 172 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 173 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 174 ; X32-NEXT: movl %ecx, %edx 175 ; X32-NEXT: addl %eax, %edx 176 ; X32-NEXT: setb %bl 177 ; X32-NEXT: testl %eax, %eax 178 ; X32-NEXT: setns %al 179 ; X32-NEXT: testl %ecx, %ecx 180 ; X32-NEXT: setns %cl 181 ; X32-NEXT: cmpb %al, %cl 182 ; X32-NEXT: sete %al 183 ; X32-NEXT: testl %edx, %edx 184 ; X32-NEXT: setns %dl 185 ; X32-NEXT: cmpb %dl, %cl 186 ; X32-NEXT: setne %dl 187 ; X32-NEXT: andb %al, %dl 188 ; X32-NEXT: orb %bl, %dl 189 ; X32-NEXT: xorl %eax, %eax 190 ; X32-NEXT: popl %ebx 191 ; X32-NEXT: retl 192 ; 193 ; X64-LABEL: knownbits_uaddo_saddo: 194 ; X64: # %bb.0: 195 ; X64-NEXT: shlq $32, %rdi 196 ; X64-NEXT: shlq $32, %rsi 197 ; X64-NEXT: addq %rdi, %rsi 198 ; X64-NEXT: setb %al 199 ; X64-NEXT: seto %dl 200 ; X64-NEXT: orb %al, %dl 201 ; X64-NEXT: xorl %eax, %eax 202 ; X64-NEXT: retq 203 %1 = shl i64 %a0, 32 204 %2 = shl i64 %a1, 32 205 %u = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %1, i64 %2) 206 %uval = extractvalue {i64, i1} %u, 0 207 %uovf = extractvalue {i64, i1} %u, 1 208 %s = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %1, i64 %2) 209 %sval = extractvalue {i64, i1} %s, 0 210 %sovf = extractvalue {i64, i1} %s, 1 211 %sum = add i64 %uval, %sval 212 %3 = trunc i64 %sum to i32 213 %4 = or i1 %uovf, %sovf 214 %ret0 = insertvalue {i32, i1} undef, i32 %3, 0 215 %ret1 = insertvalue {i32, i1} %ret0, i1 %4, 1 216 ret {i32, i1} %ret1 217 } 218 219 define {i32, i1} @knownbits_usubo_ssubo(i64 %a0, i64 %a1) nounwind { 220 ; X32-LABEL: knownbits_usubo_ssubo: 221 ; X32: # %bb.0: 222 ; X32-NEXT: pushl %ebx 223 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 224 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 225 ; X32-NEXT: movl %ecx, %edx 226 ; X32-NEXT: subl %eax, %edx 227 ; X32-NEXT: setb %bl 228 ; X32-NEXT: testl %eax, %eax 229 ; X32-NEXT: setns %al 230 ; X32-NEXT: testl %ecx, %ecx 231 ; X32-NEXT: setns %cl 232 ; X32-NEXT: cmpb %al, %cl 233 ; X32-NEXT: setne %al 234 ; X32-NEXT: testl %edx, %edx 235 ; X32-NEXT: setns %dl 236 ; X32-NEXT: cmpb %dl, %cl 237 ; X32-NEXT: setne %dl 238 ; X32-NEXT: andb %al, %dl 239 ; X32-NEXT: orb %bl, %dl 240 ; X32-NEXT: xorl %eax, %eax 241 ; X32-NEXT: popl %ebx 242 ; X32-NEXT: retl 243 ; 244 ; X64-LABEL: knownbits_usubo_ssubo: 245 ; X64: # %bb.0: 246 ; X64-NEXT: shlq $32, %rdi 247 ; X64-NEXT: shlq $32, %rsi 248 ; X64-NEXT: cmpq %rsi, %rdi 249 ; X64-NEXT: setb %al 250 ; X64-NEXT: seto %dl 251 ; X64-NEXT: orb %al, %dl 252 ; X64-NEXT: xorl %eax, %eax 253 ; X64-NEXT: retq 254 %1 = shl i64 %a0, 32 255 %2 = shl i64 %a1, 32 256 %u = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %1, i64 %2) 257 %uval = extractvalue {i64, i1} %u, 0 258 %uovf = extractvalue {i64, i1} %u, 1 259 %s = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %1, i64 %2) 260 %sval = extractvalue {i64, i1} %s, 0 261 %sovf = extractvalue {i64, i1} %s, 1 262 %sum = add i64 %uval, %sval 263 %3 = trunc i64 %sum to i32 264 %4 = or i1 %uovf, %sovf 265 %ret0 = insertvalue {i32, i1} undef, i32 %3, 0 266 %ret1 = insertvalue {i32, i1} %ret0, i1 %4, 1 267 ret {i32, i1} %ret1 268 } 269 270 declare {i64, i1} @llvm.uadd.with.overflow.i64(i64, i64) nounwind readnone 271 declare {i64, i1} @llvm.sadd.with.overflow.i64(i64, i64) nounwind readnone 272 declare {i64, i1} @llvm.usub.with.overflow.i64(i64, i64) nounwind readnone 273 declare {i64, i1} @llvm.ssub.with.overflow.i64(i64, i64) nounwind readnone 274