1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=-sse2 | FileCheck %s --check-prefix=X32 3 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X32-SSE2 4 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=X32-AVX2 5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=-sse2 | FileCheck %s --check-prefix=X64 6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X64-SSE2 7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=X64-AVX2 8 9 define i32 @PR15215_bad(<4 x i32> %input) { 10 ; X32-LABEL: PR15215_bad: 11 ; X32: # %bb.0: # %entry 12 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al 13 ; X32-NEXT: movb {{[0-9]+}}(%esp), %cl 14 ; X32-NEXT: movb {{[0-9]+}}(%esp), %dl 15 ; X32-NEXT: movb {{[0-9]+}}(%esp), %ah 16 ; X32-NEXT: addb %ah, %ah 17 ; X32-NEXT: andb $1, %dl 18 ; X32-NEXT: orb %ah, %dl 19 ; X32-NEXT: shlb $2, %dl 20 ; X32-NEXT: addb %cl, %cl 21 ; X32-NEXT: andb $1, %al 22 ; X32-NEXT: orb %cl, %al 23 ; X32-NEXT: andb $3, %al 24 ; X32-NEXT: orb %dl, %al 25 ; X32-NEXT: movzbl %al, %eax 26 ; X32-NEXT: andl $15, %eax 27 ; X32-NEXT: retl 28 ; 29 ; X32-SSE2-LABEL: PR15215_bad: 30 ; X32-SSE2: # %bb.0: # %entry 31 ; X32-SSE2-NEXT: pslld $31, %xmm0 32 ; X32-SSE2-NEXT: psrad $31, %xmm0 33 ; X32-SSE2-NEXT: movmskps %xmm0, %eax 34 ; X32-SSE2-NEXT: retl 35 ; 36 ; X32-AVX2-LABEL: PR15215_bad: 37 ; X32-AVX2: # %bb.0: # %entry 38 ; X32-AVX2-NEXT: vpslld $31, %xmm0, %xmm0 39 ; X32-AVX2-NEXT: vpsrad $31, %xmm0, %xmm0 40 ; X32-AVX2-NEXT: vmovmskps %xmm0, %eax 41 ; X32-AVX2-NEXT: retl 42 ; 43 ; X64-LABEL: PR15215_bad: 44 ; X64: # %bb.0: # %entry 45 ; X64-NEXT: addb %cl, %cl 46 ; X64-NEXT: andb $1, %dl 47 ; X64-NEXT: orb %cl, %dl 48 ; X64-NEXT: shlb $2, %dl 49 ; X64-NEXT: addb %sil, %sil 50 ; X64-NEXT: andb $1, %dil 51 ; X64-NEXT: orb %sil, %dil 52 ; X64-NEXT: andb $3, %dil 53 ; X64-NEXT: orb %dl, %dil 54 ; X64-NEXT: movzbl %dil, %eax 55 ; X64-NEXT: andl $15, %eax 56 ; X64-NEXT: retq 57 ; 58 ; X64-SSE2-LABEL: PR15215_bad: 59 ; X64-SSE2: # %bb.0: # %entry 60 ; X64-SSE2-NEXT: pslld $31, %xmm0 61 ; X64-SSE2-NEXT: psrad $31, %xmm0 62 ; X64-SSE2-NEXT: movmskps %xmm0, %eax 63 ; X64-SSE2-NEXT: retq 64 ; 65 ; X64-AVX2-LABEL: PR15215_bad: 66 ; X64-AVX2: # %bb.0: # %entry 67 ; X64-AVX2-NEXT: vpslld $31, %xmm0, %xmm0 68 ; X64-AVX2-NEXT: vpsrad $31, %xmm0, %xmm0 69 ; X64-AVX2-NEXT: vmovmskps %xmm0, %eax 70 ; X64-AVX2-NEXT: retq 71 entry: 72 %0 = trunc <4 x i32> %input to <4 x i1> 73 %1 = bitcast <4 x i1> %0 to i4 74 %2 = zext i4 %1 to i32 75 ret i32 %2 76 } 77 78 define i32 @PR15215_good(<4 x i32> %input) { 79 ; X32-LABEL: PR15215_good: 80 ; X32: # %bb.0: # %entry 81 ; X32-NEXT: pushl %esi 82 ; X32-NEXT: .cfi_def_cfa_offset 8 83 ; X32-NEXT: .cfi_offset %esi, -8 84 ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax 85 ; X32-NEXT: andl $1, %eax 86 ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 87 ; X32-NEXT: andl $1, %ecx 88 ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %edx 89 ; X32-NEXT: andl $1, %edx 90 ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %esi 91 ; X32-NEXT: andl $1, %esi 92 ; X32-NEXT: leal (%eax,%ecx,2), %eax 93 ; X32-NEXT: leal (%eax,%edx,4), %eax 94 ; X32-NEXT: leal (%eax,%esi,8), %eax 95 ; X32-NEXT: popl %esi 96 ; X32-NEXT: .cfi_def_cfa_offset 4 97 ; X32-NEXT: retl 98 ; 99 ; X32-SSE2-LABEL: PR15215_good: 100 ; X32-SSE2: # %bb.0: # %entry 101 ; X32-SSE2-NEXT: pushl %esi 102 ; X32-SSE2-NEXT: .cfi_def_cfa_offset 8 103 ; X32-SSE2-NEXT: .cfi_offset %esi, -8 104 ; X32-SSE2-NEXT: movd %xmm0, %eax 105 ; X32-SSE2-NEXT: andl $1, %eax 106 ; X32-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] 107 ; X32-SSE2-NEXT: movd %xmm1, %ecx 108 ; X32-SSE2-NEXT: andl $1, %ecx 109 ; X32-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 110 ; X32-SSE2-NEXT: movd %xmm1, %edx 111 ; X32-SSE2-NEXT: andl $1, %edx 112 ; X32-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,3] 113 ; X32-SSE2-NEXT: movd %xmm0, %esi 114 ; X32-SSE2-NEXT: andl $1, %esi 115 ; X32-SSE2-NEXT: leal (%eax,%ecx,2), %eax 116 ; X32-SSE2-NEXT: leal (%eax,%edx,4), %eax 117 ; X32-SSE2-NEXT: leal (%eax,%esi,8), %eax 118 ; X32-SSE2-NEXT: popl %esi 119 ; X32-SSE2-NEXT: .cfi_def_cfa_offset 4 120 ; X32-SSE2-NEXT: retl 121 ; 122 ; X32-AVX2-LABEL: PR15215_good: 123 ; X32-AVX2: # %bb.0: # %entry 124 ; X32-AVX2-NEXT: pushl %esi 125 ; X32-AVX2-NEXT: .cfi_def_cfa_offset 8 126 ; X32-AVX2-NEXT: .cfi_offset %esi, -8 127 ; X32-AVX2-NEXT: vmovd %xmm0, %eax 128 ; X32-AVX2-NEXT: andl $1, %eax 129 ; X32-AVX2-NEXT: vpextrd $1, %xmm0, %ecx 130 ; X32-AVX2-NEXT: andl $1, %ecx 131 ; X32-AVX2-NEXT: vpextrd $2, %xmm0, %edx 132 ; X32-AVX2-NEXT: andl $1, %edx 133 ; X32-AVX2-NEXT: vpextrd $3, %xmm0, %esi 134 ; X32-AVX2-NEXT: andl $1, %esi 135 ; X32-AVX2-NEXT: leal (%eax,%ecx,2), %eax 136 ; X32-AVX2-NEXT: leal (%eax,%edx,4), %eax 137 ; X32-AVX2-NEXT: leal (%eax,%esi,8), %eax 138 ; X32-AVX2-NEXT: popl %esi 139 ; X32-AVX2-NEXT: .cfi_def_cfa_offset 4 140 ; X32-AVX2-NEXT: retl 141 ; 142 ; X64-LABEL: PR15215_good: 143 ; X64: # %bb.0: # %entry 144 ; X64-NEXT: # kill: def $ecx killed $ecx def $rcx 145 ; X64-NEXT: # kill: def $edx killed $edx def $rdx 146 ; X64-NEXT: # kill: def $esi killed $esi def $rsi 147 ; X64-NEXT: # kill: def $edi killed $edi def $rdi 148 ; X64-NEXT: andl $1, %edi 149 ; X64-NEXT: andl $1, %esi 150 ; X64-NEXT: andl $1, %edx 151 ; X64-NEXT: andl $1, %ecx 152 ; X64-NEXT: leal (%rdi,%rsi,2), %eax 153 ; X64-NEXT: leal (%rax,%rdx,4), %eax 154 ; X64-NEXT: leal (%rax,%rcx,8), %eax 155 ; X64-NEXT: retq 156 ; 157 ; X64-SSE2-LABEL: PR15215_good: 158 ; X64-SSE2: # %bb.0: # %entry 159 ; X64-SSE2-NEXT: movd %xmm0, %eax 160 ; X64-SSE2-NEXT: andl $1, %eax 161 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] 162 ; X64-SSE2-NEXT: movd %xmm1, %ecx 163 ; X64-SSE2-NEXT: andl $1, %ecx 164 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 165 ; X64-SSE2-NEXT: movd %xmm1, %edx 166 ; X64-SSE2-NEXT: andl $1, %edx 167 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,3] 168 ; X64-SSE2-NEXT: movd %xmm0, %esi 169 ; X64-SSE2-NEXT: andl $1, %esi 170 ; X64-SSE2-NEXT: leal (%rax,%rcx,2), %eax 171 ; X64-SSE2-NEXT: leal (%rax,%rdx,4), %eax 172 ; X64-SSE2-NEXT: leal (%rax,%rsi,8), %eax 173 ; X64-SSE2-NEXT: retq 174 ; 175 ; X64-AVX2-LABEL: PR15215_good: 176 ; X64-AVX2: # %bb.0: # %entry 177 ; X64-AVX2-NEXT: vmovd %xmm0, %eax 178 ; X64-AVX2-NEXT: andl $1, %eax 179 ; X64-AVX2-NEXT: vpextrd $1, %xmm0, %ecx 180 ; X64-AVX2-NEXT: andl $1, %ecx 181 ; X64-AVX2-NEXT: vpextrd $2, %xmm0, %edx 182 ; X64-AVX2-NEXT: andl $1, %edx 183 ; X64-AVX2-NEXT: vpextrd $3, %xmm0, %esi 184 ; X64-AVX2-NEXT: andl $1, %esi 185 ; X64-AVX2-NEXT: leal (%rax,%rcx,2), %eax 186 ; X64-AVX2-NEXT: leal (%rax,%rdx,4), %eax 187 ; X64-AVX2-NEXT: leal (%rax,%rsi,8), %eax 188 ; X64-AVX2-NEXT: retq 189 entry: 190 %0 = trunc <4 x i32> %input to <4 x i1> 191 %1 = extractelement <4 x i1> %0, i32 0 192 %e1 = select i1 %1, i32 1, i32 0 193 %2 = extractelement <4 x i1> %0, i32 1 194 %e2 = select i1 %2, i32 2, i32 0 195 %3 = extractelement <4 x i1> %0, i32 2 196 %e3 = select i1 %3, i32 4, i32 0 197 %4 = extractelement <4 x i1> %0, i32 3 198 %e4 = select i1 %4, i32 8, i32 0 199 %5 = or i32 %e1, %e2 200 %6 = or i32 %5, %e3 201 %7 = or i32 %6, %e4 202 ret i32 %7 203 } 204