1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=X32 3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=X64 4 5 define i32 @knownbits_mask_extract_sext(<8 x i16> %a0) nounwind { 6 ; X32-LABEL: knownbits_mask_extract_sext: 7 ; X32: # %bb.0: 8 ; X32-NEXT: vpand {{\.LCPI.*}}, %xmm0, %xmm0 9 ; X32-NEXT: vpextrw $0, %xmm0, %eax 10 ; X32-NEXT: retl 11 ; 12 ; X64-LABEL: knownbits_mask_extract_sext: 13 ; X64: # %bb.0: 14 ; X64-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 15 ; X64-NEXT: vpextrw $0, %xmm0, %eax 16 ; X64-NEXT: retq 17 %1 = and <8 x i16> %a0, <i16 15, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> 18 %2 = extractelement <8 x i16> %1, i32 0 19 %3 = sext i16 %2 to i32 20 ret i32 %3 21 } 22 23 define float @knownbits_mask_extract_uitofp(<2 x i64> %a0) nounwind { 24 ; X32-LABEL: knownbits_mask_extract_uitofp: 25 ; X32: # %bb.0: 26 ; X32-NEXT: pushl %eax 27 ; X32-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 28 ; X32-NEXT: vmovd %xmm0, %eax 29 ; X32-NEXT: vcvtsi2ssl %eax, %xmm1, %xmm0 30 ; X32-NEXT: vmovss %xmm0, (%esp) 31 ; X32-NEXT: flds (%esp) 32 ; X32-NEXT: popl %eax 33 ; X32-NEXT: retl 34 ; 35 ; X64-LABEL: knownbits_mask_extract_uitofp: 36 ; X64: # %bb.0: 37 ; X64-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 38 ; X64-NEXT: vmovq %xmm0, %rax 39 ; X64-NEXT: vcvtsi2ssl %eax, %xmm1, %xmm0 40 ; X64-NEXT: retq 41 %1 = and <2 x i64> %a0, <i64 65535, i64 -1> 42 %2 = extractelement <2 x i64> %1, i32 0 43 %3 = uitofp i64 %2 to float 44 ret float %3 45 } 46 47 define <4 x float> @knownbits_insert_uitofp(<4 x i32> %a0, i16 %a1, i16 %a2) nounwind { 48 ; X32-LABEL: knownbits_insert_uitofp: 49 ; X32: # %bb.0: 50 ; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax 51 ; X32-NEXT: movzwl {{[0-9]+}}(%esp), %ecx 52 ; X32-NEXT: vmovd %ecx, %xmm0 53 ; X32-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0 54 ; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,2,2] 55 ; X32-NEXT: vcvtdq2ps %xmm0, %xmm0 56 ; X32-NEXT: retl 57 ; 58 ; X64-LABEL: knownbits_insert_uitofp: 59 ; X64: # %bb.0: 60 ; X64-NEXT: movzwl %di, %eax 61 ; X64-NEXT: movzwl %si, %ecx 62 ; X64-NEXT: vmovd %eax, %xmm0 63 ; X64-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0 64 ; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,2,2] 65 ; X64-NEXT: vcvtdq2ps %xmm0, %xmm0 66 ; X64-NEXT: retq 67 %1 = zext i16 %a1 to i32 68 %2 = zext i16 %a2 to i32 69 %3 = insertelement <4 x i32> %a0, i32 %1, i32 0 70 %4 = insertelement <4 x i32> %3, i32 %2, i32 2 71 %5 = shufflevector <4 x i32> %4, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2> 72 %6 = uitofp <4 x i32> %5 to <4 x float> 73 ret <4 x float> %6 74 } 75 76 define <4 x i32> @knownbits_mask_shuffle_sext(<8 x i16> %a0) nounwind { 77 ; X32-LABEL: knownbits_mask_shuffle_sext: 78 ; X32: # %bb.0: 79 ; X32-NEXT: vpand {{\.LCPI.*}}, %xmm0, %xmm0 80 ; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 81 ; X32-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 82 ; X32-NEXT: retl 83 ; 84 ; X64-LABEL: knownbits_mask_shuffle_sext: 85 ; X64: # %bb.0: 86 ; X64-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 87 ; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 88 ; X64-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 89 ; X64-NEXT: retq 90 %1 = and <8 x i16> %a0, <i16 -1, i16 -1, i16 -1, i16 -1, i16 15, i16 15, i16 15, i16 15> 91 %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 92 %3 = sext <4 x i16> %2 to <4 x i32> 93 ret <4 x i32> %3 94 } 95 96 define <4 x i32> @knownbits_mask_shuffle_shuffle_sext(<8 x i16> %a0) nounwind { 97 ; X32-LABEL: knownbits_mask_shuffle_shuffle_sext: 98 ; X32: # %bb.0: 99 ; X32-NEXT: vpand {{\.LCPI.*}}, %xmm0, %xmm0 100 ; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 101 ; X32-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 102 ; X32-NEXT: retl 103 ; 104 ; X64-LABEL: knownbits_mask_shuffle_shuffle_sext: 105 ; X64: # %bb.0: 106 ; X64-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 107 ; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 108 ; X64-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 109 ; X64-NEXT: retq 110 %1 = and <8 x i16> %a0, <i16 -1, i16 -1, i16 -1, i16 -1, i16 15, i16 15, i16 15, i16 15> 111 %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef> 112 %3 = shufflevector <8 x i16> %2, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 113 %4 = sext <4 x i16> %3 to <4 x i32> 114 ret <4 x i32> %4 115 } 116 117 define <4 x i32> @knownbits_mask_shuffle_shuffle_undef_sext(<8 x i16> %a0) nounwind { 118 ; X32-LABEL: knownbits_mask_shuffle_shuffle_undef_sext: 119 ; X32: # %bb.0: 120 ; X32-NEXT: vpand {{\.LCPI.*}}, %xmm0, %xmm0 121 ; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 122 ; X32-NEXT: vpmovsxwd %xmm0, %xmm0 123 ; X32-NEXT: retl 124 ; 125 ; X64-LABEL: knownbits_mask_shuffle_shuffle_undef_sext: 126 ; X64: # %bb.0: 127 ; X64-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 128 ; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 129 ; X64-NEXT: vpmovsxwd %xmm0, %xmm0 130 ; X64-NEXT: retq 131 %1 = and <8 x i16> %a0, <i16 -1, i16 -1, i16 -1, i16 -1, i16 15, i16 15, i16 15, i16 15> 132 %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 133 %3 = shufflevector <8 x i16> %2, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 134 %4 = sext <4 x i16> %3 to <4 x i32> 135 ret <4 x i32> %4 136 } 137 138 define <4 x float> @knownbits_mask_shuffle_uitofp(<4 x i32> %a0) nounwind { 139 ; X32-LABEL: knownbits_mask_shuffle_uitofp: 140 ; X32: # %bb.0: 141 ; X32-NEXT: vandps {{\.LCPI.*}}, %xmm0, %xmm0 142 ; X32-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,2,3,3] 143 ; X32-NEXT: vcvtdq2ps %xmm0, %xmm0 144 ; X32-NEXT: retl 145 ; 146 ; X64-LABEL: knownbits_mask_shuffle_uitofp: 147 ; X64: # %bb.0: 148 ; X64-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0 149 ; X64-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,2,3,3] 150 ; X64-NEXT: vcvtdq2ps %xmm0, %xmm0 151 ; X64-NEXT: retq 152 %1 = and <4 x i32> %a0, <i32 -1, i32 -1, i32 255, i32 4085> 153 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 2, i32 3, i32 3> 154 %3 = uitofp <4 x i32> %2 to <4 x float> 155 ret <4 x float> %3 156 } 157 158 define <4 x float> @knownbits_mask_or_shuffle_uitofp(<4 x i32> %a0) nounwind { 159 ; X32-LABEL: knownbits_mask_or_shuffle_uitofp: 160 ; X32: # %bb.0: 161 ; X32-NEXT: vpor {{\.LCPI.*}}, %xmm0, %xmm0 162 ; X32-NEXT: vpxor %xmm1, %xmm1, %xmm1 163 ; X32-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,6,6] 164 ; X32-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 165 ; X32-NEXT: vcvtdq2ps %xmm0, %xmm0 166 ; X32-NEXT: retl 167 ; 168 ; X64-LABEL: knownbits_mask_or_shuffle_uitofp: 169 ; X64: # %bb.0: 170 ; X64-NEXT: vpor {{.*}}(%rip), %xmm0, %xmm0 171 ; X64-NEXT: vpxor %xmm1, %xmm1, %xmm1 172 ; X64-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,6,6] 173 ; X64-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 174 ; X64-NEXT: vcvtdq2ps %xmm0, %xmm0 175 ; X64-NEXT: retq 176 %1 = and <4 x i32> %a0, <i32 -1, i32 -1, i32 255, i32 4085> 177 %2 = or <4 x i32> %1, <i32 65535, i32 65535, i32 65535, i32 65535> 178 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 2, i32 2, i32 3, i32 3> 179 %4 = uitofp <4 x i32> %3 to <4 x float> 180 ret <4 x float> %4 181 } 182 183 define <4 x float> @knownbits_mask_xor_shuffle_uitofp(<4 x i32> %a0) nounwind { 184 ; X32-LABEL: knownbits_mask_xor_shuffle_uitofp: 185 ; X32: # %bb.0: 186 ; X32-NEXT: vandps {{\.LCPI.*}}, %xmm0, %xmm0 187 ; X32-NEXT: vxorps {{\.LCPI.*}}, %xmm0, %xmm0 188 ; X32-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,2,3,3] 189 ; X32-NEXT: vcvtdq2ps %xmm0, %xmm0 190 ; X32-NEXT: retl 191 ; 192 ; X64-LABEL: knownbits_mask_xor_shuffle_uitofp: 193 ; X64: # %bb.0: 194 ; X64-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0 195 ; X64-NEXT: vxorps {{.*}}(%rip), %xmm0, %xmm0 196 ; X64-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,2,3,3] 197 ; X64-NEXT: vcvtdq2ps %xmm0, %xmm0 198 ; X64-NEXT: retq 199 %1 = and <4 x i32> %a0, <i32 -1, i32 -1, i32 255, i32 4085> 200 %2 = xor <4 x i32> %1, <i32 65535, i32 65535, i32 65535, i32 65535> 201 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 2, i32 2, i32 3, i32 3> 202 %4 = uitofp <4 x i32> %3 to <4 x float> 203 ret <4 x float> %4 204 } 205 206 define <4 x i32> @knownbits_mask_shl_shuffle_lshr(<4 x i32> %a0) nounwind { 207 ; X32-LABEL: knownbits_mask_shl_shuffle_lshr: 208 ; X32: # %bb.0: 209 ; X32-NEXT: vxorps %xmm0, %xmm0, %xmm0 210 ; X32-NEXT: retl 211 ; 212 ; X64-LABEL: knownbits_mask_shl_shuffle_lshr: 213 ; X64: # %bb.0: 214 ; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0 215 ; X64-NEXT: retq 216 %1 = and <4 x i32> %a0, <i32 -65536, i32 -7, i32 -7, i32 -65536> 217 %2 = shl <4 x i32> %1, <i32 17, i32 17, i32 17, i32 17> 218 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3> 219 %4 = lshr <4 x i32> %3, <i32 15, i32 15, i32 15, i32 15> 220 ret <4 x i32> %4 221 } 222 223 define <4 x i32> @knownbits_mask_ashr_shuffle_lshr(<4 x i32> %a0) nounwind { 224 ; X32-LABEL: knownbits_mask_ashr_shuffle_lshr: 225 ; X32: # %bb.0: 226 ; X32-NEXT: vxorps %xmm0, %xmm0, %xmm0 227 ; X32-NEXT: retl 228 ; 229 ; X64-LABEL: knownbits_mask_ashr_shuffle_lshr: 230 ; X64: # %bb.0: 231 ; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0 232 ; X64-NEXT: retq 233 %1 = and <4 x i32> %a0, <i32 131071, i32 -1, i32 -1, i32 131071> 234 %2 = ashr <4 x i32> %1, <i32 15, i32 15, i32 15, i32 15> 235 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3> 236 %4 = lshr <4 x i32> %3, <i32 30, i32 30, i32 30, i32 30> 237 ret <4 x i32> %4 238 } 239 240 define <4 x i32> @knownbits_mask_mul_shuffle_shl(<4 x i32> %a0, <4 x i32> %a1) nounwind { 241 ; X32-LABEL: knownbits_mask_mul_shuffle_shl: 242 ; X32: # %bb.0: 243 ; X32-NEXT: vxorps %xmm0, %xmm0, %xmm0 244 ; X32-NEXT: retl 245 ; 246 ; X64-LABEL: knownbits_mask_mul_shuffle_shl: 247 ; X64: # %bb.0: 248 ; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0 249 ; X64-NEXT: retq 250 %1 = and <4 x i32> %a0, <i32 -65536, i32 -7, i32 -7, i32 -65536> 251 %2 = mul <4 x i32> %a1, %1 252 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3> 253 %4 = shl <4 x i32> %3, <i32 22, i32 22, i32 22, i32 22> 254 ret <4 x i32> %4 255 } 256 257 define <4 x i32> @knownbits_mask_trunc_shuffle_shl(<4 x i64> %a0) nounwind { 258 ; X32-LABEL: knownbits_mask_trunc_shuffle_shl: 259 ; X32: # %bb.0: 260 ; X32-NEXT: vxorps %xmm0, %xmm0, %xmm0 261 ; X32-NEXT: retl 262 ; 263 ; X64-LABEL: knownbits_mask_trunc_shuffle_shl: 264 ; X64: # %bb.0: 265 ; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0 266 ; X64-NEXT: retq 267 %1 = and <4 x i64> %a0, <i64 -65536, i64 -7, i64 7, i64 -65536> 268 %2 = trunc <4 x i64> %1 to <4 x i32> 269 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3> 270 %4 = shl <4 x i32> %3, <i32 22, i32 22, i32 22, i32 22> 271 ret <4 x i32> %4 272 } 273 274 define <4 x i32> @knownbits_mask_add_shuffle_lshr(<4 x i32> %a0, <4 x i32> %a1) nounwind { 275 ; X32-LABEL: knownbits_mask_add_shuffle_lshr: 276 ; X32: # %bb.0: 277 ; X32-NEXT: vxorps %xmm0, %xmm0, %xmm0 278 ; X32-NEXT: retl 279 ; 280 ; X64-LABEL: knownbits_mask_add_shuffle_lshr: 281 ; X64: # %bb.0: 282 ; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0 283 ; X64-NEXT: retq 284 %1 = and <4 x i32> %a0, <i32 32767, i32 -1, i32 -1, i32 32767> 285 %2 = and <4 x i32> %a1, <i32 32767, i32 -1, i32 -1, i32 32767> 286 %3 = add <4 x i32> %1, %2 287 %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3> 288 %5 = lshr <4 x i32> %4, <i32 17, i32 17, i32 17, i32 17> 289 ret <4 x i32> %5 290 } 291 292 define <4 x i32> @knownbits_mask_sub_shuffle_lshr(<4 x i32> %a0) nounwind { 293 ; X32-LABEL: knownbits_mask_sub_shuffle_lshr: 294 ; X32: # %bb.0: 295 ; X32-NEXT: vxorps %xmm0, %xmm0, %xmm0 296 ; X32-NEXT: retl 297 ; 298 ; X64-LABEL: knownbits_mask_sub_shuffle_lshr: 299 ; X64: # %bb.0: 300 ; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0 301 ; X64-NEXT: retq 302 %1 = and <4 x i32> %a0, <i32 15, i32 -1, i32 -1, i32 15> 303 %2 = sub <4 x i32> <i32 255, i32 255, i32 255, i32 255>, %1 304 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3> 305 %4 = lshr <4 x i32> %3, <i32 22, i32 22, i32 22, i32 22> 306 ret <4 x i32> %4 307 } 308 309 define <4 x i32> @knownbits_mask_udiv_shuffle_lshr(<4 x i32> %a0, <4 x i32> %a1) nounwind { 310 ; X32-LABEL: knownbits_mask_udiv_shuffle_lshr: 311 ; X32: # %bb.0: 312 ; X32-NEXT: vxorps %xmm0, %xmm0, %xmm0 313 ; X32-NEXT: retl 314 ; 315 ; X64-LABEL: knownbits_mask_udiv_shuffle_lshr: 316 ; X64: # %bb.0: 317 ; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0 318 ; X64-NEXT: retq 319 %1 = and <4 x i32> %a0, <i32 32767, i32 -1, i32 -1, i32 32767> 320 %2 = udiv <4 x i32> %1, %a1 321 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3> 322 %4 = lshr <4 x i32> %3, <i32 22, i32 22, i32 22, i32 22> 323 ret <4 x i32> %4 324 } 325 326 define <4 x i32> @knownbits_urem_lshr(<4 x i32> %a0) nounwind { 327 ; X32-LABEL: knownbits_urem_lshr: 328 ; X32: # %bb.0: 329 ; X32-NEXT: vxorps %xmm0, %xmm0, %xmm0 330 ; X32-NEXT: retl 331 ; 332 ; X64-LABEL: knownbits_urem_lshr: 333 ; X64: # %bb.0: 334 ; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0 335 ; X64-NEXT: retq 336 %1 = urem <4 x i32> %a0, <i32 16, i32 16, i32 16, i32 16> 337 %2 = lshr <4 x i32> %1, <i32 22, i32 22, i32 22, i32 22> 338 ret <4 x i32> %2 339 } 340 341 define <4 x i32> @knownbits_mask_urem_shuffle_lshr(<4 x i32> %a0, <4 x i32> %a1) nounwind { 342 ; X32-LABEL: knownbits_mask_urem_shuffle_lshr: 343 ; X32: # %bb.0: 344 ; X32-NEXT: vxorps %xmm0, %xmm0, %xmm0 345 ; X32-NEXT: retl 346 ; 347 ; X64-LABEL: knownbits_mask_urem_shuffle_lshr: 348 ; X64: # %bb.0: 349 ; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0 350 ; X64-NEXT: retq 351 %1 = and <4 x i32> %a0, <i32 32767, i32 -1, i32 -1, i32 32767> 352 %2 = and <4 x i32> %a1, <i32 32767, i32 -1, i32 -1, i32 32767> 353 %3 = urem <4 x i32> %1, %2 354 %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3> 355 %5 = lshr <4 x i32> %4, <i32 22, i32 22, i32 22, i32 22> 356 ret <4 x i32> %5 357 } 358 359 define <4 x i32> @knownbits_mask_srem_shuffle_lshr(<4 x i32> %a0) nounwind { 360 ; X32-LABEL: knownbits_mask_srem_shuffle_lshr: 361 ; X32: # %bb.0: 362 ; X32-NEXT: vxorps %xmm0, %xmm0, %xmm0 363 ; X32-NEXT: retl 364 ; 365 ; X64-LABEL: knownbits_mask_srem_shuffle_lshr: 366 ; X64: # %bb.0: 367 ; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0 368 ; X64-NEXT: retq 369 %1 = and <4 x i32> %a0, <i32 -32768, i32 -1, i32 -1, i32 -32768> 370 %2 = srem <4 x i32> %1, <i32 16, i32 16, i32 16, i32 16> 371 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3> 372 %4 = lshr <4 x i32> %3, <i32 22, i32 22, i32 22, i32 22> 373 ret <4 x i32> %4 374 } 375 376 define <4 x i32> @knownbits_mask_bswap_shuffle_shl(<4 x i32> %a0) nounwind { 377 ; X32-LABEL: knownbits_mask_bswap_shuffle_shl: 378 ; X32: # %bb.0: 379 ; X32-NEXT: vxorps %xmm0, %xmm0, %xmm0 380 ; X32-NEXT: retl 381 ; 382 ; X64-LABEL: knownbits_mask_bswap_shuffle_shl: 383 ; X64: # %bb.0: 384 ; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0 385 ; X64-NEXT: retq 386 %1 = and <4 x i32> %a0, <i32 32767, i32 -1, i32 -1, i32 32767> 387 %2 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %1) 388 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3> 389 %4 = shl <4 x i32> %3, <i32 22, i32 22, i32 22, i32 22> 390 ret <4 x i32> %4 391 } 392 declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>) 393 394 define <8 x float> @knownbits_mask_concat_uitofp(<4 x i32> %a0, <4 x i32> %a1) nounwind { 395 ; X32-LABEL: knownbits_mask_concat_uitofp: 396 ; X32: # %bb.0: 397 ; X32-NEXT: vandps {{\.LCPI.*}}, %xmm0, %xmm0 398 ; X32-NEXT: vandps {{\.LCPI.*}}, %xmm1, %xmm1 399 ; X32-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,0,2] 400 ; X32-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[1,3,1,3] 401 ; X32-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 402 ; X32-NEXT: vcvtdq2ps %ymm0, %ymm0 403 ; X32-NEXT: retl 404 ; 405 ; X64-LABEL: knownbits_mask_concat_uitofp: 406 ; X64: # %bb.0: 407 ; X64-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0 408 ; X64-NEXT: vandps {{.*}}(%rip), %xmm1, %xmm1 409 ; X64-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,0,2] 410 ; X64-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[1,3,1,3] 411 ; X64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 412 ; X64-NEXT: vcvtdq2ps %ymm0, %ymm0 413 ; X64-NEXT: retq 414 %1 = and <4 x i32> %a0, <i32 131071, i32 -1, i32 131071, i32 -1> 415 %2 = and <4 x i32> %a1, <i32 -1, i32 131071, i32 -1, i32 131071> 416 %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <8 x i32> <i32 0, i32 2, i32 0, i32 2, i32 5, i32 7, i32 5, i32 7> 417 %4 = uitofp <8 x i32> %3 to <8 x float> 418 ret <8 x float> %4 419 } 420 421 define <4 x float> @knownbits_lshr_bitcast_shuffle_uitofp(<2 x i64> %a0, <4 x i32> %a1) nounwind { 422 ; X32-LABEL: knownbits_lshr_bitcast_shuffle_uitofp: 423 ; X32: # %bb.0: 424 ; X32-NEXT: vpsrlq $1, %xmm0, %xmm0 425 ; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 426 ; X32-NEXT: vcvtdq2ps %xmm0, %xmm0 427 ; X32-NEXT: retl 428 ; 429 ; X64-LABEL: knownbits_lshr_bitcast_shuffle_uitofp: 430 ; X64: # %bb.0: 431 ; X64-NEXT: vpsrlq $1, %xmm0, %xmm0 432 ; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 433 ; X64-NEXT: vcvtdq2ps %xmm0, %xmm0 434 ; X64-NEXT: retq 435 %1 = lshr <2 x i64> %a0, <i64 1, i64 1> 436 %2 = bitcast <2 x i64> %1 to <4 x i32> 437 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 3, i32 3> 438 %4 = uitofp <4 x i32> %3 to <4 x float> 439 ret <4 x float> %4 440 } 441 442 define <4 x float> @knownbits_smax_smin_shuffle_uitofp(<4 x i32> %a0) { 443 ; X32-LABEL: knownbits_smax_smin_shuffle_uitofp: 444 ; X32: # %bb.0: 445 ; X32-NEXT: vpminsd {{\.LCPI.*}}, %xmm0, %xmm0 446 ; X32-NEXT: vpmaxsd {{\.LCPI.*}}, %xmm0, %xmm0 447 ; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,3,3] 448 ; X32-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7] 449 ; X32-NEXT: vpsrld $16, %xmm0, %xmm0 450 ; X32-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7] 451 ; X32-NEXT: vaddps {{\.LCPI.*}}, %xmm0, %xmm0 452 ; X32-NEXT: vaddps %xmm0, %xmm1, %xmm0 453 ; X32-NEXT: retl 454 ; 455 ; X64-LABEL: knownbits_smax_smin_shuffle_uitofp: 456 ; X64: # %bb.0: 457 ; X64-NEXT: vpminsd {{.*}}(%rip), %xmm0, %xmm0 458 ; X64-NEXT: vpmaxsd {{.*}}(%rip), %xmm0, %xmm0 459 ; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,3,3] 460 ; X64-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7] 461 ; X64-NEXT: vpsrld $16, %xmm0, %xmm0 462 ; X64-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7] 463 ; X64-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0 464 ; X64-NEXT: vaddps %xmm0, %xmm1, %xmm0 465 ; X64-NEXT: retq 466 %1 = call <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32> %a0, <4 x i32> <i32 0, i32 -65535, i32 -65535, i32 0>) 467 %2 = call <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32> %1, <4 x i32> <i32 65535, i32 -1, i32 -1, i32 131071>) 468 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3> 469 %4 = uitofp <4 x i32> %3 to <4 x float> 470 ret <4 x float> %4 471 } 472 declare <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32>, <4 x i32>) nounwind readnone 473 declare <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32>, <4 x i32>) nounwind readnone 474 475 define <4 x float> @knownbits_umin_shuffle_uitofp(<4 x i32> %a0) { 476 ; X32-LABEL: knownbits_umin_shuffle_uitofp: 477 ; X32: # %bb.0: 478 ; X32-NEXT: vpminud {{\.LCPI.*}}, %xmm0, %xmm0 479 ; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,3,3] 480 ; X32-NEXT: vcvtdq2ps %xmm0, %xmm0 481 ; X32-NEXT: retl 482 ; 483 ; X64-LABEL: knownbits_umin_shuffle_uitofp: 484 ; X64: # %bb.0: 485 ; X64-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm0 486 ; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,3,3] 487 ; X64-NEXT: vcvtdq2ps %xmm0, %xmm0 488 ; X64-NEXT: retq 489 %1 = call <4 x i32> @llvm.x86.sse41.pminud(<4 x i32> %a0, <4 x i32> <i32 65535, i32 -1, i32 -1, i32 262143>) 490 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3> 491 %3 = uitofp <4 x i32> %2 to <4 x float> 492 ret <4 x float> %3 493 } 494 declare <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32>, <4 x i32>) nounwind readnone 495 declare <4 x i32> @llvm.x86.sse41.pminud(<4 x i32>, <4 x i32>) nounwind readnone 496 497 define <4 x i32> @knownbits_umax_shuffle_ashr(<4 x i32> %a0) { 498 ; X32-LABEL: knownbits_umax_shuffle_ashr: 499 ; X32: # %bb.0: 500 ; X32-NEXT: vpmaxud {{\.LCPI.*}}, %xmm0, %xmm0 501 ; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,2] 502 ; X32-NEXT: retl 503 ; 504 ; X64-LABEL: knownbits_umax_shuffle_ashr: 505 ; X64: # %bb.0: 506 ; X64-NEXT: vpmaxud {{.*}}(%rip), %xmm0, %xmm0 507 ; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,2] 508 ; X64-NEXT: retq 509 %1 = call <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32> %a0, <4 x i32> <i32 65535, i32 -1, i32 -1, i32 262143>) 510 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 2, i32 2> 511 %3 = ashr <4 x i32> %2, <i32 31, i32 31, i32 31, i32 31> 512 ret <4 x i32> %3 513 } 514 515 define <4 x float> @knownbits_mask_umax_shuffle_uitofp(<4 x i32> %a0) { 516 ; X32-LABEL: knownbits_mask_umax_shuffle_uitofp: 517 ; X32: # %bb.0: 518 ; X32-NEXT: vpand {{\.LCPI.*}}, %xmm0, %xmm0 519 ; X32-NEXT: vpmaxud {{\.LCPI.*}}, %xmm0, %xmm0 520 ; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,3,3] 521 ; X32-NEXT: vcvtdq2ps %xmm0, %xmm0 522 ; X32-NEXT: retl 523 ; 524 ; X64-LABEL: knownbits_mask_umax_shuffle_uitofp: 525 ; X64: # %bb.0: 526 ; X64-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 527 ; X64-NEXT: vpmaxud {{.*}}(%rip), %xmm0, %xmm0 528 ; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,3,3] 529 ; X64-NEXT: vcvtdq2ps %xmm0, %xmm0 530 ; X64-NEXT: retq 531 %1 = and <4 x i32> %a0, <i32 65535, i32 -1, i32 -1, i32 262143> 532 %2 = call <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32> %1, <4 x i32> <i32 255, i32 -1, i32 -1, i32 1023>) 533 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3> 534 %4 = uitofp <4 x i32> %3 to <4 x float> 535 ret <4 x float> %4 536 } 537 538 define <4 x i32> @knownbits_mask_bitreverse_ashr(<4 x i32> %a0) { 539 ; X32-LABEL: knownbits_mask_bitreverse_ashr: 540 ; X32: # %bb.0: 541 ; X32-NEXT: vxorps %xmm0, %xmm0, %xmm0 542 ; X32-NEXT: retl 543 ; 544 ; X64-LABEL: knownbits_mask_bitreverse_ashr: 545 ; X64: # %bb.0: 546 ; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0 547 ; X64-NEXT: retq 548 %1 = and <4 x i32> %a0, <i32 -2, i32 -2, i32 -2, i32 -2> 549 %2 = call <4 x i32> @llvm.bitreverse.v4i32(<4 x i32> %1) 550 %3 = ashr <4 x i32> %2, <i32 31, i32 31, i32 31, i32 31> 551 ret <4 x i32> %3 552 } 553 declare <4 x i32> @llvm.bitreverse.v4i32(<4 x i32>) nounwind readnone 554 555 ; If we don't know that the input isn't INT_MIN we can't combine to sitofp 556 define <4 x float> @knownbits_abs_uitofp(<4 x i32> %a0) { 557 ; X32-LABEL: knownbits_abs_uitofp: 558 ; X32: # %bb.0: 559 ; X32-NEXT: vpabsd %xmm0, %xmm0 560 ; X32-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7] 561 ; X32-NEXT: vpsrld $16, %xmm0, %xmm0 562 ; X32-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7] 563 ; X32-NEXT: vaddps {{\.LCPI.*}}, %xmm0, %xmm0 564 ; X32-NEXT: vaddps %xmm0, %xmm1, %xmm0 565 ; X32-NEXT: retl 566 ; 567 ; X64-LABEL: knownbits_abs_uitofp: 568 ; X64: # %bb.0: 569 ; X64-NEXT: vpabsd %xmm0, %xmm0 570 ; X64-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7] 571 ; X64-NEXT: vpsrld $16, %xmm0, %xmm0 572 ; X64-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7] 573 ; X64-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0 574 ; X64-NEXT: vaddps %xmm0, %xmm1, %xmm0 575 ; X64-NEXT: retq 576 %1 = sub <4 x i32> zeroinitializer, %a0 577 %2 = icmp slt <4 x i32> %a0, zeroinitializer 578 %3 = select <4 x i1> %2, <4 x i32> %1, <4 x i32> %a0 579 %4 = uitofp <4 x i32> %3 to <4 x float> 580 ret <4 x float> %4 581 } 582 583 define <4 x float> @knownbits_or_abs_uitofp(<4 x i32> %a0) { 584 ; X32-LABEL: knownbits_or_abs_uitofp: 585 ; X32: # %bb.0: 586 ; X32-NEXT: vpor {{\.LCPI.*}}, %xmm0, %xmm0 587 ; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,0,2] 588 ; X32-NEXT: vpabsd %xmm0, %xmm0 589 ; X32-NEXT: vcvtdq2ps %xmm0, %xmm0 590 ; X32-NEXT: retl 591 ; 592 ; X64-LABEL: knownbits_or_abs_uitofp: 593 ; X64: # %bb.0: 594 ; X64-NEXT: vpor {{.*}}(%rip), %xmm0, %xmm0 595 ; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,0,2] 596 ; X64-NEXT: vpabsd %xmm0, %xmm0 597 ; X64-NEXT: vcvtdq2ps %xmm0, %xmm0 598 ; X64-NEXT: retq 599 %1 = or <4 x i32> %a0, <i32 1, i32 0, i32 3, i32 0> 600 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 0, i32 2, i32 0, i32 2> 601 %3 = sub <4 x i32> zeroinitializer, %2 602 %4 = icmp slt <4 x i32> %2, zeroinitializer 603 %5 = select <4 x i1> %4, <4 x i32> %3, <4 x i32> %2 604 %6 = uitofp <4 x i32> %5 to <4 x float> 605 ret <4 x float> %6 606 } 607 608 define <4 x float> @knownbits_and_select_shuffle_uitofp(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2, <4 x i32> %a3) nounwind { 609 ; X32-LABEL: knownbits_and_select_shuffle_uitofp: 610 ; X32: # %bb.0: 611 ; X32-NEXT: pushl %ebp 612 ; X32-NEXT: movl %esp, %ebp 613 ; X32-NEXT: andl $-16, %esp 614 ; X32-NEXT: subl $16, %esp 615 ; X32-NEXT: vmovaps 8(%ebp), %xmm3 616 ; X32-NEXT: vandps {{\.LCPI.*}}, %xmm2, %xmm2 617 ; X32-NEXT: vandps {{\.LCPI.*}}, %xmm3, %xmm3 618 ; X32-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 619 ; X32-NEXT: vblendvps %xmm0, %xmm2, %xmm3, %xmm0 620 ; X32-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,2,2] 621 ; X32-NEXT: vcvtdq2ps %xmm0, %xmm0 622 ; X32-NEXT: movl %ebp, %esp 623 ; X32-NEXT: popl %ebp 624 ; X32-NEXT: retl 625 ; 626 ; X64-LABEL: knownbits_and_select_shuffle_uitofp: 627 ; X64: # %bb.0: 628 ; X64-NEXT: vandps {{.*}}(%rip), %xmm2, %xmm2 629 ; X64-NEXT: vandps {{.*}}(%rip), %xmm3, %xmm3 630 ; X64-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 631 ; X64-NEXT: vblendvps %xmm0, %xmm2, %xmm3, %xmm0 632 ; X64-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,2,2] 633 ; X64-NEXT: vcvtdq2ps %xmm0, %xmm0 634 ; X64-NEXT: retq 635 %1 = and <4 x i32> %a2, <i32 65535, i32 -1, i32 255, i32 -1> 636 %2 = and <4 x i32> %a3, <i32 255, i32 -1, i32 65535, i32 -1> 637 %3 = icmp eq <4 x i32> %a0, %a1 638 %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2 639 %5 = shufflevector <4 x i32> %4, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2> 640 %6 = uitofp <4 x i32> %5 to <4 x float> 641 ret <4 x float> %6 642 } 643 644 define <4 x float> @knownbits_lshr_and_select_shuffle_uitofp(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2, <4 x i32> %a3) nounwind { 645 ; X32-LABEL: knownbits_lshr_and_select_shuffle_uitofp: 646 ; X32: # %bb.0: 647 ; X32-NEXT: pushl %ebp 648 ; X32-NEXT: movl %esp, %ebp 649 ; X32-NEXT: andl $-16, %esp 650 ; X32-NEXT: subl $16, %esp 651 ; X32-NEXT: vmovaps 8(%ebp), %xmm3 652 ; X32-NEXT: vpsrld $5, %xmm2, %xmm2 653 ; X32-NEXT: vandps {{\.LCPI.*}}, %xmm3, %xmm3 654 ; X32-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 655 ; X32-NEXT: vblendvps %xmm0, %xmm2, %xmm3, %xmm0 656 ; X32-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,2,2] 657 ; X32-NEXT: vcvtdq2ps %xmm0, %xmm0 658 ; X32-NEXT: movl %ebp, %esp 659 ; X32-NEXT: popl %ebp 660 ; X32-NEXT: retl 661 ; 662 ; X64-LABEL: knownbits_lshr_and_select_shuffle_uitofp: 663 ; X64: # %bb.0: 664 ; X64-NEXT: vpsrld $5, %xmm2, %xmm2 665 ; X64-NEXT: vandps {{.*}}(%rip), %xmm3, %xmm3 666 ; X64-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 667 ; X64-NEXT: vblendvps %xmm0, %xmm2, %xmm3, %xmm0 668 ; X64-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,2,2] 669 ; X64-NEXT: vcvtdq2ps %xmm0, %xmm0 670 ; X64-NEXT: retq 671 %1 = lshr <4 x i32> %a2, <i32 5, i32 1, i32 5, i32 1> 672 %2 = and <4 x i32> %a3, <i32 255, i32 -1, i32 65535, i32 -1> 673 %3 = icmp eq <4 x i32> %a0, %a1 674 %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2 675 %5 = shufflevector <4 x i32> %4, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2> 676 %6 = uitofp <4 x i32> %5 to <4 x float> 677 ret <4 x float> %6 678 } 679