Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=X32
      3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=X64
      4 
      5 define i32 @knownbits_mask_extract_sext(<8 x i16> %a0) nounwind {
      6 ; X32-LABEL: knownbits_mask_extract_sext:
      7 ; X32:       # %bb.0:
      8 ; X32-NEXT:    vpand {{\.LCPI.*}}, %xmm0, %xmm0
      9 ; X32-NEXT:    vpextrw $0, %xmm0, %eax
     10 ; X32-NEXT:    retl
     11 ;
     12 ; X64-LABEL: knownbits_mask_extract_sext:
     13 ; X64:       # %bb.0:
     14 ; X64-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
     15 ; X64-NEXT:    vpextrw $0, %xmm0, %eax
     16 ; X64-NEXT:    retq
     17   %1 = and <8 x i16> %a0, <i16 15, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
     18   %2 = extractelement <8 x i16> %1, i32 0
     19   %3 = sext i16 %2 to i32
     20   ret i32 %3
     21 }
     22 
     23 define float @knownbits_mask_extract_uitofp(<2 x i64> %a0) nounwind {
     24 ; X32-LABEL: knownbits_mask_extract_uitofp:
     25 ; X32:       # %bb.0:
     26 ; X32-NEXT:    pushl %eax
     27 ; X32-NEXT:    vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
     28 ; X32-NEXT:    vmovd %xmm0, %eax
     29 ; X32-NEXT:    vcvtsi2ssl %eax, %xmm1, %xmm0
     30 ; X32-NEXT:    vmovss %xmm0, (%esp)
     31 ; X32-NEXT:    flds (%esp)
     32 ; X32-NEXT:    popl %eax
     33 ; X32-NEXT:    retl
     34 ;
     35 ; X64-LABEL: knownbits_mask_extract_uitofp:
     36 ; X64:       # %bb.0:
     37 ; X64-NEXT:    vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
     38 ; X64-NEXT:    vmovq %xmm0, %rax
     39 ; X64-NEXT:    vcvtsi2ssl %eax, %xmm1, %xmm0
     40 ; X64-NEXT:    retq
     41   %1 = and <2 x i64> %a0, <i64 65535, i64 -1>
     42   %2 = extractelement <2 x i64> %1, i32 0
     43   %3 = uitofp i64 %2 to float
     44   ret float %3
     45 }
     46 
     47 define <4 x float> @knownbits_insert_uitofp(<4 x i32> %a0, i16 %a1, i16 %a2) nounwind {
     48 ; X32-LABEL: knownbits_insert_uitofp:
     49 ; X32:       # %bb.0:
     50 ; X32-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
     51 ; X32-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
     52 ; X32-NEXT:    vmovd %ecx, %xmm0
     53 ; X32-NEXT:    vpinsrd $2, %eax, %xmm0, %xmm0
     54 ; X32-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
     55 ; X32-NEXT:    vcvtdq2ps %xmm0, %xmm0
     56 ; X32-NEXT:    retl
     57 ;
     58 ; X64-LABEL: knownbits_insert_uitofp:
     59 ; X64:       # %bb.0:
     60 ; X64-NEXT:    movzwl %di, %eax
     61 ; X64-NEXT:    movzwl %si, %ecx
     62 ; X64-NEXT:    vmovd %eax, %xmm0
     63 ; X64-NEXT:    vpinsrd $2, %ecx, %xmm0, %xmm0
     64 ; X64-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
     65 ; X64-NEXT:    vcvtdq2ps %xmm0, %xmm0
     66 ; X64-NEXT:    retq
     67   %1 = zext i16 %a1 to i32
     68   %2 = zext i16 %a2 to i32
     69   %3 = insertelement <4 x i32> %a0, i32 %1, i32 0
     70   %4 = insertelement <4 x i32>  %3, i32 %2, i32 2
     71   %5 = shufflevector <4 x i32> %4, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
     72   %6 = uitofp <4 x i32> %5 to <4 x float>
     73   ret <4 x float> %6
     74 }
     75 
     76 define <4 x i32> @knownbits_mask_shuffle_sext(<8 x i16> %a0) nounwind {
     77 ; X32-LABEL: knownbits_mask_shuffle_sext:
     78 ; X32:       # %bb.0:
     79 ; X32-NEXT:    vpand {{\.LCPI.*}}, %xmm0, %xmm0
     80 ; X32-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
     81 ; X32-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
     82 ; X32-NEXT:    retl
     83 ;
     84 ; X64-LABEL: knownbits_mask_shuffle_sext:
     85 ; X64:       # %bb.0:
     86 ; X64-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
     87 ; X64-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
     88 ; X64-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
     89 ; X64-NEXT:    retq
     90   %1 = and <8 x i16> %a0, <i16 -1, i16 -1, i16 -1, i16 -1, i16 15, i16 15, i16 15, i16 15>
     91   %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
     92   %3 = sext <4 x i16> %2 to <4 x i32>
     93   ret <4 x i32> %3
     94 }
     95 
     96 define <4 x i32> @knownbits_mask_shuffle_shuffle_sext(<8 x i16> %a0) nounwind {
     97 ; X32-LABEL: knownbits_mask_shuffle_shuffle_sext:
     98 ; X32:       # %bb.0:
     99 ; X32-NEXT:    vpand {{\.LCPI.*}}, %xmm0, %xmm0
    100 ; X32-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
    101 ; X32-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
    102 ; X32-NEXT:    retl
    103 ;
    104 ; X64-LABEL: knownbits_mask_shuffle_shuffle_sext:
    105 ; X64:       # %bb.0:
    106 ; X64-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
    107 ; X64-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
    108 ; X64-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
    109 ; X64-NEXT:    retq
    110   %1 = and <8 x i16> %a0, <i16 -1, i16 -1, i16 -1, i16 -1, i16 15, i16 15, i16 15, i16 15>
    111   %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
    112   %3 = shufflevector <8 x i16> %2, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    113   %4 = sext <4 x i16> %3 to <4 x i32>
    114   ret <4 x i32> %4
    115 }
    116 
    117 define <4 x i32> @knownbits_mask_shuffle_shuffle_undef_sext(<8 x i16> %a0) nounwind {
    118 ; X32-LABEL: knownbits_mask_shuffle_shuffle_undef_sext:
    119 ; X32:       # %bb.0:
    120 ; X32-NEXT:    vpand {{\.LCPI.*}}, %xmm0, %xmm0
    121 ; X32-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
    122 ; X32-NEXT:    vpmovsxwd %xmm0, %xmm0
    123 ; X32-NEXT:    retl
    124 ;
    125 ; X64-LABEL: knownbits_mask_shuffle_shuffle_undef_sext:
    126 ; X64:       # %bb.0:
    127 ; X64-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
    128 ; X64-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
    129 ; X64-NEXT:    vpmovsxwd %xmm0, %xmm0
    130 ; X64-NEXT:    retq
    131   %1 = and <8 x i16> %a0, <i16 -1, i16 -1, i16 -1, i16 -1, i16 15, i16 15, i16 15, i16 15>
    132   %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
    133   %3 = shufflevector <8 x i16> %2, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    134   %4 = sext <4 x i16> %3 to <4 x i32>
    135   ret <4 x i32> %4
    136 }
    137 
    138 define <4 x float> @knownbits_mask_shuffle_uitofp(<4 x i32> %a0) nounwind {
    139 ; X32-LABEL: knownbits_mask_shuffle_uitofp:
    140 ; X32:       # %bb.0:
    141 ; X32-NEXT:    vandps {{\.LCPI.*}}, %xmm0, %xmm0
    142 ; X32-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[2,2,3,3]
    143 ; X32-NEXT:    vcvtdq2ps %xmm0, %xmm0
    144 ; X32-NEXT:    retl
    145 ;
    146 ; X64-LABEL: knownbits_mask_shuffle_uitofp:
    147 ; X64:       # %bb.0:
    148 ; X64-NEXT:    vandps {{.*}}(%rip), %xmm0, %xmm0
    149 ; X64-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[2,2,3,3]
    150 ; X64-NEXT:    vcvtdq2ps %xmm0, %xmm0
    151 ; X64-NEXT:    retq
    152   %1 = and <4 x i32> %a0, <i32 -1, i32 -1, i32 255, i32 4085>
    153   %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 2, i32 3, i32 3>
    154   %3 = uitofp <4 x i32> %2 to <4 x float>
    155   ret <4 x float> %3
    156 }
    157 
    158 define <4 x float> @knownbits_mask_or_shuffle_uitofp(<4 x i32> %a0) nounwind {
    159 ; X32-LABEL: knownbits_mask_or_shuffle_uitofp:
    160 ; X32:       # %bb.0:
    161 ; X32-NEXT:    vpor {{\.LCPI.*}}, %xmm0, %xmm0
    162 ; X32-NEXT:    vpxor %xmm1, %xmm1, %xmm1
    163 ; X32-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,6,6]
    164 ; X32-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
    165 ; X32-NEXT:    vcvtdq2ps %xmm0, %xmm0
    166 ; X32-NEXT:    retl
    167 ;
    168 ; X64-LABEL: knownbits_mask_or_shuffle_uitofp:
    169 ; X64:       # %bb.0:
    170 ; X64-NEXT:    vpor {{.*}}(%rip), %xmm0, %xmm0
    171 ; X64-NEXT:    vpxor %xmm1, %xmm1, %xmm1
    172 ; X64-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,6,6]
    173 ; X64-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
    174 ; X64-NEXT:    vcvtdq2ps %xmm0, %xmm0
    175 ; X64-NEXT:    retq
    176   %1 = and <4 x i32> %a0, <i32 -1, i32 -1, i32 255, i32 4085>
    177   %2 = or <4 x i32> %1, <i32 65535, i32 65535, i32 65535, i32 65535>
    178   %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 2, i32 2, i32 3, i32 3>
    179   %4 = uitofp <4 x i32> %3 to <4 x float>
    180   ret <4 x float> %4
    181 }
    182 
    183 define <4 x float> @knownbits_mask_xor_shuffle_uitofp(<4 x i32> %a0) nounwind {
    184 ; X32-LABEL: knownbits_mask_xor_shuffle_uitofp:
    185 ; X32:       # %bb.0:
    186 ; X32-NEXT:    vandps {{\.LCPI.*}}, %xmm0, %xmm0
    187 ; X32-NEXT:    vxorps {{\.LCPI.*}}, %xmm0, %xmm0
    188 ; X32-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[2,2,3,3]
    189 ; X32-NEXT:    vcvtdq2ps %xmm0, %xmm0
    190 ; X32-NEXT:    retl
    191 ;
    192 ; X64-LABEL: knownbits_mask_xor_shuffle_uitofp:
    193 ; X64:       # %bb.0:
    194 ; X64-NEXT:    vandps {{.*}}(%rip), %xmm0, %xmm0
    195 ; X64-NEXT:    vxorps {{.*}}(%rip), %xmm0, %xmm0
    196 ; X64-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[2,2,3,3]
    197 ; X64-NEXT:    vcvtdq2ps %xmm0, %xmm0
    198 ; X64-NEXT:    retq
    199   %1 = and <4 x i32> %a0, <i32 -1, i32 -1, i32 255, i32 4085>
    200   %2 = xor <4 x i32> %1, <i32 65535, i32 65535, i32 65535, i32 65535>
    201   %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 2, i32 2, i32 3, i32 3>
    202   %4 = uitofp <4 x i32> %3 to <4 x float>
    203   ret <4 x float> %4
    204 }
    205 
    206 define <4 x i32> @knownbits_mask_shl_shuffle_lshr(<4 x i32> %a0) nounwind {
    207 ; X32-LABEL: knownbits_mask_shl_shuffle_lshr:
    208 ; X32:       # %bb.0:
    209 ; X32-NEXT:    vxorps %xmm0, %xmm0, %xmm0
    210 ; X32-NEXT:    retl
    211 ;
    212 ; X64-LABEL: knownbits_mask_shl_shuffle_lshr:
    213 ; X64:       # %bb.0:
    214 ; X64-NEXT:    vxorps %xmm0, %xmm0, %xmm0
    215 ; X64-NEXT:    retq
    216   %1 = and <4 x i32> %a0, <i32 -65536, i32 -7, i32 -7, i32 -65536>
    217   %2 = shl <4 x i32> %1, <i32 17, i32 17, i32 17, i32 17>
    218   %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
    219   %4 = lshr <4 x i32> %3, <i32 15, i32 15, i32 15, i32 15>
    220   ret <4 x i32> %4
    221 }
    222 
    223 define <4 x i32> @knownbits_mask_ashr_shuffle_lshr(<4 x i32> %a0) nounwind {
    224 ; X32-LABEL: knownbits_mask_ashr_shuffle_lshr:
    225 ; X32:       # %bb.0:
    226 ; X32-NEXT:    vxorps %xmm0, %xmm0, %xmm0
    227 ; X32-NEXT:    retl
    228 ;
    229 ; X64-LABEL: knownbits_mask_ashr_shuffle_lshr:
    230 ; X64:       # %bb.0:
    231 ; X64-NEXT:    vxorps %xmm0, %xmm0, %xmm0
    232 ; X64-NEXT:    retq
    233   %1 = and <4 x i32> %a0, <i32 131071, i32 -1, i32 -1, i32 131071>
    234   %2 = ashr <4 x i32> %1, <i32 15, i32 15, i32 15, i32 15>
    235   %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
    236   %4 = lshr <4 x i32> %3, <i32 30, i32 30, i32 30, i32 30>
    237   ret <4 x i32> %4
    238 }
    239 
    240 define <4 x i32> @knownbits_mask_mul_shuffle_shl(<4 x i32> %a0, <4 x i32> %a1) nounwind {
    241 ; X32-LABEL: knownbits_mask_mul_shuffle_shl:
    242 ; X32:       # %bb.0:
    243 ; X32-NEXT:    vxorps %xmm0, %xmm0, %xmm0
    244 ; X32-NEXT:    retl
    245 ;
    246 ; X64-LABEL: knownbits_mask_mul_shuffle_shl:
    247 ; X64:       # %bb.0:
    248 ; X64-NEXT:    vxorps %xmm0, %xmm0, %xmm0
    249 ; X64-NEXT:    retq
    250   %1 = and <4 x i32> %a0, <i32 -65536, i32 -7, i32 -7, i32 -65536>
    251   %2 = mul <4 x i32> %a1, %1
    252   %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
    253   %4 = shl <4 x i32> %3, <i32 22, i32 22, i32 22, i32 22>
    254   ret <4 x i32> %4
    255 }
    256 
    257 define <4 x i32> @knownbits_mask_trunc_shuffle_shl(<4 x i64> %a0) nounwind {
    258 ; X32-LABEL: knownbits_mask_trunc_shuffle_shl:
    259 ; X32:       # %bb.0:
    260 ; X32-NEXT:    vxorps %xmm0, %xmm0, %xmm0
    261 ; X32-NEXT:    retl
    262 ;
    263 ; X64-LABEL: knownbits_mask_trunc_shuffle_shl:
    264 ; X64:       # %bb.0:
    265 ; X64-NEXT:    vxorps %xmm0, %xmm0, %xmm0
    266 ; X64-NEXT:    retq
    267   %1 = and <4 x i64> %a0, <i64 -65536, i64 -7, i64 7, i64 -65536>
    268   %2 = trunc <4 x i64> %1 to <4 x i32>
    269   %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
    270   %4 = shl <4 x i32> %3, <i32 22, i32 22, i32 22, i32 22>
    271   ret <4 x i32> %4
    272 }
    273 
    274 define <4 x i32> @knownbits_mask_add_shuffle_lshr(<4 x i32> %a0, <4 x i32> %a1) nounwind {
    275 ; X32-LABEL: knownbits_mask_add_shuffle_lshr:
    276 ; X32:       # %bb.0:
    277 ; X32-NEXT:    vxorps %xmm0, %xmm0, %xmm0
    278 ; X32-NEXT:    retl
    279 ;
    280 ; X64-LABEL: knownbits_mask_add_shuffle_lshr:
    281 ; X64:       # %bb.0:
    282 ; X64-NEXT:    vxorps %xmm0, %xmm0, %xmm0
    283 ; X64-NEXT:    retq
    284   %1 = and <4 x i32> %a0, <i32 32767, i32 -1, i32 -1, i32 32767>
    285   %2 = and <4 x i32> %a1, <i32 32767, i32 -1, i32 -1, i32 32767>
    286   %3 = add <4 x i32> %1, %2
    287   %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
    288   %5 = lshr <4 x i32> %4, <i32 17, i32 17, i32 17, i32 17>
    289   ret <4 x i32> %5
    290 }
    291 
    292 define <4 x i32> @knownbits_mask_sub_shuffle_lshr(<4 x i32> %a0) nounwind {
    293 ; X32-LABEL: knownbits_mask_sub_shuffle_lshr:
    294 ; X32:       # %bb.0:
    295 ; X32-NEXT:    vxorps %xmm0, %xmm0, %xmm0
    296 ; X32-NEXT:    retl
    297 ;
    298 ; X64-LABEL: knownbits_mask_sub_shuffle_lshr:
    299 ; X64:       # %bb.0:
    300 ; X64-NEXT:    vxorps %xmm0, %xmm0, %xmm0
    301 ; X64-NEXT:    retq
    302   %1 = and <4 x i32> %a0, <i32 15, i32 -1, i32 -1, i32 15>
    303   %2 = sub <4 x i32> <i32 255, i32 255, i32 255, i32 255>, %1
    304   %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
    305   %4 = lshr <4 x i32> %3, <i32 22, i32 22, i32 22, i32 22>
    306   ret <4 x i32> %4
    307 }
    308 
    309 define <4 x i32> @knownbits_mask_udiv_shuffle_lshr(<4 x i32> %a0, <4 x i32> %a1) nounwind {
    310 ; X32-LABEL: knownbits_mask_udiv_shuffle_lshr:
    311 ; X32:       # %bb.0:
    312 ; X32-NEXT:    vxorps %xmm0, %xmm0, %xmm0
    313 ; X32-NEXT:    retl
    314 ;
    315 ; X64-LABEL: knownbits_mask_udiv_shuffle_lshr:
    316 ; X64:       # %bb.0:
    317 ; X64-NEXT:    vxorps %xmm0, %xmm0, %xmm0
    318 ; X64-NEXT:    retq
    319   %1 = and <4 x i32> %a0, <i32 32767, i32 -1, i32 -1, i32 32767>
    320   %2 = udiv <4 x i32> %1, %a1
    321   %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
    322   %4 = lshr <4 x i32> %3, <i32 22, i32 22, i32 22, i32 22>
    323   ret <4 x i32> %4
    324 }
    325 
    326 define <4 x i32> @knownbits_urem_lshr(<4 x i32> %a0) nounwind {
    327 ; X32-LABEL: knownbits_urem_lshr:
    328 ; X32:       # %bb.0:
    329 ; X32-NEXT:    vxorps %xmm0, %xmm0, %xmm0
    330 ; X32-NEXT:    retl
    331 ;
    332 ; X64-LABEL: knownbits_urem_lshr:
    333 ; X64:       # %bb.0:
    334 ; X64-NEXT:    vxorps %xmm0, %xmm0, %xmm0
    335 ; X64-NEXT:    retq
    336   %1 = urem <4 x i32> %a0, <i32 16, i32 16, i32 16, i32 16>
    337   %2 = lshr <4 x i32> %1, <i32 22, i32 22, i32 22, i32 22>
    338   ret <4 x i32> %2
    339 }
    340 
    341 define <4 x i32> @knownbits_mask_urem_shuffle_lshr(<4 x i32> %a0, <4 x i32> %a1) nounwind {
    342 ; X32-LABEL: knownbits_mask_urem_shuffle_lshr:
    343 ; X32:       # %bb.0:
    344 ; X32-NEXT:    vxorps %xmm0, %xmm0, %xmm0
    345 ; X32-NEXT:    retl
    346 ;
    347 ; X64-LABEL: knownbits_mask_urem_shuffle_lshr:
    348 ; X64:       # %bb.0:
    349 ; X64-NEXT:    vxorps %xmm0, %xmm0, %xmm0
    350 ; X64-NEXT:    retq
    351   %1 = and <4 x i32> %a0, <i32 32767, i32 -1, i32 -1, i32 32767>
    352   %2 = and <4 x i32> %a1, <i32 32767, i32 -1, i32 -1, i32 32767>
    353   %3 = urem <4 x i32> %1, %2
    354   %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
    355   %5 = lshr <4 x i32> %4, <i32 22, i32 22, i32 22, i32 22>
    356   ret <4 x i32> %5
    357 }
    358 
    359 define <4 x i32> @knownbits_mask_srem_shuffle_lshr(<4 x i32> %a0) nounwind {
    360 ; X32-LABEL: knownbits_mask_srem_shuffle_lshr:
    361 ; X32:       # %bb.0:
    362 ; X32-NEXT:    vxorps %xmm0, %xmm0, %xmm0
    363 ; X32-NEXT:    retl
    364 ;
    365 ; X64-LABEL: knownbits_mask_srem_shuffle_lshr:
    366 ; X64:       # %bb.0:
    367 ; X64-NEXT:    vxorps %xmm0, %xmm0, %xmm0
    368 ; X64-NEXT:    retq
    369   %1 = and <4 x i32> %a0, <i32 -32768, i32 -1, i32 -1, i32 -32768>
    370   %2 = srem <4 x i32> %1, <i32 16, i32 16, i32 16, i32 16>
    371   %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
    372   %4 = lshr <4 x i32> %3, <i32 22, i32 22, i32 22, i32 22>
    373   ret <4 x i32> %4
    374 }
    375 
    376 define <4 x i32> @knownbits_mask_bswap_shuffle_shl(<4 x i32> %a0) nounwind {
    377 ; X32-LABEL: knownbits_mask_bswap_shuffle_shl:
    378 ; X32:       # %bb.0:
    379 ; X32-NEXT:    vxorps %xmm0, %xmm0, %xmm0
    380 ; X32-NEXT:    retl
    381 ;
    382 ; X64-LABEL: knownbits_mask_bswap_shuffle_shl:
    383 ; X64:       # %bb.0:
    384 ; X64-NEXT:    vxorps %xmm0, %xmm0, %xmm0
    385 ; X64-NEXT:    retq
    386   %1 = and <4 x i32> %a0, <i32 32767, i32 -1, i32 -1, i32 32767>
    387   %2 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %1)
    388   %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
    389   %4 = shl <4 x i32> %3, <i32 22, i32 22, i32 22, i32 22>
    390   ret <4 x i32> %4
    391 }
    392 declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>)
    393 
    394 define <8 x float> @knownbits_mask_concat_uitofp(<4 x i32> %a0, <4 x i32> %a1) nounwind {
    395 ; X32-LABEL: knownbits_mask_concat_uitofp:
    396 ; X32:       # %bb.0:
    397 ; X32-NEXT:    vandps {{\.LCPI.*}}, %xmm0, %xmm0
    398 ; X32-NEXT:    vandps {{\.LCPI.*}}, %xmm1, %xmm1
    399 ; X32-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,2,0,2]
    400 ; X32-NEXT:    vpermilps {{.*#+}} xmm1 = xmm1[1,3,1,3]
    401 ; X32-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
    402 ; X32-NEXT:    vcvtdq2ps %ymm0, %ymm0
    403 ; X32-NEXT:    retl
    404 ;
    405 ; X64-LABEL: knownbits_mask_concat_uitofp:
    406 ; X64:       # %bb.0:
    407 ; X64-NEXT:    vandps {{.*}}(%rip), %xmm0, %xmm0
    408 ; X64-NEXT:    vandps {{.*}}(%rip), %xmm1, %xmm1
    409 ; X64-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,2,0,2]
    410 ; X64-NEXT:    vpermilps {{.*#+}} xmm1 = xmm1[1,3,1,3]
    411 ; X64-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
    412 ; X64-NEXT:    vcvtdq2ps %ymm0, %ymm0
    413 ; X64-NEXT:    retq
    414   %1 = and <4 x i32> %a0, <i32 131071, i32 -1, i32 131071, i32 -1>
    415   %2 = and <4 x i32> %a1, <i32 -1, i32 131071, i32 -1, i32 131071>
    416   %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <8 x i32> <i32 0, i32 2, i32 0, i32 2, i32 5, i32 7, i32 5, i32 7>
    417   %4 = uitofp <8 x i32> %3 to <8 x float>
    418   ret <8 x float> %4
    419 }
    420 
    421 define <4 x float> @knownbits_lshr_bitcast_shuffle_uitofp(<2 x i64> %a0, <4 x i32> %a1) nounwind {
    422 ; X32-LABEL: knownbits_lshr_bitcast_shuffle_uitofp:
    423 ; X32:       # %bb.0:
    424 ; X32-NEXT:    vpsrlq $1, %xmm0, %xmm0
    425 ; X32-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
    426 ; X32-NEXT:    vcvtdq2ps %xmm0, %xmm0
    427 ; X32-NEXT:    retl
    428 ;
    429 ; X64-LABEL: knownbits_lshr_bitcast_shuffle_uitofp:
    430 ; X64:       # %bb.0:
    431 ; X64-NEXT:    vpsrlq $1, %xmm0, %xmm0
    432 ; X64-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
    433 ; X64-NEXT:    vcvtdq2ps %xmm0, %xmm0
    434 ; X64-NEXT:    retq
    435   %1 = lshr <2 x i64> %a0, <i64 1, i64 1>
    436   %2 = bitcast <2 x i64> %1 to <4 x i32>
    437   %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
    438   %4 = uitofp <4 x i32> %3 to <4 x float>
    439   ret <4 x float> %4
    440 }
    441 
    442 define <4 x float> @knownbits_smax_smin_shuffle_uitofp(<4 x i32> %a0) {
    443 ; X32-LABEL: knownbits_smax_smin_shuffle_uitofp:
    444 ; X32:       # %bb.0:
    445 ; X32-NEXT:    vpminsd {{\.LCPI.*}}, %xmm0, %xmm0
    446 ; X32-NEXT:    vpmaxsd {{\.LCPI.*}}, %xmm0, %xmm0
    447 ; X32-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,3,3]
    448 ; X32-NEXT:    vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
    449 ; X32-NEXT:    vpsrld $16, %xmm0, %xmm0
    450 ; X32-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
    451 ; X32-NEXT:    vaddps {{\.LCPI.*}}, %xmm0, %xmm0
    452 ; X32-NEXT:    vaddps %xmm0, %xmm1, %xmm0
    453 ; X32-NEXT:    retl
    454 ;
    455 ; X64-LABEL: knownbits_smax_smin_shuffle_uitofp:
    456 ; X64:       # %bb.0:
    457 ; X64-NEXT:    vpminsd {{.*}}(%rip), %xmm0, %xmm0
    458 ; X64-NEXT:    vpmaxsd {{.*}}(%rip), %xmm0, %xmm0
    459 ; X64-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,3,3]
    460 ; X64-NEXT:    vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
    461 ; X64-NEXT:    vpsrld $16, %xmm0, %xmm0
    462 ; X64-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
    463 ; X64-NEXT:    vaddps {{.*}}(%rip), %xmm0, %xmm0
    464 ; X64-NEXT:    vaddps %xmm0, %xmm1, %xmm0
    465 ; X64-NEXT:    retq
    466   %1 = call <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32> %a0, <4 x i32> <i32 0, i32 -65535, i32 -65535, i32 0>)
    467   %2 = call <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32> %1, <4 x i32> <i32 65535, i32 -1, i32 -1, i32 131071>)
    468   %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
    469   %4 = uitofp <4 x i32> %3 to <4 x float>
    470   ret <4 x float> %4
    471 }
    472 declare <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32>, <4 x i32>) nounwind readnone
    473 declare <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32>, <4 x i32>) nounwind readnone
    474 
    475 define <4 x float> @knownbits_umin_shuffle_uitofp(<4 x i32> %a0) {
    476 ; X32-LABEL: knownbits_umin_shuffle_uitofp:
    477 ; X32:       # %bb.0:
    478 ; X32-NEXT:    vpminud {{\.LCPI.*}}, %xmm0, %xmm0
    479 ; X32-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,3,3]
    480 ; X32-NEXT:    vcvtdq2ps %xmm0, %xmm0
    481 ; X32-NEXT:    retl
    482 ;
    483 ; X64-LABEL: knownbits_umin_shuffle_uitofp:
    484 ; X64:       # %bb.0:
    485 ; X64-NEXT:    vpminud {{.*}}(%rip), %xmm0, %xmm0
    486 ; X64-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,3,3]
    487 ; X64-NEXT:    vcvtdq2ps %xmm0, %xmm0
    488 ; X64-NEXT:    retq
    489   %1 = call <4 x i32> @llvm.x86.sse41.pminud(<4 x i32> %a0, <4 x i32> <i32 65535, i32 -1, i32 -1, i32 262143>)
    490   %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
    491   %3 = uitofp <4 x i32> %2 to <4 x float>
    492   ret <4 x float> %3
    493 }
    494 declare <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32>, <4 x i32>) nounwind readnone
    495 declare <4 x i32> @llvm.x86.sse41.pminud(<4 x i32>, <4 x i32>) nounwind readnone
    496 
    497 define <4 x i32> @knownbits_umax_shuffle_ashr(<4 x i32> %a0) {
    498 ; X32-LABEL: knownbits_umax_shuffle_ashr:
    499 ; X32:       # %bb.0:
    500 ; X32-NEXT:    vpmaxud {{\.LCPI.*}}, %xmm0, %xmm0
    501 ; X32-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,2]
    502 ; X32-NEXT:    retl
    503 ;
    504 ; X64-LABEL: knownbits_umax_shuffle_ashr:
    505 ; X64:       # %bb.0:
    506 ; X64-NEXT:    vpmaxud {{.*}}(%rip), %xmm0, %xmm0
    507 ; X64-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,2]
    508 ; X64-NEXT:    retq
    509   %1 = call <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32> %a0, <4 x i32> <i32 65535, i32 -1, i32 -1, i32 262143>)
    510   %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 2, i32 2>
    511   %3 = ashr <4 x i32> %2, <i32 31, i32 31, i32 31, i32 31>
    512   ret <4 x i32> %3
    513 }
    514 
    515 define <4 x float> @knownbits_mask_umax_shuffle_uitofp(<4 x i32> %a0) {
    516 ; X32-LABEL: knownbits_mask_umax_shuffle_uitofp:
    517 ; X32:       # %bb.0:
    518 ; X32-NEXT:    vpand {{\.LCPI.*}}, %xmm0, %xmm0
    519 ; X32-NEXT:    vpmaxud {{\.LCPI.*}}, %xmm0, %xmm0
    520 ; X32-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,3,3]
    521 ; X32-NEXT:    vcvtdq2ps %xmm0, %xmm0
    522 ; X32-NEXT:    retl
    523 ;
    524 ; X64-LABEL: knownbits_mask_umax_shuffle_uitofp:
    525 ; X64:       # %bb.0:
    526 ; X64-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
    527 ; X64-NEXT:    vpmaxud {{.*}}(%rip), %xmm0, %xmm0
    528 ; X64-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,3,3]
    529 ; X64-NEXT:    vcvtdq2ps %xmm0, %xmm0
    530 ; X64-NEXT:    retq
    531   %1 = and <4 x i32> %a0, <i32 65535, i32 -1, i32 -1, i32 262143>
    532   %2 = call <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32> %1, <4 x i32> <i32 255, i32 -1, i32 -1, i32 1023>)
    533   %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
    534   %4 = uitofp <4 x i32> %3 to <4 x float>
    535   ret <4 x float> %4
    536 }
    537 
    538 define <4 x i32> @knownbits_mask_bitreverse_ashr(<4 x i32> %a0) {
    539 ; X32-LABEL: knownbits_mask_bitreverse_ashr:
    540 ; X32:       # %bb.0:
    541 ; X32-NEXT:    vxorps %xmm0, %xmm0, %xmm0
    542 ; X32-NEXT:    retl
    543 ;
    544 ; X64-LABEL: knownbits_mask_bitreverse_ashr:
    545 ; X64:       # %bb.0:
    546 ; X64-NEXT:    vxorps %xmm0, %xmm0, %xmm0
    547 ; X64-NEXT:    retq
    548   %1 = and <4 x i32> %a0, <i32 -2, i32 -2, i32 -2, i32 -2>
    549   %2 = call <4 x i32> @llvm.bitreverse.v4i32(<4 x i32> %1)
    550   %3 = ashr <4 x i32> %2, <i32 31, i32 31, i32 31, i32 31>
    551   ret <4 x i32> %3
    552 }
    553 declare <4 x i32> @llvm.bitreverse.v4i32(<4 x i32>) nounwind readnone
    554 
    555 ; If we don't know that the input isn't INT_MIN we can't combine to sitofp
    556 define <4 x float> @knownbits_abs_uitofp(<4 x i32> %a0) {
    557 ; X32-LABEL: knownbits_abs_uitofp:
    558 ; X32:       # %bb.0:
    559 ; X32-NEXT:    vpabsd %xmm0, %xmm0
    560 ; X32-NEXT:    vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
    561 ; X32-NEXT:    vpsrld $16, %xmm0, %xmm0
    562 ; X32-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
    563 ; X32-NEXT:    vaddps {{\.LCPI.*}}, %xmm0, %xmm0
    564 ; X32-NEXT:    vaddps %xmm0, %xmm1, %xmm0
    565 ; X32-NEXT:    retl
    566 ;
    567 ; X64-LABEL: knownbits_abs_uitofp:
    568 ; X64:       # %bb.0:
    569 ; X64-NEXT:    vpabsd %xmm0, %xmm0
    570 ; X64-NEXT:    vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
    571 ; X64-NEXT:    vpsrld $16, %xmm0, %xmm0
    572 ; X64-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
    573 ; X64-NEXT:    vaddps {{.*}}(%rip), %xmm0, %xmm0
    574 ; X64-NEXT:    vaddps %xmm0, %xmm1, %xmm0
    575 ; X64-NEXT:    retq
    576   %1 = sub <4 x i32> zeroinitializer, %a0
    577   %2 = icmp slt <4 x i32> %a0, zeroinitializer
    578   %3 = select <4 x i1> %2, <4 x i32> %1, <4 x i32> %a0
    579   %4 = uitofp <4 x i32> %3 to <4 x float>
    580   ret <4 x float> %4
    581 }
    582 
    583 define <4 x float> @knownbits_or_abs_uitofp(<4 x i32> %a0) {
    584 ; X32-LABEL: knownbits_or_abs_uitofp:
    585 ; X32:       # %bb.0:
    586 ; X32-NEXT:    vpor {{\.LCPI.*}}, %xmm0, %xmm0
    587 ; X32-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,0,2]
    588 ; X32-NEXT:    vpabsd %xmm0, %xmm0
    589 ; X32-NEXT:    vcvtdq2ps %xmm0, %xmm0
    590 ; X32-NEXT:    retl
    591 ;
    592 ; X64-LABEL: knownbits_or_abs_uitofp:
    593 ; X64:       # %bb.0:
    594 ; X64-NEXT:    vpor {{.*}}(%rip), %xmm0, %xmm0
    595 ; X64-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,0,2]
    596 ; X64-NEXT:    vpabsd %xmm0, %xmm0
    597 ; X64-NEXT:    vcvtdq2ps %xmm0, %xmm0
    598 ; X64-NEXT:    retq
    599   %1 = or <4 x i32> %a0, <i32 1, i32 0, i32 3, i32 0>
    600   %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 0, i32 2, i32 0, i32 2>
    601   %3 = sub <4 x i32> zeroinitializer, %2
    602   %4 = icmp slt <4 x i32> %2, zeroinitializer
    603   %5 = select <4 x i1> %4, <4 x i32> %3, <4 x i32> %2
    604   %6 = uitofp <4 x i32> %5 to <4 x float>
    605   ret <4 x float> %6
    606 }
    607 
    608 define <4 x float> @knownbits_and_select_shuffle_uitofp(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2, <4 x i32> %a3) nounwind {
    609 ; X32-LABEL: knownbits_and_select_shuffle_uitofp:
    610 ; X32:       # %bb.0:
    611 ; X32-NEXT:    pushl %ebp
    612 ; X32-NEXT:    movl %esp, %ebp
    613 ; X32-NEXT:    andl $-16, %esp
    614 ; X32-NEXT:    subl $16, %esp
    615 ; X32-NEXT:    vmovaps 8(%ebp), %xmm3
    616 ; X32-NEXT:    vandps {{\.LCPI.*}}, %xmm2, %xmm2
    617 ; X32-NEXT:    vandps {{\.LCPI.*}}, %xmm3, %xmm3
    618 ; X32-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
    619 ; X32-NEXT:    vblendvps %xmm0, %xmm2, %xmm3, %xmm0
    620 ; X32-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,0,2,2]
    621 ; X32-NEXT:    vcvtdq2ps %xmm0, %xmm0
    622 ; X32-NEXT:    movl %ebp, %esp
    623 ; X32-NEXT:    popl %ebp
    624 ; X32-NEXT:    retl
    625 ;
    626 ; X64-LABEL: knownbits_and_select_shuffle_uitofp:
    627 ; X64:       # %bb.0:
    628 ; X64-NEXT:    vandps {{.*}}(%rip), %xmm2, %xmm2
    629 ; X64-NEXT:    vandps {{.*}}(%rip), %xmm3, %xmm3
    630 ; X64-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
    631 ; X64-NEXT:    vblendvps %xmm0, %xmm2, %xmm3, %xmm0
    632 ; X64-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,0,2,2]
    633 ; X64-NEXT:    vcvtdq2ps %xmm0, %xmm0
    634 ; X64-NEXT:    retq
    635   %1 = and <4 x i32> %a2, <i32 65535, i32 -1, i32 255, i32 -1>
    636   %2 = and <4 x i32> %a3, <i32 255, i32 -1, i32 65535, i32 -1>
    637   %3 = icmp eq <4 x i32> %a0, %a1
    638   %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2
    639   %5 = shufflevector <4 x i32> %4, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
    640   %6 = uitofp <4 x i32> %5 to <4 x float>
    641   ret <4 x float> %6
    642 }
    643 
    644 define <4 x float> @knownbits_lshr_and_select_shuffle_uitofp(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2, <4 x i32> %a3) nounwind {
    645 ; X32-LABEL: knownbits_lshr_and_select_shuffle_uitofp:
    646 ; X32:       # %bb.0:
    647 ; X32-NEXT:    pushl %ebp
    648 ; X32-NEXT:    movl %esp, %ebp
    649 ; X32-NEXT:    andl $-16, %esp
    650 ; X32-NEXT:    subl $16, %esp
    651 ; X32-NEXT:    vmovaps 8(%ebp), %xmm3
    652 ; X32-NEXT:    vpsrld $5, %xmm2, %xmm2
    653 ; X32-NEXT:    vandps {{\.LCPI.*}}, %xmm3, %xmm3
    654 ; X32-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
    655 ; X32-NEXT:    vblendvps %xmm0, %xmm2, %xmm3, %xmm0
    656 ; X32-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,0,2,2]
    657 ; X32-NEXT:    vcvtdq2ps %xmm0, %xmm0
    658 ; X32-NEXT:    movl %ebp, %esp
    659 ; X32-NEXT:    popl %ebp
    660 ; X32-NEXT:    retl
    661 ;
    662 ; X64-LABEL: knownbits_lshr_and_select_shuffle_uitofp:
    663 ; X64:       # %bb.0:
    664 ; X64-NEXT:    vpsrld $5, %xmm2, %xmm2
    665 ; X64-NEXT:    vandps {{.*}}(%rip), %xmm3, %xmm3
    666 ; X64-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
    667 ; X64-NEXT:    vblendvps %xmm0, %xmm2, %xmm3, %xmm0
    668 ; X64-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,0,2,2]
    669 ; X64-NEXT:    vcvtdq2ps %xmm0, %xmm0
    670 ; X64-NEXT:    retq
    671   %1 = lshr <4 x i32> %a2, <i32 5, i32 1, i32 5, i32 1>
    672   %2 = and <4 x i32> %a3, <i32 255, i32 -1, i32 65535, i32 -1>
    673   %3 = icmp eq <4 x i32> %a0, %a1
    674   %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2
    675   %5 = shufflevector <4 x i32> %4, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
    676   %6 = uitofp <4 x i32> %5 to <4 x float>
    677   ret <4 x float> %6
    678 }
    679