Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=X32
      3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=X64
      4 
      5 define <2 x double> @signbits_sext_v2i64_sitofp_v2f64(i32 %a0, i32 %a1) nounwind {
      6 ; X32-LABEL: signbits_sext_v2i64_sitofp_v2f64:
      7 ; X32:       # %bb.0:
      8 ; X32-NEXT:    vcvtdq2pd {{[0-9]+}}(%esp), %xmm0
      9 ; X32-NEXT:    retl
     10 ;
     11 ; X64-LABEL: signbits_sext_v2i64_sitofp_v2f64:
     12 ; X64:       # %bb.0:
     13 ; X64-NEXT:    vmovd %edi, %xmm0
     14 ; X64-NEXT:    vpinsrd $1, %esi, %xmm0, %xmm0
     15 ; X64-NEXT:    vcvtdq2pd %xmm0, %xmm0
     16 ; X64-NEXT:    retq
     17   %1 = sext i32 %a0 to i64
     18   %2 = sext i32 %a1 to i64
     19   %3 = insertelement <2 x i64> undef, i64 %1, i32 0
     20   %4 = insertelement <2 x i64> %3, i64 %2, i32 1
     21   %5 = sitofp <2 x i64> %4 to <2 x double>
     22   ret <2 x double> %5
     23 }
     24 
     25 define <4 x float> @signbits_sext_v4i64_sitofp_v4f32(i8 signext %a0, i16 signext %a1, i32 %a2, i32 %a3) nounwind {
     26 ; X32-LABEL: signbits_sext_v4i64_sitofp_v4f32:
     27 ; X32:       # %bb.0:
     28 ; X32-NEXT:    movswl {{[0-9]+}}(%esp), %eax
     29 ; X32-NEXT:    movsbl {{[0-9]+}}(%esp), %ecx
     30 ; X32-NEXT:    vmovd %ecx, %xmm0
     31 ; X32-NEXT:    vpinsrd $2, %eax, %xmm0, %xmm0
     32 ; X32-NEXT:    vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
     33 ; X32-NEXT:    vpinsrd $2, {{[0-9]+}}(%esp), %xmm1, %xmm1
     34 ; X32-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
     35 ; X32-NEXT:    vcvtdq2ps %xmm0, %xmm0
     36 ; X32-NEXT:    retl
     37 ;
     38 ; X64-LABEL: signbits_sext_v4i64_sitofp_v4f32:
     39 ; X64:       # %bb.0:
     40 ; X64-NEXT:    movslq %edi, %rax
     41 ; X64-NEXT:    movslq %esi, %rsi
     42 ; X64-NEXT:    movslq %edx, %rdx
     43 ; X64-NEXT:    movslq %ecx, %rcx
     44 ; X64-NEXT:    vmovq %rcx, %xmm0
     45 ; X64-NEXT:    vmovq %rdx, %xmm1
     46 ; X64-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
     47 ; X64-NEXT:    vmovq %rsi, %xmm1
     48 ; X64-NEXT:    vmovq %rax, %xmm2
     49 ; X64-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
     50 ; X64-NEXT:    vshufps {{.*#+}} xmm0 = xmm1[0,2],xmm0[0,2]
     51 ; X64-NEXT:    vcvtdq2ps %xmm0, %xmm0
     52 ; X64-NEXT:    retq
     53   %1 = sext i8 %a0 to i64
     54   %2 = sext i16 %a1 to i64
     55   %3 = sext i32 %a2 to i64
     56   %4 = sext i32 %a3 to i64
     57   %5 = insertelement <4 x i64> undef, i64 %1, i32 0
     58   %6 = insertelement <4 x i64> %5, i64 %2, i32 1
     59   %7 = insertelement <4 x i64> %6, i64 %3, i32 2
     60   %8 = insertelement <4 x i64> %7, i64 %4, i32 3
     61   %9 = sitofp <4 x i64> %8 to <4 x float>
     62   ret <4 x float> %9
     63 }
     64 
     65 define float @signbits_ashr_extract_sitofp_0(<2 x i64> %a0) nounwind {
     66 ; X32-LABEL: signbits_ashr_extract_sitofp_0:
     67 ; X32:       # %bb.0:
     68 ; X32-NEXT:    pushl %eax
     69 ; X32-NEXT:    vextractps $1, %xmm0, %eax
     70 ; X32-NEXT:    vcvtsi2ssl %eax, %xmm1, %xmm0
     71 ; X32-NEXT:    vmovss %xmm0, (%esp)
     72 ; X32-NEXT:    flds (%esp)
     73 ; X32-NEXT:    popl %eax
     74 ; X32-NEXT:    retl
     75 ;
     76 ; X64-LABEL: signbits_ashr_extract_sitofp_0:
     77 ; X64:       # %bb.0:
     78 ; X64-NEXT:    vpsrad $31, %xmm0, %xmm1
     79 ; X64-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
     80 ; X64-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
     81 ; X64-NEXT:    vmovq %xmm0, %rax
     82 ; X64-NEXT:    vcvtsi2ssl %eax, %xmm2, %xmm0
     83 ; X64-NEXT:    retq
     84   %1 = ashr <2 x i64> %a0, <i64 32, i64 32>
     85   %2 = extractelement <2 x i64> %1, i32 0
     86   %3 = sitofp i64 %2 to float
     87   ret float %3
     88 }
     89 
     90 define float @signbits_ashr_extract_sitofp_1(<2 x i64> %a0) nounwind {
     91 ; X32-LABEL: signbits_ashr_extract_sitofp_1:
     92 ; X32:       # %bb.0:
     93 ; X32-NEXT:    pushl %eax
     94 ; X32-NEXT:    vmovdqa {{.*#+}} xmm1 = [0,2147483648,0,2147483648]
     95 ; X32-NEXT:    vpsrlq $63, %xmm1, %xmm2
     96 ; X32-NEXT:    vpsrlq $32, %xmm1, %xmm1
     97 ; X32-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7]
     98 ; X32-NEXT:    vpsrlq $63, %xmm0, %xmm2
     99 ; X32-NEXT:    vpsrlq $32, %xmm0, %xmm0
    100 ; X32-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7]
    101 ; X32-NEXT:    vpxor %xmm1, %xmm0, %xmm0
    102 ; X32-NEXT:    vpsubq %xmm1, %xmm0, %xmm0
    103 ; X32-NEXT:    vmovd %xmm0, %eax
    104 ; X32-NEXT:    vcvtsi2ssl %eax, %xmm3, %xmm0
    105 ; X32-NEXT:    vmovss %xmm0, (%esp)
    106 ; X32-NEXT:    flds (%esp)
    107 ; X32-NEXT:    popl %eax
    108 ; X32-NEXT:    retl
    109 ;
    110 ; X64-LABEL: signbits_ashr_extract_sitofp_1:
    111 ; X64:       # %bb.0:
    112 ; X64-NEXT:    vpsrlq $63, %xmm0, %xmm1
    113 ; X64-NEXT:    vpsrlq $32, %xmm0, %xmm0
    114 ; X64-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
    115 ; X64-NEXT:    vmovdqa {{.*#+}} xmm1 = [2147483648,1]
    116 ; X64-NEXT:    vpxor %xmm1, %xmm0, %xmm0
    117 ; X64-NEXT:    vpsubq %xmm1, %xmm0, %xmm0
    118 ; X64-NEXT:    vmovq %xmm0, %rax
    119 ; X64-NEXT:    vcvtsi2ssl %eax, %xmm2, %xmm0
    120 ; X64-NEXT:    retq
    121   %1 = ashr <2 x i64> %a0, <i64 32, i64 63>
    122   %2 = extractelement <2 x i64> %1, i32 0
    123   %3 = sitofp i64 %2 to float
    124   ret float %3
    125 }
    126 
    127 define float @signbits_ashr_shl_extract_sitofp(<2 x i64> %a0) nounwind {
    128 ; X32-LABEL: signbits_ashr_shl_extract_sitofp:
    129 ; X32:       # %bb.0:
    130 ; X32-NEXT:    pushl %eax
    131 ; X32-NEXT:    vmovdqa {{.*#+}} xmm1 = [0,2147483648,0,2147483648]
    132 ; X32-NEXT:    vpsrlq $60, %xmm1, %xmm2
    133 ; X32-NEXT:    vpsrlq $61, %xmm1, %xmm1
    134 ; X32-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7]
    135 ; X32-NEXT:    vpsrlq $60, %xmm0, %xmm2
    136 ; X32-NEXT:    vpsrlq $61, %xmm0, %xmm0
    137 ; X32-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7]
    138 ; X32-NEXT:    vpxor %xmm1, %xmm0, %xmm0
    139 ; X32-NEXT:    vpsubq %xmm1, %xmm0, %xmm0
    140 ; X32-NEXT:    vpsllq $20, %xmm0, %xmm0
    141 ; X32-NEXT:    vmovd %xmm0, %eax
    142 ; X32-NEXT:    vcvtsi2ssl %eax, %xmm3, %xmm0
    143 ; X32-NEXT:    vmovss %xmm0, (%esp)
    144 ; X32-NEXT:    flds (%esp)
    145 ; X32-NEXT:    popl %eax
    146 ; X32-NEXT:    retl
    147 ;
    148 ; X64-LABEL: signbits_ashr_shl_extract_sitofp:
    149 ; X64:       # %bb.0:
    150 ; X64-NEXT:    vpsrlq $60, %xmm0, %xmm1
    151 ; X64-NEXT:    vpsrlq $61, %xmm0, %xmm0
    152 ; X64-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
    153 ; X64-NEXT:    vmovdqa {{.*#+}} xmm1 = [4,8]
    154 ; X64-NEXT:    vpxor %xmm1, %xmm0, %xmm0
    155 ; X64-NEXT:    vpsubq %xmm1, %xmm0, %xmm0
    156 ; X64-NEXT:    vpsllq $20, %xmm0, %xmm0
    157 ; X64-NEXT:    vmovq %xmm0, %rax
    158 ; X64-NEXT:    vcvtsi2ssl %eax, %xmm2, %xmm0
    159 ; X64-NEXT:    retq
    160   %1 = ashr <2 x i64> %a0, <i64 61, i64 60>
    161   %2 = shl <2 x i64> %1, <i64 20, i64 16>
    162   %3 = extractelement <2 x i64> %2, i32 0
    163   %4 = sitofp i64 %3 to float
    164   ret float %4
    165 }
    166 
    167 define float @signbits_ashr_insert_ashr_extract_sitofp(i64 %a0, i64 %a1) nounwind {
    168 ; X32-LABEL: signbits_ashr_insert_ashr_extract_sitofp:
    169 ; X32:       # %bb.0:
    170 ; X32-NEXT:    pushl %eax
    171 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    172 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
    173 ; X32-NEXT:    shrdl $30, %ecx, %eax
    174 ; X32-NEXT:    sarl $30, %ecx
    175 ; X32-NEXT:    vmovd %eax, %xmm0
    176 ; X32-NEXT:    vpinsrd $1, %ecx, %xmm0, %xmm0
    177 ; X32-NEXT:    vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0
    178 ; X32-NEXT:    vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0
    179 ; X32-NEXT:    vpsrlq $3, %xmm0, %xmm0
    180 ; X32-NEXT:    vmovd %xmm0, %eax
    181 ; X32-NEXT:    vcvtsi2ssl %eax, %xmm1, %xmm0
    182 ; X32-NEXT:    vmovss %xmm0, (%esp)
    183 ; X32-NEXT:    flds (%esp)
    184 ; X32-NEXT:    popl %eax
    185 ; X32-NEXT:    retl
    186 ;
    187 ; X64-LABEL: signbits_ashr_insert_ashr_extract_sitofp:
    188 ; X64:       # %bb.0:
    189 ; X64-NEXT:    sarq $30, %rdi
    190 ; X64-NEXT:    vmovq %rsi, %xmm0
    191 ; X64-NEXT:    vmovq %rdi, %xmm1
    192 ; X64-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
    193 ; X64-NEXT:    vpsrad $3, %xmm0, %xmm1
    194 ; X64-NEXT:    vpsrlq $3, %xmm0, %xmm0
    195 ; X64-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
    196 ; X64-NEXT:    vmovq %xmm0, %rax
    197 ; X64-NEXT:    vcvtsi2ssl %eax, %xmm2, %xmm0
    198 ; X64-NEXT:    retq
    199   %1 = ashr i64 %a0, 30
    200   %2 = insertelement <2 x i64> undef, i64 %1, i32 0
    201   %3 = insertelement <2 x i64> %2, i64 %a1, i32 1
    202   %4 = ashr <2 x i64> %3, <i64 3, i64 3>
    203   %5 = extractelement <2 x i64> %4, i32 0
    204   %6 = sitofp i64 %5 to float
    205   ret float %6
    206 }
    207 
    208 define <4 x double> @signbits_sext_shuffle_sitofp(<4 x i32> %a0, <4 x i64> %a1) nounwind {
    209 ; X32-LABEL: signbits_sext_shuffle_sitofp:
    210 ; X32:       # %bb.0:
    211 ; X32-NEXT:    vpmovsxdq %xmm0, %xmm1
    212 ; X32-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
    213 ; X32-NEXT:    vpmovsxdq %xmm0, %xmm0
    214 ; X32-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    215 ; X32-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
    216 ; X32-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
    217 ; X32-NEXT:    vextractf128 $1, %ymm0, %xmm1
    218 ; X32-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
    219 ; X32-NEXT:    vcvtdq2pd %xmm0, %ymm0
    220 ; X32-NEXT:    retl
    221 ;
    222 ; X64-LABEL: signbits_sext_shuffle_sitofp:
    223 ; X64:       # %bb.0:
    224 ; X64-NEXT:    vpmovsxdq %xmm0, %xmm1
    225 ; X64-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
    226 ; X64-NEXT:    vpmovsxdq %xmm0, %xmm0
    227 ; X64-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    228 ; X64-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
    229 ; X64-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
    230 ; X64-NEXT:    vextractf128 $1, %ymm0, %xmm1
    231 ; X64-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
    232 ; X64-NEXT:    vcvtdq2pd %xmm0, %ymm0
    233 ; X64-NEXT:    retq
    234   %1 = sext <4 x i32> %a0 to <4 x i64>
    235   %2 = shufflevector <4 x i64> %1, <4 x i64>%a1, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
    236   %3 = sitofp <4 x i64> %2 to <4 x double>
    237   ret <4 x double> %3
    238 }
    239 
    240 define <2 x double> @signbits_ashr_concat_ashr_extract_sitofp(<2 x i64> %a0, <4 x i64> %a1) nounwind {
    241 ; X32-LABEL: signbits_ashr_concat_ashr_extract_sitofp:
    242 ; X32:       # %bb.0:
    243 ; X32-NEXT:    vpsrad $16, %xmm0, %xmm1
    244 ; X32-NEXT:    vpsrlq $16, %xmm0, %xmm0
    245 ; X32-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
    246 ; X32-NEXT:    vpsrlq $16, %xmm0, %xmm0
    247 ; X32-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
    248 ; X32-NEXT:    vcvtdq2pd %xmm0, %xmm0
    249 ; X32-NEXT:    retl
    250 ;
    251 ; X64-LABEL: signbits_ashr_concat_ashr_extract_sitofp:
    252 ; X64:       # %bb.0:
    253 ; X64-NEXT:    vpsrad $16, %xmm0, %xmm1
    254 ; X64-NEXT:    vpsrlq $16, %xmm0, %xmm0
    255 ; X64-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
    256 ; X64-NEXT:    vpsrlq $16, %xmm0, %xmm0
    257 ; X64-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
    258 ; X64-NEXT:    vcvtdq2pd %xmm0, %xmm0
    259 ; X64-NEXT:    retq
    260   %1 = ashr <2 x i64> %a0, <i64 16, i64 16>
    261   %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
    262   %3 = shufflevector <4 x i64> %a1, <4 x i64> %2, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
    263   %4 = ashr <4 x i64> %3, <i64 16, i64 16, i64 16, i64 16>
    264   %5 = shufflevector <4 x i64> %4, <4 x i64> undef, <2 x i32> <i32 2, i32 3>
    265   %6 = sitofp <2 x i64> %5 to <2 x double>
    266   ret <2 x double> %6
    267 }
    268 
    269 define float @signbits_ashr_sext_sextinreg_and_extract_sitofp(<2 x i64> %a0, <2 x i64> %a1, i32 %a2) nounwind {
    270 ; X32-LABEL: signbits_ashr_sext_sextinreg_and_extract_sitofp:
    271 ; X32:       # %bb.0:
    272 ; X32-NEXT:    pushl %eax
    273 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    274 ; X32-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,2147483648,0,2147483648]
    275 ; X32-NEXT:    vpsrlq $60, %xmm2, %xmm3
    276 ; X32-NEXT:    vpsrlq $61, %xmm2, %xmm2
    277 ; X32-NEXT:    vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm3[4,5,6,7]
    278 ; X32-NEXT:    vpsrlq $60, %xmm0, %xmm3
    279 ; X32-NEXT:    vpsrlq $61, %xmm0, %xmm0
    280 ; X32-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm3[4,5,6,7]
    281 ; X32-NEXT:    vpxor %xmm2, %xmm0, %xmm0
    282 ; X32-NEXT:    vpsubq %xmm2, %xmm0, %xmm0
    283 ; X32-NEXT:    vpinsrd $0, %eax, %xmm1, %xmm1
    284 ; X32-NEXT:    sarl $31, %eax
    285 ; X32-NEXT:    vpinsrd $1, %eax, %xmm1, %xmm1
    286 ; X32-NEXT:    vpsllq $20, %xmm1, %xmm1
    287 ; X32-NEXT:    vpsrad $20, %xmm1, %xmm2
    288 ; X32-NEXT:    vpsrlq $20, %xmm1, %xmm1
    289 ; X32-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
    290 ; X32-NEXT:    vpand %xmm1, %xmm0, %xmm0
    291 ; X32-NEXT:    vmovd %xmm0, %eax
    292 ; X32-NEXT:    vcvtsi2ssl %eax, %xmm4, %xmm0
    293 ; X32-NEXT:    vmovss %xmm0, (%esp)
    294 ; X32-NEXT:    flds (%esp)
    295 ; X32-NEXT:    popl %eax
    296 ; X32-NEXT:    retl
    297 ;
    298 ; X64-LABEL: signbits_ashr_sext_sextinreg_and_extract_sitofp:
    299 ; X64:       # %bb.0:
    300 ; X64-NEXT:    vpsrlq $60, %xmm0, %xmm2
    301 ; X64-NEXT:    vpsrlq $61, %xmm0, %xmm0
    302 ; X64-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7]
    303 ; X64-NEXT:    vmovdqa {{.*#+}} xmm2 = [4,8]
    304 ; X64-NEXT:    vpxor %xmm2, %xmm0, %xmm0
    305 ; X64-NEXT:    vpsubq %xmm2, %xmm0, %xmm0
    306 ; X64-NEXT:    movslq %edi, %rax
    307 ; X64-NEXT:    vpinsrq $0, %rax, %xmm1, %xmm1
    308 ; X64-NEXT:    vpsllq $20, %xmm1, %xmm1
    309 ; X64-NEXT:    vpsrad $20, %xmm1, %xmm2
    310 ; X64-NEXT:    vpsrlq $20, %xmm1, %xmm1
    311 ; X64-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
    312 ; X64-NEXT:    vpand %xmm1, %xmm0, %xmm0
    313 ; X64-NEXT:    vmovq %xmm0, %rax
    314 ; X64-NEXT:    vcvtsi2ssl %eax, %xmm3, %xmm0
    315 ; X64-NEXT:    retq
    316   %1 = ashr <2 x i64> %a0, <i64 61, i64 60>
    317   %2 = sext i32 %a2 to i64
    318   %3 = insertelement <2 x i64> %a1, i64 %2, i32 0
    319   %4 = shl <2 x i64> %3, <i64 20, i64 20>
    320   %5 = ashr <2 x i64> %4, <i64 20, i64 20>
    321   %6 = and <2 x i64> %1, %5
    322   %7 = extractelement <2 x i64> %6, i32 0
    323   %8 = sitofp i64 %7 to float
    324   ret float %8
    325 }
    326 
    327 define float @signbits_ashr_sextvecinreg_bitops_extract_sitofp(<2 x i64> %a0, <4 x i32> %a1) nounwind {
    328 ; X32-LABEL: signbits_ashr_sextvecinreg_bitops_extract_sitofp:
    329 ; X32:       # %bb.0:
    330 ; X32-NEXT:    pushl %eax
    331 ; X32-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,2147483648,0,2147483648]
    332 ; X32-NEXT:    vpsrlq $60, %xmm2, %xmm3
    333 ; X32-NEXT:    vpsrlq $61, %xmm2, %xmm2
    334 ; X32-NEXT:    vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm3[4,5,6,7]
    335 ; X32-NEXT:    vpsrlq $60, %xmm0, %xmm3
    336 ; X32-NEXT:    vpsrlq $61, %xmm0, %xmm0
    337 ; X32-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm3[4,5,6,7]
    338 ; X32-NEXT:    vpxor %xmm2, %xmm0, %xmm0
    339 ; X32-NEXT:    vpsubq %xmm2, %xmm0, %xmm0
    340 ; X32-NEXT:    vpmovsxdq %xmm1, %xmm1
    341 ; X32-NEXT:    vpand %xmm1, %xmm0, %xmm2
    342 ; X32-NEXT:    vpor %xmm1, %xmm2, %xmm1
    343 ; X32-NEXT:    vpxor %xmm0, %xmm1, %xmm0
    344 ; X32-NEXT:    vmovd %xmm0, %eax
    345 ; X32-NEXT:    vcvtsi2ssl %eax, %xmm4, %xmm0
    346 ; X32-NEXT:    vmovss %xmm0, (%esp)
    347 ; X32-NEXT:    flds (%esp)
    348 ; X32-NEXT:    popl %eax
    349 ; X32-NEXT:    retl
    350 ;
    351 ; X64-LABEL: signbits_ashr_sextvecinreg_bitops_extract_sitofp:
    352 ; X64:       # %bb.0:
    353 ; X64-NEXT:    vpsrlq $60, %xmm0, %xmm2
    354 ; X64-NEXT:    vpsrlq $61, %xmm0, %xmm0
    355 ; X64-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7]
    356 ; X64-NEXT:    vmovdqa {{.*#+}} xmm2 = [4,8]
    357 ; X64-NEXT:    vpxor %xmm2, %xmm0, %xmm0
    358 ; X64-NEXT:    vpsubq %xmm2, %xmm0, %xmm0
    359 ; X64-NEXT:    vpmovsxdq %xmm1, %xmm1
    360 ; X64-NEXT:    vpand %xmm1, %xmm0, %xmm2
    361 ; X64-NEXT:    vpor %xmm1, %xmm2, %xmm1
    362 ; X64-NEXT:    vpxor %xmm0, %xmm1, %xmm0
    363 ; X64-NEXT:    vmovq %xmm0, %rax
    364 ; X64-NEXT:    vcvtsi2ssl %eax, %xmm3, %xmm0
    365 ; X64-NEXT:    retq
    366   %1 = ashr <2 x i64> %a0, <i64 61, i64 60>
    367   %2 = shufflevector <4 x i32> %a1, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
    368   %3 = sext <2 x i32> %2 to <2 x i64>
    369   %4 = and <2 x i64> %1, %3
    370   %5 = or <2 x i64> %4, %3
    371   %6 = xor <2 x i64> %5, %1
    372   %7 = extractelement <2 x i64> %6, i32 0
    373   %8 = sitofp i64 %7 to float
    374   ret float %8
    375 }
    376 
    377 define <4 x float> @signbits_ashr_sext_select_shuffle_sitofp(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> %a2, <4 x i32> %a3) nounwind {
    378 ; X32-LABEL: signbits_ashr_sext_select_shuffle_sitofp:
    379 ; X32:       # %bb.0:
    380 ; X32-NEXT:    pushl %ebp
    381 ; X32-NEXT:    movl %esp, %ebp
    382 ; X32-NEXT:    andl $-16, %esp
    383 ; X32-NEXT:    subl $16, %esp
    384 ; X32-NEXT:    vmovdqa {{.*#+}} xmm3 = [33,0,63,0]
    385 ; X32-NEXT:    vmovdqa {{.*#+}} xmm4 = [0,2147483648,0,2147483648]
    386 ; X32-NEXT:    vpsrlq %xmm3, %xmm4, %xmm5
    387 ; X32-NEXT:    vpshufd {{.*#+}} xmm6 = xmm3[2,3,0,1]
    388 ; X32-NEXT:    vpsrlq %xmm6, %xmm4, %xmm4
    389 ; X32-NEXT:    vpblendw {{.*#+}} xmm4 = xmm5[0,1,2,3],xmm4[4,5,6,7]
    390 ; X32-NEXT:    vextractf128 $1, %ymm2, %xmm5
    391 ; X32-NEXT:    vpsrlq %xmm6, %xmm5, %xmm7
    392 ; X32-NEXT:    vpsrlq %xmm3, %xmm5, %xmm5
    393 ; X32-NEXT:    vpblendw {{.*#+}} xmm5 = xmm5[0,1,2,3],xmm7[4,5,6,7]
    394 ; X32-NEXT:    vpsrlq %xmm6, %xmm2, %xmm6
    395 ; X32-NEXT:    vpsrlq %xmm3, %xmm2, %xmm2
    396 ; X32-NEXT:    vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm6[4,5,6,7]
    397 ; X32-NEXT:    vpmovsxdq 16(%ebp), %xmm3
    398 ; X32-NEXT:    vpxor %xmm4, %xmm5, %xmm5
    399 ; X32-NEXT:    vpsubq %xmm4, %xmm5, %xmm5
    400 ; X32-NEXT:    vpxor %xmm4, %xmm2, %xmm2
    401 ; X32-NEXT:    vpsubq %xmm4, %xmm2, %xmm2
    402 ; X32-NEXT:    vpmovsxdq 8(%ebp), %xmm4
    403 ; X32-NEXT:    vinsertf128 $1, %xmm5, %ymm2, %ymm2
    404 ; X32-NEXT:    vinsertf128 $1, %xmm3, %ymm4, %ymm3
    405 ; X32-NEXT:    vextractf128 $1, %ymm1, %xmm4
    406 ; X32-NEXT:    vextractf128 $1, %ymm0, %xmm5
    407 ; X32-NEXT:    vpcmpeqq %xmm4, %xmm5, %xmm4
    408 ; X32-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm0
    409 ; X32-NEXT:    vinsertf128 $1, %xmm4, %ymm0, %ymm0
    410 ; X32-NEXT:    vblendvpd %ymm0, %ymm2, %ymm3, %ymm0
    411 ; X32-NEXT:    vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
    412 ; X32-NEXT:    vextractf128 $1, %ymm0, %xmm1
    413 ; X32-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
    414 ; X32-NEXT:    vcvtdq2ps %xmm0, %xmm0
    415 ; X32-NEXT:    movl %ebp, %esp
    416 ; X32-NEXT:    popl %ebp
    417 ; X32-NEXT:    vzeroupper
    418 ; X32-NEXT:    retl
    419 ;
    420 ; X64-LABEL: signbits_ashr_sext_select_shuffle_sitofp:
    421 ; X64:       # %bb.0:
    422 ; X64-NEXT:    vextractf128 $1, %ymm2, %xmm4
    423 ; X64-NEXT:    vpsrlq $63, %xmm4, %xmm5
    424 ; X64-NEXT:    vpsrlq $33, %xmm4, %xmm4
    425 ; X64-NEXT:    vpblendw {{.*#+}} xmm4 = xmm4[0,1,2,3],xmm5[4,5,6,7]
    426 ; X64-NEXT:    vmovdqa {{.*#+}} xmm5 = [1073741824,1]
    427 ; X64-NEXT:    vpxor %xmm5, %xmm4, %xmm4
    428 ; X64-NEXT:    vpsubq %xmm5, %xmm4, %xmm4
    429 ; X64-NEXT:    vpsrlq $63, %xmm2, %xmm6
    430 ; X64-NEXT:    vpsrlq $33, %xmm2, %xmm2
    431 ; X64-NEXT:    vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm6[4,5,6,7]
    432 ; X64-NEXT:    vpxor %xmm5, %xmm2, %xmm2
    433 ; X64-NEXT:    vpsubq %xmm5, %xmm2, %xmm2
    434 ; X64-NEXT:    vinsertf128 $1, %xmm4, %ymm2, %ymm2
    435 ; X64-NEXT:    vpmovsxdq %xmm3, %xmm4
    436 ; X64-NEXT:    vpshufd {{.*#+}} xmm3 = xmm3[2,3,0,1]
    437 ; X64-NEXT:    vpmovsxdq %xmm3, %xmm3
    438 ; X64-NEXT:    vinsertf128 $1, %xmm3, %ymm4, %ymm3
    439 ; X64-NEXT:    vextractf128 $1, %ymm1, %xmm4
    440 ; X64-NEXT:    vextractf128 $1, %ymm0, %xmm5
    441 ; X64-NEXT:    vpcmpeqq %xmm4, %xmm5, %xmm4
    442 ; X64-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm0
    443 ; X64-NEXT:    vinsertf128 $1, %xmm4, %ymm0, %ymm0
    444 ; X64-NEXT:    vblendvpd %ymm0, %ymm2, %ymm3, %ymm0
    445 ; X64-NEXT:    vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
    446 ; X64-NEXT:    vextractf128 $1, %ymm0, %xmm1
    447 ; X64-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
    448 ; X64-NEXT:    vcvtdq2ps %xmm0, %xmm0
    449 ; X64-NEXT:    vzeroupper
    450 ; X64-NEXT:    retq
    451   %1 = ashr <4 x i64> %a2, <i64 33, i64 63, i64 33, i64 63>
    452   %2 = sext <4 x i32> %a3 to <4 x i64>
    453   %3 = icmp eq <4 x i64> %a0, %a1
    454   %4 = select <4 x i1> %3, <4 x i64> %1, <4 x i64> %2
    455   %5 = shufflevector <4 x i64> %4, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
    456   %6 = sitofp <4 x i64> %5 to <4 x float>
    457   ret <4 x float> %6
    458 }
    459