Home | History | Annotate | Download | only in X86
      1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
      2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
      3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
      4 ;
      5 ; 32-bit tests to make sure we're not doing anything stupid.
      6 ; RUN: llc < %s -mtriple=i686-unknown-unknown
      7 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse
      8 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2
      9 
     10 ;
     11 ; Signed Integer to Double
     12 ;
     13 
     14 define <2 x double> @sitofp_2i64_to_2f64(<2 x i64> %a) {
     15 ; SSE-LABEL: sitofp_2i64_to_2f64:
     16 ; SSE:       # BB#0:
     17 ; SSE-NEXT:    movd %xmm0, %rax
     18 ; SSE-NEXT:    cvtsi2sdq %rax, %xmm1
     19 ; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
     20 ; SSE-NEXT:    movd %xmm0, %rax
     21 ; SSE-NEXT:    xorps %xmm0, %xmm0
     22 ; SSE-NEXT:    cvtsi2sdq %rax, %xmm0
     23 ; SSE-NEXT:    unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
     24 ; SSE-NEXT:    movapd %xmm1, %xmm0
     25 ; SSE-NEXT:    retq
     26 ;
     27 ; AVX-LABEL: sitofp_2i64_to_2f64:
     28 ; AVX:       # BB#0:
     29 ; AVX-NEXT:    vpextrq $1, %xmm0, %rax
     30 ; AVX-NEXT:    vcvtsi2sdq %rax, %xmm0, %xmm1
     31 ; AVX-NEXT:    vmovq %xmm0, %rax
     32 ; AVX-NEXT:    vxorps %xmm0, %xmm0, %xmm0
     33 ; AVX-NEXT:    vcvtsi2sdq %rax, %xmm0, %xmm0
     34 ; AVX-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
     35 ; AVX-NEXT:    retq
     36   %cvt = sitofp <2 x i64> %a to <2 x double>
     37   ret <2 x double> %cvt
     38 }
     39 
     40 define <2 x double> @sitofp_2i32_to_2f64(<4 x i32> %a) {
     41 ; SSE-LABEL: sitofp_2i32_to_2f64:
     42 ; SSE:       # BB#0:
     43 ; SSE-NEXT:    cvtdq2pd %xmm0, %xmm0
     44 ; SSE-NEXT:    retq
     45 ;
     46 ; AVX-LABEL: sitofp_2i32_to_2f64:
     47 ; AVX:       # BB#0:
     48 ; AVX-NEXT:    vcvtdq2pd %xmm0, %xmm0
     49 ; AVX-NEXT:    retq
     50   %shuf = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
     51   %cvt = sitofp <2 x i32> %shuf to <2 x double>
     52   ret <2 x double> %cvt
     53 }
     54 
     55 define <2 x double> @sitofp_4i32_to_2f64(<4 x i32> %a) {
     56 ; SSE-LABEL: sitofp_4i32_to_2f64:
     57 ; SSE:       # BB#0:
     58 ; SSE-NEXT:    cvtdq2pd %xmm0, %xmm0
     59 ; SSE-NEXT:    retq
     60 ;
     61 ; AVX-LABEL: sitofp_4i32_to_2f64:
     62 ; AVX:       # BB#0:
     63 ; AVX-NEXT:    vcvtdq2pd %xmm0, %ymm0
     64 ; AVX-NEXT:    vzeroupper
     65 ; AVX-NEXT:    retq
     66   %cvt = sitofp <4 x i32> %a to <4 x double>
     67   %shuf = shufflevector <4 x double> %cvt, <4 x double> undef, <2 x i32> <i32 0, i32 1>
     68   ret <2 x double> %shuf
     69 }
     70 
     71 define <2 x double> @sitofp_2i16_to_2f64(<8 x i16> %a) {
     72 ; SSE-LABEL: sitofp_2i16_to_2f64:
     73 ; SSE:       # BB#0:
     74 ; SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
     75 ; SSE-NEXT:    psrad $16, %xmm0
     76 ; SSE-NEXT:    cvtdq2pd %xmm0, %xmm0
     77 ; SSE-NEXT:    retq
     78 ;
     79 ; AVX-LABEL: sitofp_2i16_to_2f64:
     80 ; AVX:       # BB#0:
     81 ; AVX-NEXT:    vpmovsxwd %xmm0, %xmm0
     82 ; AVX-NEXT:    vcvtdq2pd %xmm0, %xmm0
     83 ; AVX-NEXT:    retq
     84   %shuf = shufflevector <8 x i16> %a, <8 x i16> undef, <2 x i32> <i32 0, i32 1>
     85   %cvt = sitofp <2 x i16> %shuf to <2 x double>
     86   ret <2 x double> %cvt
     87 }
     88 
     89 define <2 x double> @sitofp_8i16_to_2f64(<8 x i16> %a) {
     90 ; SSE-LABEL: sitofp_8i16_to_2f64:
     91 ; SSE:       # BB#0:
     92 ; SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
     93 ; SSE-NEXT:    psrad $16, %xmm0
     94 ; SSE-NEXT:    cvtdq2pd %xmm0, %xmm0
     95 ; SSE-NEXT:    retq
     96 ;
     97 ; AVX1-LABEL: sitofp_8i16_to_2f64:
     98 ; AVX1:       # BB#0:
     99 ; AVX1-NEXT:    vpmovsxwd %xmm0, %xmm0
    100 ; AVX1-NEXT:    vcvtdq2pd %xmm0, %ymm0
    101 ; AVX1-NEXT:    vzeroupper
    102 ; AVX1-NEXT:    retq
    103 ;
    104 ; AVX2-LABEL: sitofp_8i16_to_2f64:
    105 ; AVX2:       # BB#0:
    106 ; AVX2-NEXT:    vpmovsxwd %xmm0, %ymm0
    107 ; AVX2-NEXT:    vcvtdq2pd %xmm0, %ymm0
    108 ; AVX2-NEXT:    vzeroupper
    109 ; AVX2-NEXT:    retq
    110   %cvt = sitofp <8 x i16> %a to <8 x double>
    111   %shuf = shufflevector <8 x double> %cvt, <8 x double> undef, <2 x i32> <i32 0, i32 1>
    112   ret <2 x double> %shuf
    113 }
    114 
    115 define <2 x double> @sitofp_2i8_to_2f64(<16 x i8> %a) {
    116 ; SSE-LABEL: sitofp_2i8_to_2f64:
    117 ; SSE:       # BB#0:
    118 ; SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
    119 ; SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
    120 ; SSE-NEXT:    psrad $24, %xmm0
    121 ; SSE-NEXT:    cvtdq2pd %xmm0, %xmm0
    122 ; SSE-NEXT:    retq
    123 ;
    124 ; AVX-LABEL: sitofp_2i8_to_2f64:
    125 ; AVX:       # BB#0:
    126 ; AVX-NEXT:    vpmovsxbd %xmm0, %xmm0
    127 ; AVX-NEXT:    vcvtdq2pd %xmm0, %xmm0
    128 ; AVX-NEXT:    retq
    129   %shuf = shufflevector <16 x i8> %a, <16 x i8> undef, <2 x i32> <i32 0, i32 1>
    130   %cvt = sitofp <2 x i8> %shuf to <2 x double>
    131   ret <2 x double> %cvt
    132 }
    133 
    134 define <2 x double> @sitofp_16i8_to_2f64(<16 x i8> %a) {
    135 ; SSE-LABEL: sitofp_16i8_to_2f64:
    136 ; SSE:       # BB#0:
    137 ; SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
    138 ; SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
    139 ; SSE-NEXT:    psrad $24, %xmm0
    140 ; SSE-NEXT:    cvtdq2pd %xmm0, %xmm0
    141 ; SSE-NEXT:    retq
    142 ;
    143 ; AVX1-LABEL: sitofp_16i8_to_2f64:
    144 ; AVX1:       # BB#0:
    145 ; AVX1-NEXT:    vpmovsxbd %xmm0, %xmm0
    146 ; AVX1-NEXT:    vcvtdq2pd %xmm0, %ymm0
    147 ; AVX1-NEXT:    vzeroupper
    148 ; AVX1-NEXT:    retq
    149 ;
    150 ; AVX2-LABEL: sitofp_16i8_to_2f64:
    151 ; AVX2:       # BB#0:
    152 ; AVX2-NEXT:    vpmovsxbw %xmm0, %ymm0
    153 ; AVX2-NEXT:    vpmovsxwd %xmm0, %ymm0
    154 ; AVX2-NEXT:    vcvtdq2pd %xmm0, %ymm0
    155 ; AVX2-NEXT:    vzeroupper
    156 ; AVX2-NEXT:    retq
    157   %cvt = sitofp <16 x i8> %a to <16 x double>
    158   %shuf = shufflevector <16 x double> %cvt, <16 x double> undef, <2 x i32> <i32 0, i32 1>
    159   ret <2 x double> %shuf
    160 }
    161 
    162 define <4 x double> @sitofp_4i64_to_4f64(<4 x i64> %a) {
    163 ; SSE-LABEL: sitofp_4i64_to_4f64:
    164 ; SSE:       # BB#0:
    165 ; SSE-NEXT:    movd %xmm0, %rax
    166 ; SSE-NEXT:    cvtsi2sdq %rax, %xmm2
    167 ; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
    168 ; SSE-NEXT:    movd %xmm0, %rax
    169 ; SSE-NEXT:    xorps %xmm0, %xmm0
    170 ; SSE-NEXT:    cvtsi2sdq %rax, %xmm0
    171 ; SSE-NEXT:    unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm0[0]
    172 ; SSE-NEXT:    movd %xmm1, %rax
    173 ; SSE-NEXT:    cvtsi2sdq %rax, %xmm3
    174 ; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
    175 ; SSE-NEXT:    movd %xmm0, %rax
    176 ; SSE-NEXT:    xorps %xmm0, %xmm0
    177 ; SSE-NEXT:    cvtsi2sdq %rax, %xmm0
    178 ; SSE-NEXT:    unpcklpd {{.*#+}} xmm3 = xmm3[0],xmm0[0]
    179 ; SSE-NEXT:    movapd %xmm2, %xmm0
    180 ; SSE-NEXT:    movapd %xmm3, %xmm1
    181 ; SSE-NEXT:    retq
    182 ;
    183 ; AVX1-LABEL: sitofp_4i64_to_4f64:
    184 ; AVX1:       # BB#0:
    185 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
    186 ; AVX1-NEXT:    vpextrq $1, %xmm1, %rax
    187 ; AVX1-NEXT:    vcvtsi2sdq %rax, %xmm0, %xmm2
    188 ; AVX1-NEXT:    vmovq %xmm1, %rax
    189 ; AVX1-NEXT:    vcvtsi2sdq %rax, %xmm0, %xmm1
    190 ; AVX1-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
    191 ; AVX1-NEXT:    vpextrq $1, %xmm0, %rax
    192 ; AVX1-NEXT:    vcvtsi2sdq %rax, %xmm0, %xmm2
    193 ; AVX1-NEXT:    vmovq %xmm0, %rax
    194 ; AVX1-NEXT:    vxorps %xmm0, %xmm0, %xmm0
    195 ; AVX1-NEXT:    vcvtsi2sdq %rax, %xmm0, %xmm0
    196 ; AVX1-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
    197 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
    198 ; AVX1-NEXT:    retq
    199 ;
    200 ; AVX2-LABEL: sitofp_4i64_to_4f64:
    201 ; AVX2:       # BB#0:
    202 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
    203 ; AVX2-NEXT:    vpextrq $1, %xmm1, %rax
    204 ; AVX2-NEXT:    vcvtsi2sdq %rax, %xmm0, %xmm2
    205 ; AVX2-NEXT:    vmovq %xmm1, %rax
    206 ; AVX2-NEXT:    vcvtsi2sdq %rax, %xmm0, %xmm1
    207 ; AVX2-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
    208 ; AVX2-NEXT:    vpextrq $1, %xmm0, %rax
    209 ; AVX2-NEXT:    vcvtsi2sdq %rax, %xmm0, %xmm2
    210 ; AVX2-NEXT:    vmovq %xmm0, %rax
    211 ; AVX2-NEXT:    vxorps %xmm0, %xmm0, %xmm0
    212 ; AVX2-NEXT:    vcvtsi2sdq %rax, %xmm0, %xmm0
    213 ; AVX2-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
    214 ; AVX2-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
    215 ; AVX2-NEXT:    retq
    216   %cvt = sitofp <4 x i64> %a to <4 x double>
    217   ret <4 x double> %cvt
    218 }
    219 
    220 define <4 x double> @sitofp_4i32_to_4f64(<4 x i32> %a) {
    221 ; SSE-LABEL: sitofp_4i32_to_4f64:
    222 ; SSE:       # BB#0:
    223 ; SSE-NEXT:    cvtdq2pd %xmm0, %xmm2
    224 ; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
    225 ; SSE-NEXT:    cvtdq2pd %xmm0, %xmm1
    226 ; SSE-NEXT:    movaps %xmm2, %xmm0
    227 ; SSE-NEXT:    retq
    228 ;
    229 ; AVX-LABEL: sitofp_4i32_to_4f64:
    230 ; AVX:       # BB#0:
    231 ; AVX-NEXT:    vcvtdq2pd %xmm0, %ymm0
    232 ; AVX-NEXT:    retq
    233   %cvt = sitofp <4 x i32> %a to <4 x double>
    234   ret <4 x double> %cvt
    235 }
    236 
    237 define <4 x double> @sitofp_4i16_to_4f64(<8 x i16> %a) {
    238 ; SSE-LABEL: sitofp_4i16_to_4f64:
    239 ; SSE:       # BB#0:
    240 ; SSE-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
    241 ; SSE-NEXT:    psrad $16, %xmm1
    242 ; SSE-NEXT:    cvtdq2pd %xmm1, %xmm0
    243 ; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
    244 ; SSE-NEXT:    cvtdq2pd %xmm1, %xmm1
    245 ; SSE-NEXT:    retq
    246 ;
    247 ; AVX-LABEL: sitofp_4i16_to_4f64:
    248 ; AVX:       # BB#0:
    249 ; AVX-NEXT:    vpmovsxwd %xmm0, %xmm0
    250 ; AVX-NEXT:    vcvtdq2pd %xmm0, %ymm0
    251 ; AVX-NEXT:    retq
    252   %shuf = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    253   %cvt = sitofp <4 x i16> %shuf to <4 x double>
    254   ret <4 x double> %cvt
    255 }
    256 
    257 define <4 x double> @sitofp_8i16_to_4f64(<8 x i16> %a) {
    258 ; SSE-LABEL: sitofp_8i16_to_4f64:
    259 ; SSE:       # BB#0:
    260 ; SSE-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
    261 ; SSE-NEXT:    psrad $16, %xmm1
    262 ; SSE-NEXT:    cvtdq2pd %xmm1, %xmm0
    263 ; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
    264 ; SSE-NEXT:    cvtdq2pd %xmm1, %xmm1
    265 ; SSE-NEXT:    retq
    266 ;
    267 ; AVX1-LABEL: sitofp_8i16_to_4f64:
    268 ; AVX1:       # BB#0:
    269 ; AVX1-NEXT:    vpmovsxwd %xmm0, %xmm0
    270 ; AVX1-NEXT:    vcvtdq2pd %xmm0, %ymm0
    271 ; AVX1-NEXT:    retq
    272 ;
    273 ; AVX2-LABEL: sitofp_8i16_to_4f64:
    274 ; AVX2:       # BB#0:
    275 ; AVX2-NEXT:    vpmovsxwd %xmm0, %ymm0
    276 ; AVX2-NEXT:    vcvtdq2pd %xmm0, %ymm0
    277 ; AVX2-NEXT:    retq
    278   %cvt = sitofp <8 x i16> %a to <8 x double>
    279   %shuf = shufflevector <8 x double> %cvt, <8 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    280   ret <4 x double> %shuf
    281 }
    282 
    283 define <4 x double> @sitofp_4i8_to_4f64(<16 x i8> %a) {
    284 ; SSE-LABEL: sitofp_4i8_to_4f64:
    285 ; SSE:       # BB#0:
    286 ; SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
    287 ; SSE-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
    288 ; SSE-NEXT:    psrad $24, %xmm1
    289 ; SSE-NEXT:    cvtdq2pd %xmm1, %xmm0
    290 ; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
    291 ; SSE-NEXT:    cvtdq2pd %xmm1, %xmm1
    292 ; SSE-NEXT:    retq
    293 ;
    294 ; AVX-LABEL: sitofp_4i8_to_4f64:
    295 ; AVX:       # BB#0:
    296 ; AVX-NEXT:    vpmovsxbd %xmm0, %xmm0
    297 ; AVX-NEXT:    vcvtdq2pd %xmm0, %ymm0
    298 ; AVX-NEXT:    retq
    299   %shuf = shufflevector <16 x i8> %a, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    300   %cvt = sitofp <4 x i8> %shuf to <4 x double>
    301   ret <4 x double> %cvt
    302 }
    303 
    304 define <4 x double> @sitofp_16i8_to_4f64(<16 x i8> %a) {
    305 ; SSE-LABEL: sitofp_16i8_to_4f64:
    306 ; SSE:       # BB#0:
    307 ; SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
    308 ; SSE-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
    309 ; SSE-NEXT:    psrad $24, %xmm1
    310 ; SSE-NEXT:    cvtdq2pd %xmm1, %xmm0
    311 ; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
    312 ; SSE-NEXT:    cvtdq2pd %xmm1, %xmm1
    313 ; SSE-NEXT:    retq
    314 ;
    315 ; AVX1-LABEL: sitofp_16i8_to_4f64:
    316 ; AVX1:       # BB#0:
    317 ; AVX1-NEXT:    vpmovsxbd %xmm0, %xmm0
    318 ; AVX1-NEXT:    vcvtdq2pd %xmm0, %ymm0
    319 ; AVX1-NEXT:    retq
    320 ;
    321 ; AVX2-LABEL: sitofp_16i8_to_4f64:
    322 ; AVX2:       # BB#0:
    323 ; AVX2-NEXT:    vpmovsxbw %xmm0, %ymm0
    324 ; AVX2-NEXT:    vpmovsxwd %xmm0, %ymm0
    325 ; AVX2-NEXT:    vcvtdq2pd %xmm0, %ymm0
    326 ; AVX2-NEXT:    retq
    327   %cvt = sitofp <16 x i8> %a to <16 x double>
    328   %shuf = shufflevector <16 x double> %cvt, <16 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    329   ret <4 x double> %shuf
    330 }
    331 
    332 ;
    333 ; Unsigned Integer to Double
    334 ;
    335 
    336 define <2 x double> @uitofp_2i64_to_2f64(<2 x i64> %a) {
    337 ; SSE-LABEL: uitofp_2i64_to_2f64:
    338 ; SSE:       # BB#0:
    339 ; SSE-NEXT:    movdqa {{.*#+}} xmm1 = [1127219200,1160773632,0,0]
    340 ; SSE-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
    341 ; SSE-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
    342 ; SSE-NEXT:    movapd {{.*#+}} xmm3 = [4.503600e+15,1.934281e+25]
    343 ; SSE-NEXT:    subpd %xmm3, %xmm0
    344 ; SSE-NEXT:    pshufd {{.*#+}} xmm4 = xmm0[2,3,0,1]
    345 ; SSE-NEXT:    addpd %xmm4, %xmm0
    346 ; SSE-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
    347 ; SSE-NEXT:    subpd %xmm3, %xmm2
    348 ; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[2,3,0,1]
    349 ; SSE-NEXT:    addpd %xmm2, %xmm1
    350 ; SSE-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
    351 ; SSE-NEXT:    retq
    352 ;
    353 ; AVX-LABEL: uitofp_2i64_to_2f64:
    354 ; AVX:       # BB#0:
    355 ; AVX-NEXT:    vmovdqa {{.*#+}} xmm1 = [1127219200,1160773632,0,0]
    356 ; AVX-NEXT:    vpunpckldq {{.*#+}} xmm2 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
    357 ; AVX-NEXT:    vmovapd {{.*#+}} xmm3 = [4.503600e+15,1.934281e+25]
    358 ; AVX-NEXT:    vsubpd %xmm3, %xmm2, %xmm2
    359 ; AVX-NEXT:    vhaddpd %xmm2, %xmm2, %xmm2
    360 ; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
    361 ; AVX-NEXT:    vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
    362 ; AVX-NEXT:    vsubpd %xmm3, %xmm0, %xmm0
    363 ; AVX-NEXT:    vhaddpd %xmm0, %xmm0, %xmm0
    364 ; AVX-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm2[0],xmm0[0]
    365 ; AVX-NEXT:    retq
    366   %cvt = uitofp <2 x i64> %a to <2 x double>
    367   ret <2 x double> %cvt
    368 }
    369 
    370 define <2 x double> @uitofp_2i32_to_2f64(<4 x i32> %a) {
    371 ; SSE-LABEL: uitofp_2i32_to_2f64:
    372 ; SSE:       # BB#0:
    373 ; SSE-NEXT:    pxor %xmm1, %xmm1
    374 ; SSE-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
    375 ; SSE-NEXT:    movdqa {{.*#+}} xmm1 = [1127219200,1160773632,0,0]
    376 ; SSE-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
    377 ; SSE-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
    378 ; SSE-NEXT:    movapd {{.*#+}} xmm3 = [4.503600e+15,1.934281e+25]
    379 ; SSE-NEXT:    subpd %xmm3, %xmm0
    380 ; SSE-NEXT:    pshufd {{.*#+}} xmm4 = xmm0[2,3,0,1]
    381 ; SSE-NEXT:    addpd %xmm4, %xmm0
    382 ; SSE-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
    383 ; SSE-NEXT:    subpd %xmm3, %xmm2
    384 ; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[2,3,0,1]
    385 ; SSE-NEXT:    addpd %xmm2, %xmm1
    386 ; SSE-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
    387 ; SSE-NEXT:    retq
    388 ;
    389 ; AVX-LABEL: uitofp_2i32_to_2f64:
    390 ; AVX:       # BB#0:
    391 ; AVX-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
    392 ; AVX-NEXT:    vmovdqa {{.*#+}} xmm1 = [1127219200,1160773632,0,0]
    393 ; AVX-NEXT:    vpunpckldq {{.*#+}} xmm2 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
    394 ; AVX-NEXT:    vmovapd {{.*#+}} xmm3 = [4.503600e+15,1.934281e+25]
    395 ; AVX-NEXT:    vsubpd %xmm3, %xmm2, %xmm2
    396 ; AVX-NEXT:    vhaddpd %xmm2, %xmm2, %xmm2
    397 ; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
    398 ; AVX-NEXT:    vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
    399 ; AVX-NEXT:    vsubpd %xmm3, %xmm0, %xmm0
    400 ; AVX-NEXT:    vhaddpd %xmm0, %xmm0, %xmm0
    401 ; AVX-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm2[0],xmm0[0]
    402 ; AVX-NEXT:    retq
    403   %shuf = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
    404   %cvt = uitofp <2 x i32> %shuf to <2 x double>
    405   ret <2 x double> %cvt
    406 }
    407 
    408 define <2 x double> @uitofp_4i32_to_2f64(<4 x i32> %a) {
    409 ; SSE-LABEL: uitofp_4i32_to_2f64:
    410 ; SSE:       # BB#0:
    411 ; SSE-NEXT:    pxor %xmm1, %xmm1
    412 ; SSE-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
    413 ; SSE-NEXT:    movdqa {{.*#+}} xmm1 = [1127219200,1160773632,0,0]
    414 ; SSE-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
    415 ; SSE-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
    416 ; SSE-NEXT:    movapd {{.*#+}} xmm3 = [4.503600e+15,1.934281e+25]
    417 ; SSE-NEXT:    subpd %xmm3, %xmm0
    418 ; SSE-NEXT:    pshufd {{.*#+}} xmm4 = xmm0[2,3,0,1]
    419 ; SSE-NEXT:    addpd %xmm4, %xmm0
    420 ; SSE-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
    421 ; SSE-NEXT:    subpd %xmm3, %xmm2
    422 ; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[2,3,0,1]
    423 ; SSE-NEXT:    addpd %xmm2, %xmm1
    424 ; SSE-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
    425 ; SSE-NEXT:    retq
    426 ;
    427 ; AVX1-LABEL: uitofp_4i32_to_2f64:
    428 ; AVX1:       # BB#0:
    429 ; AVX1-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm1
    430 ; AVX1-NEXT:    vcvtdq2pd %xmm1, %ymm1
    431 ; AVX1-NEXT:    vpsrld $16, %xmm0, %xmm0
    432 ; AVX1-NEXT:    vcvtdq2pd %xmm0, %ymm0
    433 ; AVX1-NEXT:    vmulpd {{.*}}(%rip), %ymm0, %ymm0
    434 ; AVX1-NEXT:    vaddpd %ymm1, %ymm0, %ymm0
    435 ; AVX1-NEXT:    vzeroupper
    436 ; AVX1-NEXT:    retq
    437 ;
    438 ; AVX2-LABEL: uitofp_4i32_to_2f64:
    439 ; AVX2:       # BB#0:
    440 ; AVX2-NEXT:    vpsrld $16, %xmm0, %xmm1
    441 ; AVX2-NEXT:    vcvtdq2pd %xmm1, %ymm1
    442 ; AVX2-NEXT:    vbroadcastsd {{.*}}(%rip), %ymm2
    443 ; AVX2-NEXT:    vmulpd %ymm2, %ymm1, %ymm1
    444 ; AVX2-NEXT:    vpbroadcastd {{.*}}(%rip), %xmm2
    445 ; AVX2-NEXT:    vpand %xmm2, %xmm0, %xmm0
    446 ; AVX2-NEXT:    vcvtdq2pd %xmm0, %ymm0
    447 ; AVX2-NEXT:    vaddpd %ymm0, %ymm1, %ymm0
    448 ; AVX2-NEXT:    vzeroupper
    449 ; AVX2-NEXT:    retq
    450   %cvt = uitofp <4 x i32> %a to <4 x double>
    451   %shuf = shufflevector <4 x double> %cvt, <4 x double> undef, <2 x i32> <i32 0, i32 1>
    452   ret <2 x double> %shuf
    453 }
    454 
    455 define <2 x double> @uitofp_2i16_to_2f64(<8 x i16> %a) {
    456 ; SSE-LABEL: uitofp_2i16_to_2f64:
    457 ; SSE:       # BB#0:
    458 ; SSE-NEXT:    pxor %xmm1, %xmm1
    459 ; SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
    460 ; SSE-NEXT:    cvtdq2pd %xmm0, %xmm0
    461 ; SSE-NEXT:    retq
    462 ;
    463 ; AVX-LABEL: uitofp_2i16_to_2f64:
    464 ; AVX:       # BB#0:
    465 ; AVX-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
    466 ; AVX-NEXT:    vcvtdq2pd %xmm0, %xmm0
    467 ; AVX-NEXT:    retq
    468   %shuf = shufflevector <8 x i16> %a, <8 x i16> undef, <2 x i32> <i32 0, i32 1>
    469   %cvt = uitofp <2 x i16> %shuf to <2 x double>
    470   ret <2 x double> %cvt
    471 }
    472 
    473 define <2 x double> @uitofp_8i16_to_2f64(<8 x i16> %a) {
    474 ; SSE-LABEL: uitofp_8i16_to_2f64:
    475 ; SSE:       # BB#0:
    476 ; SSE-NEXT:    pxor %xmm1, %xmm1
    477 ; SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
    478 ; SSE-NEXT:    cvtdq2pd %xmm0, %xmm0
    479 ; SSE-NEXT:    retq
    480 ;
    481 ; AVX1-LABEL: uitofp_8i16_to_2f64:
    482 ; AVX1:       # BB#0:
    483 ; AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
    484 ; AVX1-NEXT:    vcvtdq2pd %xmm0, %ymm0
    485 ; AVX1-NEXT:    vzeroupper
    486 ; AVX1-NEXT:    retq
    487 ;
    488 ; AVX2-LABEL: uitofp_8i16_to_2f64:
    489 ; AVX2:       # BB#0:
    490 ; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
    491 ; AVX2-NEXT:    vcvtdq2pd %xmm0, %ymm0
    492 ; AVX2-NEXT:    vzeroupper
    493 ; AVX2-NEXT:    retq
    494   %cvt = uitofp <8 x i16> %a to <8 x double>
    495   %shuf = shufflevector <8 x double> %cvt, <8 x double> undef, <2 x i32> <i32 0, i32 1>
    496   ret <2 x double> %shuf
    497 }
    498 
    499 define <2 x double> @uitofp_2i8_to_2f64(<16 x i8> %a) {
    500 ; SSE-LABEL: uitofp_2i8_to_2f64:
    501 ; SSE:       # BB#0:
    502 ; SSE-NEXT:    pxor %xmm1, %xmm1
    503 ; SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
    504 ; SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
    505 ; SSE-NEXT:    cvtdq2pd %xmm0, %xmm0
    506 ; SSE-NEXT:    retq
    507 ;
    508 ; AVX-LABEL: uitofp_2i8_to_2f64:
    509 ; AVX:       # BB#0:
    510 ; AVX-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
    511 ; AVX-NEXT:    vcvtdq2pd %xmm0, %xmm0
    512 ; AVX-NEXT:    retq
    513   %shuf = shufflevector <16 x i8> %a, <16 x i8> undef, <2 x i32> <i32 0, i32 1>
    514   %cvt = uitofp <2 x i8> %shuf to <2 x double>
    515   ret <2 x double> %cvt
    516 }
    517 
    518 define <2 x double> @uitofp_16i8_to_2f64(<16 x i8> %a) {
    519 ; SSE-LABEL: uitofp_16i8_to_2f64:
    520 ; SSE:       # BB#0:
    521 ; SSE-NEXT:    pxor %xmm1, %xmm1
    522 ; SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
    523 ; SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
    524 ; SSE-NEXT:    cvtdq2pd %xmm0, %xmm0
    525 ; SSE-NEXT:    retq
    526 ;
    527 ; AVX1-LABEL: uitofp_16i8_to_2f64:
    528 ; AVX1:       # BB#0:
    529 ; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
    530 ; AVX1-NEXT:    vcvtdq2pd %xmm0, %ymm0
    531 ; AVX1-NEXT:    vzeroupper
    532 ; AVX1-NEXT:    retq
    533 ;
    534 ; AVX2-LABEL: uitofp_16i8_to_2f64:
    535 ; AVX2:       # BB#0:
    536 ; AVX2-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
    537 ; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
    538 ; AVX2-NEXT:    vcvtdq2pd %xmm0, %ymm0
    539 ; AVX2-NEXT:    vzeroupper
    540 ; AVX2-NEXT:    retq
    541   %cvt = uitofp <16 x i8> %a to <16 x double>
    542   %shuf = shufflevector <16 x double> %cvt, <16 x double> undef, <2 x i32> <i32 0, i32 1>
    543   ret <2 x double> %shuf
    544 }
    545 
    546 define <4 x double> @uitofp_4i64_to_4f64(<4 x i64> %a) {
    547 ; SSE-LABEL: uitofp_4i64_to_4f64:
    548 ; SSE:       # BB#0:
    549 ; SSE-NEXT:    movdqa {{.*#+}} xmm2 = [1127219200,1160773632,0,0]
    550 ; SSE-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[2,3,0,1]
    551 ; SSE-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
    552 ; SSE-NEXT:    movapd {{.*#+}} xmm4 = [4.503600e+15,1.934281e+25]
    553 ; SSE-NEXT:    subpd %xmm4, %xmm0
    554 ; SSE-NEXT:    pshufd {{.*#+}} xmm5 = xmm0[2,3,0,1]
    555 ; SSE-NEXT:    addpd %xmm5, %xmm0
    556 ; SSE-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
    557 ; SSE-NEXT:    subpd %xmm4, %xmm3
    558 ; SSE-NEXT:    pshufd {{.*#+}} xmm5 = xmm3[2,3,0,1]
    559 ; SSE-NEXT:    addpd %xmm3, %xmm5
    560 ; SSE-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm5[0]
    561 ; SSE-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[2,3,0,1]
    562 ; SSE-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
    563 ; SSE-NEXT:    subpd %xmm4, %xmm1
    564 ; SSE-NEXT:    pshufd {{.*#+}} xmm5 = xmm1[2,3,0,1]
    565 ; SSE-NEXT:    addpd %xmm5, %xmm1
    566 ; SSE-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
    567 ; SSE-NEXT:    subpd %xmm4, %xmm3
    568 ; SSE-NEXT:    pshufd {{.*#+}} xmm2 = xmm3[2,3,0,1]
    569 ; SSE-NEXT:    addpd %xmm3, %xmm2
    570 ; SSE-NEXT:    unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
    571 ; SSE-NEXT:    retq
    572 ;
    573 ; AVX1-LABEL: uitofp_4i64_to_4f64:
    574 ; AVX1:       # BB#0:
    575 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
    576 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [1127219200,1160773632,0,0]
    577 ; AVX1-NEXT:    vpunpckldq {{.*#+}} xmm3 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
    578 ; AVX1-NEXT:    vmovapd {{.*#+}} xmm4 = [4.503600e+15,1.934281e+25]
    579 ; AVX1-NEXT:    vsubpd %xmm4, %xmm3, %xmm3
    580 ; AVX1-NEXT:    vhaddpd %xmm3, %xmm3, %xmm3
    581 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
    582 ; AVX1-NEXT:    vpunpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
    583 ; AVX1-NEXT:    vsubpd %xmm4, %xmm1, %xmm1
    584 ; AVX1-NEXT:    vhaddpd %xmm1, %xmm1, %xmm1
    585 ; AVX1-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm3[0],xmm1[0]
    586 ; AVX1-NEXT:    vpunpckldq {{.*#+}} xmm3 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
    587 ; AVX1-NEXT:    vsubpd %xmm4, %xmm3, %xmm3
    588 ; AVX1-NEXT:    vhaddpd %xmm3, %xmm3, %xmm3
    589 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
    590 ; AVX1-NEXT:    vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
    591 ; AVX1-NEXT:    vsubpd %xmm4, %xmm0, %xmm0
    592 ; AVX1-NEXT:    vhaddpd %xmm0, %xmm0, %xmm0
    593 ; AVX1-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm3[0],xmm0[0]
    594 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
    595 ; AVX1-NEXT:    retq
    596 ;
    597 ; AVX2-LABEL: uitofp_4i64_to_4f64:
    598 ; AVX2:       # BB#0:
    599 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
    600 ; AVX2-NEXT:    vmovdqa {{.*#+}} xmm2 = [1127219200,1160773632,0,0]
    601 ; AVX2-NEXT:    vpunpckldq {{.*#+}} xmm3 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
    602 ; AVX2-NEXT:    vmovapd {{.*#+}} xmm4 = [4.503600e+15,1.934281e+25]
    603 ; AVX2-NEXT:    vsubpd %xmm4, %xmm3, %xmm3
    604 ; AVX2-NEXT:    vhaddpd %xmm3, %xmm3, %xmm3
    605 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
    606 ; AVX2-NEXT:    vpunpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
    607 ; AVX2-NEXT:    vsubpd %xmm4, %xmm1, %xmm1
    608 ; AVX2-NEXT:    vhaddpd %xmm1, %xmm1, %xmm1
    609 ; AVX2-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm3[0],xmm1[0]
    610 ; AVX2-NEXT:    vpunpckldq {{.*#+}} xmm3 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
    611 ; AVX2-NEXT:    vsubpd %xmm4, %xmm3, %xmm3
    612 ; AVX2-NEXT:    vhaddpd %xmm3, %xmm3, %xmm3
    613 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
    614 ; AVX2-NEXT:    vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
    615 ; AVX2-NEXT:    vsubpd %xmm4, %xmm0, %xmm0
    616 ; AVX2-NEXT:    vhaddpd %xmm0, %xmm0, %xmm0
    617 ; AVX2-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm3[0],xmm0[0]
    618 ; AVX2-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
    619 ; AVX2-NEXT:    retq
    620   %cvt = uitofp <4 x i64> %a to <4 x double>
    621   ret <4 x double> %cvt
    622 }
    623 
    624 define <4 x double> @uitofp_4i32_to_4f64(<4 x i32> %a) {
    625 ; SSE-LABEL: uitofp_4i32_to_4f64:
    626 ; SSE:       # BB#0:
    627 ; SSE-NEXT:    movdqa %xmm0, %xmm2
    628 ; SSE-NEXT:    pxor %xmm1, %xmm1
    629 ; SSE-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
    630 ; SSE-NEXT:    movdqa {{.*#+}} xmm3 = [1127219200,1160773632,0,0]
    631 ; SSE-NEXT:    pshufd {{.*#+}} xmm4 = xmm0[2,3,0,1]
    632 ; SSE-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
    633 ; SSE-NEXT:    movapd {{.*#+}} xmm5 = [4.503600e+15,1.934281e+25]
    634 ; SSE-NEXT:    subpd %xmm5, %xmm0
    635 ; SSE-NEXT:    pshufd {{.*#+}} xmm6 = xmm0[2,3,0,1]
    636 ; SSE-NEXT:    addpd %xmm6, %xmm0
    637 ; SSE-NEXT:    punpckldq {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1]
    638 ; SSE-NEXT:    subpd %xmm5, %xmm4
    639 ; SSE-NEXT:    pshufd {{.*#+}} xmm6 = xmm4[2,3,0,1]
    640 ; SSE-NEXT:    addpd %xmm4, %xmm6
    641 ; SSE-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm6[0]
    642 ; SSE-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
    643 ; SSE-NEXT:    pshufd {{.*#+}} xmm4 = xmm2[2,3,0,1]
    644 ; SSE-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
    645 ; SSE-NEXT:    subpd %xmm5, %xmm2
    646 ; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[2,3,0,1]
    647 ; SSE-NEXT:    addpd %xmm2, %xmm1
    648 ; SSE-NEXT:    punpckldq {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1]
    649 ; SSE-NEXT:    subpd %xmm5, %xmm4
    650 ; SSE-NEXT:    pshufd {{.*#+}} xmm2 = xmm4[2,3,0,1]
    651 ; SSE-NEXT:    addpd %xmm4, %xmm2
    652 ; SSE-NEXT:    unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
    653 ; SSE-NEXT:    retq
    654 ;
    655 ; AVX1-LABEL: uitofp_4i32_to_4f64:
    656 ; AVX1:       # BB#0:
    657 ; AVX1-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm1
    658 ; AVX1-NEXT:    vcvtdq2pd %xmm1, %ymm1
    659 ; AVX1-NEXT:    vpsrld $16, %xmm0, %xmm0
    660 ; AVX1-NEXT:    vcvtdq2pd %xmm0, %ymm0
    661 ; AVX1-NEXT:    vmulpd {{.*}}(%rip), %ymm0, %ymm0
    662 ; AVX1-NEXT:    vaddpd %ymm1, %ymm0, %ymm0
    663 ; AVX1-NEXT:    retq
    664 ;
    665 ; AVX2-LABEL: uitofp_4i32_to_4f64:
    666 ; AVX2:       # BB#0:
    667 ; AVX2-NEXT:    vpsrld $16, %xmm0, %xmm1
    668 ; AVX2-NEXT:    vcvtdq2pd %xmm1, %ymm1
    669 ; AVX2-NEXT:    vbroadcastsd {{.*}}(%rip), %ymm2
    670 ; AVX2-NEXT:    vmulpd %ymm2, %ymm1, %ymm1
    671 ; AVX2-NEXT:    vpbroadcastd {{.*}}(%rip), %xmm2
    672 ; AVX2-NEXT:    vpand %xmm2, %xmm0, %xmm0
    673 ; AVX2-NEXT:    vcvtdq2pd %xmm0, %ymm0
    674 ; AVX2-NEXT:    vaddpd %ymm0, %ymm1, %ymm0
    675 ; AVX2-NEXT:    retq
    676   %cvt = uitofp <4 x i32> %a to <4 x double>
    677   ret <4 x double> %cvt
    678 }
    679 
    680 define <4 x double> @uitofp_4i16_to_4f64(<8 x i16> %a) {
    681 ; SSE-LABEL: uitofp_4i16_to_4f64:
    682 ; SSE:       # BB#0:
    683 ; SSE-NEXT:    pxor %xmm1, %xmm1
    684 ; SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
    685 ; SSE-NEXT:    cvtdq2pd %xmm0, %xmm2
    686 ; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
    687 ; SSE-NEXT:    cvtdq2pd %xmm0, %xmm1
    688 ; SSE-NEXT:    movaps %xmm2, %xmm0
    689 ; SSE-NEXT:    retq
    690 ;
    691 ; AVX-LABEL: uitofp_4i16_to_4f64:
    692 ; AVX:       # BB#0:
    693 ; AVX-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
    694 ; AVX-NEXT:    vcvtdq2pd %xmm0, %ymm0
    695 ; AVX-NEXT:    retq
    696   %shuf = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    697   %cvt = uitofp <4 x i16> %shuf to <4 x double>
    698   ret <4 x double> %cvt
    699 }
    700 
    701 define <4 x double> @uitofp_8i16_to_4f64(<8 x i16> %a) {
    702 ; SSE-LABEL: uitofp_8i16_to_4f64:
    703 ; SSE:       # BB#0:
    704 ; SSE-NEXT:    pxor %xmm1, %xmm1
    705 ; SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
    706 ; SSE-NEXT:    cvtdq2pd %xmm0, %xmm2
    707 ; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
    708 ; SSE-NEXT:    cvtdq2pd %xmm0, %xmm1
    709 ; SSE-NEXT:    movaps %xmm2, %xmm0
    710 ; SSE-NEXT:    retq
    711 ;
    712 ; AVX1-LABEL: uitofp_8i16_to_4f64:
    713 ; AVX1:       # BB#0:
    714 ; AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
    715 ; AVX1-NEXT:    vcvtdq2pd %xmm0, %ymm0
    716 ; AVX1-NEXT:    retq
    717 ;
    718 ; AVX2-LABEL: uitofp_8i16_to_4f64:
    719 ; AVX2:       # BB#0:
    720 ; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
    721 ; AVX2-NEXT:    vcvtdq2pd %xmm0, %ymm0
    722 ; AVX2-NEXT:    retq
    723   %cvt = uitofp <8 x i16> %a to <8 x double>
    724   %shuf = shufflevector <8 x double> %cvt, <8 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    725   ret <4 x double> %shuf
    726 }
    727 
    728 define <4 x double> @uitofp_4i8_to_4f64(<16 x i8> %a) {
    729 ; SSE-LABEL: uitofp_4i8_to_4f64:
    730 ; SSE:       # BB#0:
    731 ; SSE-NEXT:    pxor %xmm1, %xmm1
    732 ; SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
    733 ; SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
    734 ; SSE-NEXT:    cvtdq2pd %xmm0, %xmm2
    735 ; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
    736 ; SSE-NEXT:    cvtdq2pd %xmm0, %xmm1
    737 ; SSE-NEXT:    movaps %xmm2, %xmm0
    738 ; SSE-NEXT:    retq
    739 ;
    740 ; AVX-LABEL: uitofp_4i8_to_4f64:
    741 ; AVX:       # BB#0:
    742 ; AVX-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
    743 ; AVX-NEXT:    vcvtdq2pd %xmm0, %ymm0
    744 ; AVX-NEXT:    retq
    745   %shuf = shufflevector <16 x i8> %a, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    746   %cvt = uitofp <4 x i8> %shuf to <4 x double>
    747   ret <4 x double> %cvt
    748 }
    749 
    750 define <4 x double> @uitofp_16i8_to_4f64(<16 x i8> %a) {
    751 ; SSE-LABEL: uitofp_16i8_to_4f64:
    752 ; SSE:       # BB#0:
    753 ; SSE-NEXT:    pxor %xmm1, %xmm1
    754 ; SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
    755 ; SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
    756 ; SSE-NEXT:    cvtdq2pd %xmm0, %xmm2
    757 ; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
    758 ; SSE-NEXT:    cvtdq2pd %xmm0, %xmm1
    759 ; SSE-NEXT:    movaps %xmm2, %xmm0
    760 ; SSE-NEXT:    retq
    761 ;
    762 ; AVX1-LABEL: uitofp_16i8_to_4f64:
    763 ; AVX1:       # BB#0:
    764 ; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
    765 ; AVX1-NEXT:    vcvtdq2pd %xmm0, %ymm0
    766 ; AVX1-NEXT:    retq
    767 ;
    768 ; AVX2-LABEL: uitofp_16i8_to_4f64:
    769 ; AVX2:       # BB#0:
    770 ; AVX2-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
    771 ; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
    772 ; AVX2-NEXT:    vcvtdq2pd %xmm0, %ymm0
    773 ; AVX2-NEXT:    retq
    774   %cvt = uitofp <16 x i8> %a to <16 x double>
    775   %shuf = shufflevector <16 x double> %cvt, <16 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    776   ret <4 x double> %shuf
    777 }
    778 
    779 ;
    780 ; Signed Integer to Float
    781 ;
    782 
    783 define <4 x float> @sitofp_2i64_to_4f32(<2 x i64> %a) {
    784 ; SSE-LABEL: sitofp_2i64_to_4f32:
    785 ; SSE:       # BB#0:
    786 ; SSE-NEXT:    movd %xmm0, %rax
    787 ; SSE-NEXT:    cvtsi2ssq %rax, %xmm1
    788 ; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
    789 ; SSE-NEXT:    movd %xmm0, %rax
    790 ; SSE-NEXT:    xorps %xmm0, %xmm0
    791 ; SSE-NEXT:    cvtsi2ssq %rax, %xmm0
    792 ; SSE-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
    793 ; SSE-NEXT:    movaps %xmm1, %xmm0
    794 ; SSE-NEXT:    retq
    795 ;
    796 ; AVX-LABEL: sitofp_2i64_to_4f32:
    797 ; AVX:       # BB#0:
    798 ; AVX-NEXT:    vpextrq $1, %xmm0, %rax
    799 ; AVX-NEXT:    vcvtsi2ssq %rax, %xmm0, %xmm1
    800 ; AVX-NEXT:    vmovq %xmm0, %rax
    801 ; AVX-NEXT:    vxorps %xmm0, %xmm0, %xmm0
    802 ; AVX-NEXT:    vcvtsi2ssq %rax, %xmm0, %xmm0
    803 ; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
    804 ; AVX-NEXT:    vcvtsi2ssq %rax, %xmm0, %xmm1
    805 ; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
    806 ; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
    807 ; AVX-NEXT:    retq
    808   %cvt = sitofp <2 x i64> %a to <2 x float>
    809   %ext = shufflevector <2 x float> %cvt, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
    810   ret <4 x float> %ext
    811 }
    812 
    813 define <4 x float> @sitofp_4i64_to_4f32_undef(<2 x i64> %a) {
    814 ; SSE-LABEL: sitofp_4i64_to_4f32_undef:
    815 ; SSE:       # BB#0:
    816 ; SSE-NEXT:    cvtsi2ssq %rax, %xmm2
    817 ; SSE-NEXT:    movd %xmm0, %rax
    818 ; SSE-NEXT:    cvtsi2ssq %rax, %xmm1
    819 ; SSE-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
    820 ; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
    821 ; SSE-NEXT:    movd %xmm0, %rax
    822 ; SSE-NEXT:    xorps %xmm0, %xmm0
    823 ; SSE-NEXT:    cvtsi2ssq %rax, %xmm0
    824 ; SSE-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
    825 ; SSE-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
    826 ; SSE-NEXT:    movaps %xmm1, %xmm0
    827 ; SSE-NEXT:    retq
    828 ;
    829 ; AVX-LABEL: sitofp_4i64_to_4f32_undef:
    830 ; AVX:       # BB#0:
    831 ; AVX-NEXT:    vpextrq $1, %xmm0, %rax
    832 ; AVX-NEXT:    vcvtsi2ssq %rax, %xmm0, %xmm1
    833 ; AVX-NEXT:    vmovq %xmm0, %rax
    834 ; AVX-NEXT:    vxorps %xmm0, %xmm0, %xmm0
    835 ; AVX-NEXT:    vcvtsi2ssq %rax, %xmm0, %xmm0
    836 ; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
    837 ; AVX-NEXT:    vcvtsi2ssq %rax, %xmm0, %xmm1
    838 ; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
    839 ; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
    840 ; AVX-NEXT:    retq
    841   %ext = shufflevector <2 x i64> %a, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
    842   %cvt = sitofp <4 x i64> %ext to <4 x float>
    843   ret <4 x float> %cvt
    844 }
    845 
    846 define <4 x float> @sitofp_4i32_to_4f32(<4 x i32> %a) {
    847 ; SSE-LABEL: sitofp_4i32_to_4f32:
    848 ; SSE:       # BB#0:
    849 ; SSE-NEXT:    cvtdq2ps %xmm0, %xmm0
    850 ; SSE-NEXT:    retq
    851 ;
    852 ; AVX-LABEL: sitofp_4i32_to_4f32:
    853 ; AVX:       # BB#0:
    854 ; AVX-NEXT:    vcvtdq2ps %xmm0, %xmm0
    855 ; AVX-NEXT:    retq
    856   %cvt = sitofp <4 x i32> %a to <4 x float>
    857   ret <4 x float> %cvt
    858 }
    859 
    860 define <4 x float> @sitofp_4i16_to_4f32(<8 x i16> %a) {
    861 ; SSE-LABEL: sitofp_4i16_to_4f32:
    862 ; SSE:       # BB#0:
    863 ; SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
    864 ; SSE-NEXT:    psrad $16, %xmm0
    865 ; SSE-NEXT:    cvtdq2ps %xmm0, %xmm0
    866 ; SSE-NEXT:    retq
    867 ;
    868 ; AVX-LABEL: sitofp_4i16_to_4f32:
    869 ; AVX:       # BB#0:
    870 ; AVX-NEXT:    vpmovsxwd %xmm0, %xmm0
    871 ; AVX-NEXT:    vcvtdq2ps %xmm0, %xmm0
    872 ; AVX-NEXT:    retq
    873   %shuf = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    874   %cvt = sitofp <4 x i16> %shuf to <4 x float>
    875   ret <4 x float> %cvt
    876 }
    877 
    878 define <4 x float> @sitofp_8i16_to_4f32(<8 x i16> %a) {
    879 ; SSE-LABEL: sitofp_8i16_to_4f32:
    880 ; SSE:       # BB#0:
    881 ; SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
    882 ; SSE-NEXT:    psrad $16, %xmm0
    883 ; SSE-NEXT:    cvtdq2ps %xmm0, %xmm0
    884 ; SSE-NEXT:    retq
    885 ;
    886 ; AVX1-LABEL: sitofp_8i16_to_4f32:
    887 ; AVX1:       # BB#0:
    888 ; AVX1-NEXT:    vpmovsxwd %xmm0, %xmm1
    889 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
    890 ; AVX1-NEXT:    vpmovsxwd %xmm0, %xmm0
    891 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    892 ; AVX1-NEXT:    vcvtdq2ps %ymm0, %ymm0
    893 ; AVX1-NEXT:    vzeroupper
    894 ; AVX1-NEXT:    retq
    895 ;
    896 ; AVX2-LABEL: sitofp_8i16_to_4f32:
    897 ; AVX2:       # BB#0:
    898 ; AVX2-NEXT:    vpmovsxwd %xmm0, %ymm0
    899 ; AVX2-NEXT:    vcvtdq2ps %ymm0, %ymm0
    900 ; AVX2-NEXT:    vzeroupper
    901 ; AVX2-NEXT:    retq
    902   %cvt = sitofp <8 x i16> %a to <8 x float>
    903   %shuf = shufflevector <8 x float> %cvt, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    904   ret <4 x float> %shuf
    905 }
    906 
    907 define <4 x float> @sitofp_4i8_to_4f32(<16 x i8> %a) {
    908 ; SSE-LABEL: sitofp_4i8_to_4f32:
    909 ; SSE:       # BB#0:
    910 ; SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
    911 ; SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
    912 ; SSE-NEXT:    psrad $24, %xmm0
    913 ; SSE-NEXT:    cvtdq2ps %xmm0, %xmm0
    914 ; SSE-NEXT:    retq
    915 ;
    916 ; AVX-LABEL: sitofp_4i8_to_4f32:
    917 ; AVX:       # BB#0:
    918 ; AVX-NEXT:    vpmovsxbd %xmm0, %xmm0
    919 ; AVX-NEXT:    vcvtdq2ps %xmm0, %xmm0
    920 ; AVX-NEXT:    retq
    921   %shuf = shufflevector <16 x i8> %a, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    922   %cvt = sitofp <4 x i8> %shuf to <4 x float>
    923   ret <4 x float> %cvt
    924 }
    925 
    926 define <4 x float> @sitofp_16i8_to_4f32(<16 x i8> %a) {
    927 ; SSE-LABEL: sitofp_16i8_to_4f32:
    928 ; SSE:       # BB#0:
    929 ; SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
    930 ; SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
    931 ; SSE-NEXT:    psrad $24, %xmm0
    932 ; SSE-NEXT:    cvtdq2ps %xmm0, %xmm0
    933 ; SSE-NEXT:    retq
    934 ;
    935 ; AVX1-LABEL: sitofp_16i8_to_4f32:
    936 ; AVX1:       # BB#0:
    937 ; AVX1-NEXT:    vpmovsxbd %xmm0, %xmm1
    938 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
    939 ; AVX1-NEXT:    vpmovsxbd %xmm0, %xmm0
    940 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    941 ; AVX1-NEXT:    vcvtdq2ps %ymm0, %ymm0
    942 ; AVX1-NEXT:    vzeroupper
    943 ; AVX1-NEXT:    retq
    944 ;
    945 ; AVX2-LABEL: sitofp_16i8_to_4f32:
    946 ; AVX2:       # BB#0:
    947 ; AVX2-NEXT:    vpmovsxbw %xmm0, %ymm0
    948 ; AVX2-NEXT:    vpmovsxwd %xmm0, %ymm0
    949 ; AVX2-NEXT:    vcvtdq2ps %ymm0, %ymm0
    950 ; AVX2-NEXT:    vzeroupper
    951 ; AVX2-NEXT:    retq
    952   %cvt = sitofp <16 x i8> %a to <16 x float>
    953   %shuf = shufflevector <16 x float> %cvt, <16 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    954   ret <4 x float> %shuf
    955 }
    956 
    957 define <4 x float> @sitofp_4i64_to_4f32(<4 x i64> %a) {
    958 ; SSE-LABEL: sitofp_4i64_to_4f32:
    959 ; SSE:       # BB#0:
    960 ; SSE-NEXT:    movd %xmm1, %rax
    961 ; SSE-NEXT:    cvtsi2ssq %rax, %xmm3
    962 ; SSE-NEXT:    movd %xmm0, %rax
    963 ; SSE-NEXT:    cvtsi2ssq %rax, %xmm2
    964 ; SSE-NEXT:    unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
    965 ; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
    966 ; SSE-NEXT:    movd %xmm1, %rax
    967 ; SSE-NEXT:    xorps %xmm1, %xmm1
    968 ; SSE-NEXT:    cvtsi2ssq %rax, %xmm1
    969 ; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
    970 ; SSE-NEXT:    movd %xmm0, %rax
    971 ; SSE-NEXT:    xorps %xmm0, %xmm0
    972 ; SSE-NEXT:    cvtsi2ssq %rax, %xmm0
    973 ; SSE-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
    974 ; SSE-NEXT:    unpcklps {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
    975 ; SSE-NEXT:    movaps %xmm2, %xmm0
    976 ; SSE-NEXT:    retq
    977 ;
    978 ; AVX1-LABEL: sitofp_4i64_to_4f32:
    979 ; AVX1:       # BB#0:
    980 ; AVX1-NEXT:    vpextrq $1, %xmm0, %rax
    981 ; AVX1-NEXT:    vcvtsi2ssq %rax, %xmm0, %xmm1
    982 ; AVX1-NEXT:    vmovq %xmm0, %rax
    983 ; AVX1-NEXT:    vcvtsi2ssq %rax, %xmm0, %xmm2
    984 ; AVX1-NEXT:    vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
    985 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
    986 ; AVX1-NEXT:    vmovq %xmm0, %rax
    987 ; AVX1-NEXT:    vcvtsi2ssq %rax, %xmm0, %xmm2
    988 ; AVX1-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
    989 ; AVX1-NEXT:    vpextrq $1, %xmm0, %rax
    990 ; AVX1-NEXT:    vxorps %xmm0, %xmm0, %xmm0
    991 ; AVX1-NEXT:    vcvtsi2ssq %rax, %xmm0, %xmm0
    992 ; AVX1-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
    993 ; AVX1-NEXT:    vzeroupper
    994 ; AVX1-NEXT:    retq
    995 ;
    996 ; AVX2-LABEL: sitofp_4i64_to_4f32:
    997 ; AVX2:       # BB#0:
    998 ; AVX2-NEXT:    vpextrq $1, %xmm0, %rax
    999 ; AVX2-NEXT:    vcvtsi2ssq %rax, %xmm0, %xmm1
   1000 ; AVX2-NEXT:    vmovq %xmm0, %rax
   1001 ; AVX2-NEXT:    vcvtsi2ssq %rax, %xmm0, %xmm2
   1002 ; AVX2-NEXT:    vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
   1003 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm0
   1004 ; AVX2-NEXT:    vmovq %xmm0, %rax
   1005 ; AVX2-NEXT:    vcvtsi2ssq %rax, %xmm0, %xmm2
   1006 ; AVX2-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
   1007 ; AVX2-NEXT:    vpextrq $1, %xmm0, %rax
   1008 ; AVX2-NEXT:    vxorps %xmm0, %xmm0, %xmm0
   1009 ; AVX2-NEXT:    vcvtsi2ssq %rax, %xmm0, %xmm0
   1010 ; AVX2-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
   1011 ; AVX2-NEXT:    vzeroupper
   1012 ; AVX2-NEXT:    retq
   1013   %cvt = sitofp <4 x i64> %a to <4 x float>
   1014   ret <4 x float> %cvt
   1015 }
   1016 
   1017 define <8 x float> @sitofp_8i32_to_8f32(<8 x i32> %a) {
   1018 ; SSE-LABEL: sitofp_8i32_to_8f32:
   1019 ; SSE:       # BB#0:
   1020 ; SSE-NEXT:    cvtdq2ps %xmm0, %xmm0
   1021 ; SSE-NEXT:    cvtdq2ps %xmm1, %xmm1
   1022 ; SSE-NEXT:    retq
   1023 ;
   1024 ; AVX-LABEL: sitofp_8i32_to_8f32:
   1025 ; AVX:       # BB#0:
   1026 ; AVX-NEXT:    vcvtdq2ps %ymm0, %ymm0
   1027 ; AVX-NEXT:    retq
   1028   %cvt = sitofp <8 x i32> %a to <8 x float>
   1029   ret <8 x float> %cvt
   1030 }
   1031 
   1032 define <8 x float> @sitofp_8i16_to_8f32(<8 x i16> %a) {
   1033 ; SSE-LABEL: sitofp_8i16_to_8f32:
   1034 ; SSE:       # BB#0:
   1035 ; SSE-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
   1036 ; SSE-NEXT:    psrad $16, %xmm1
   1037 ; SSE-NEXT:    cvtdq2ps %xmm1, %xmm2
   1038 ; SSE-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
   1039 ; SSE-NEXT:    psrad $16, %xmm0
   1040 ; SSE-NEXT:    cvtdq2ps %xmm0, %xmm1
   1041 ; SSE-NEXT:    movaps %xmm2, %xmm0
   1042 ; SSE-NEXT:    retq
   1043 ;
   1044 ; AVX1-LABEL: sitofp_8i16_to_8f32:
   1045 ; AVX1:       # BB#0:
   1046 ; AVX1-NEXT:    vpmovsxwd %xmm0, %xmm1
   1047 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
   1048 ; AVX1-NEXT:    vpmovsxwd %xmm0, %xmm0
   1049 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   1050 ; AVX1-NEXT:    vcvtdq2ps %ymm0, %ymm0
   1051 ; AVX1-NEXT:    retq
   1052 ;
   1053 ; AVX2-LABEL: sitofp_8i16_to_8f32:
   1054 ; AVX2:       # BB#0:
   1055 ; AVX2-NEXT:    vpmovsxwd %xmm0, %ymm0
   1056 ; AVX2-NEXT:    vcvtdq2ps %ymm0, %ymm0
   1057 ; AVX2-NEXT:    retq
   1058   %cvt = sitofp <8 x i16> %a to <8 x float>
   1059   ret <8 x float> %cvt
   1060 }
   1061 
   1062 define <8 x float> @sitofp_8i8_to_8f32(<16 x i8> %a) {
   1063 ; SSE-LABEL: sitofp_8i8_to_8f32:
   1064 ; SSE:       # BB#0:
   1065 ; SSE-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
   1066 ; SSE-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3]
   1067 ; SSE-NEXT:    psrad $24, %xmm1
   1068 ; SSE-NEXT:    cvtdq2ps %xmm1, %xmm2
   1069 ; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
   1070 ; SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
   1071 ; SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
   1072 ; SSE-NEXT:    psrad $24, %xmm0
   1073 ; SSE-NEXT:    cvtdq2ps %xmm0, %xmm1
   1074 ; SSE-NEXT:    movaps %xmm2, %xmm0
   1075 ; SSE-NEXT:    retq
   1076 ;
   1077 ; AVX1-LABEL: sitofp_8i8_to_8f32:
   1078 ; AVX1:       # BB#0:
   1079 ; AVX1-NEXT:    vpmovsxbd %xmm0, %xmm1
   1080 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
   1081 ; AVX1-NEXT:    vpmovsxbd %xmm0, %xmm0
   1082 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   1083 ; AVX1-NEXT:    vcvtdq2ps %ymm0, %ymm0
   1084 ; AVX1-NEXT:    retq
   1085 ;
   1086 ; AVX2-LABEL: sitofp_8i8_to_8f32:
   1087 ; AVX2:       # BB#0:
   1088 ; AVX2-NEXT:    vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
   1089 ; AVX2-NEXT:    vpslld $24, %ymm0, %ymm0
   1090 ; AVX2-NEXT:    vpsrad $24, %ymm0, %ymm0
   1091 ; AVX2-NEXT:    vcvtdq2ps %ymm0, %ymm0
   1092 ; AVX2-NEXT:    retq
   1093   %shuf = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
   1094   %cvt = sitofp <8 x i8> %shuf to <8 x float>
   1095   ret <8 x float> %cvt
   1096 }
   1097 
   1098 define <8 x float> @sitofp_16i8_to_8f32(<16 x i8> %a) {
   1099 ; SSE-LABEL: sitofp_16i8_to_8f32:
   1100 ; SSE:       # BB#0:
   1101 ; SSE-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
   1102 ; SSE-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3]
   1103 ; SSE-NEXT:    psrad $24, %xmm1
   1104 ; SSE-NEXT:    cvtdq2ps %xmm1, %xmm2
   1105 ; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
   1106 ; SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
   1107 ; SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
   1108 ; SSE-NEXT:    psrad $24, %xmm0
   1109 ; SSE-NEXT:    cvtdq2ps %xmm0, %xmm1
   1110 ; SSE-NEXT:    movaps %xmm2, %xmm0
   1111 ; SSE-NEXT:    retq
   1112 ;
   1113 ; AVX1-LABEL: sitofp_16i8_to_8f32:
   1114 ; AVX1:       # BB#0:
   1115 ; AVX1-NEXT:    vpmovsxbd %xmm0, %xmm1
   1116 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
   1117 ; AVX1-NEXT:    vpmovsxbd %xmm0, %xmm0
   1118 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   1119 ; AVX1-NEXT:    vcvtdq2ps %ymm0, %ymm0
   1120 ; AVX1-NEXT:    retq
   1121 ;
   1122 ; AVX2-LABEL: sitofp_16i8_to_8f32:
   1123 ; AVX2:       # BB#0:
   1124 ; AVX2-NEXT:    vpmovsxbw %xmm0, %ymm0
   1125 ; AVX2-NEXT:    vpmovsxwd %xmm0, %ymm0
   1126 ; AVX2-NEXT:    vcvtdq2ps %ymm0, %ymm0
   1127 ; AVX2-NEXT:    retq
   1128   %cvt = sitofp <16 x i8> %a to <16 x float>
   1129   %shuf = shufflevector <16 x float> %cvt, <16 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
   1130   ret <8 x float> %shuf
   1131 }
   1132 
   1133 ;
   1134 ; Unsigned Integer to Float
   1135 ;
   1136 
   1137 define <4 x float> @uitofp_2i64_to_4f32(<2 x i64> %a) {
   1138 ; SSE-LABEL: uitofp_2i64_to_4f32:
   1139 ; SSE:       # BB#0:
   1140 ; SSE-NEXT:    movdqa %xmm0, %xmm1
   1141 ; SSE-NEXT:    movd %xmm1, %rax
   1142 ; SSE-NEXT:    movl %eax, %ecx
   1143 ; SSE-NEXT:    andl $1, %ecx
   1144 ; SSE-NEXT:    testq %rax, %rax
   1145 ; SSE-NEXT:    js .LBB38_1
   1146 ; SSE-NEXT:  # BB#2:
   1147 ; SSE-NEXT:    xorps %xmm0, %xmm0
   1148 ; SSE-NEXT:    cvtsi2ssq %rax, %xmm0
   1149 ; SSE-NEXT:    jmp .LBB38_3
   1150 ; SSE-NEXT:  .LBB38_1:
   1151 ; SSE-NEXT:    shrq %rax
   1152 ; SSE-NEXT:    orq %rax, %rcx
   1153 ; SSE-NEXT:    xorps %xmm0, %xmm0
   1154 ; SSE-NEXT:    cvtsi2ssq %rcx, %xmm0
   1155 ; SSE-NEXT:    addss %xmm0, %xmm0
   1156 ; SSE-NEXT:  .LBB38_3:
   1157 ; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
   1158 ; SSE-NEXT:    movd %xmm1, %rax
   1159 ; SSE-NEXT:    movl %eax, %ecx
   1160 ; SSE-NEXT:    andl $1, %ecx
   1161 ; SSE-NEXT:    testq %rax, %rax
   1162 ; SSE-NEXT:    js .LBB38_4
   1163 ; SSE-NEXT:  # BB#5:
   1164 ; SSE-NEXT:    xorps %xmm1, %xmm1
   1165 ; SSE-NEXT:    cvtsi2ssq %rax, %xmm1
   1166 ; SSE-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
   1167 ; SSE-NEXT:    retq
   1168 ; SSE-NEXT:  .LBB38_4:
   1169 ; SSE-NEXT:    shrq %rax
   1170 ; SSE-NEXT:    orq %rax, %rcx
   1171 ; SSE-NEXT:    xorps %xmm1, %xmm1
   1172 ; SSE-NEXT:    cvtsi2ssq %rcx, %xmm1
   1173 ; SSE-NEXT:    addss %xmm1, %xmm1
   1174 ; SSE-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
   1175 ; SSE-NEXT:    retq
   1176 ;
   1177 ; AVX-LABEL: uitofp_2i64_to_4f32:
   1178 ; AVX:       # BB#0:
   1179 ; AVX-NEXT:    vpextrq $1, %xmm0, %rax
   1180 ; AVX-NEXT:    movl %eax, %ecx
   1181 ; AVX-NEXT:    andl $1, %ecx
   1182 ; AVX-NEXT:    testq %rax, %rax
   1183 ; AVX-NEXT:    js .LBB38_1
   1184 ; AVX-NEXT:  # BB#2:
   1185 ; AVX-NEXT:    vcvtsi2ssq %rax, %xmm0, %xmm1
   1186 ; AVX-NEXT:    jmp .LBB38_3
   1187 ; AVX-NEXT:  .LBB38_1:
   1188 ; AVX-NEXT:    shrq %rax
   1189 ; AVX-NEXT:    orq %rax, %rcx
   1190 ; AVX-NEXT:    vcvtsi2ssq %rcx, %xmm0, %xmm1
   1191 ; AVX-NEXT:    vaddss %xmm1, %xmm1, %xmm1
   1192 ; AVX-NEXT:  .LBB38_3:
   1193 ; AVX-NEXT:    vmovq %xmm0, %rax
   1194 ; AVX-NEXT:    movl %eax, %ecx
   1195 ; AVX-NEXT:    andl $1, %ecx
   1196 ; AVX-NEXT:    testq %rax, %rax
   1197 ; AVX-NEXT:    js .LBB38_4
   1198 ; AVX-NEXT:  # BB#5:
   1199 ; AVX-NEXT:    vxorps %xmm0, %xmm0, %xmm0
   1200 ; AVX-NEXT:    vcvtsi2ssq %rax, %xmm0, %xmm0
   1201 ; AVX-NEXT:    jmp .LBB38_6
   1202 ; AVX-NEXT:  .LBB38_4:
   1203 ; AVX-NEXT:    shrq %rax
   1204 ; AVX-NEXT:    orq %rax, %rcx
   1205 ; AVX-NEXT:    vxorps %xmm0, %xmm0, %xmm0
   1206 ; AVX-NEXT:    vcvtsi2ssq %rcx, %xmm0, %xmm0
   1207 ; AVX-NEXT:    vaddss %xmm0, %xmm0, %xmm0
   1208 ; AVX-NEXT:  .LBB38_6:
   1209 ; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
   1210 ; AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
   1211 ; AVX-NEXT:    testq %rax, %rax
   1212 ; AVX-NEXT:    js .LBB38_8
   1213 ; AVX-NEXT:  # BB#7:
   1214 ; AVX-NEXT:    vcvtsi2ssq %rax, %xmm0, %xmm1
   1215 ; AVX-NEXT:  .LBB38_8:
   1216 ; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
   1217 ; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
   1218 ; AVX-NEXT:    retq
   1219   %cvt = uitofp <2 x i64> %a to <2 x float>
   1220   %ext = shufflevector <2 x float> %cvt, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
   1221   ret <4 x float> %ext
   1222 }
   1223 
   1224 define <4 x float> @uitofp_4i64_to_4f32_undef(<2 x i64> %a) {
   1225 ; SSE-LABEL: uitofp_4i64_to_4f32_undef:
   1226 ; SSE:       # BB#0:
   1227 ; SSE-NEXT:    movdqa %xmm0, %xmm1
   1228 ; SSE-NEXT:    testq %rax, %rax
   1229 ; SSE-NEXT:    xorps %xmm2, %xmm2
   1230 ; SSE-NEXT:    js .LBB39_2
   1231 ; SSE-NEXT:  # BB#1:
   1232 ; SSE-NEXT:    xorps %xmm2, %xmm2
   1233 ; SSE-NEXT:    cvtsi2ssq %rax, %xmm2
   1234 ; SSE-NEXT:  .LBB39_2:
   1235 ; SSE-NEXT:    movd %xmm1, %rax
   1236 ; SSE-NEXT:    movl %eax, %ecx
   1237 ; SSE-NEXT:    andl $1, %ecx
   1238 ; SSE-NEXT:    testq %rax, %rax
   1239 ; SSE-NEXT:    js .LBB39_3
   1240 ; SSE-NEXT:  # BB#4:
   1241 ; SSE-NEXT:    xorps %xmm0, %xmm0
   1242 ; SSE-NEXT:    cvtsi2ssq %rax, %xmm0
   1243 ; SSE-NEXT:    jmp .LBB39_5
   1244 ; SSE-NEXT:  .LBB39_3:
   1245 ; SSE-NEXT:    shrq %rax
   1246 ; SSE-NEXT:    orq %rax, %rcx
   1247 ; SSE-NEXT:    xorps %xmm0, %xmm0
   1248 ; SSE-NEXT:    cvtsi2ssq %rcx, %xmm0
   1249 ; SSE-NEXT:    addss %xmm0, %xmm0
   1250 ; SSE-NEXT:  .LBB39_5:
   1251 ; SSE-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
   1252 ; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
   1253 ; SSE-NEXT:    movd %xmm1, %rax
   1254 ; SSE-NEXT:    movl %eax, %ecx
   1255 ; SSE-NEXT:    andl $1, %ecx
   1256 ; SSE-NEXT:    testq %rax, %rax
   1257 ; SSE-NEXT:    js .LBB39_6
   1258 ; SSE-NEXT:  # BB#7:
   1259 ; SSE-NEXT:    xorps %xmm1, %xmm1
   1260 ; SSE-NEXT:    cvtsi2ssq %rax, %xmm1
   1261 ; SSE-NEXT:    jmp .LBB39_8
   1262 ; SSE-NEXT:  .LBB39_6:
   1263 ; SSE-NEXT:    shrq %rax
   1264 ; SSE-NEXT:    orq %rax, %rcx
   1265 ; SSE-NEXT:    xorps %xmm1, %xmm1
   1266 ; SSE-NEXT:    cvtsi2ssq %rcx, %xmm1
   1267 ; SSE-NEXT:    addss %xmm1, %xmm1
   1268 ; SSE-NEXT:  .LBB39_8:
   1269 ; SSE-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
   1270 ; SSE-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
   1271 ; SSE-NEXT:    retq
   1272 ;
   1273 ; AVX-LABEL: uitofp_4i64_to_4f32_undef:
   1274 ; AVX:       # BB#0:
   1275 ; AVX-NEXT:    vpextrq $1, %xmm0, %rax
   1276 ; AVX-NEXT:    movl %eax, %ecx
   1277 ; AVX-NEXT:    andl $1, %ecx
   1278 ; AVX-NEXT:    testq %rax, %rax
   1279 ; AVX-NEXT:    js .LBB39_1
   1280 ; AVX-NEXT:  # BB#2:
   1281 ; AVX-NEXT:    vcvtsi2ssq %rax, %xmm0, %xmm1
   1282 ; AVX-NEXT:    jmp .LBB39_3
   1283 ; AVX-NEXT:  .LBB39_1:
   1284 ; AVX-NEXT:    shrq %rax
   1285 ; AVX-NEXT:    orq %rax, %rcx
   1286 ; AVX-NEXT:    vcvtsi2ssq %rcx, %xmm0, %xmm1
   1287 ; AVX-NEXT:    vaddss %xmm1, %xmm1, %xmm1
   1288 ; AVX-NEXT:  .LBB39_3:
   1289 ; AVX-NEXT:    vmovq %xmm0, %rax
   1290 ; AVX-NEXT:    movl %eax, %ecx
   1291 ; AVX-NEXT:    andl $1, %ecx
   1292 ; AVX-NEXT:    testq %rax, %rax
   1293 ; AVX-NEXT:    js .LBB39_4
   1294 ; AVX-NEXT:  # BB#5:
   1295 ; AVX-NEXT:    vxorps %xmm0, %xmm0, %xmm0
   1296 ; AVX-NEXT:    vcvtsi2ssq %rax, %xmm0, %xmm0
   1297 ; AVX-NEXT:    jmp .LBB39_6
   1298 ; AVX-NEXT:  .LBB39_4:
   1299 ; AVX-NEXT:    shrq %rax
   1300 ; AVX-NEXT:    orq %rax, %rcx
   1301 ; AVX-NEXT:    vxorps %xmm0, %xmm0, %xmm0
   1302 ; AVX-NEXT:    vcvtsi2ssq %rcx, %xmm0, %xmm0
   1303 ; AVX-NEXT:    vaddss %xmm0, %xmm0, %xmm0
   1304 ; AVX-NEXT:  .LBB39_6:
   1305 ; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
   1306 ; AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
   1307 ; AVX-NEXT:    testq %rax, %rax
   1308 ; AVX-NEXT:    js .LBB39_8
   1309 ; AVX-NEXT:  # BB#7:
   1310 ; AVX-NEXT:    vcvtsi2ssq %rax, %xmm0, %xmm1
   1311 ; AVX-NEXT:  .LBB39_8:
   1312 ; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
   1313 ; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
   1314 ; AVX-NEXT:    retq
   1315   %ext = shufflevector <2 x i64> %a, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
   1316   %cvt = uitofp <4 x i64> %ext to <4 x float>
   1317   ret <4 x float> %cvt
   1318 }
   1319 
   1320 define <4 x float> @uitofp_4i32_to_4f32(<4 x i32> %a) {
   1321 ; SSE-LABEL: uitofp_4i32_to_4f32:
   1322 ; SSE:       # BB#0:
   1323 ; SSE-NEXT:    movdqa {{.*#+}} xmm1 = [65535,65535,65535,65535]
   1324 ; SSE-NEXT:    pand %xmm0, %xmm1
   1325 ; SSE-NEXT:    por {{.*}}(%rip), %xmm1
   1326 ; SSE-NEXT:    psrld $16, %xmm0
   1327 ; SSE-NEXT:    por {{.*}}(%rip), %xmm0
   1328 ; SSE-NEXT:    addps {{.*}}(%rip), %xmm0
   1329 ; SSE-NEXT:    addps %xmm1, %xmm0
   1330 ; SSE-NEXT:    retq
   1331 ;
   1332 ; AVX1-LABEL: uitofp_4i32_to_4f32:
   1333 ; AVX1:       # BB#0:
   1334 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
   1335 ; AVX1-NEXT:    vpsrld $16, %xmm0, %xmm0
   1336 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
   1337 ; AVX1-NEXT:    vaddps {{.*}}(%rip), %xmm0, %xmm0
   1338 ; AVX1-NEXT:    vaddps %xmm0, %xmm1, %xmm0
   1339 ; AVX1-NEXT:    retq
   1340 ;
   1341 ; AVX2-LABEL: uitofp_4i32_to_4f32:
   1342 ; AVX2:       # BB#0:
   1343 ; AVX2-NEXT:    vpbroadcastd {{.*}}(%rip), %xmm1
   1344 ; AVX2-NEXT:    vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
   1345 ; AVX2-NEXT:    vpsrld $16, %xmm0, %xmm0
   1346 ; AVX2-NEXT:    vpbroadcastd {{.*}}(%rip), %xmm2
   1347 ; AVX2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7]
   1348 ; AVX2-NEXT:    vbroadcastss {{.*}}(%rip), %xmm2
   1349 ; AVX2-NEXT:    vaddps %xmm2, %xmm0, %xmm0
   1350 ; AVX2-NEXT:    vaddps %xmm0, %xmm1, %xmm0
   1351 ; AVX2-NEXT:    retq
   1352   %cvt = uitofp <4 x i32> %a to <4 x float>
   1353   ret <4 x float> %cvt
   1354 }
   1355 
   1356 define <4 x float> @uitofp_4i16_to_4f32(<8 x i16> %a) {
   1357 ; SSE-LABEL: uitofp_4i16_to_4f32:
   1358 ; SSE:       # BB#0:
   1359 ; SSE-NEXT:    pxor %xmm1, %xmm1
   1360 ; SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   1361 ; SSE-NEXT:    cvtdq2ps %xmm0, %xmm0
   1362 ; SSE-NEXT:    retq
   1363 ;
   1364 ; AVX-LABEL: uitofp_4i16_to_4f32:
   1365 ; AVX:       # BB#0:
   1366 ; AVX-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
   1367 ; AVX-NEXT:    vcvtdq2ps %xmm0, %xmm0
   1368 ; AVX-NEXT:    retq
   1369   %shuf = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   1370   %cvt = uitofp <4 x i16> %shuf to <4 x float>
   1371   ret <4 x float> %cvt
   1372 }
   1373 
   1374 define <4 x float> @uitofp_8i16_to_4f32(<8 x i16> %a) {
   1375 ; SSE-LABEL: uitofp_8i16_to_4f32:
   1376 ; SSE:       # BB#0:
   1377 ; SSE-NEXT:    pxor %xmm1, %xmm1
   1378 ; SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   1379 ; SSE-NEXT:    cvtdq2ps %xmm0, %xmm0
   1380 ; SSE-NEXT:    retq
   1381 ;
   1382 ; AVX1-LABEL: uitofp_8i16_to_4f32:
   1383 ; AVX1:       # BB#0:
   1384 ; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
   1385 ; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
   1386 ; AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
   1387 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   1388 ; AVX1-NEXT:    vcvtdq2ps %ymm0, %ymm0
   1389 ; AVX1-NEXT:    vzeroupper
   1390 ; AVX1-NEXT:    retq
   1391 ;
   1392 ; AVX2-LABEL: uitofp_8i16_to_4f32:
   1393 ; AVX2:       # BB#0:
   1394 ; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
   1395 ; AVX2-NEXT:    vcvtdq2ps %ymm0, %ymm0
   1396 ; AVX2-NEXT:    vzeroupper
   1397 ; AVX2-NEXT:    retq
   1398   %cvt = uitofp <8 x i16> %a to <8 x float>
   1399   %shuf = shufflevector <8 x float> %cvt, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   1400   ret <4 x float> %shuf
   1401 }
   1402 
   1403 define <4 x float> @uitofp_4i8_to_4f32(<16 x i8> %a) {
   1404 ; SSE-LABEL: uitofp_4i8_to_4f32:
   1405 ; SSE:       # BB#0:
   1406 ; SSE-NEXT:    pxor %xmm1, %xmm1
   1407 ; SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
   1408 ; SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   1409 ; SSE-NEXT:    cvtdq2ps %xmm0, %xmm0
   1410 ; SSE-NEXT:    retq
   1411 ;
   1412 ; AVX-LABEL: uitofp_4i8_to_4f32:
   1413 ; AVX:       # BB#0:
   1414 ; AVX-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
   1415 ; AVX-NEXT:    vcvtdq2ps %xmm0, %xmm0
   1416 ; AVX-NEXT:    retq
   1417   %shuf = shufflevector <16 x i8> %a, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   1418   %cvt = uitofp <4 x i8> %shuf to <4 x float>
   1419   ret <4 x float> %cvt
   1420 }
   1421 
   1422 define <4 x float> @uitofp_16i8_to_4f32(<16 x i8> %a) {
   1423 ; SSE-LABEL: uitofp_16i8_to_4f32:
   1424 ; SSE:       # BB#0:
   1425 ; SSE-NEXT:    pxor %xmm1, %xmm1
   1426 ; SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
   1427 ; SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   1428 ; SSE-NEXT:    cvtdq2ps %xmm0, %xmm0
   1429 ; SSE-NEXT:    retq
   1430 ;
   1431 ; AVX1-LABEL: uitofp_16i8_to_4f32:
   1432 ; AVX1:       # BB#0:
   1433 ; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
   1434 ; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
   1435 ; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
   1436 ; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
   1437 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   1438 ; AVX1-NEXT:    vcvtdq2ps %ymm0, %ymm0
   1439 ; AVX1-NEXT:    vzeroupper
   1440 ; AVX1-NEXT:    retq
   1441 ;
   1442 ; AVX2-LABEL: uitofp_16i8_to_4f32:
   1443 ; AVX2:       # BB#0:
   1444 ; AVX2-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
   1445 ; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
   1446 ; AVX2-NEXT:    vcvtdq2ps %ymm0, %ymm0
   1447 ; AVX2-NEXT:    vzeroupper
   1448 ; AVX2-NEXT:    retq
   1449   %cvt = uitofp <16 x i8> %a to <16 x float>
   1450   %shuf = shufflevector <16 x float> %cvt, <16 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   1451   ret <4 x float> %shuf
   1452 }
   1453 
   1454 define <4 x float> @uitofp_4i64_to_4f32(<4 x i64> %a) {
   1455 ; SSE-LABEL: uitofp_4i64_to_4f32:
   1456 ; SSE:       # BB#0:
   1457 ; SSE-NEXT:    movd %xmm1, %rax
   1458 ; SSE-NEXT:    movl %eax, %ecx
   1459 ; SSE-NEXT:    andl $1, %ecx
   1460 ; SSE-NEXT:    testq %rax, %rax
   1461 ; SSE-NEXT:    js .LBB45_1
   1462 ; SSE-NEXT:  # BB#2:
   1463 ; SSE-NEXT:    cvtsi2ssq %rax, %xmm3
   1464 ; SSE-NEXT:    jmp .LBB45_3
   1465 ; SSE-NEXT:  .LBB45_1:
   1466 ; SSE-NEXT:    shrq %rax
   1467 ; SSE-NEXT:    orq %rax, %rcx
   1468 ; SSE-NEXT:    cvtsi2ssq %rcx, %xmm3
   1469 ; SSE-NEXT:    addss %xmm3, %xmm3
   1470 ; SSE-NEXT:  .LBB45_3:
   1471 ; SSE-NEXT:    movd %xmm0, %rax
   1472 ; SSE-NEXT:    movl %eax, %ecx
   1473 ; SSE-NEXT:    andl $1, %ecx
   1474 ; SSE-NEXT:    testq %rax, %rax
   1475 ; SSE-NEXT:    js .LBB45_4
   1476 ; SSE-NEXT:  # BB#5:
   1477 ; SSE-NEXT:    cvtsi2ssq %rax, %xmm2
   1478 ; SSE-NEXT:    jmp .LBB45_6
   1479 ; SSE-NEXT:  .LBB45_4:
   1480 ; SSE-NEXT:    shrq %rax
   1481 ; SSE-NEXT:    orq %rax, %rcx
   1482 ; SSE-NEXT:    cvtsi2ssq %rcx, %xmm2
   1483 ; SSE-NEXT:    addss %xmm2, %xmm2
   1484 ; SSE-NEXT:  .LBB45_6:
   1485 ; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
   1486 ; SSE-NEXT:    movd %xmm1, %rax
   1487 ; SSE-NEXT:    movl %eax, %ecx
   1488 ; SSE-NEXT:    andl $1, %ecx
   1489 ; SSE-NEXT:    testq %rax, %rax
   1490 ; SSE-NEXT:    js .LBB45_7
   1491 ; SSE-NEXT:  # BB#8:
   1492 ; SSE-NEXT:    xorps %xmm1, %xmm1
   1493 ; SSE-NEXT:    cvtsi2ssq %rax, %xmm1
   1494 ; SSE-NEXT:    jmp .LBB45_9
   1495 ; SSE-NEXT:  .LBB45_7:
   1496 ; SSE-NEXT:    shrq %rax
   1497 ; SSE-NEXT:    orq %rax, %rcx
   1498 ; SSE-NEXT:    xorps %xmm1, %xmm1
   1499 ; SSE-NEXT:    cvtsi2ssq %rcx, %xmm1
   1500 ; SSE-NEXT:    addss %xmm1, %xmm1
   1501 ; SSE-NEXT:  .LBB45_9:
   1502 ; SSE-NEXT:    unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
   1503 ; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
   1504 ; SSE-NEXT:    movd %xmm0, %rax
   1505 ; SSE-NEXT:    movl %eax, %ecx
   1506 ; SSE-NEXT:    andl $1, %ecx
   1507 ; SSE-NEXT:    testq %rax, %rax
   1508 ; SSE-NEXT:    js .LBB45_10
   1509 ; SSE-NEXT:  # BB#11:
   1510 ; SSE-NEXT:    xorps %xmm0, %xmm0
   1511 ; SSE-NEXT:    cvtsi2ssq %rax, %xmm0
   1512 ; SSE-NEXT:    jmp .LBB45_12
   1513 ; SSE-NEXT:  .LBB45_10:
   1514 ; SSE-NEXT:    shrq %rax
   1515 ; SSE-NEXT:    orq %rax, %rcx
   1516 ; SSE-NEXT:    xorps %xmm0, %xmm0
   1517 ; SSE-NEXT:    cvtsi2ssq %rcx, %xmm0
   1518 ; SSE-NEXT:    addss %xmm0, %xmm0
   1519 ; SSE-NEXT:  .LBB45_12:
   1520 ; SSE-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
   1521 ; SSE-NEXT:    unpcklps {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
   1522 ; SSE-NEXT:    movaps %xmm2, %xmm0
   1523 ; SSE-NEXT:    retq
   1524 ;
   1525 ; AVX1-LABEL: uitofp_4i64_to_4f32:
   1526 ; AVX1:       # BB#0:
   1527 ; AVX1-NEXT:    vpextrq $1, %xmm0, %rax
   1528 ; AVX1-NEXT:    movl %eax, %ecx
   1529 ; AVX1-NEXT:    andl $1, %ecx
   1530 ; AVX1-NEXT:    testq %rax, %rax
   1531 ; AVX1-NEXT:    js .LBB45_1
   1532 ; AVX1-NEXT:  # BB#2:
   1533 ; AVX1-NEXT:    vcvtsi2ssq %rax, %xmm0, %xmm1
   1534 ; AVX1-NEXT:    jmp .LBB45_3
   1535 ; AVX1-NEXT:  .LBB45_1:
   1536 ; AVX1-NEXT:    shrq %rax
   1537 ; AVX1-NEXT:    orq %rax, %rcx
   1538 ; AVX1-NEXT:    vcvtsi2ssq %rcx, %xmm0, %xmm1
   1539 ; AVX1-NEXT:    vaddss %xmm1, %xmm1, %xmm1
   1540 ; AVX1-NEXT:  .LBB45_3:
   1541 ; AVX1-NEXT:    vmovq %xmm0, %rax
   1542 ; AVX1-NEXT:    movl %eax, %ecx
   1543 ; AVX1-NEXT:    andl $1, %ecx
   1544 ; AVX1-NEXT:    testq %rax, %rax
   1545 ; AVX1-NEXT:    js .LBB45_4
   1546 ; AVX1-NEXT:  # BB#5:
   1547 ; AVX1-NEXT:    vcvtsi2ssq %rax, %xmm0, %xmm2
   1548 ; AVX1-NEXT:    jmp .LBB45_6
   1549 ; AVX1-NEXT:  .LBB45_4:
   1550 ; AVX1-NEXT:    shrq %rax
   1551 ; AVX1-NEXT:    orq %rax, %rcx
   1552 ; AVX1-NEXT:    vcvtsi2ssq %rcx, %xmm0, %xmm2
   1553 ; AVX1-NEXT:    vaddss %xmm2, %xmm2, %xmm2
   1554 ; AVX1-NEXT:  .LBB45_6:
   1555 ; AVX1-NEXT:    vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
   1556 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   1557 ; AVX1-NEXT:    vmovq %xmm0, %rax
   1558 ; AVX1-NEXT:    movl %eax, %ecx
   1559 ; AVX1-NEXT:    andl $1, %ecx
   1560 ; AVX1-NEXT:    testq %rax, %rax
   1561 ; AVX1-NEXT:    js .LBB45_7
   1562 ; AVX1-NEXT:  # BB#8:
   1563 ; AVX1-NEXT:    vcvtsi2ssq %rax, %xmm0, %xmm2
   1564 ; AVX1-NEXT:    jmp .LBB45_9
   1565 ; AVX1-NEXT:  .LBB45_7:
   1566 ; AVX1-NEXT:    shrq %rax
   1567 ; AVX1-NEXT:    orq %rax, %rcx
   1568 ; AVX1-NEXT:    vcvtsi2ssq %rcx, %xmm0, %xmm2
   1569 ; AVX1-NEXT:    vaddss %xmm2, %xmm2, %xmm2
   1570 ; AVX1-NEXT:  .LBB45_9:
   1571 ; AVX1-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
   1572 ; AVX1-NEXT:    vpextrq $1, %xmm0, %rax
   1573 ; AVX1-NEXT:    movl %eax, %ecx
   1574 ; AVX1-NEXT:    andl $1, %ecx
   1575 ; AVX1-NEXT:    testq %rax, %rax
   1576 ; AVX1-NEXT:    js .LBB45_10
   1577 ; AVX1-NEXT:  # BB#11:
   1578 ; AVX1-NEXT:    vxorps %xmm0, %xmm0, %xmm0
   1579 ; AVX1-NEXT:    vcvtsi2ssq %rax, %xmm0, %xmm0
   1580 ; AVX1-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
   1581 ; AVX1-NEXT:    vzeroupper
   1582 ; AVX1-NEXT:    retq
   1583 ; AVX1-NEXT:  .LBB45_10:
   1584 ; AVX1-NEXT:    shrq %rax
   1585 ; AVX1-NEXT:    orq %rax, %rcx
   1586 ; AVX1-NEXT:    vcvtsi2ssq %rcx, %xmm0, %xmm0
   1587 ; AVX1-NEXT:    vaddss %xmm0, %xmm0, %xmm0
   1588 ; AVX1-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
   1589 ; AVX1-NEXT:    vzeroupper
   1590 ; AVX1-NEXT:    retq
   1591 ;
   1592 ; AVX2-LABEL: uitofp_4i64_to_4f32:
   1593 ; AVX2:       # BB#0:
   1594 ; AVX2-NEXT:    vpextrq $1, %xmm0, %rax
   1595 ; AVX2-NEXT:    movl %eax, %ecx
   1596 ; AVX2-NEXT:    andl $1, %ecx
   1597 ; AVX2-NEXT:    testq %rax, %rax
   1598 ; AVX2-NEXT:    js .LBB45_1
   1599 ; AVX2-NEXT:  # BB#2:
   1600 ; AVX2-NEXT:    vcvtsi2ssq %rax, %xmm0, %xmm1
   1601 ; AVX2-NEXT:    jmp .LBB45_3
   1602 ; AVX2-NEXT:  .LBB45_1:
   1603 ; AVX2-NEXT:    shrq %rax
   1604 ; AVX2-NEXT:    orq %rax, %rcx
   1605 ; AVX2-NEXT:    vcvtsi2ssq %rcx, %xmm0, %xmm1
   1606 ; AVX2-NEXT:    vaddss %xmm1, %xmm1, %xmm1
   1607 ; AVX2-NEXT:  .LBB45_3:
   1608 ; AVX2-NEXT:    vmovq %xmm0, %rax
   1609 ; AVX2-NEXT:    movl %eax, %ecx
   1610 ; AVX2-NEXT:    andl $1, %ecx
   1611 ; AVX2-NEXT:    testq %rax, %rax
   1612 ; AVX2-NEXT:    js .LBB45_4
   1613 ; AVX2-NEXT:  # BB#5:
   1614 ; AVX2-NEXT:    vcvtsi2ssq %rax, %xmm0, %xmm2
   1615 ; AVX2-NEXT:    jmp .LBB45_6
   1616 ; AVX2-NEXT:  .LBB45_4:
   1617 ; AVX2-NEXT:    shrq %rax
   1618 ; AVX2-NEXT:    orq %rax, %rcx
   1619 ; AVX2-NEXT:    vcvtsi2ssq %rcx, %xmm0, %xmm2
   1620 ; AVX2-NEXT:    vaddss %xmm2, %xmm2, %xmm2
   1621 ; AVX2-NEXT:  .LBB45_6:
   1622 ; AVX2-NEXT:    vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
   1623 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm0
   1624 ; AVX2-NEXT:    vmovq %xmm0, %rax
   1625 ; AVX2-NEXT:    movl %eax, %ecx
   1626 ; AVX2-NEXT:    andl $1, %ecx
   1627 ; AVX2-NEXT:    testq %rax, %rax
   1628 ; AVX2-NEXT:    js .LBB45_7
   1629 ; AVX2-NEXT:  # BB#8:
   1630 ; AVX2-NEXT:    vcvtsi2ssq %rax, %xmm0, %xmm2
   1631 ; AVX2-NEXT:    jmp .LBB45_9
   1632 ; AVX2-NEXT:  .LBB45_7:
   1633 ; AVX2-NEXT:    shrq %rax
   1634 ; AVX2-NEXT:    orq %rax, %rcx
   1635 ; AVX2-NEXT:    vcvtsi2ssq %rcx, %xmm0, %xmm2
   1636 ; AVX2-NEXT:    vaddss %xmm2, %xmm2, %xmm2
   1637 ; AVX2-NEXT:  .LBB45_9:
   1638 ; AVX2-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
   1639 ; AVX2-NEXT:    vpextrq $1, %xmm0, %rax
   1640 ; AVX2-NEXT:    movl %eax, %ecx
   1641 ; AVX2-NEXT:    andl $1, %ecx
   1642 ; AVX2-NEXT:    testq %rax, %rax
   1643 ; AVX2-NEXT:    js .LBB45_10
   1644 ; AVX2-NEXT:  # BB#11:
   1645 ; AVX2-NEXT:    vxorps %xmm0, %xmm0, %xmm0
   1646 ; AVX2-NEXT:    vcvtsi2ssq %rax, %xmm0, %xmm0
   1647 ; AVX2-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
   1648 ; AVX2-NEXT:    vzeroupper
   1649 ; AVX2-NEXT:    retq
   1650 ; AVX2-NEXT:  .LBB45_10:
   1651 ; AVX2-NEXT:    shrq %rax
   1652 ; AVX2-NEXT:    orq %rax, %rcx
   1653 ; AVX2-NEXT:    vcvtsi2ssq %rcx, %xmm0, %xmm0
   1654 ; AVX2-NEXT:    vaddss %xmm0, %xmm0, %xmm0
   1655 ; AVX2-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
   1656 ; AVX2-NEXT:    vzeroupper
   1657 ; AVX2-NEXT:    retq
   1658   %cvt = uitofp <4 x i64> %a to <4 x float>
   1659   ret <4 x float> %cvt
   1660 }
   1661 
   1662 define <8 x float> @uitofp_8i32_to_8f32(<8 x i32> %a) {
   1663 ; SSE-LABEL: uitofp_8i32_to_8f32:
   1664 ; SSE:       # BB#0:
   1665 ; SSE-NEXT:    movdqa {{.*#+}} xmm2 = [65535,65535,65535,65535]
   1666 ; SSE-NEXT:    movdqa %xmm0, %xmm3
   1667 ; SSE-NEXT:    pand %xmm2, %xmm3
   1668 ; SSE-NEXT:    movdqa {{.*#+}} xmm4 = [1258291200,1258291200,1258291200,1258291200]
   1669 ; SSE-NEXT:    por %xmm4, %xmm3
   1670 ; SSE-NEXT:    psrld $16, %xmm0
   1671 ; SSE-NEXT:    movdqa {{.*#+}} xmm5 = [1392508928,1392508928,1392508928,1392508928]
   1672 ; SSE-NEXT:    por %xmm5, %xmm0
   1673 ; SSE-NEXT:    movaps {{.*#+}} xmm6 = [-5.497642e+11,-5.497642e+11,-5.497642e+11,-5.497642e+11]
   1674 ; SSE-NEXT:    addps %xmm6, %xmm0
   1675 ; SSE-NEXT:    addps %xmm3, %xmm0
   1676 ; SSE-NEXT:    pand %xmm1, %xmm2
   1677 ; SSE-NEXT:    por %xmm4, %xmm2
   1678 ; SSE-NEXT:    psrld $16, %xmm1
   1679 ; SSE-NEXT:    por %xmm5, %xmm1
   1680 ; SSE-NEXT:    addps %xmm6, %xmm1
   1681 ; SSE-NEXT:    addps %xmm2, %xmm1
   1682 ; SSE-NEXT:    retq
   1683 ;
   1684 ; AVX1-LABEL: uitofp_8i32_to_8f32:
   1685 ; AVX1:       # BB#0:
   1686 ; AVX1-NEXT:    vandps {{.*}}(%rip), %ymm0, %ymm1
   1687 ; AVX1-NEXT:    vcvtdq2ps %ymm1, %ymm1
   1688 ; AVX1-NEXT:    vpsrld $16, %xmm0, %xmm2
   1689 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   1690 ; AVX1-NEXT:    vpsrld $16, %xmm0, %xmm0
   1691 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm2, %ymm0
   1692 ; AVX1-NEXT:    vcvtdq2ps %ymm0, %ymm0
   1693 ; AVX1-NEXT:    vmulps {{.*}}(%rip), %ymm0, %ymm0
   1694 ; AVX1-NEXT:    vaddps %ymm1, %ymm0, %ymm0
   1695 ; AVX1-NEXT:    retq
   1696 ;
   1697 ; AVX2-LABEL: uitofp_8i32_to_8f32:
   1698 ; AVX2:       # BB#0:
   1699 ; AVX2-NEXT:    vpbroadcastd {{.*}}(%rip), %ymm1
   1700 ; AVX2-NEXT:    vpblendw {{.*#+}} ymm1 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
   1701 ; AVX2-NEXT:    vpsrld $16, %ymm0, %ymm0
   1702 ; AVX2-NEXT:    vpbroadcastd {{.*}}(%rip), %ymm2
   1703 ; AVX2-NEXT:    vpblendw {{.*#+}} ymm0 = ymm0[0],ymm2[1],ymm0[2],ymm2[3],ymm0[4],ymm2[5],ymm0[6],ymm2[7],ymm0[8],ymm2[9],ymm0[10],ymm2[11],ymm0[12],ymm2[13],ymm0[14],ymm2[15]
   1704 ; AVX2-NEXT:    vbroadcastss {{.*}}(%rip), %ymm2
   1705 ; AVX2-NEXT:    vaddps %ymm2, %ymm0, %ymm0
   1706 ; AVX2-NEXT:    vaddps %ymm0, %ymm1, %ymm0
   1707 ; AVX2-NEXT:    retq
   1708   %cvt = uitofp <8 x i32> %a to <8 x float>
   1709   ret <8 x float> %cvt
   1710 }
   1711 
   1712 define <8 x float> @uitofp_8i16_to_8f32(<8 x i16> %a) {
   1713 ; SSE-LABEL: uitofp_8i16_to_8f32:
   1714 ; SSE:       # BB#0:
   1715 ; SSE-NEXT:    pxor %xmm1, %xmm1
   1716 ; SSE-NEXT:    movdqa %xmm0, %xmm2
   1717 ; SSE-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
   1718 ; SSE-NEXT:    cvtdq2ps %xmm2, %xmm2
   1719 ; SSE-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
   1720 ; SSE-NEXT:    cvtdq2ps %xmm0, %xmm1
   1721 ; SSE-NEXT:    movaps %xmm2, %xmm0
   1722 ; SSE-NEXT:    retq
   1723 ;
   1724 ; AVX1-LABEL: uitofp_8i16_to_8f32:
   1725 ; AVX1:       # BB#0:
   1726 ; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
   1727 ; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
   1728 ; AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
   1729 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   1730 ; AVX1-NEXT:    vcvtdq2ps %ymm0, %ymm0
   1731 ; AVX1-NEXT:    retq
   1732 ;
   1733 ; AVX2-LABEL: uitofp_8i16_to_8f32:
   1734 ; AVX2:       # BB#0:
   1735 ; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
   1736 ; AVX2-NEXT:    vcvtdq2ps %ymm0, %ymm0
   1737 ; AVX2-NEXT:    retq
   1738   %cvt = uitofp <8 x i16> %a to <8 x float>
   1739   ret <8 x float> %cvt
   1740 }
   1741 
   1742 define <8 x float> @uitofp_8i8_to_8f32(<16 x i8> %a) {
   1743 ; SSE-LABEL: uitofp_8i8_to_8f32:
   1744 ; SSE:       # BB#0:
   1745 ; SSE-NEXT:    pxor %xmm1, %xmm1
   1746 ; SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
   1747 ; SSE-NEXT:    movdqa %xmm0, %xmm2
   1748 ; SSE-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
   1749 ; SSE-NEXT:    cvtdq2ps %xmm2, %xmm2
   1750 ; SSE-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
   1751 ; SSE-NEXT:    cvtdq2ps %xmm0, %xmm1
   1752 ; SSE-NEXT:    movaps %xmm2, %xmm0
   1753 ; SSE-NEXT:    retq
   1754 ;
   1755 ; AVX1-LABEL: uitofp_8i8_to_8f32:
   1756 ; AVX1:       # BB#0:
   1757 ; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
   1758 ; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
   1759 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
   1760 ; AVX1-NEXT:    vpand %xmm2, %xmm1, %xmm1
   1761 ; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
   1762 ; AVX1-NEXT:    vpand %xmm2, %xmm0, %xmm0
   1763 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   1764 ; AVX1-NEXT:    vcvtdq2ps %ymm0, %ymm0
   1765 ; AVX1-NEXT:    retq
   1766 ;
   1767 ; AVX2-LABEL: uitofp_8i8_to_8f32:
   1768 ; AVX2:       # BB#0:
   1769 ; AVX2-NEXT:    vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
   1770 ; AVX2-NEXT:    vpand {{.*}}(%rip), %ymm0, %ymm0
   1771 ; AVX2-NEXT:    vcvtdq2ps %ymm0, %ymm0
   1772 ; AVX2-NEXT:    retq
   1773   %shuf = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
   1774   %cvt = uitofp <8 x i8> %shuf to <8 x float>
   1775   ret <8 x float> %cvt
   1776 }
   1777 
   1778 define <8 x float> @uitofp_16i8_to_8f32(<16 x i8> %a) {
   1779 ; SSE-LABEL: uitofp_16i8_to_8f32:
   1780 ; SSE:       # BB#0:
   1781 ; SSE-NEXT:    pxor %xmm1, %xmm1
   1782 ; SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
   1783 ; SSE-NEXT:    movdqa %xmm0, %xmm2
   1784 ; SSE-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
   1785 ; SSE-NEXT:    cvtdq2ps %xmm2, %xmm2
   1786 ; SSE-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
   1787 ; SSE-NEXT:    cvtdq2ps %xmm0, %xmm1
   1788 ; SSE-NEXT:    movaps %xmm2, %xmm0
   1789 ; SSE-NEXT:    retq
   1790 ;
   1791 ; AVX1-LABEL: uitofp_16i8_to_8f32:
   1792 ; AVX1:       # BB#0:
   1793 ; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
   1794 ; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
   1795 ; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
   1796 ; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
   1797 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   1798 ; AVX1-NEXT:    vcvtdq2ps %ymm0, %ymm0
   1799 ; AVX1-NEXT:    retq
   1800 ;
   1801 ; AVX2-LABEL: uitofp_16i8_to_8f32:
   1802 ; AVX2:       # BB#0:
   1803 ; AVX2-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
   1804 ; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
   1805 ; AVX2-NEXT:    vcvtdq2ps %ymm0, %ymm0
   1806 ; AVX2-NEXT:    retq
   1807   %cvt = uitofp <16 x i8> %a to <16 x float>
   1808   %shuf = shufflevector <16 x float> %cvt, <16 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
   1809   ret <8 x float> %shuf
   1810 }
   1811 
   1812 ;
   1813 ; Aggregates
   1814 ;
   1815 
   1816 %Arguments = type <{ <8 x i8>, <8 x i16>, <8 x float>* }>
   1817 define void @aggregate_sitofp_8i16_to_8f32(%Arguments* nocapture readonly %a0) {
   1818 ; SSE-LABEL: aggregate_sitofp_8i16_to_8f32:
   1819 ; SSE:       # BB#0:
   1820 ; SSE-NEXT:    movq 24(%rdi), %rax
   1821 ; SSE-NEXT:    movdqu 8(%rdi), %xmm0
   1822 ; SSE-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
   1823 ; SSE-NEXT:    psrad $16, %xmm1
   1824 ; SSE-NEXT:    cvtdq2ps %xmm1, %xmm1
   1825 ; SSE-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
   1826 ; SSE-NEXT:    psrad $16, %xmm0
   1827 ; SSE-NEXT:    cvtdq2ps %xmm0, %xmm0
   1828 ; SSE-NEXT:    movaps %xmm0, 16(%rax)
   1829 ; SSE-NEXT:    movaps %xmm1, (%rax)
   1830 ; SSE-NEXT:    retq
   1831 ;
   1832 ; AVX1-LABEL: aggregate_sitofp_8i16_to_8f32:
   1833 ; AVX1:       # BB#0:
   1834 ; AVX1-NEXT:    movq 24(%rdi), %rax
   1835 ; AVX1-NEXT:    vmovdqu 8(%rdi), %xmm0
   1836 ; AVX1-NEXT:    vpmovsxwd %xmm0, %xmm1
   1837 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
   1838 ; AVX1-NEXT:    vpmovsxwd %xmm0, %xmm0
   1839 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   1840 ; AVX1-NEXT:    vcvtdq2ps %ymm0, %ymm0
   1841 ; AVX1-NEXT:    vmovaps %ymm0, (%rax)
   1842 ; AVX1-NEXT:    vzeroupper
   1843 ; AVX1-NEXT:    retq
   1844 ;
   1845 ; AVX2-LABEL: aggregate_sitofp_8i16_to_8f32:
   1846 ; AVX2:       # BB#0:
   1847 ; AVX2-NEXT:    movq 24(%rdi), %rax
   1848 ; AVX2-NEXT:    vpmovsxwd 8(%rdi), %ymm0
   1849 ; AVX2-NEXT:    vcvtdq2ps %ymm0, %ymm0
   1850 ; AVX2-NEXT:    vmovaps %ymm0, (%rax)
   1851 ; AVX2-NEXT:    vzeroupper
   1852 ; AVX2-NEXT:    retq
   1853  %1 = load %Arguments, %Arguments* %a0, align 1
   1854  %2 = extractvalue %Arguments %1, 1
   1855  %3 = extractvalue %Arguments %1, 2
   1856  %4 = sitofp <8 x i16> %2 to <8 x float>
   1857  store <8 x float> %4, <8 x float>* %3, align 32
   1858  ret void
   1859 }
   1860