Home | History | Annotate | Download | only in X86
      1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
      2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
      3 ;
      4 ; 32-bit tests to make sure we're not doing anything stupid.
      5 ; RUN: llc < %s -mtriple=i686-unknown-unknown
      6 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse
      7 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2
      8 
      9 ;
     10 ; Double to Signed Integer
     11 ;
     12 
     13 define <2 x i64> @fptosi_2f64_to_2i64(<2 x double> %a) {
     14 ; SSE-LABEL: fptosi_2f64_to_2i64:
     15 ; SSE:       # BB#0:
     16 ; SSE-NEXT:    cvttsd2si %xmm0, %rax
     17 ; SSE-NEXT:    movd %rax, %xmm1
     18 ; SSE-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1,0]
     19 ; SSE-NEXT:    cvttsd2si %xmm0, %rax
     20 ; SSE-NEXT:    movd %rax, %xmm0
     21 ; SSE-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
     22 ; SSE-NEXT:    movdqa %xmm1, %xmm0
     23 ; SSE-NEXT:    retq
     24 ;
     25 ; AVX-LABEL: fptosi_2f64_to_2i64:
     26 ; AVX:       # BB#0:
     27 ; AVX-NEXT:    vcvttsd2si %xmm0, %rax
     28 ; AVX-NEXT:    vmovq %rax, %xmm1
     29 ; AVX-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
     30 ; AVX-NEXT:    vcvttsd2si %xmm0, %rax
     31 ; AVX-NEXT:    vmovq %rax, %xmm0
     32 ; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
     33 ; AVX-NEXT:    retq
     34   %cvt = fptosi <2 x double> %a to <2 x i64>
     35   ret <2 x i64> %cvt
     36 }
     37 
     38 define <4 x i32> @fptosi_2f64_to_2i32(<2 x double> %a) {
     39 ; SSE-LABEL: fptosi_2f64_to_2i32:
     40 ; SSE:       # BB#0:
     41 ; SSE-NEXT:    cvttsd2si %xmm0, %rax
     42 ; SSE-NEXT:    movd %rax, %xmm1
     43 ; SSE-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1,0]
     44 ; SSE-NEXT:    cvttsd2si %xmm0, %rax
     45 ; SSE-NEXT:    movd %rax, %xmm0
     46 ; SSE-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
     47 ; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3]
     48 ; SSE-NEXT:    retq
     49 ;
     50 ; AVX-LABEL: fptosi_2f64_to_2i32:
     51 ; AVX:       # BB#0:
     52 ; AVX-NEXT:    vcvttsd2si %xmm0, %rax
     53 ; AVX-NEXT:    vmovq %rax, %xmm1
     54 ; AVX-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
     55 ; AVX-NEXT:    vcvttsd2si %xmm0, %rax
     56 ; AVX-NEXT:    vmovq %rax, %xmm0
     57 ; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
     58 ; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
     59 ; AVX-NEXT:    retq
     60   %cvt = fptosi <2 x double> %a to <2 x i32>
     61   %ext = shufflevector <2 x i32> %cvt, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
     62   ret <4 x i32> %ext
     63 }
     64 
     65 define <4 x i32> @fptosi_4f64_to_2i32(<2 x double> %a) {
     66 ; SSE-LABEL: fptosi_4f64_to_2i32:
     67 ; SSE:       # BB#0:
     68 ; SSE-NEXT:    cvttsd2si %xmm0, %rax
     69 ; SSE-NEXT:    movd %rax, %xmm1
     70 ; SSE-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1,0]
     71 ; SSE-NEXT:    cvttsd2si %xmm0, %rax
     72 ; SSE-NEXT:    movd %rax, %xmm0
     73 ; SSE-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
     74 ; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3]
     75 ; SSE-NEXT:    cvttsd2si %xmm0, %rax
     76 ; SSE-NEXT:    movd %rax, %xmm1
     77 ; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,1,0,1]
     78 ; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
     79 ; SSE-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
     80 ; SSE-NEXT:    retq
     81 ;
     82 ; AVX-LABEL: fptosi_4f64_to_2i32:
     83 ; AVX:       # BB#0:
     84 ; AVX-NEXT:    # kill
     85 ; AVX-NEXT:    vcvttpd2dqy %ymm0, %xmm0
     86 ; AVX-NEXT:    vzeroupper
     87 ; AVX-NEXT:    retq
     88   %ext = shufflevector <2 x double> %a, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
     89   %cvt = fptosi <4 x double> %ext to <4 x i32>
     90   ret <4 x i32> %cvt
     91 }
     92 
     93 define <4 x i64> @fptosi_4f64_to_4i64(<4 x double> %a) {
     94 ; SSE-LABEL: fptosi_4f64_to_4i64:
     95 ; SSE:       # BB#0:
     96 ; SSE-NEXT:    cvttsd2si %xmm0, %rax
     97 ; SSE-NEXT:    movd %rax, %xmm2
     98 ; SSE-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1,0]
     99 ; SSE-NEXT:    cvttsd2si %xmm0, %rax
    100 ; SSE-NEXT:    movd %rax, %xmm0
    101 ; SSE-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm0[0]
    102 ; SSE-NEXT:    cvttsd2si %xmm1, %rax
    103 ; SSE-NEXT:    movd %rax, %xmm3
    104 ; SSE-NEXT:    shufpd {{.*#+}} xmm1 = xmm1[1,0]
    105 ; SSE-NEXT:    cvttsd2si %xmm1, %rax
    106 ; SSE-NEXT:    movd %rax, %xmm0
    107 ; SSE-NEXT:    punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm0[0]
    108 ; SSE-NEXT:    movdqa %xmm2, %xmm0
    109 ; SSE-NEXT:    movdqa %xmm3, %xmm1
    110 ; SSE-NEXT:    retq
    111 ;
    112 ; AVX-LABEL: fptosi_4f64_to_4i64:
    113 ; AVX:       # BB#0:
    114 ; AVX-NEXT:    vextractf128 $1, %ymm0, %xmm1
    115 ; AVX-NEXT:    vcvttsd2si %xmm1, %rax
    116 ; AVX-NEXT:    vmovq %rax, %xmm2
    117 ; AVX-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
    118 ; AVX-NEXT:    vcvttsd2si %xmm1, %rax
    119 ; AVX-NEXT:    vmovq %rax, %xmm1
    120 ; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
    121 ; AVX-NEXT:    vcvttsd2si %xmm0, %rax
    122 ; AVX-NEXT:    vmovq %rax, %xmm2
    123 ; AVX-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
    124 ; AVX-NEXT:    vcvttsd2si %xmm0, %rax
    125 ; AVX-NEXT:    vmovq %rax, %xmm0
    126 ; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
    127 ; AVX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
    128 ; AVX-NEXT:    retq
    129   %cvt = fptosi <4 x double> %a to <4 x i64>
    130   ret <4 x i64> %cvt
    131 }
    132 
    133 define <4 x i32> @fptosi_4f64_to_4i32(<4 x double> %a) {
    134 ; SSE-LABEL: fptosi_4f64_to_4i32:
    135 ; SSE:       # BB#0:
    136 ; SSE-NEXT:    cvttsd2si %xmm1, %rax
    137 ; SSE-NEXT:    movd %rax, %xmm2
    138 ; SSE-NEXT:    shufpd {{.*#+}} xmm1 = xmm1[1,0]
    139 ; SSE-NEXT:    cvttsd2si %xmm1, %rax
    140 ; SSE-NEXT:    movd %rax, %xmm1
    141 ; SSE-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
    142 ; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
    143 ; SSE-NEXT:    cvttsd2si %xmm0, %rax
    144 ; SSE-NEXT:    movd %rax, %xmm2
    145 ; SSE-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1,0]
    146 ; SSE-NEXT:    cvttsd2si %xmm0, %rax
    147 ; SSE-NEXT:    movd %rax, %xmm0
    148 ; SSE-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm0[0]
    149 ; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3]
    150 ; SSE-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
    151 ; SSE-NEXT:    retq
    152 ;
    153 ; AVX-LABEL: fptosi_4f64_to_4i32:
    154 ; AVX:       # BB#0:
    155 ; AVX-NEXT:    vcvttpd2dqy %ymm0, %xmm0
    156 ; AVX-NEXT:    vzeroupper
    157 ; AVX-NEXT:    retq
    158   %cvt = fptosi <4 x double> %a to <4 x i32>
    159   ret <4 x i32> %cvt
    160 }
    161 
    162 ;
    163 ; Double to Unsigned Integer
    164 ;
    165 
    166 define <2 x i64> @fptoui_2f64_to_2i64(<2 x double> %a) {
    167 ; SSE-LABEL: fptoui_2f64_to_2i64:
    168 ; SSE:       # BB#0:
    169 ; SSE-NEXT:    movsd {{.*#+}} xmm2 = mem[0],zero
    170 ; SSE-NEXT:    movapd %xmm0, %xmm1
    171 ; SSE-NEXT:    subsd %xmm2, %xmm1
    172 ; SSE-NEXT:    cvttsd2si %xmm1, %rax
    173 ; SSE-NEXT:    movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
    174 ; SSE-NEXT:    xorq %rcx, %rax
    175 ; SSE-NEXT:    cvttsd2si %xmm0, %rdx
    176 ; SSE-NEXT:    ucomisd %xmm2, %xmm0
    177 ; SSE-NEXT:    cmovaeq %rax, %rdx
    178 ; SSE-NEXT:    movd %rdx, %xmm1
    179 ; SSE-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1,0]
    180 ; SSE-NEXT:    movapd %xmm0, %xmm3
    181 ; SSE-NEXT:    subsd %xmm2, %xmm3
    182 ; SSE-NEXT:    cvttsd2si %xmm3, %rax
    183 ; SSE-NEXT:    xorq %rcx, %rax
    184 ; SSE-NEXT:    cvttsd2si %xmm0, %rcx
    185 ; SSE-NEXT:    ucomisd %xmm2, %xmm0
    186 ; SSE-NEXT:    cmovaeq %rax, %rcx
    187 ; SSE-NEXT:    movd %rcx, %xmm0
    188 ; SSE-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
    189 ; SSE-NEXT:    movdqa %xmm1, %xmm0
    190 ; SSE-NEXT:    retq
    191 ;
    192 ; AVX-LABEL: fptoui_2f64_to_2i64:
    193 ; AVX:       # BB#0:
    194 ; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
    195 ; AVX-NEXT:    vsubsd %xmm1, %xmm0, %xmm2
    196 ; AVX-NEXT:    vcvttsd2si %xmm2, %rax
    197 ; AVX-NEXT:    movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
    198 ; AVX-NEXT:    xorq %rcx, %rax
    199 ; AVX-NEXT:    vcvttsd2si %xmm0, %rdx
    200 ; AVX-NEXT:    vucomisd %xmm1, %xmm0
    201 ; AVX-NEXT:    cmovaeq %rax, %rdx
    202 ; AVX-NEXT:    vmovq %rdx, %xmm2
    203 ; AVX-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
    204 ; AVX-NEXT:    vsubsd %xmm1, %xmm0, %xmm3
    205 ; AVX-NEXT:    vcvttsd2si %xmm3, %rax
    206 ; AVX-NEXT:    xorq %rcx, %rax
    207 ; AVX-NEXT:    vcvttsd2si %xmm0, %rcx
    208 ; AVX-NEXT:    vucomisd %xmm1, %xmm0
    209 ; AVX-NEXT:    cmovaeq %rax, %rcx
    210 ; AVX-NEXT:    vmovq %rcx, %xmm0
    211 ; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
    212 ; AVX-NEXT:    retq
    213   %cvt = fptoui <2 x double> %a to <2 x i64>
    214   ret <2 x i64> %cvt
    215 }
    216 
    217 define <4 x i32> @fptoui_2f64_to_2i32(<2 x double> %a) {
    218 ; SSE-LABEL: fptoui_2f64_to_2i32:
    219 ; SSE:       # BB#0:
    220 ; SSE-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
    221 ; SSE-NEXT:    movapd %xmm0, %xmm2
    222 ; SSE-NEXT:    subsd %xmm1, %xmm2
    223 ; SSE-NEXT:    cvttsd2si %xmm2, %rax
    224 ; SSE-NEXT:    movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
    225 ; SSE-NEXT:    xorq %rcx, %rax
    226 ; SSE-NEXT:    cvttsd2si %xmm0, %rdx
    227 ; SSE-NEXT:    ucomisd %xmm1, %xmm0
    228 ; SSE-NEXT:    cmovaeq %rax, %rdx
    229 ; SSE-NEXT:    movd %rdx, %xmm2
    230 ; SSE-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1,0]
    231 ; SSE-NEXT:    movapd %xmm0, %xmm3
    232 ; SSE-NEXT:    subsd %xmm1, %xmm3
    233 ; SSE-NEXT:    cvttsd2si %xmm3, %rax
    234 ; SSE-NEXT:    xorq %rcx, %rax
    235 ; SSE-NEXT:    cvttsd2si %xmm0, %rcx
    236 ; SSE-NEXT:    ucomisd %xmm1, %xmm0
    237 ; SSE-NEXT:    cmovaeq %rax, %rcx
    238 ; SSE-NEXT:    movd %rcx, %xmm0
    239 ; SSE-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm0[0]
    240 ; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3]
    241 ; SSE-NEXT:    retq
    242 ;
    243 ; AVX-LABEL: fptoui_2f64_to_2i32:
    244 ; AVX:       # BB#0:
    245 ; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
    246 ; AVX-NEXT:    vsubsd %xmm1, %xmm0, %xmm2
    247 ; AVX-NEXT:    vcvttsd2si %xmm2, %rax
    248 ; AVX-NEXT:    movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
    249 ; AVX-NEXT:    xorq %rcx, %rax
    250 ; AVX-NEXT:    vcvttsd2si %xmm0, %rdx
    251 ; AVX-NEXT:    vucomisd %xmm1, %xmm0
    252 ; AVX-NEXT:    cmovaeq %rax, %rdx
    253 ; AVX-NEXT:    vmovq %rdx, %xmm2
    254 ; AVX-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
    255 ; AVX-NEXT:    vsubsd %xmm1, %xmm0, %xmm3
    256 ; AVX-NEXT:    vcvttsd2si %xmm3, %rax
    257 ; AVX-NEXT:    xorq %rcx, %rax
    258 ; AVX-NEXT:    vcvttsd2si %xmm0, %rcx
    259 ; AVX-NEXT:    vucomisd %xmm1, %xmm0
    260 ; AVX-NEXT:    cmovaeq %rax, %rcx
    261 ; AVX-NEXT:    vmovq %rcx, %xmm0
    262 ; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
    263 ; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
    264 ; AVX-NEXT:    retq
    265   %cvt = fptoui <2 x double> %a to <2 x i32>
    266   %ext = shufflevector <2 x i32> %cvt, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
    267   ret <4 x i32> %ext
    268 }
    269 
    270 define <4 x i32> @fptoui_4f64_to_2i32(<2 x double> %a) {
    271 ; SSE-LABEL: fptoui_4f64_to_2i32:
    272 ; SSE:       # BB#0:
    273 ; SSE-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
    274 ; SSE-NEXT:    movapd %xmm0, %xmm2
    275 ; SSE-NEXT:    subsd %xmm1, %xmm2
    276 ; SSE-NEXT:    cvttsd2si %xmm2, %rax
    277 ; SSE-NEXT:    movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
    278 ; SSE-NEXT:    xorq %rcx, %rax
    279 ; SSE-NEXT:    cvttsd2si %xmm0, %rdx
    280 ; SSE-NEXT:    ucomisd %xmm1, %xmm0
    281 ; SSE-NEXT:    cmovaeq %rax, %rdx
    282 ; SSE-NEXT:    movd %rdx, %xmm2
    283 ; SSE-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1,0]
    284 ; SSE-NEXT:    movapd %xmm0, %xmm3
    285 ; SSE-NEXT:    subsd %xmm1, %xmm3
    286 ; SSE-NEXT:    cvttsd2si %xmm3, %rax
    287 ; SSE-NEXT:    xorq %rcx, %rax
    288 ; SSE-NEXT:    cvttsd2si %xmm0, %rdx
    289 ; SSE-NEXT:    ucomisd %xmm1, %xmm0
    290 ; SSE-NEXT:    cmovaeq %rax, %rdx
    291 ; SSE-NEXT:    movd %rdx, %xmm0
    292 ; SSE-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm0[0]
    293 ; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3]
    294 ; SSE-NEXT:    cvttsd2si %xmm0, %rax
    295 ; SSE-NEXT:    xorq %rax, %rcx
    296 ; SSE-NEXT:    ucomisd %xmm1, %xmm0
    297 ; SSE-NEXT:    cmovbq %rax, %rcx
    298 ; SSE-NEXT:    movd %rcx, %xmm1
    299 ; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,1,0,1]
    300 ; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
    301 ; SSE-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
    302 ; SSE-NEXT:    retq
    303 ;
    304 ; AVX-LABEL: fptoui_4f64_to_2i32:
    305 ; AVX:       # BB#0:
    306 ; AVX-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
    307 ; AVX-NEXT:    vcvttsd2si %xmm1, %rax
    308 ; AVX-NEXT:    vcvttsd2si %xmm0, %rcx
    309 ; AVX-NEXT:    vmovd %ecx, %xmm0
    310 ; AVX-NEXT:    vpinsrd $1, %eax, %xmm0, %xmm0
    311 ; AVX-NEXT:    vcvttsd2si %xmm0, %rax
    312 ; AVX-NEXT:    vpinsrd $2, %eax, %xmm0, %xmm0
    313 ; AVX-NEXT:    vpinsrd $3, %eax, %xmm0, %xmm0
    314 ; AVX-NEXT:    retq
    315   %ext = shufflevector <2 x double> %a, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
    316   %cvt = fptoui <4 x double> %ext to <4 x i32>
    317   ret <4 x i32> %cvt
    318 }
    319 
    320 define <4 x i64> @fptoui_4f64_to_4i64(<4 x double> %a) {
    321 ; SSE-LABEL: fptoui_4f64_to_4i64:
    322 ; SSE:       # BB#0:
    323 ; SSE-NEXT:    movapd %xmm0, %xmm2
    324 ; SSE-NEXT:    movsd {{.*#+}} xmm3 = mem[0],zero
    325 ; SSE-NEXT:    subsd %xmm3, %xmm0
    326 ; SSE-NEXT:    cvttsd2si %xmm0, %rcx
    327 ; SSE-NEXT:    movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
    328 ; SSE-NEXT:    xorq %rax, %rcx
    329 ; SSE-NEXT:    cvttsd2si %xmm2, %rdx
    330 ; SSE-NEXT:    ucomisd %xmm3, %xmm2
    331 ; SSE-NEXT:    cmovaeq %rcx, %rdx
    332 ; SSE-NEXT:    movd %rdx, %xmm0
    333 ; SSE-NEXT:    shufpd {{.*#+}} xmm2 = xmm2[1,0]
    334 ; SSE-NEXT:    movapd %xmm2, %xmm4
    335 ; SSE-NEXT:    subsd %xmm3, %xmm4
    336 ; SSE-NEXT:    cvttsd2si %xmm4, %rcx
    337 ; SSE-NEXT:    xorq %rax, %rcx
    338 ; SSE-NEXT:    cvttsd2si %xmm2, %rdx
    339 ; SSE-NEXT:    ucomisd %xmm3, %xmm2
    340 ; SSE-NEXT:    cmovaeq %rcx, %rdx
    341 ; SSE-NEXT:    movd %rdx, %xmm2
    342 ; SSE-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
    343 ; SSE-NEXT:    movapd %xmm1, %xmm2
    344 ; SSE-NEXT:    subsd %xmm3, %xmm2
    345 ; SSE-NEXT:    cvttsd2si %xmm2, %rcx
    346 ; SSE-NEXT:    xorq %rax, %rcx
    347 ; SSE-NEXT:    cvttsd2si %xmm1, %rdx
    348 ; SSE-NEXT:    ucomisd %xmm3, %xmm1
    349 ; SSE-NEXT:    cmovaeq %rcx, %rdx
    350 ; SSE-NEXT:    movd %rdx, %xmm2
    351 ; SSE-NEXT:    shufpd {{.*#+}} xmm1 = xmm1[1,0]
    352 ; SSE-NEXT:    movapd %xmm1, %xmm4
    353 ; SSE-NEXT:    subsd %xmm3, %xmm4
    354 ; SSE-NEXT:    cvttsd2si %xmm4, %rcx
    355 ; SSE-NEXT:    xorq %rax, %rcx
    356 ; SSE-NEXT:    cvttsd2si %xmm1, %rax
    357 ; SSE-NEXT:    ucomisd %xmm3, %xmm1
    358 ; SSE-NEXT:    cmovaeq %rcx, %rax
    359 ; SSE-NEXT:    movd %rax, %xmm1
    360 ; SSE-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
    361 ; SSE-NEXT:    movdqa %xmm2, %xmm1
    362 ; SSE-NEXT:    retq
    363 ;
    364 ; AVX-LABEL: fptoui_4f64_to_4i64:
    365 ; AVX:       # BB#0:
    366 ; AVX-NEXT:    vextractf128 $1, %ymm0, %xmm2
    367 ; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
    368 ; AVX-NEXT:    vsubsd %xmm1, %xmm2, %xmm3
    369 ; AVX-NEXT:    vcvttsd2si %xmm3, %rax
    370 ; AVX-NEXT:    movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
    371 ; AVX-NEXT:    xorq %rcx, %rax
    372 ; AVX-NEXT:    vcvttsd2si %xmm2, %rdx
    373 ; AVX-NEXT:    vucomisd %xmm1, %xmm2
    374 ; AVX-NEXT:    cmovaeq %rax, %rdx
    375 ; AVX-NEXT:    vmovq %rdx, %xmm3
    376 ; AVX-NEXT:    vpermilpd {{.*#+}} xmm2 = xmm2[1,0]
    377 ; AVX-NEXT:    vsubsd %xmm1, %xmm2, %xmm4
    378 ; AVX-NEXT:    vcvttsd2si %xmm4, %rax
    379 ; AVX-NEXT:    xorq %rcx, %rax
    380 ; AVX-NEXT:    vcvttsd2si %xmm2, %rdx
    381 ; AVX-NEXT:    vucomisd %xmm1, %xmm2
    382 ; AVX-NEXT:    cmovaeq %rax, %rdx
    383 ; AVX-NEXT:    vmovq %rdx, %xmm2
    384 ; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
    385 ; AVX-NEXT:    vsubsd %xmm1, %xmm0, %xmm3
    386 ; AVX-NEXT:    vcvttsd2si %xmm3, %rax
    387 ; AVX-NEXT:    xorq %rcx, %rax
    388 ; AVX-NEXT:    vcvttsd2si %xmm0, %rdx
    389 ; AVX-NEXT:    vucomisd %xmm1, %xmm0
    390 ; AVX-NEXT:    cmovaeq %rax, %rdx
    391 ; AVX-NEXT:    vmovq %rdx, %xmm3
    392 ; AVX-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
    393 ; AVX-NEXT:    vsubsd %xmm1, %xmm0, %xmm4
    394 ; AVX-NEXT:    vcvttsd2si %xmm4, %rax
    395 ; AVX-NEXT:    xorq %rcx, %rax
    396 ; AVX-NEXT:    vcvttsd2si %xmm0, %rcx
    397 ; AVX-NEXT:    vucomisd %xmm1, %xmm0
    398 ; AVX-NEXT:    cmovaeq %rax, %rcx
    399 ; AVX-NEXT:    vmovq %rcx, %xmm0
    400 ; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0]
    401 ; AVX-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
    402 ; AVX-NEXT:    retq
    403   %cvt = fptoui <4 x double> %a to <4 x i64>
    404   ret <4 x i64> %cvt
    405 }
    406 
    407 define <4 x i32> @fptoui_4f64_to_4i32(<4 x double> %a) {
    408 ; SSE-LABEL: fptoui_4f64_to_4i32:
    409 ; SSE:       # BB#0:
    410 ; SSE-NEXT:    movsd {{.*#+}} xmm2 = mem[0],zero
    411 ; SSE-NEXT:    movapd %xmm1, %xmm3
    412 ; SSE-NEXT:    subsd %xmm2, %xmm3
    413 ; SSE-NEXT:    cvttsd2si %xmm3, %rcx
    414 ; SSE-NEXT:    movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
    415 ; SSE-NEXT:    xorq %rax, %rcx
    416 ; SSE-NEXT:    cvttsd2si %xmm1, %rdx
    417 ; SSE-NEXT:    ucomisd %xmm2, %xmm1
    418 ; SSE-NEXT:    cmovaeq %rcx, %rdx
    419 ; SSE-NEXT:    movd %rdx, %xmm3
    420 ; SSE-NEXT:    shufpd {{.*#+}} xmm1 = xmm1[1,0]
    421 ; SSE-NEXT:    movapd %xmm1, %xmm4
    422 ; SSE-NEXT:    subsd %xmm2, %xmm4
    423 ; SSE-NEXT:    cvttsd2si %xmm4, %rcx
    424 ; SSE-NEXT:    xorq %rax, %rcx
    425 ; SSE-NEXT:    cvttsd2si %xmm1, %rdx
    426 ; SSE-NEXT:    ucomisd %xmm2, %xmm1
    427 ; SSE-NEXT:    cmovaeq %rcx, %rdx
    428 ; SSE-NEXT:    movd %rdx, %xmm1
    429 ; SSE-NEXT:    punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm1[0]
    430 ; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm3[0,2,2,3]
    431 ; SSE-NEXT:    movapd %xmm0, %xmm3
    432 ; SSE-NEXT:    subsd %xmm2, %xmm3
    433 ; SSE-NEXT:    cvttsd2si %xmm3, %rcx
    434 ; SSE-NEXT:    xorq %rax, %rcx
    435 ; SSE-NEXT:    cvttsd2si %xmm0, %rdx
    436 ; SSE-NEXT:    ucomisd %xmm2, %xmm0
    437 ; SSE-NEXT:    cmovaeq %rcx, %rdx
    438 ; SSE-NEXT:    movd %rdx, %xmm3
    439 ; SSE-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1,0]
    440 ; SSE-NEXT:    movapd %xmm0, %xmm4
    441 ; SSE-NEXT:    subsd %xmm2, %xmm4
    442 ; SSE-NEXT:    cvttsd2si %xmm4, %rcx
    443 ; SSE-NEXT:    xorq %rax, %rcx
    444 ; SSE-NEXT:    cvttsd2si %xmm0, %rax
    445 ; SSE-NEXT:    ucomisd %xmm2, %xmm0
    446 ; SSE-NEXT:    cmovaeq %rcx, %rax
    447 ; SSE-NEXT:    movd %rax, %xmm0
    448 ; SSE-NEXT:    punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm0[0]
    449 ; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[0,2,2,3]
    450 ; SSE-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
    451 ; SSE-NEXT:    retq
    452 ;
    453 ; AVX-LABEL: fptoui_4f64_to_4i32:
    454 ; AVX:       # BB#0:
    455 ; AVX-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
    456 ; AVX-NEXT:    vcvttsd2si %xmm1, %rax
    457 ; AVX-NEXT:    vcvttsd2si %xmm0, %rcx
    458 ; AVX-NEXT:    vmovd %ecx, %xmm1
    459 ; AVX-NEXT:    vpinsrd $1, %eax, %xmm1, %xmm1
    460 ; AVX-NEXT:    vextractf128 $1, %ymm0, %xmm0
    461 ; AVX-NEXT:    vcvttsd2si %xmm0, %rax
    462 ; AVX-NEXT:    vpinsrd $2, %eax, %xmm1, %xmm1
    463 ; AVX-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
    464 ; AVX-NEXT:    vcvttsd2si %xmm0, %rax
    465 ; AVX-NEXT:    vpinsrd $3, %eax, %xmm1, %xmm0
    466 ; AVX-NEXT:    vzeroupper
    467 ; AVX-NEXT:    retq
    468   %cvt = fptoui <4 x double> %a to <4 x i32>
    469   ret <4 x i32> %cvt
    470 }
    471 
    472 ;
    473 ; Float to Signed Integer
    474 ;
    475 
    476 define <4 x i32> @fptosi_4f32_to_4i32(<4 x float> %a) {
    477 ; SSE-LABEL: fptosi_4f32_to_4i32:
    478 ; SSE:       # BB#0:
    479 ; SSE-NEXT:    cvttps2dq %xmm0, %xmm0
    480 ; SSE-NEXT:    retq
    481 ;
    482 ; AVX-LABEL: fptosi_4f32_to_4i32:
    483 ; AVX:       # BB#0:
    484 ; AVX-NEXT:    vcvttps2dq %xmm0, %xmm0
    485 ; AVX-NEXT:    retq
    486   %cvt = fptosi <4 x float> %a to <4 x i32>
    487   ret <4 x i32> %cvt
    488 }
    489 
    490 define <2 x i64> @fptosi_2f32_to_2i64(<4 x float> %a) {
    491 ; SSE-LABEL: fptosi_2f32_to_2i64:
    492 ; SSE:       # BB#0:
    493 ; SSE-NEXT:    cvttss2si %xmm0, %rax
    494 ; SSE-NEXT:    movd %rax, %xmm1
    495 ; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
    496 ; SSE-NEXT:    cvttss2si %xmm0, %rax
    497 ; SSE-NEXT:    movd %rax, %xmm0
    498 ; SSE-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
    499 ; SSE-NEXT:    movdqa %xmm1, %xmm0
    500 ; SSE-NEXT:    retq
    501 ;
    502 ; AVX-LABEL: fptosi_2f32_to_2i64:
    503 ; AVX:       # BB#0:
    504 ; AVX-NEXT:    vcvttss2si %xmm0, %rax
    505 ; AVX-NEXT:    vmovq %rax, %xmm1
    506 ; AVX-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
    507 ; AVX-NEXT:    vcvttss2si %xmm0, %rax
    508 ; AVX-NEXT:    vmovq %rax, %xmm0
    509 ; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
    510 ; AVX-NEXT:    retq
    511   %shuf = shufflevector <4 x float> %a, <4 x float> undef, <2 x i32> <i32 0, i32 1>
    512   %cvt = fptosi <2 x float> %shuf to <2 x i64>
    513   ret <2 x i64> %cvt
    514 }
    515 
    516 define <2 x i64> @fptosi_4f32_to_2i64(<4 x float> %a) {
    517 ; SSE-LABEL: fptosi_4f32_to_2i64:
    518 ; SSE:       # BB#0:
    519 ; SSE-NEXT:    cvttss2si %xmm0, %rax
    520 ; SSE-NEXT:    movd %rax, %xmm1
    521 ; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
    522 ; SSE-NEXT:    cvttss2si %xmm0, %rax
    523 ; SSE-NEXT:    movd %rax, %xmm0
    524 ; SSE-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
    525 ; SSE-NEXT:    movdqa %xmm1, %xmm0
    526 ; SSE-NEXT:    retq
    527 ;
    528 ; AVX-LABEL: fptosi_4f32_to_2i64:
    529 ; AVX:       # BB#0:
    530 ; AVX-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
    531 ; AVX-NEXT:    vcvttss2si %xmm1, %rax
    532 ; AVX-NEXT:    vcvttss2si %xmm0, %rcx
    533 ; AVX-NEXT:    vmovq %rcx, %xmm0
    534 ; AVX-NEXT:    vmovq %rax, %xmm1
    535 ; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
    536 ; AVX-NEXT:    retq
    537   %cvt = fptosi <4 x float> %a to <4 x i64>
    538   %shuf = shufflevector <4 x i64> %cvt, <4 x i64> undef, <2 x i32> <i32 0, i32 1>
    539   ret <2 x i64> %shuf
    540 }
    541 
    542 define <8 x i32> @fptosi_8f32_to_8i32(<8 x float> %a) {
    543 ; SSE-LABEL: fptosi_8f32_to_8i32:
    544 ; SSE:       # BB#0:
    545 ; SSE-NEXT:    cvttps2dq %xmm0, %xmm0
    546 ; SSE-NEXT:    cvttps2dq %xmm1, %xmm1
    547 ; SSE-NEXT:    retq
    548 ;
    549 ; AVX-LABEL: fptosi_8f32_to_8i32:
    550 ; AVX:       # BB#0:
    551 ; AVX-NEXT:    vcvttps2dq %ymm0, %ymm0
    552 ; AVX-NEXT:    retq
    553   %cvt = fptosi <8 x float> %a to <8 x i32>
    554   ret <8 x i32> %cvt
    555 }
    556 
    557 define <4 x i64> @fptosi_4f32_to_4i64(<8 x float> %a) {
    558 ; SSE-LABEL: fptosi_4f32_to_4i64:
    559 ; SSE:       # BB#0:
    560 ; SSE-NEXT:    cvttss2si %xmm0, %rax
    561 ; SSE-NEXT:    movd %rax, %xmm2
    562 ; SSE-NEXT:    movaps %xmm0, %xmm1
    563 ; SSE-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,1,2,3]
    564 ; SSE-NEXT:    cvttss2si %xmm1, %rax
    565 ; SSE-NEXT:    movd %rax, %xmm1
    566 ; SSE-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
    567 ; SSE-NEXT:    movaps %xmm0, %xmm1
    568 ; SSE-NEXT:    shufps {{.*#+}} xmm1 = xmm1[3,1,2,3]
    569 ; SSE-NEXT:    cvttss2si %xmm1, %rax
    570 ; SSE-NEXT:    movd %rax, %xmm3
    571 ; SSE-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1,0]
    572 ; SSE-NEXT:    cvttss2si %xmm0, %rax
    573 ; SSE-NEXT:    movd %rax, %xmm1
    574 ; SSE-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
    575 ; SSE-NEXT:    movdqa %xmm2, %xmm0
    576 ; SSE-NEXT:    retq
    577 ;
    578 ; AVX-LABEL: fptosi_4f32_to_4i64:
    579 ; AVX:       # BB#0:
    580 ; AVX-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[3,1,2,3]
    581 ; AVX-NEXT:    vcvttss2si %xmm1, %rax
    582 ; AVX-NEXT:    vmovq %rax, %xmm1
    583 ; AVX-NEXT:    vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
    584 ; AVX-NEXT:    vcvttss2si %xmm2, %rax
    585 ; AVX-NEXT:    vmovq %rax, %xmm2
    586 ; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
    587 ; AVX-NEXT:    vcvttss2si %xmm0, %rax
    588 ; AVX-NEXT:    vmovq %rax, %xmm2
    589 ; AVX-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
    590 ; AVX-NEXT:    vcvttss2si %xmm0, %rax
    591 ; AVX-NEXT:    vmovq %rax, %xmm0
    592 ; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
    593 ; AVX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
    594 ; AVX-NEXT:    retq
    595   %shuf = shufflevector <8 x float> %a, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    596   %cvt = fptosi <4 x float> %shuf to <4 x i64>
    597   ret <4 x i64> %cvt
    598 }
    599 
    600 define <4 x i64> @fptosi_8f32_to_4i64(<8 x float> %a) {
    601 ; SSE-LABEL: fptosi_8f32_to_4i64:
    602 ; SSE:       # BB#0:
    603 ; SSE-NEXT:    cvttss2si %xmm0, %rax
    604 ; SSE-NEXT:    movd %rax, %xmm2
    605 ; SSE-NEXT:    movaps %xmm0, %xmm1
    606 ; SSE-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,1,2,3]
    607 ; SSE-NEXT:    cvttss2si %xmm1, %rax
    608 ; SSE-NEXT:    movd %rax, %xmm1
    609 ; SSE-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
    610 ; SSE-NEXT:    movaps %xmm0, %xmm1
    611 ; SSE-NEXT:    shufps {{.*#+}} xmm1 = xmm1[3,1,2,3]
    612 ; SSE-NEXT:    cvttss2si %xmm1, %rax
    613 ; SSE-NEXT:    movd %rax, %xmm3
    614 ; SSE-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1,0]
    615 ; SSE-NEXT:    cvttss2si %xmm0, %rax
    616 ; SSE-NEXT:    movd %rax, %xmm1
    617 ; SSE-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
    618 ; SSE-NEXT:    movdqa %xmm2, %xmm0
    619 ; SSE-NEXT:    retq
    620 ;
    621 ; AVX-LABEL: fptosi_8f32_to_4i64:
    622 ; AVX:       # BB#0:
    623 ; AVX-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[3,1,2,3]
    624 ; AVX-NEXT:    vcvttss2si %xmm1, %rax
    625 ; AVX-NEXT:    vmovq %rax, %xmm1
    626 ; AVX-NEXT:    vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
    627 ; AVX-NEXT:    vcvttss2si %xmm2, %rax
    628 ; AVX-NEXT:    vmovq %rax, %xmm2
    629 ; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
    630 ; AVX-NEXT:    vcvttss2si %xmm0, %rax
    631 ; AVX-NEXT:    vmovq %rax, %xmm2
    632 ; AVX-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
    633 ; AVX-NEXT:    vcvttss2si %xmm0, %rax
    634 ; AVX-NEXT:    vmovq %rax, %xmm0
    635 ; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
    636 ; AVX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
    637 ; AVX-NEXT:    retq
    638   %cvt = fptosi <8 x float> %a to <8 x i64>
    639   %shuf = shufflevector <8 x i64> %cvt, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    640   ret <4 x i64> %shuf
    641 }
    642 
    643 ;
    644 ; Float to Unsigned Integer
    645 ;
    646 
    647 define <4 x i32> @fptoui_4f32_to_4i32(<4 x float> %a) {
    648 ; SSE-LABEL: fptoui_4f32_to_4i32:
    649 ; SSE:       # BB#0:
    650 ; SSE-NEXT:    movaps %xmm0, %xmm1
    651 ; SSE-NEXT:    shufps {{.*#+}} xmm1 = xmm1[3,1,2,3]
    652 ; SSE-NEXT:    cvttss2si %xmm1, %rax
    653 ; SSE-NEXT:    movd %eax, %xmm1
    654 ; SSE-NEXT:    movaps %xmm0, %xmm2
    655 ; SSE-NEXT:    shufps {{.*#+}} xmm2 = xmm2[1,1,2,3]
    656 ; SSE-NEXT:    cvttss2si %xmm2, %rax
    657 ; SSE-NEXT:    movd %eax, %xmm2
    658 ; SSE-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
    659 ; SSE-NEXT:    cvttss2si %xmm0, %rax
    660 ; SSE-NEXT:    movd %eax, %xmm1
    661 ; SSE-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1,0]
    662 ; SSE-NEXT:    cvttss2si %xmm0, %rax
    663 ; SSE-NEXT:    movd %eax, %xmm0
    664 ; SSE-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
    665 ; SSE-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
    666 ; SSE-NEXT:    movdqa %xmm1, %xmm0
    667 ; SSE-NEXT:    retq
    668 ;
    669 ; AVX-LABEL: fptoui_4f32_to_4i32:
    670 ; AVX:       # BB#0:
    671 ; AVX-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
    672 ; AVX-NEXT:    vcvttss2si %xmm1, %rax
    673 ; AVX-NEXT:    vcvttss2si %xmm0, %rcx
    674 ; AVX-NEXT:    vmovd %ecx, %xmm1
    675 ; AVX-NEXT:    vpinsrd $1, %eax, %xmm1, %xmm1
    676 ; AVX-NEXT:    vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
    677 ; AVX-NEXT:    vcvttss2si %xmm2, %rax
    678 ; AVX-NEXT:    vpinsrd $2, %eax, %xmm1, %xmm1
    679 ; AVX-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
    680 ; AVX-NEXT:    vcvttss2si %xmm0, %rax
    681 ; AVX-NEXT:    vpinsrd $3, %eax, %xmm1, %xmm0
    682 ; AVX-NEXT:    retq
    683   %cvt = fptoui <4 x float> %a to <4 x i32>
    684   ret <4 x i32> %cvt
    685 }
    686 
    687 define <2 x i64> @fptoui_2f32_to_2i64(<4 x float> %a) {
    688 ; SSE-LABEL: fptoui_2f32_to_2i64:
    689 ; SSE:       # BB#0:
    690 ; SSE-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
    691 ; SSE-NEXT:    movaps %xmm0, %xmm1
    692 ; SSE-NEXT:    subss %xmm2, %xmm1
    693 ; SSE-NEXT:    cvttss2si %xmm1, %rax
    694 ; SSE-NEXT:    movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
    695 ; SSE-NEXT:    xorq %rcx, %rax
    696 ; SSE-NEXT:    cvttss2si %xmm0, %rdx
    697 ; SSE-NEXT:    ucomiss %xmm2, %xmm0
    698 ; SSE-NEXT:    cmovaeq %rax, %rdx
    699 ; SSE-NEXT:    movd %rdx, %xmm1
    700 ; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
    701 ; SSE-NEXT:    movaps %xmm0, %xmm3
    702 ; SSE-NEXT:    subss %xmm2, %xmm3
    703 ; SSE-NEXT:    cvttss2si %xmm3, %rax
    704 ; SSE-NEXT:    xorq %rcx, %rax
    705 ; SSE-NEXT:    cvttss2si %xmm0, %rcx
    706 ; SSE-NEXT:    ucomiss %xmm2, %xmm0
    707 ; SSE-NEXT:    cmovaeq %rax, %rcx
    708 ; SSE-NEXT:    movd %rcx, %xmm0
    709 ; SSE-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
    710 ; SSE-NEXT:    movdqa %xmm1, %xmm0
    711 ; SSE-NEXT:    retq
    712 ;
    713 ; AVX-LABEL: fptoui_2f32_to_2i64:
    714 ; AVX:       # BB#0:
    715 ; AVX-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
    716 ; AVX-NEXT:    vsubss %xmm1, %xmm0, %xmm2
    717 ; AVX-NEXT:    vcvttss2si %xmm2, %rax
    718 ; AVX-NEXT:    movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
    719 ; AVX-NEXT:    xorq %rcx, %rax
    720 ; AVX-NEXT:    vcvttss2si %xmm0, %rdx
    721 ; AVX-NEXT:    vucomiss %xmm1, %xmm0
    722 ; AVX-NEXT:    cmovaeq %rax, %rdx
    723 ; AVX-NEXT:    vmovq %rdx, %xmm2
    724 ; AVX-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
    725 ; AVX-NEXT:    vsubss %xmm1, %xmm0, %xmm3
    726 ; AVX-NEXT:    vcvttss2si %xmm3, %rax
    727 ; AVX-NEXT:    xorq %rcx, %rax
    728 ; AVX-NEXT:    vcvttss2si %xmm0, %rcx
    729 ; AVX-NEXT:    vucomiss %xmm1, %xmm0
    730 ; AVX-NEXT:    cmovaeq %rax, %rcx
    731 ; AVX-NEXT:    vmovq %rcx, %xmm0
    732 ; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
    733 ; AVX-NEXT:    retq
    734   %shuf = shufflevector <4 x float> %a, <4 x float> undef, <2 x i32> <i32 0, i32 1>
    735   %cvt = fptoui <2 x float> %shuf to <2 x i64>
    736   ret <2 x i64> %cvt
    737 }
    738 
    739 define <2 x i64> @fptoui_4f32_to_2i64(<4 x float> %a) {
    740 ; SSE-LABEL: fptoui_4f32_to_2i64:
    741 ; SSE:       # BB#0:
    742 ; SSE-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
    743 ; SSE-NEXT:    movaps %xmm0, %xmm1
    744 ; SSE-NEXT:    subss %xmm2, %xmm1
    745 ; SSE-NEXT:    cvttss2si %xmm1, %rax
    746 ; SSE-NEXT:    movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
    747 ; SSE-NEXT:    xorq %rcx, %rax
    748 ; SSE-NEXT:    cvttss2si %xmm0, %rdx
    749 ; SSE-NEXT:    ucomiss %xmm2, %xmm0
    750 ; SSE-NEXT:    cmovaeq %rax, %rdx
    751 ; SSE-NEXT:    movd %rdx, %xmm1
    752 ; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
    753 ; SSE-NEXT:    movaps %xmm0, %xmm3
    754 ; SSE-NEXT:    subss %xmm2, %xmm3
    755 ; SSE-NEXT:    cvttss2si %xmm3, %rax
    756 ; SSE-NEXT:    xorq %rcx, %rax
    757 ; SSE-NEXT:    cvttss2si %xmm0, %rcx
    758 ; SSE-NEXT:    ucomiss %xmm2, %xmm0
    759 ; SSE-NEXT:    cmovaeq %rax, %rcx
    760 ; SSE-NEXT:    movd %rcx, %xmm0
    761 ; SSE-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
    762 ; SSE-NEXT:    movdqa %xmm1, %xmm0
    763 ; SSE-NEXT:    retq
    764 ;
    765 ; AVX-LABEL: fptoui_4f32_to_2i64:
    766 ; AVX:       # BB#0:
    767 ; AVX-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
    768 ; AVX-NEXT:    vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
    769 ; AVX-NEXT:    vsubss %xmm2, %xmm1, %xmm3
    770 ; AVX-NEXT:    vcvttss2si %xmm3, %rax
    771 ; AVX-NEXT:    movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
    772 ; AVX-NEXT:    xorq %rcx, %rax
    773 ; AVX-NEXT:    vcvttss2si %xmm1, %rdx
    774 ; AVX-NEXT:    vucomiss %xmm2, %xmm1
    775 ; AVX-NEXT:    cmovaeq %rax, %rdx
    776 ; AVX-NEXT:    vsubss %xmm2, %xmm0, %xmm1
    777 ; AVX-NEXT:    vcvttss2si %xmm1, %rax
    778 ; AVX-NEXT:    xorq %rcx, %rax
    779 ; AVX-NEXT:    vcvttss2si %xmm0, %rcx
    780 ; AVX-NEXT:    vucomiss %xmm2, %xmm0
    781 ; AVX-NEXT:    cmovaeq %rax, %rcx
    782 ; AVX-NEXT:    vmovq %rcx, %xmm0
    783 ; AVX-NEXT:    vmovq %rdx, %xmm1
    784 ; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
    785 ; AVX-NEXT:    retq
    786   %cvt = fptoui <4 x float> %a to <4 x i64>
    787   %shuf = shufflevector <4 x i64> %cvt, <4 x i64> undef, <2 x i32> <i32 0, i32 1>
    788   ret <2 x i64> %shuf
    789 }
    790 
    791 define <8 x i32> @fptoui_8f32_to_8i32(<8 x float> %a) {
    792 ; SSE-LABEL: fptoui_8f32_to_8i32:
    793 ; SSE:       # BB#0:
    794 ; SSE-NEXT:    movaps %xmm0, %xmm2
    795 ; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
    796 ; SSE-NEXT:    cvttss2si %xmm0, %rax
    797 ; SSE-NEXT:    movd %eax, %xmm0
    798 ; SSE-NEXT:    movaps %xmm2, %xmm3
    799 ; SSE-NEXT:    shufps {{.*#+}} xmm3 = xmm3[1,1,2,3]
    800 ; SSE-NEXT:    cvttss2si %xmm3, %rax
    801 ; SSE-NEXT:    movd %eax, %xmm3
    802 ; SSE-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
    803 ; SSE-NEXT:    cvttss2si %xmm2, %rax
    804 ; SSE-NEXT:    movd %eax, %xmm0
    805 ; SSE-NEXT:    shufpd {{.*#+}} xmm2 = xmm2[1,0]
    806 ; SSE-NEXT:    cvttss2si %xmm2, %rax
    807 ; SSE-NEXT:    movd %eax, %xmm2
    808 ; SSE-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
    809 ; SSE-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
    810 ; SSE-NEXT:    movaps %xmm1, %xmm2
    811 ; SSE-NEXT:    shufps {{.*#+}} xmm2 = xmm2[3,1,2,3]
    812 ; SSE-NEXT:    cvttss2si %xmm2, %rax
    813 ; SSE-NEXT:    movd %eax, %xmm2
    814 ; SSE-NEXT:    movaps %xmm1, %xmm3
    815 ; SSE-NEXT:    shufps {{.*#+}} xmm3 = xmm3[1,1,2,3]
    816 ; SSE-NEXT:    cvttss2si %xmm3, %rax
    817 ; SSE-NEXT:    movd %eax, %xmm3
    818 ; SSE-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
    819 ; SSE-NEXT:    cvttss2si %xmm1, %rax
    820 ; SSE-NEXT:    movd %eax, %xmm2
    821 ; SSE-NEXT:    shufpd {{.*#+}} xmm1 = xmm1[1,0]
    822 ; SSE-NEXT:    cvttss2si %xmm1, %rax
    823 ; SSE-NEXT:    movd %eax, %xmm1
    824 ; SSE-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
    825 ; SSE-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
    826 ; SSE-NEXT:    movdqa %xmm2, %xmm1
    827 ; SSE-NEXT:    retq
    828 ;
    829 ; AVX-LABEL: fptoui_8f32_to_8i32:
    830 ; AVX:       # BB#0:
    831 ; AVX-NEXT:    vextractf128 $1, %ymm0, %xmm1
    832 ; AVX-NEXT:    vmovshdup {{.*#+}} xmm2 = xmm1[1,1,3,3]
    833 ; AVX-NEXT:    vcvttss2si %xmm2, %rax
    834 ; AVX-NEXT:    vcvttss2si %xmm1, %rcx
    835 ; AVX-NEXT:    vmovd %ecx, %xmm2
    836 ; AVX-NEXT:    vpinsrd $1, %eax, %xmm2, %xmm2
    837 ; AVX-NEXT:    vpermilpd {{.*#+}} xmm3 = xmm1[1,0]
    838 ; AVX-NEXT:    vcvttss2si %xmm3, %rax
    839 ; AVX-NEXT:    vpinsrd $2, %eax, %xmm2, %xmm2
    840 ; AVX-NEXT:    vpermilps {{.*#+}} xmm1 = xmm1[3,1,2,3]
    841 ; AVX-NEXT:    vcvttss2si %xmm1, %rax
    842 ; AVX-NEXT:    vpinsrd $3, %eax, %xmm2, %xmm1
    843 ; AVX-NEXT:    vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
    844 ; AVX-NEXT:    vcvttss2si %xmm2, %rax
    845 ; AVX-NEXT:    vcvttss2si %xmm0, %rcx
    846 ; AVX-NEXT:    vmovd %ecx, %xmm2
    847 ; AVX-NEXT:    vpinsrd $1, %eax, %xmm2, %xmm2
    848 ; AVX-NEXT:    vpermilpd {{.*#+}} xmm3 = xmm0[1,0]
    849 ; AVX-NEXT:    vcvttss2si %xmm3, %rax
    850 ; AVX-NEXT:    vpinsrd $2, %eax, %xmm2, %xmm2
    851 ; AVX-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
    852 ; AVX-NEXT:    vcvttss2si %xmm0, %rax
    853 ; AVX-NEXT:    vpinsrd $3, %eax, %xmm2, %xmm0
    854 ; AVX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
    855 ; AVX-NEXT:    retq
    856   %cvt = fptoui <8 x float> %a to <8 x i32>
    857   ret <8 x i32> %cvt
    858 }
    859 
    860 define <4 x i64> @fptoui_4f32_to_4i64(<8 x float> %a) {
    861 ; SSE-LABEL: fptoui_4f32_to_4i64:
    862 ; SSE:       # BB#0:
    863 ; SSE-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
    864 ; SSE-NEXT:    movaps %xmm0, %xmm2
    865 ; SSE-NEXT:    subss %xmm1, %xmm2
    866 ; SSE-NEXT:    cvttss2si %xmm2, %rcx
    867 ; SSE-NEXT:    movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
    868 ; SSE-NEXT:    xorq %rax, %rcx
    869 ; SSE-NEXT:    cvttss2si %xmm0, %rdx
    870 ; SSE-NEXT:    ucomiss %xmm1, %xmm0
    871 ; SSE-NEXT:    cmovaeq %rcx, %rdx
    872 ; SSE-NEXT:    movd %rdx, %xmm2
    873 ; SSE-NEXT:    movaps %xmm0, %xmm3
    874 ; SSE-NEXT:    shufps {{.*#+}} xmm3 = xmm3[1,1,2,3]
    875 ; SSE-NEXT:    movaps %xmm3, %xmm4
    876 ; SSE-NEXT:    subss %xmm1, %xmm4
    877 ; SSE-NEXT:    cvttss2si %xmm4, %rcx
    878 ; SSE-NEXT:    xorq %rax, %rcx
    879 ; SSE-NEXT:    cvttss2si %xmm3, %rdx
    880 ; SSE-NEXT:    ucomiss %xmm1, %xmm3
    881 ; SSE-NEXT:    cmovaeq %rcx, %rdx
    882 ; SSE-NEXT:    movd %rdx, %xmm3
    883 ; SSE-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
    884 ; SSE-NEXT:    movaps %xmm0, %xmm3
    885 ; SSE-NEXT:    shufps {{.*#+}} xmm3 = xmm3[3,1,2,3]
    886 ; SSE-NEXT:    movaps %xmm3, %xmm4
    887 ; SSE-NEXT:    subss %xmm1, %xmm4
    888 ; SSE-NEXT:    cvttss2si %xmm4, %rcx
    889 ; SSE-NEXT:    xorq %rax, %rcx
    890 ; SSE-NEXT:    cvttss2si %xmm3, %rdx
    891 ; SSE-NEXT:    ucomiss %xmm1, %xmm3
    892 ; SSE-NEXT:    cmovaeq %rcx, %rdx
    893 ; SSE-NEXT:    movd %rdx, %xmm3
    894 ; SSE-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1,0]
    895 ; SSE-NEXT:    movapd %xmm0, %xmm4
    896 ; SSE-NEXT:    subss %xmm1, %xmm4
    897 ; SSE-NEXT:    cvttss2si %xmm4, %rcx
    898 ; SSE-NEXT:    xorq %rax, %rcx
    899 ; SSE-NEXT:    cvttss2si %xmm0, %rax
    900 ; SSE-NEXT:    ucomiss %xmm1, %xmm0
    901 ; SSE-NEXT:    cmovaeq %rcx, %rax
    902 ; SSE-NEXT:    movd %rax, %xmm1
    903 ; SSE-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
    904 ; SSE-NEXT:    movdqa %xmm2, %xmm0
    905 ; SSE-NEXT:    retq
    906 ;
    907 ; AVX-LABEL: fptoui_4f32_to_4i64:
    908 ; AVX:       # BB#0:
    909 ; AVX-NEXT:    vpermilps {{.*#+}} xmm2 = xmm0[3,1,2,3]
    910 ; AVX-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
    911 ; AVX-NEXT:    vsubss %xmm1, %xmm2, %xmm3
    912 ; AVX-NEXT:    vcvttss2si %xmm3, %rax
    913 ; AVX-NEXT:    movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
    914 ; AVX-NEXT:    xorq %rcx, %rax
    915 ; AVX-NEXT:    vcvttss2si %xmm2, %rdx
    916 ; AVX-NEXT:    vucomiss %xmm1, %xmm2
    917 ; AVX-NEXT:    cmovaeq %rax, %rdx
    918 ; AVX-NEXT:    vmovq %rdx, %xmm2
    919 ; AVX-NEXT:    vpermilpd {{.*#+}} xmm3 = xmm0[1,0]
    920 ; AVX-NEXT:    vsubss %xmm1, %xmm3, %xmm4
    921 ; AVX-NEXT:    vcvttss2si %xmm4, %rax
    922 ; AVX-NEXT:    xorq %rcx, %rax
    923 ; AVX-NEXT:    vcvttss2si %xmm3, %rdx
    924 ; AVX-NEXT:    vucomiss %xmm1, %xmm3
    925 ; AVX-NEXT:    cmovaeq %rax, %rdx
    926 ; AVX-NEXT:    vmovq %rdx, %xmm3
    927 ; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
    928 ; AVX-NEXT:    vsubss %xmm1, %xmm0, %xmm3
    929 ; AVX-NEXT:    vcvttss2si %xmm3, %rax
    930 ; AVX-NEXT:    xorq %rcx, %rax
    931 ; AVX-NEXT:    vcvttss2si %xmm0, %rdx
    932 ; AVX-NEXT:    vucomiss %xmm1, %xmm0
    933 ; AVX-NEXT:    cmovaeq %rax, %rdx
    934 ; AVX-NEXT:    vmovq %rdx, %xmm3
    935 ; AVX-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
    936 ; AVX-NEXT:    vsubss %xmm1, %xmm0, %xmm4
    937 ; AVX-NEXT:    vcvttss2si %xmm4, %rax
    938 ; AVX-NEXT:    xorq %rcx, %rax
    939 ; AVX-NEXT:    vcvttss2si %xmm0, %rcx
    940 ; AVX-NEXT:    vucomiss %xmm1, %xmm0
    941 ; AVX-NEXT:    cmovaeq %rax, %rcx
    942 ; AVX-NEXT:    vmovq %rcx, %xmm0
    943 ; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0]
    944 ; AVX-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
    945 ; AVX-NEXT:    retq
    946   %shuf = shufflevector <8 x float> %a, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    947   %cvt = fptoui <4 x float> %shuf to <4 x i64>
    948   ret <4 x i64> %cvt
    949 }
    950 
    951 define <4 x i64> @fptoui_8f32_to_4i64(<8 x float> %a) {
    952 ; SSE-LABEL: fptoui_8f32_to_4i64:
    953 ; SSE:       # BB#0:
    954 ; SSE-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
    955 ; SSE-NEXT:    movaps %xmm0, %xmm2
    956 ; SSE-NEXT:    subss %xmm1, %xmm2
    957 ; SSE-NEXT:    cvttss2si %xmm2, %rcx
    958 ; SSE-NEXT:    movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
    959 ; SSE-NEXT:    xorq %rax, %rcx
    960 ; SSE-NEXT:    cvttss2si %xmm0, %rdx
    961 ; SSE-NEXT:    ucomiss %xmm1, %xmm0
    962 ; SSE-NEXT:    cmovaeq %rcx, %rdx
    963 ; SSE-NEXT:    movd %rdx, %xmm2
    964 ; SSE-NEXT:    movaps %xmm0, %xmm3
    965 ; SSE-NEXT:    shufps {{.*#+}} xmm3 = xmm3[1,1,2,3]
    966 ; SSE-NEXT:    movaps %xmm3, %xmm4
    967 ; SSE-NEXT:    subss %xmm1, %xmm4
    968 ; SSE-NEXT:    cvttss2si %xmm4, %rcx
    969 ; SSE-NEXT:    xorq %rax, %rcx
    970 ; SSE-NEXT:    cvttss2si %xmm3, %rdx
    971 ; SSE-NEXT:    ucomiss %xmm1, %xmm3
    972 ; SSE-NEXT:    cmovaeq %rcx, %rdx
    973 ; SSE-NEXT:    movd %rdx, %xmm3
    974 ; SSE-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
    975 ; SSE-NEXT:    movaps %xmm0, %xmm3
    976 ; SSE-NEXT:    shufps {{.*#+}} xmm3 = xmm3[3,1,2,3]
    977 ; SSE-NEXT:    movaps %xmm3, %xmm4
    978 ; SSE-NEXT:    subss %xmm1, %xmm4
    979 ; SSE-NEXT:    cvttss2si %xmm4, %rcx
    980 ; SSE-NEXT:    xorq %rax, %rcx
    981 ; SSE-NEXT:    cvttss2si %xmm3, %rdx
    982 ; SSE-NEXT:    ucomiss %xmm1, %xmm3
    983 ; SSE-NEXT:    cmovaeq %rcx, %rdx
    984 ; SSE-NEXT:    movd %rdx, %xmm3
    985 ; SSE-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1,0]
    986 ; SSE-NEXT:    movapd %xmm0, %xmm4
    987 ; SSE-NEXT:    subss %xmm1, %xmm4
    988 ; SSE-NEXT:    cvttss2si %xmm4, %rcx
    989 ; SSE-NEXT:    xorq %rax, %rcx
    990 ; SSE-NEXT:    cvttss2si %xmm0, %rax
    991 ; SSE-NEXT:    ucomiss %xmm1, %xmm0
    992 ; SSE-NEXT:    cmovaeq %rcx, %rax
    993 ; SSE-NEXT:    movd %rax, %xmm1
    994 ; SSE-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
    995 ; SSE-NEXT:    movdqa %xmm2, %xmm0
    996 ; SSE-NEXT:    retq
    997 ;
    998 ; AVX-LABEL: fptoui_8f32_to_4i64:
    999 ; AVX:       # BB#0:
   1000 ; AVX-NEXT:    vpermilps {{.*#+}} xmm2 = xmm0[3,1,2,3]
   1001 ; AVX-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
   1002 ; AVX-NEXT:    vsubss %xmm1, %xmm2, %xmm3
   1003 ; AVX-NEXT:    vcvttss2si %xmm3, %rax
   1004 ; AVX-NEXT:    movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
   1005 ; AVX-NEXT:    xorq %rcx, %rax
   1006 ; AVX-NEXT:    vcvttss2si %xmm2, %rdx
   1007 ; AVX-NEXT:    vucomiss %xmm1, %xmm2
   1008 ; AVX-NEXT:    cmovaeq %rax, %rdx
   1009 ; AVX-NEXT:    vmovq %rdx, %xmm2
   1010 ; AVX-NEXT:    vpermilpd {{.*#+}} xmm3 = xmm0[1,0]
   1011 ; AVX-NEXT:    vsubss %xmm1, %xmm3, %xmm4
   1012 ; AVX-NEXT:    vcvttss2si %xmm4, %rax
   1013 ; AVX-NEXT:    xorq %rcx, %rax
   1014 ; AVX-NEXT:    vcvttss2si %xmm3, %rdx
   1015 ; AVX-NEXT:    vucomiss %xmm1, %xmm3
   1016 ; AVX-NEXT:    cmovaeq %rax, %rdx
   1017 ; AVX-NEXT:    vmovq %rdx, %xmm3
   1018 ; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
   1019 ; AVX-NEXT:    vsubss %xmm1, %xmm0, %xmm3
   1020 ; AVX-NEXT:    vcvttss2si %xmm3, %rax
   1021 ; AVX-NEXT:    xorq %rcx, %rax
   1022 ; AVX-NEXT:    vcvttss2si %xmm0, %rdx
   1023 ; AVX-NEXT:    vucomiss %xmm1, %xmm0
   1024 ; AVX-NEXT:    cmovaeq %rax, %rdx
   1025 ; AVX-NEXT:    vmovq %rdx, %xmm3
   1026 ; AVX-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
   1027 ; AVX-NEXT:    vsubss %xmm1, %xmm0, %xmm4
   1028 ; AVX-NEXT:    vcvttss2si %xmm4, %rax
   1029 ; AVX-NEXT:    xorq %rcx, %rax
   1030 ; AVX-NEXT:    vcvttss2si %xmm0, %rcx
   1031 ; AVX-NEXT:    vucomiss %xmm1, %xmm0
   1032 ; AVX-NEXT:    cmovaeq %rax, %rcx
   1033 ; AVX-NEXT:    vmovq %rcx, %xmm0
   1034 ; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0]
   1035 ; AVX-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
   1036 ; AVX-NEXT:    retq
   1037   %cvt = fptoui <8 x float> %a to <8 x i64>
   1038   %shuf = shufflevector <8 x i64> %cvt, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   1039   ret <4 x i64> %shuf
   1040 }
   1041 
   1042 ;
   1043 ; Constant Folding
   1044 ;
   1045 
   1046 define <2 x i64> @fptosi_2f64_to_2i64_const() {
   1047 ; SSE-LABEL: fptosi_2f64_to_2i64_const:
   1048 ; SSE:       # BB#0:
   1049 ; SSE-NEXT:    movaps {{.*#+}} xmm0 = [1,18446744073709551615]
   1050 ; SSE-NEXT:    retq
   1051 ;
   1052 ; AVX-LABEL: fptosi_2f64_to_2i64_const:
   1053 ; AVX:       # BB#0:
   1054 ; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [1,18446744073709551615]
   1055 ; AVX-NEXT:    retq
   1056   %cvt = fptosi <2 x double> <double 1.0, double -1.0> to <2 x i64>
   1057   ret <2 x i64> %cvt
   1058 }
   1059 
   1060 define <4 x i32> @fptosi_2f64_to_2i32_const() {
   1061 ; SSE-LABEL: fptosi_2f64_to_2i32_const:
   1062 ; SSE:       # BB#0:
   1063 ; SSE-NEXT:    movaps {{.*#+}} xmm0 = <4294967295,1,u,u>
   1064 ; SSE-NEXT:    retq
   1065 ;
   1066 ; AVX-LABEL: fptosi_2f64_to_2i32_const:
   1067 ; AVX:       # BB#0:
   1068 ; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = <4294967295,1,u,u>
   1069 ; AVX-NEXT:    retq
   1070   %cvt = fptosi <2 x double> <double -1.0, double 1.0> to <2 x i32>
   1071   %ext = shufflevector <2 x i32> %cvt, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
   1072   ret <4 x i32> %ext
   1073 }
   1074 
   1075 define <4 x i64> @fptosi_4f64_to_4i64_const() {
   1076 ; SSE-LABEL: fptosi_4f64_to_4i64_const:
   1077 ; SSE:       # BB#0:
   1078 ; SSE-NEXT:    movaps {{.*#+}} xmm0 = [1,18446744073709551615]
   1079 ; SSE-NEXT:    movaps {{.*#+}} xmm1 = [2,18446744073709551613]
   1080 ; SSE-NEXT:    retq
   1081 ;
   1082 ; AVX-LABEL: fptosi_4f64_to_4i64_const:
   1083 ; AVX:       # BB#0:
   1084 ; AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [1,18446744073709551615,2,18446744073709551613]
   1085 ; AVX-NEXT:    retq
   1086   %cvt = fptosi <4 x double> <double 1.0, double -1.0, double 2.0, double -3.0> to <4 x i64>
   1087   ret <4 x i64> %cvt
   1088 }
   1089 
   1090 define <4 x i32> @fptosi_4f64_to_4i32_const() {
   1091 ; SSE-LABEL: fptosi_4f64_to_4i32_const:
   1092 ; SSE:       # BB#0:
   1093 ; SSE-NEXT:    movaps {{.*#+}} xmm0 = [4294967295,1,4294967294,3]
   1094 ; SSE-NEXT:    retq
   1095 ;
   1096 ; AVX-LABEL: fptosi_4f64_to_4i32_const:
   1097 ; AVX:       # BB#0:
   1098 ; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [4294967295,1,4294967294,3]
   1099 ; AVX-NEXT:    retq
   1100   %cvt = fptosi <4 x double> <double -1.0, double 1.0, double -2.0, double 3.0> to <4 x i32>
   1101   ret <4 x i32> %cvt
   1102 }
   1103 
   1104 define <2 x i64> @fptoui_2f64_to_2i64_const() {
   1105 ; SSE-LABEL: fptoui_2f64_to_2i64_const:
   1106 ; SSE:       # BB#0:
   1107 ; SSE-NEXT:    movaps {{.*#+}} xmm0 = [2,4]
   1108 ; SSE-NEXT:    retq
   1109 ;
   1110 ; AVX-LABEL: fptoui_2f64_to_2i64_const:
   1111 ; AVX:       # BB#0:
   1112 ; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [2,4]
   1113 ; AVX-NEXT:    retq
   1114   %cvt = fptoui <2 x double> <double 2.0, double 4.0> to <2 x i64>
   1115   ret <2 x i64> %cvt
   1116 }
   1117 
   1118 define <4 x i32> @fptoui_2f64_to_2i32_const(<2 x double> %a) {
   1119 ; SSE-LABEL: fptoui_2f64_to_2i32_const:
   1120 ; SSE:       # BB#0:
   1121 ; SSE-NEXT:    movaps {{.*#+}} xmm0 = <2,4,u,u>
   1122 ; SSE-NEXT:    retq
   1123 ;
   1124 ; AVX-LABEL: fptoui_2f64_to_2i32_const:
   1125 ; AVX:       # BB#0:
   1126 ; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = <2,4,u,u>
   1127 ; AVX-NEXT:    retq
   1128   %cvt = fptoui <2 x double> <double 2.0, double 4.0> to <2 x i32>
   1129   %ext = shufflevector <2 x i32> %cvt, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
   1130   ret <4 x i32> %ext
   1131 }
   1132 
   1133 define <4 x i64> @fptoui_4f64_to_4i64_const(<4 x double> %a) {
   1134 ; SSE-LABEL: fptoui_4f64_to_4i64_const:
   1135 ; SSE:       # BB#0:
   1136 ; SSE-NEXT:    movaps {{.*#+}} xmm0 = [2,4]
   1137 ; SSE-NEXT:    movaps {{.*#+}} xmm1 = [6,8]
   1138 ; SSE-NEXT:    retq
   1139 ;
   1140 ; AVX-LABEL: fptoui_4f64_to_4i64_const:
   1141 ; AVX:       # BB#0:
   1142 ; AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [2,4,6,8]
   1143 ; AVX-NEXT:    retq
   1144   %cvt = fptoui <4 x double> <double 2.0, double 4.0, double 6.0, double 8.0> to <4 x i64>
   1145   ret <4 x i64> %cvt
   1146 }
   1147 
   1148 define <4 x i32> @fptoui_4f64_to_4i32_const(<4 x double> %a) {
   1149 ; SSE-LABEL: fptoui_4f64_to_4i32_const:
   1150 ; SSE:       # BB#0:
   1151 ; SSE-NEXT:    movaps {{.*#+}} xmm0 = [2,4,6,8]
   1152 ; SSE-NEXT:    retq
   1153 ;
   1154 ; AVX-LABEL: fptoui_4f64_to_4i32_const:
   1155 ; AVX:       # BB#0:
   1156 ; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [2,4,6,8]
   1157 ; AVX-NEXT:    retq
   1158   %cvt = fptoui <4 x double> <double 2.0, double 4.0, double 6.0, double 8.0> to <4 x i32>
   1159   ret <4 x i32> %cvt
   1160 }
   1161 
   1162 define <4 x i32> @fptosi_4f32_to_4i32_const() {
   1163 ; SSE-LABEL: fptosi_4f32_to_4i32_const:
   1164 ; SSE:       # BB#0:
   1165 ; SSE-NEXT:    movaps {{.*#+}} xmm0 = [1,4294967295,2,3]
   1166 ; SSE-NEXT:    retq
   1167 ;
   1168 ; AVX-LABEL: fptosi_4f32_to_4i32_const:
   1169 ; AVX:       # BB#0:
   1170 ; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [1,4294967295,2,3]
   1171 ; AVX-NEXT:    retq
   1172   %cvt = fptosi <4 x float> <float 1.0, float -1.0, float 2.0, float 3.0> to <4 x i32>
   1173   ret <4 x i32> %cvt
   1174 }
   1175 
   1176 define <4 x i64> @fptosi_4f32_to_4i64_const() {
   1177 ; SSE-LABEL: fptosi_4f32_to_4i64_const:
   1178 ; SSE:       # BB#0:
   1179 ; SSE-NEXT:    movaps {{.*#+}} xmm0 = [1,18446744073709551615]
   1180 ; SSE-NEXT:    movaps {{.*#+}} xmm1 = [2,3]
   1181 ; SSE-NEXT:    retq
   1182 ;
   1183 ; AVX-LABEL: fptosi_4f32_to_4i64_const:
   1184 ; AVX:       # BB#0:
   1185 ; AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [1,18446744073709551615,2,3]
   1186 ; AVX-NEXT:    retq
   1187   %cvt = fptosi <4 x float> <float 1.0, float -1.0, float 2.0, float 3.0> to <4 x i64>
   1188   ret <4 x i64> %cvt
   1189 }
   1190 
   1191 define <8 x i32> @fptosi_8f32_to_8i32_const(<8 x float> %a) {
   1192 ; SSE-LABEL: fptosi_8f32_to_8i32_const:
   1193 ; SSE:       # BB#0:
   1194 ; SSE-NEXT:    movaps {{.*#+}} xmm0 = [1,4294967295,2,3]
   1195 ; SSE-NEXT:    movaps {{.*#+}} xmm1 = [6,4294967288,2,4294967295]
   1196 ; SSE-NEXT:    retq
   1197 ;
   1198 ; AVX-LABEL: fptosi_8f32_to_8i32_const:
   1199 ; AVX:       # BB#0:
   1200 ; AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [1,4294967295,2,3,6,4294967288,2,4294967295]
   1201 ; AVX-NEXT:    retq
   1202   %cvt = fptosi <8 x float> <float 1.0, float -1.0, float 2.0, float 3.0, float 6.0, float -8.0, float 2.0, float -1.0> to <8 x i32>
   1203   ret <8 x i32> %cvt
   1204 }
   1205 
   1206 define <4 x i32> @fptoui_4f32_to_4i32_const(<4 x float> %a) {
   1207 ; SSE-LABEL: fptoui_4f32_to_4i32_const:
   1208 ; SSE:       # BB#0:
   1209 ; SSE-NEXT:    movaps {{.*#+}} xmm0 = [1,2,4,6]
   1210 ; SSE-NEXT:    retq
   1211 ;
   1212 ; AVX-LABEL: fptoui_4f32_to_4i32_const:
   1213 ; AVX:       # BB#0:
   1214 ; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [1,2,4,6]
   1215 ; AVX-NEXT:    retq
   1216   %cvt = fptoui <4 x float> <float 1.0, float 2.0, float 4.0, float 6.0> to <4 x i32>
   1217   ret <4 x i32> %cvt
   1218 }
   1219 
   1220 define <4 x i64> @fptoui_4f32_to_4i64_const() {
   1221 ; SSE-LABEL: fptoui_4f32_to_4i64_const:
   1222 ; SSE:       # BB#0:
   1223 ; SSE-NEXT:    movaps {{.*#+}} xmm0 = [1,2]
   1224 ; SSE-NEXT:    movaps {{.*#+}} xmm1 = [4,8]
   1225 ; SSE-NEXT:    retq
   1226 ;
   1227 ; AVX-LABEL: fptoui_4f32_to_4i64_const:
   1228 ; AVX:       # BB#0:
   1229 ; AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [1,2,4,8]
   1230 ; AVX-NEXT:    retq
   1231   %cvt = fptoui <4 x float> <float 1.0, float 2.0, float 4.0, float 8.0> to <4 x i64>
   1232   ret <4 x i64> %cvt
   1233 }
   1234 
   1235 define <8 x i32> @fptoui_8f32_to_8i32_const(<8 x float> %a) {
   1236 ; SSE-LABEL: fptoui_8f32_to_8i32_const:
   1237 ; SSE:       # BB#0:
   1238 ; SSE-NEXT:    movaps {{.*#+}} xmm0 = [1,2,4,6]
   1239 ; SSE-NEXT:    movaps {{.*#+}} xmm1 = [8,6,4,1]
   1240 ; SSE-NEXT:    retq
   1241 ;
   1242 ; AVX-LABEL: fptoui_8f32_to_8i32_const:
   1243 ; AVX:       # BB#0:
   1244 ; AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [1,2,4,6,8,6,4,1]
   1245 ; AVX-NEXT:    retq
   1246   %cvt = fptoui <8 x float> <float 1.0, float 2.0, float 4.0, float 6.0, float 8.0, float 6.0, float 4.0, float 1.0> to <8 x i32>
   1247   ret <8 x i32> %cvt
   1248 }
   1249