Home | History | Annotate | Download | only in X86
      1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
      2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
      3 ;
      4 ; 32-bit tests to make sure we're not doing anything stupid.
      5 ; RUN: llc < %s -mtriple=i686-unknown-unknown
      6 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse
      7 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2
      8 
      9 ;
     10 ; Double to Signed Integer
     11 ;
     12 
     13 define <2 x i64> @fptosi_2f64_to_2i64(<2 x double> %a) {
     14 ; SSE-LABEL: fptosi_2f64_to_2i64:
     15 ; SSE:       # BB#0:
     16 ; SSE-NEXT:    cvttsd2si %xmm0, %rax
     17 ; SSE-NEXT:    movd %rax, %xmm1
     18 ; SSE-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1,0]
     19 ; SSE-NEXT:    cvttsd2si %xmm0, %rax
     20 ; SSE-NEXT:    movd %rax, %xmm0
     21 ; SSE-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
     22 ; SSE-NEXT:    movdqa %xmm1, %xmm0
     23 ; SSE-NEXT:    retq
     24 ;
     25 ; AVX-LABEL: fptosi_2f64_to_2i64:
     26 ; AVX:       # BB#0:
     27 ; AVX-NEXT:    vcvttsd2si %xmm0, %rax
     28 ; AVX-NEXT:    vmovq %rax, %xmm1
     29 ; AVX-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
     30 ; AVX-NEXT:    vcvttsd2si %xmm0, %rax
     31 ; AVX-NEXT:    vmovq %rax, %xmm0
     32 ; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
     33 ; AVX-NEXT:    retq
     34   %cvt = fptosi <2 x double> %a to <2 x i64>
     35   ret <2 x i64> %cvt
     36 }
     37 
     38 define <4 x i32> @fptosi_2f64_to_2i32(<2 x double> %a) {
     39 ; SSE-LABEL: fptosi_2f64_to_2i32:
     40 ; SSE:       # BB#0:
     41 ; SSE-NEXT:    cvttsd2si %xmm0, %rax
     42 ; SSE-NEXT:    movd %rax, %xmm1
     43 ; SSE-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1,0]
     44 ; SSE-NEXT:    cvttsd2si %xmm0, %rax
     45 ; SSE-NEXT:    movd %rax, %xmm0
     46 ; SSE-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
     47 ; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3]
     48 ; SSE-NEXT:    retq
     49 ;
     50 ; AVX-LABEL: fptosi_2f64_to_2i32:
     51 ; AVX:       # BB#0:
     52 ; AVX-NEXT:    vcvttsd2si %xmm0, %rax
     53 ; AVX-NEXT:    vmovq %rax, %xmm1
     54 ; AVX-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
     55 ; AVX-NEXT:    vcvttsd2si %xmm0, %rax
     56 ; AVX-NEXT:    vmovq %rax, %xmm0
     57 ; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
     58 ; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
     59 ; AVX-NEXT:    retq
     60   %cvt = fptosi <2 x double> %a to <2 x i32>
     61   %ext = shufflevector <2 x i32> %cvt, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
     62   ret <4 x i32> %ext
     63 }
     64 
     65 define <4 x i32> @fptosi_4f64_to_2i32(<2 x double> %a) {
     66 ; SSE-LABEL: fptosi_4f64_to_2i32:
     67 ; SSE:       # BB#0:
     68 ; SSE-NEXT:    cvttsd2si %xmm0, %rax
     69 ; SSE-NEXT:    movd %rax, %xmm1
     70 ; SSE-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1,0]
     71 ; SSE-NEXT:    cvttsd2si %xmm0, %rax
     72 ; SSE-NEXT:    movd %rax, %xmm0
     73 ; SSE-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
     74 ; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3]
     75 ; SSE-NEXT:    cvttsd2si %xmm0, %rax
     76 ; SSE-NEXT:    movd %rax, %xmm1
     77 ; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,1,0,1]
     78 ; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
     79 ; SSE-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
     80 ; SSE-NEXT:    retq
     81 ;
     82 ; AVX-LABEL: fptosi_4f64_to_2i32:
     83 ; AVX:       # BB#0:
     84 ; AVX-NEXT:    vcvttpd2dqy %ymm0, %xmm0
     85 ; AVX-NEXT:    vzeroupper
     86 ; AVX-NEXT:    retq
     87   %ext = shufflevector <2 x double> %a, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
     88   %cvt = fptosi <4 x double> %ext to <4 x i32>
     89   ret <4 x i32> %cvt
     90 }
     91 
     92 define <4 x i64> @fptosi_4f64_to_4i64(<4 x double> %a) {
     93 ; SSE-LABEL: fptosi_4f64_to_4i64:
     94 ; SSE:       # BB#0:
     95 ; SSE-NEXT:    cvttsd2si %xmm0, %rax
     96 ; SSE-NEXT:    movd %rax, %xmm2
     97 ; SSE-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1,0]
     98 ; SSE-NEXT:    cvttsd2si %xmm0, %rax
     99 ; SSE-NEXT:    movd %rax, %xmm0
    100 ; SSE-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm0[0]
    101 ; SSE-NEXT:    cvttsd2si %xmm1, %rax
    102 ; SSE-NEXT:    movd %rax, %xmm3
    103 ; SSE-NEXT:    shufpd {{.*#+}} xmm1 = xmm1[1,0]
    104 ; SSE-NEXT:    cvttsd2si %xmm1, %rax
    105 ; SSE-NEXT:    movd %rax, %xmm0
    106 ; SSE-NEXT:    punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm0[0]
    107 ; SSE-NEXT:    movdqa %xmm2, %xmm0
    108 ; SSE-NEXT:    movdqa %xmm3, %xmm1
    109 ; SSE-NEXT:    retq
    110 ;
    111 ; AVX-LABEL: fptosi_4f64_to_4i64:
    112 ; AVX:       # BB#0:
    113 ; AVX-NEXT:    vextractf128 $1, %ymm0, %xmm1
    114 ; AVX-NEXT:    vcvttsd2si %xmm1, %rax
    115 ; AVX-NEXT:    vmovq %rax, %xmm2
    116 ; AVX-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
    117 ; AVX-NEXT:    vcvttsd2si %xmm1, %rax
    118 ; AVX-NEXT:    vmovq %rax, %xmm1
    119 ; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
    120 ; AVX-NEXT:    vcvttsd2si %xmm0, %rax
    121 ; AVX-NEXT:    vmovq %rax, %xmm2
    122 ; AVX-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
    123 ; AVX-NEXT:    vcvttsd2si %xmm0, %rax
    124 ; AVX-NEXT:    vmovq %rax, %xmm0
    125 ; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
    126 ; AVX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
    127 ; AVX-NEXT:    retq
    128   %cvt = fptosi <4 x double> %a to <4 x i64>
    129   ret <4 x i64> %cvt
    130 }
    131 
    132 define <4 x i32> @fptosi_4f64_to_4i32(<4 x double> %a) {
    133 ; SSE-LABEL: fptosi_4f64_to_4i32:
    134 ; SSE:       # BB#0:
    135 ; SSE-NEXT:    cvttsd2si %xmm1, %rax
    136 ; SSE-NEXT:    movd %rax, %xmm2
    137 ; SSE-NEXT:    shufpd {{.*#+}} xmm1 = xmm1[1,0]
    138 ; SSE-NEXT:    cvttsd2si %xmm1, %rax
    139 ; SSE-NEXT:    movd %rax, %xmm1
    140 ; SSE-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
    141 ; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
    142 ; SSE-NEXT:    cvttsd2si %xmm0, %rax
    143 ; SSE-NEXT:    movd %rax, %xmm2
    144 ; SSE-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1,0]
    145 ; SSE-NEXT:    cvttsd2si %xmm0, %rax
    146 ; SSE-NEXT:    movd %rax, %xmm0
    147 ; SSE-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm0[0]
    148 ; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3]
    149 ; SSE-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
    150 ; SSE-NEXT:    retq
    151 ;
    152 ; AVX-LABEL: fptosi_4f64_to_4i32:
    153 ; AVX:       # BB#0:
    154 ; AVX-NEXT:    vcvttpd2dqy %ymm0, %xmm0
    155 ; AVX-NEXT:    vzeroupper
    156 ; AVX-NEXT:    retq
    157   %cvt = fptosi <4 x double> %a to <4 x i32>
    158   ret <4 x i32> %cvt
    159 }
    160 
    161 ;
    162 ; Double to Unsigned Integer
    163 ;
    164 
    165 define <2 x i64> @fptoui_2f64_to_2i64(<2 x double> %a) {
    166 ; SSE-LABEL: fptoui_2f64_to_2i64:
    167 ; SSE:       # BB#0:
    168 ; SSE-NEXT:    movsd {{.*#+}} xmm2 = mem[0],zero
    169 ; SSE-NEXT:    movapd %xmm0, %xmm1
    170 ; SSE-NEXT:    subsd %xmm2, %xmm1
    171 ; SSE-NEXT:    cvttsd2si %xmm1, %rax
    172 ; SSE-NEXT:    movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
    173 ; SSE-NEXT:    xorq %rcx, %rax
    174 ; SSE-NEXT:    cvttsd2si %xmm0, %rdx
    175 ; SSE-NEXT:    ucomisd %xmm2, %xmm0
    176 ; SSE-NEXT:    cmovaeq %rax, %rdx
    177 ; SSE-NEXT:    movd %rdx, %xmm1
    178 ; SSE-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1,0]
    179 ; SSE-NEXT:    movapd %xmm0, %xmm3
    180 ; SSE-NEXT:    subsd %xmm2, %xmm3
    181 ; SSE-NEXT:    cvttsd2si %xmm3, %rax
    182 ; SSE-NEXT:    xorq %rcx, %rax
    183 ; SSE-NEXT:    cvttsd2si %xmm0, %rcx
    184 ; SSE-NEXT:    ucomisd %xmm2, %xmm0
    185 ; SSE-NEXT:    cmovaeq %rax, %rcx
    186 ; SSE-NEXT:    movd %rcx, %xmm0
    187 ; SSE-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
    188 ; SSE-NEXT:    movdqa %xmm1, %xmm0
    189 ; SSE-NEXT:    retq
    190 ;
    191 ; AVX-LABEL: fptoui_2f64_to_2i64:
    192 ; AVX:       # BB#0:
    193 ; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
    194 ; AVX-NEXT:    vsubsd %xmm1, %xmm0, %xmm2
    195 ; AVX-NEXT:    vcvttsd2si %xmm2, %rax
    196 ; AVX-NEXT:    movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
    197 ; AVX-NEXT:    xorq %rcx, %rax
    198 ; AVX-NEXT:    vcvttsd2si %xmm0, %rdx
    199 ; AVX-NEXT:    vucomisd %xmm1, %xmm0
    200 ; AVX-NEXT:    cmovaeq %rax, %rdx
    201 ; AVX-NEXT:    vmovq %rdx, %xmm2
    202 ; AVX-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
    203 ; AVX-NEXT:    vsubsd %xmm1, %xmm0, %xmm3
    204 ; AVX-NEXT:    vcvttsd2si %xmm3, %rax
    205 ; AVX-NEXT:    xorq %rcx, %rax
    206 ; AVX-NEXT:    vcvttsd2si %xmm0, %rcx
    207 ; AVX-NEXT:    vucomisd %xmm1, %xmm0
    208 ; AVX-NEXT:    cmovaeq %rax, %rcx
    209 ; AVX-NEXT:    vmovq %rcx, %xmm0
    210 ; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
    211 ; AVX-NEXT:    retq
    212   %cvt = fptoui <2 x double> %a to <2 x i64>
    213   ret <2 x i64> %cvt
    214 }
    215 
    216 define <4 x i32> @fptoui_2f64_to_2i32(<2 x double> %a) {
    217 ; SSE-LABEL: fptoui_2f64_to_2i32:
    218 ; SSE:       # BB#0:
    219 ; SSE-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
    220 ; SSE-NEXT:    movapd %xmm0, %xmm2
    221 ; SSE-NEXT:    subsd %xmm1, %xmm2
    222 ; SSE-NEXT:    cvttsd2si %xmm2, %rax
    223 ; SSE-NEXT:    movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
    224 ; SSE-NEXT:    xorq %rcx, %rax
    225 ; SSE-NEXT:    cvttsd2si %xmm0, %rdx
    226 ; SSE-NEXT:    ucomisd %xmm1, %xmm0
    227 ; SSE-NEXT:    cmovaeq %rax, %rdx
    228 ; SSE-NEXT:    movd %rdx, %xmm2
    229 ; SSE-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1,0]
    230 ; SSE-NEXT:    movapd %xmm0, %xmm3
    231 ; SSE-NEXT:    subsd %xmm1, %xmm3
    232 ; SSE-NEXT:    cvttsd2si %xmm3, %rax
    233 ; SSE-NEXT:    xorq %rcx, %rax
    234 ; SSE-NEXT:    cvttsd2si %xmm0, %rcx
    235 ; SSE-NEXT:    ucomisd %xmm1, %xmm0
    236 ; SSE-NEXT:    cmovaeq %rax, %rcx
    237 ; SSE-NEXT:    movd %rcx, %xmm0
    238 ; SSE-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm0[0]
    239 ; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3]
    240 ; SSE-NEXT:    retq
    241 ;
    242 ; AVX-LABEL: fptoui_2f64_to_2i32:
    243 ; AVX:       # BB#0:
    244 ; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
    245 ; AVX-NEXT:    vsubsd %xmm1, %xmm0, %xmm2
    246 ; AVX-NEXT:    vcvttsd2si %xmm2, %rax
    247 ; AVX-NEXT:    movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
    248 ; AVX-NEXT:    xorq %rcx, %rax
    249 ; AVX-NEXT:    vcvttsd2si %xmm0, %rdx
    250 ; AVX-NEXT:    vucomisd %xmm1, %xmm0
    251 ; AVX-NEXT:    cmovaeq %rax, %rdx
    252 ; AVX-NEXT:    vmovq %rdx, %xmm2
    253 ; AVX-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
    254 ; AVX-NEXT:    vsubsd %xmm1, %xmm0, %xmm3
    255 ; AVX-NEXT:    vcvttsd2si %xmm3, %rax
    256 ; AVX-NEXT:    xorq %rcx, %rax
    257 ; AVX-NEXT:    vcvttsd2si %xmm0, %rcx
    258 ; AVX-NEXT:    vucomisd %xmm1, %xmm0
    259 ; AVX-NEXT:    cmovaeq %rax, %rcx
    260 ; AVX-NEXT:    vmovq %rcx, %xmm0
    261 ; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
    262 ; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
    263 ; AVX-NEXT:    retq
    264   %cvt = fptoui <2 x double> %a to <2 x i32>
    265   %ext = shufflevector <2 x i32> %cvt, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
    266   ret <4 x i32> %ext
    267 }
    268 
    269 define <4 x i32> @fptoui_4f64_to_2i32(<2 x double> %a) {
    270 ; SSE-LABEL: fptoui_4f64_to_2i32:
    271 ; SSE:       # BB#0:
    272 ; SSE-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
    273 ; SSE-NEXT:    movapd %xmm0, %xmm2
    274 ; SSE-NEXT:    subsd %xmm1, %xmm2
    275 ; SSE-NEXT:    cvttsd2si %xmm2, %rax
    276 ; SSE-NEXT:    movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
    277 ; SSE-NEXT:    xorq %rcx, %rax
    278 ; SSE-NEXT:    cvttsd2si %xmm0, %rdx
    279 ; SSE-NEXT:    ucomisd %xmm1, %xmm0
    280 ; SSE-NEXT:    cmovaeq %rax, %rdx
    281 ; SSE-NEXT:    movd %rdx, %xmm2
    282 ; SSE-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1,0]
    283 ; SSE-NEXT:    movapd %xmm0, %xmm3
    284 ; SSE-NEXT:    subsd %xmm1, %xmm3
    285 ; SSE-NEXT:    cvttsd2si %xmm3, %rax
    286 ; SSE-NEXT:    xorq %rcx, %rax
    287 ; SSE-NEXT:    cvttsd2si %xmm0, %rdx
    288 ; SSE-NEXT:    ucomisd %xmm1, %xmm0
    289 ; SSE-NEXT:    cmovaeq %rax, %rdx
    290 ; SSE-NEXT:    movd %rdx, %xmm0
    291 ; SSE-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm0[0]
    292 ; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3]
    293 ; SSE-NEXT:    cvttsd2si %xmm0, %rax
    294 ; SSE-NEXT:    xorq %rax, %rcx
    295 ; SSE-NEXT:    ucomisd %xmm1, %xmm0
    296 ; SSE-NEXT:    cmovbq %rax, %rcx
    297 ; SSE-NEXT:    movd %rcx, %xmm1
    298 ; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,1,0,1]
    299 ; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
    300 ; SSE-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
    301 ; SSE-NEXT:    retq
    302 ;
    303 ; AVX-LABEL: fptoui_4f64_to_2i32:
    304 ; AVX:       # BB#0:
    305 ; AVX-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
    306 ; AVX-NEXT:    vcvttsd2si %xmm1, %rax
    307 ; AVX-NEXT:    vcvttsd2si %xmm0, %rcx
    308 ; AVX-NEXT:    vmovd %ecx, %xmm0
    309 ; AVX-NEXT:    vpinsrd $1, %eax, %xmm0, %xmm0
    310 ; AVX-NEXT:    vcvttsd2si %xmm0, %rax
    311 ; AVX-NEXT:    vpinsrd $2, %eax, %xmm0, %xmm0
    312 ; AVX-NEXT:    vpinsrd $3, %eax, %xmm0, %xmm0
    313 ; AVX-NEXT:    retq
    314   %ext = shufflevector <2 x double> %a, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
    315   %cvt = fptoui <4 x double> %ext to <4 x i32>
    316   ret <4 x i32> %cvt
    317 }
    318 
    319 define <4 x i64> @fptoui_4f64_to_4i64(<4 x double> %a) {
    320 ; SSE-LABEL: fptoui_4f64_to_4i64:
    321 ; SSE:       # BB#0:
    322 ; SSE-NEXT:    movapd %xmm0, %xmm2
    323 ; SSE-NEXT:    movsd {{.*#+}} xmm3 = mem[0],zero
    324 ; SSE-NEXT:    subsd %xmm3, %xmm0
    325 ; SSE-NEXT:    cvttsd2si %xmm0, %rcx
    326 ; SSE-NEXT:    movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
    327 ; SSE-NEXT:    xorq %rax, %rcx
    328 ; SSE-NEXT:    cvttsd2si %xmm2, %rdx
    329 ; SSE-NEXT:    ucomisd %xmm3, %xmm2
    330 ; SSE-NEXT:    cmovaeq %rcx, %rdx
    331 ; SSE-NEXT:    movd %rdx, %xmm0
    332 ; SSE-NEXT:    shufpd {{.*#+}} xmm2 = xmm2[1,0]
    333 ; SSE-NEXT:    movapd %xmm2, %xmm4
    334 ; SSE-NEXT:    subsd %xmm3, %xmm4
    335 ; SSE-NEXT:    cvttsd2si %xmm4, %rcx
    336 ; SSE-NEXT:    xorq %rax, %rcx
    337 ; SSE-NEXT:    cvttsd2si %xmm2, %rdx
    338 ; SSE-NEXT:    ucomisd %xmm3, %xmm2
    339 ; SSE-NEXT:    cmovaeq %rcx, %rdx
    340 ; SSE-NEXT:    movd %rdx, %xmm2
    341 ; SSE-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
    342 ; SSE-NEXT:    movapd %xmm1, %xmm2
    343 ; SSE-NEXT:    subsd %xmm3, %xmm2
    344 ; SSE-NEXT:    cvttsd2si %xmm2, %rcx
    345 ; SSE-NEXT:    xorq %rax, %rcx
    346 ; SSE-NEXT:    cvttsd2si %xmm1, %rdx
    347 ; SSE-NEXT:    ucomisd %xmm3, %xmm1
    348 ; SSE-NEXT:    cmovaeq %rcx, %rdx
    349 ; SSE-NEXT:    movd %rdx, %xmm2
    350 ; SSE-NEXT:    shufpd {{.*#+}} xmm1 = xmm1[1,0]
    351 ; SSE-NEXT:    movapd %xmm1, %xmm4
    352 ; SSE-NEXT:    subsd %xmm3, %xmm4
    353 ; SSE-NEXT:    cvttsd2si %xmm4, %rcx
    354 ; SSE-NEXT:    xorq %rax, %rcx
    355 ; SSE-NEXT:    cvttsd2si %xmm1, %rax
    356 ; SSE-NEXT:    ucomisd %xmm3, %xmm1
    357 ; SSE-NEXT:    cmovaeq %rcx, %rax
    358 ; SSE-NEXT:    movd %rax, %xmm1
    359 ; SSE-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
    360 ; SSE-NEXT:    movdqa %xmm2, %xmm1
    361 ; SSE-NEXT:    retq
    362 ;
    363 ; AVX-LABEL: fptoui_4f64_to_4i64:
    364 ; AVX:       # BB#0:
    365 ; AVX-NEXT:    vextractf128 $1, %ymm0, %xmm2
    366 ; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
    367 ; AVX-NEXT:    vsubsd %xmm1, %xmm2, %xmm3
    368 ; AVX-NEXT:    vcvttsd2si %xmm3, %rax
    369 ; AVX-NEXT:    movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
    370 ; AVX-NEXT:    xorq %rcx, %rax
    371 ; AVX-NEXT:    vcvttsd2si %xmm2, %rdx
    372 ; AVX-NEXT:    vucomisd %xmm1, %xmm2
    373 ; AVX-NEXT:    cmovaeq %rax, %rdx
    374 ; AVX-NEXT:    vmovq %rdx, %xmm3
    375 ; AVX-NEXT:    vpermilpd {{.*#+}} xmm2 = xmm2[1,0]
    376 ; AVX-NEXT:    vsubsd %xmm1, %xmm2, %xmm4
    377 ; AVX-NEXT:    vcvttsd2si %xmm4, %rax
    378 ; AVX-NEXT:    xorq %rcx, %rax
    379 ; AVX-NEXT:    vcvttsd2si %xmm2, %rdx
    380 ; AVX-NEXT:    vucomisd %xmm1, %xmm2
    381 ; AVX-NEXT:    cmovaeq %rax, %rdx
    382 ; AVX-NEXT:    vmovq %rdx, %xmm2
    383 ; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
    384 ; AVX-NEXT:    vsubsd %xmm1, %xmm0, %xmm3
    385 ; AVX-NEXT:    vcvttsd2si %xmm3, %rax
    386 ; AVX-NEXT:    xorq %rcx, %rax
    387 ; AVX-NEXT:    vcvttsd2si %xmm0, %rdx
    388 ; AVX-NEXT:    vucomisd %xmm1, %xmm0
    389 ; AVX-NEXT:    cmovaeq %rax, %rdx
    390 ; AVX-NEXT:    vmovq %rdx, %xmm3
    391 ; AVX-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
    392 ; AVX-NEXT:    vsubsd %xmm1, %xmm0, %xmm4
    393 ; AVX-NEXT:    vcvttsd2si %xmm4, %rax
    394 ; AVX-NEXT:    xorq %rcx, %rax
    395 ; AVX-NEXT:    vcvttsd2si %xmm0, %rcx
    396 ; AVX-NEXT:    vucomisd %xmm1, %xmm0
    397 ; AVX-NEXT:    cmovaeq %rax, %rcx
    398 ; AVX-NEXT:    vmovq %rcx, %xmm0
    399 ; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0]
    400 ; AVX-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
    401 ; AVX-NEXT:    retq
    402   %cvt = fptoui <4 x double> %a to <4 x i64>
    403   ret <4 x i64> %cvt
    404 }
    405 
    406 define <4 x i32> @fptoui_4f64_to_4i32(<4 x double> %a) {
    407 ; SSE-LABEL: fptoui_4f64_to_4i32:
    408 ; SSE:       # BB#0:
    409 ; SSE-NEXT:    movsd {{.*#+}} xmm2 = mem[0],zero
    410 ; SSE-NEXT:    movapd %xmm1, %xmm3
    411 ; SSE-NEXT:    subsd %xmm2, %xmm3
    412 ; SSE-NEXT:    cvttsd2si %xmm3, %rcx
    413 ; SSE-NEXT:    movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
    414 ; SSE-NEXT:    xorq %rax, %rcx
    415 ; SSE-NEXT:    cvttsd2si %xmm1, %rdx
    416 ; SSE-NEXT:    ucomisd %xmm2, %xmm1
    417 ; SSE-NEXT:    cmovaeq %rcx, %rdx
    418 ; SSE-NEXT:    movd %rdx, %xmm3
    419 ; SSE-NEXT:    shufpd {{.*#+}} xmm1 = xmm1[1,0]
    420 ; SSE-NEXT:    movapd %xmm1, %xmm4
    421 ; SSE-NEXT:    subsd %xmm2, %xmm4
    422 ; SSE-NEXT:    cvttsd2si %xmm4, %rcx
    423 ; SSE-NEXT:    xorq %rax, %rcx
    424 ; SSE-NEXT:    cvttsd2si %xmm1, %rdx
    425 ; SSE-NEXT:    ucomisd %xmm2, %xmm1
    426 ; SSE-NEXT:    cmovaeq %rcx, %rdx
    427 ; SSE-NEXT:    movd %rdx, %xmm1
    428 ; SSE-NEXT:    punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm1[0]
    429 ; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm3[0,2,2,3]
    430 ; SSE-NEXT:    movapd %xmm0, %xmm3
    431 ; SSE-NEXT:    subsd %xmm2, %xmm3
    432 ; SSE-NEXT:    cvttsd2si %xmm3, %rcx
    433 ; SSE-NEXT:    xorq %rax, %rcx
    434 ; SSE-NEXT:    cvttsd2si %xmm0, %rdx
    435 ; SSE-NEXT:    ucomisd %xmm2, %xmm0
    436 ; SSE-NEXT:    cmovaeq %rcx, %rdx
    437 ; SSE-NEXT:    movd %rdx, %xmm3
    438 ; SSE-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1,0]
    439 ; SSE-NEXT:    movapd %xmm0, %xmm4
    440 ; SSE-NEXT:    subsd %xmm2, %xmm4
    441 ; SSE-NEXT:    cvttsd2si %xmm4, %rcx
    442 ; SSE-NEXT:    xorq %rax, %rcx
    443 ; SSE-NEXT:    cvttsd2si %xmm0, %rax
    444 ; SSE-NEXT:    ucomisd %xmm2, %xmm0
    445 ; SSE-NEXT:    cmovaeq %rcx, %rax
    446 ; SSE-NEXT:    movd %rax, %xmm0
    447 ; SSE-NEXT:    punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm0[0]
    448 ; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[0,2,2,3]
    449 ; SSE-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
    450 ; SSE-NEXT:    retq
    451 ;
    452 ; AVX-LABEL: fptoui_4f64_to_4i32:
    453 ; AVX:       # BB#0:
    454 ; AVX-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
    455 ; AVX-NEXT:    vcvttsd2si %xmm1, %rax
    456 ; AVX-NEXT:    vcvttsd2si %xmm0, %rcx
    457 ; AVX-NEXT:    vmovd %ecx, %xmm1
    458 ; AVX-NEXT:    vpinsrd $1, %eax, %xmm1, %xmm1
    459 ; AVX-NEXT:    vextractf128 $1, %ymm0, %xmm0
    460 ; AVX-NEXT:    vcvttsd2si %xmm0, %rax
    461 ; AVX-NEXT:    vpinsrd $2, %eax, %xmm1, %xmm1
    462 ; AVX-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
    463 ; AVX-NEXT:    vcvttsd2si %xmm0, %rax
    464 ; AVX-NEXT:    vpinsrd $3, %eax, %xmm1, %xmm0
    465 ; AVX-NEXT:    vzeroupper
    466 ; AVX-NEXT:    retq
    467   %cvt = fptoui <4 x double> %a to <4 x i32>
    468   ret <4 x i32> %cvt
    469 }
    470 
    471 ;
    472 ; Float to Signed Integer
    473 ;
    474 
    475 define <4 x i32> @fptosi_4f32_to_4i32(<4 x float> %a) {
    476 ; SSE-LABEL: fptosi_4f32_to_4i32:
    477 ; SSE:       # BB#0:
    478 ; SSE-NEXT:    cvttps2dq %xmm0, %xmm0
    479 ; SSE-NEXT:    retq
    480 ;
    481 ; AVX-LABEL: fptosi_4f32_to_4i32:
    482 ; AVX:       # BB#0:
    483 ; AVX-NEXT:    vcvttps2dq %xmm0, %xmm0
    484 ; AVX-NEXT:    retq
    485   %cvt = fptosi <4 x float> %a to <4 x i32>
    486   ret <4 x i32> %cvt
    487 }
    488 
    489 define <2 x i64> @fptosi_2f32_to_2i64(<4 x float> %a) {
    490 ; SSE-LABEL: fptosi_2f32_to_2i64:
    491 ; SSE:       # BB#0:
    492 ; SSE-NEXT:    cvttss2si %xmm0, %rax
    493 ; SSE-NEXT:    movd %rax, %xmm1
    494 ; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
    495 ; SSE-NEXT:    cvttss2si %xmm0, %rax
    496 ; SSE-NEXT:    movd %rax, %xmm0
    497 ; SSE-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
    498 ; SSE-NEXT:    movdqa %xmm1, %xmm0
    499 ; SSE-NEXT:    retq
    500 ;
    501 ; AVX-LABEL: fptosi_2f32_to_2i64:
    502 ; AVX:       # BB#0:
    503 ; AVX-NEXT:    vcvttss2si %xmm0, %rax
    504 ; AVX-NEXT:    vmovq %rax, %xmm1
    505 ; AVX-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
    506 ; AVX-NEXT:    vcvttss2si %xmm0, %rax
    507 ; AVX-NEXT:    vmovq %rax, %xmm0
    508 ; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
    509 ; AVX-NEXT:    retq
    510   %shuf = shufflevector <4 x float> %a, <4 x float> undef, <2 x i32> <i32 0, i32 1>
    511   %cvt = fptosi <2 x float> %shuf to <2 x i64>
    512   ret <2 x i64> %cvt
    513 }
    514 
    515 define <2 x i64> @fptosi_4f32_to_2i64(<4 x float> %a) {
    516 ; SSE-LABEL: fptosi_4f32_to_2i64:
    517 ; SSE:       # BB#0:
    518 ; SSE-NEXT:    cvttss2si %xmm0, %rax
    519 ; SSE-NEXT:    movd %rax, %xmm1
    520 ; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
    521 ; SSE-NEXT:    cvttss2si %xmm0, %rax
    522 ; SSE-NEXT:    movd %rax, %xmm0
    523 ; SSE-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
    524 ; SSE-NEXT:    movdqa %xmm1, %xmm0
    525 ; SSE-NEXT:    retq
    526 ;
    527 ; AVX-LABEL: fptosi_4f32_to_2i64:
    528 ; AVX:       # BB#0:
    529 ; AVX-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
    530 ; AVX-NEXT:    vcvttss2si %xmm1, %rax
    531 ; AVX-NEXT:    vcvttss2si %xmm0, %rcx
    532 ; AVX-NEXT:    vmovq %rcx, %xmm0
    533 ; AVX-NEXT:    vmovq %rax, %xmm1
    534 ; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
    535 ; AVX-NEXT:    retq
    536   %cvt = fptosi <4 x float> %a to <4 x i64>
    537   %shuf = shufflevector <4 x i64> %cvt, <4 x i64> undef, <2 x i32> <i32 0, i32 1>
    538   ret <2 x i64> %shuf
    539 }
    540 
    541 define <8 x i32> @fptosi_8f32_to_8i32(<8 x float> %a) {
    542 ; SSE-LABEL: fptosi_8f32_to_8i32:
    543 ; SSE:       # BB#0:
    544 ; SSE-NEXT:    cvttps2dq %xmm0, %xmm0
    545 ; SSE-NEXT:    cvttps2dq %xmm1, %xmm1
    546 ; SSE-NEXT:    retq
    547 ;
    548 ; AVX-LABEL: fptosi_8f32_to_8i32:
    549 ; AVX:       # BB#0:
    550 ; AVX-NEXT:    vcvttps2dq %ymm0, %ymm0
    551 ; AVX-NEXT:    retq
    552   %cvt = fptosi <8 x float> %a to <8 x i32>
    553   ret <8 x i32> %cvt
    554 }
    555 
    556 define <4 x i64> @fptosi_4f32_to_4i64(<8 x float> %a) {
    557 ; SSE-LABEL: fptosi_4f32_to_4i64:
    558 ; SSE:       # BB#0:
    559 ; SSE-NEXT:    cvttss2si %xmm0, %rax
    560 ; SSE-NEXT:    movd %rax, %xmm2
    561 ; SSE-NEXT:    movaps %xmm0, %xmm1
    562 ; SSE-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,1,2,3]
    563 ; SSE-NEXT:    cvttss2si %xmm1, %rax
    564 ; SSE-NEXT:    movd %rax, %xmm1
    565 ; SSE-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
    566 ; SSE-NEXT:    movaps %xmm0, %xmm1
    567 ; SSE-NEXT:    shufps {{.*#+}} xmm1 = xmm1[3,1,2,3]
    568 ; SSE-NEXT:    cvttss2si %xmm1, %rax
    569 ; SSE-NEXT:    movd %rax, %xmm3
    570 ; SSE-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1,0]
    571 ; SSE-NEXT:    cvttss2si %xmm0, %rax
    572 ; SSE-NEXT:    movd %rax, %xmm1
    573 ; SSE-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
    574 ; SSE-NEXT:    movdqa %xmm2, %xmm0
    575 ; SSE-NEXT:    retq
    576 ;
    577 ; AVX-LABEL: fptosi_4f32_to_4i64:
    578 ; AVX:       # BB#0:
    579 ; AVX-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[3,1,2,3]
    580 ; AVX-NEXT:    vcvttss2si %xmm1, %rax
    581 ; AVX-NEXT:    vmovq %rax, %xmm1
    582 ; AVX-NEXT:    vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
    583 ; AVX-NEXT:    vcvttss2si %xmm2, %rax
    584 ; AVX-NEXT:    vmovq %rax, %xmm2
    585 ; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
    586 ; AVX-NEXT:    vcvttss2si %xmm0, %rax
    587 ; AVX-NEXT:    vmovq %rax, %xmm2
    588 ; AVX-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
    589 ; AVX-NEXT:    vcvttss2si %xmm0, %rax
    590 ; AVX-NEXT:    vmovq %rax, %xmm0
    591 ; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
    592 ; AVX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
    593 ; AVX-NEXT:    retq
    594   %shuf = shufflevector <8 x float> %a, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    595   %cvt = fptosi <4 x float> %shuf to <4 x i64>
    596   ret <4 x i64> %cvt
    597 }
    598 
    599 define <4 x i64> @fptosi_8f32_to_4i64(<8 x float> %a) {
    600 ; SSE-LABEL: fptosi_8f32_to_4i64:
    601 ; SSE:       # BB#0:
    602 ; SSE-NEXT:    cvttss2si %xmm0, %rax
    603 ; SSE-NEXT:    movd %rax, %xmm2
    604 ; SSE-NEXT:    movaps %xmm0, %xmm1
    605 ; SSE-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,1,2,3]
    606 ; SSE-NEXT:    cvttss2si %xmm1, %rax
    607 ; SSE-NEXT:    movd %rax, %xmm1
    608 ; SSE-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
    609 ; SSE-NEXT:    movaps %xmm0, %xmm1
    610 ; SSE-NEXT:    shufps {{.*#+}} xmm1 = xmm1[3,1,2,3]
    611 ; SSE-NEXT:    cvttss2si %xmm1, %rax
    612 ; SSE-NEXT:    movd %rax, %xmm3
    613 ; SSE-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1,0]
    614 ; SSE-NEXT:    cvttss2si %xmm0, %rax
    615 ; SSE-NEXT:    movd %rax, %xmm1
    616 ; SSE-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
    617 ; SSE-NEXT:    movdqa %xmm2, %xmm0
    618 ; SSE-NEXT:    retq
    619 ;
    620 ; AVX-LABEL: fptosi_8f32_to_4i64:
    621 ; AVX:       # BB#0:
    622 ; AVX-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[3,1,2,3]
    623 ; AVX-NEXT:    vcvttss2si %xmm1, %rax
    624 ; AVX-NEXT:    vmovq %rax, %xmm1
    625 ; AVX-NEXT:    vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
    626 ; AVX-NEXT:    vcvttss2si %xmm2, %rax
    627 ; AVX-NEXT:    vmovq %rax, %xmm2
    628 ; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
    629 ; AVX-NEXT:    vcvttss2si %xmm0, %rax
    630 ; AVX-NEXT:    vmovq %rax, %xmm2
    631 ; AVX-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
    632 ; AVX-NEXT:    vcvttss2si %xmm0, %rax
    633 ; AVX-NEXT:    vmovq %rax, %xmm0
    634 ; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
    635 ; AVX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
    636 ; AVX-NEXT:    retq
    637   %cvt = fptosi <8 x float> %a to <8 x i64>
    638   %shuf = shufflevector <8 x i64> %cvt, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    639   ret <4 x i64> %shuf
    640 }
    641 
    642 ;
    643 ; Float to Unsigned Integer
    644 ;
    645 
    646 define <4 x i32> @fptoui_4f32_to_4i32(<4 x float> %a) {
    647 ; SSE-LABEL: fptoui_4f32_to_4i32:
    648 ; SSE:       # BB#0:
    649 ; SSE-NEXT:    movaps %xmm0, %xmm1
    650 ; SSE-NEXT:    shufps {{.*#+}} xmm1 = xmm1[3,1,2,3]
    651 ; SSE-NEXT:    cvttss2si %xmm1, %rax
    652 ; SSE-NEXT:    movd %eax, %xmm1
    653 ; SSE-NEXT:    movaps %xmm0, %xmm2
    654 ; SSE-NEXT:    shufps {{.*#+}} xmm2 = xmm2[1,1,2,3]
    655 ; SSE-NEXT:    cvttss2si %xmm2, %rax
    656 ; SSE-NEXT:    movd %eax, %xmm2
    657 ; SSE-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
    658 ; SSE-NEXT:    cvttss2si %xmm0, %rax
    659 ; SSE-NEXT:    movd %eax, %xmm1
    660 ; SSE-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1,0]
    661 ; SSE-NEXT:    cvttss2si %xmm0, %rax
    662 ; SSE-NEXT:    movd %eax, %xmm0
    663 ; SSE-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
    664 ; SSE-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
    665 ; SSE-NEXT:    movdqa %xmm1, %xmm0
    666 ; SSE-NEXT:    retq
    667 ;
    668 ; AVX-LABEL: fptoui_4f32_to_4i32:
    669 ; AVX:       # BB#0:
    670 ; AVX-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
    671 ; AVX-NEXT:    vcvttss2si %xmm1, %rax
    672 ; AVX-NEXT:    vcvttss2si %xmm0, %rcx
    673 ; AVX-NEXT:    vmovd %ecx, %xmm1
    674 ; AVX-NEXT:    vpinsrd $1, %eax, %xmm1, %xmm1
    675 ; AVX-NEXT:    vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
    676 ; AVX-NEXT:    vcvttss2si %xmm2, %rax
    677 ; AVX-NEXT:    vpinsrd $2, %eax, %xmm1, %xmm1
    678 ; AVX-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
    679 ; AVX-NEXT:    vcvttss2si %xmm0, %rax
    680 ; AVX-NEXT:    vpinsrd $3, %eax, %xmm1, %xmm0
    681 ; AVX-NEXT:    retq
    682   %cvt = fptoui <4 x float> %a to <4 x i32>
    683   ret <4 x i32> %cvt
    684 }
    685 
    686 define <2 x i64> @fptoui_2f32_to_2i64(<4 x float> %a) {
    687 ; SSE-LABEL: fptoui_2f32_to_2i64:
    688 ; SSE:       # BB#0:
    689 ; SSE-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
    690 ; SSE-NEXT:    movaps %xmm0, %xmm1
    691 ; SSE-NEXT:    subss %xmm2, %xmm1
    692 ; SSE-NEXT:    cvttss2si %xmm1, %rax
    693 ; SSE-NEXT:    movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
    694 ; SSE-NEXT:    xorq %rcx, %rax
    695 ; SSE-NEXT:    cvttss2si %xmm0, %rdx
    696 ; SSE-NEXT:    ucomiss %xmm2, %xmm0
    697 ; SSE-NEXT:    cmovaeq %rax, %rdx
    698 ; SSE-NEXT:    movd %rdx, %xmm1
    699 ; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
    700 ; SSE-NEXT:    movaps %xmm0, %xmm3
    701 ; SSE-NEXT:    subss %xmm2, %xmm3
    702 ; SSE-NEXT:    cvttss2si %xmm3, %rax
    703 ; SSE-NEXT:    xorq %rcx, %rax
    704 ; SSE-NEXT:    cvttss2si %xmm0, %rcx
    705 ; SSE-NEXT:    ucomiss %xmm2, %xmm0
    706 ; SSE-NEXT:    cmovaeq %rax, %rcx
    707 ; SSE-NEXT:    movd %rcx, %xmm0
    708 ; SSE-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
    709 ; SSE-NEXT:    movdqa %xmm1, %xmm0
    710 ; SSE-NEXT:    retq
    711 ;
    712 ; AVX-LABEL: fptoui_2f32_to_2i64:
    713 ; AVX:       # BB#0:
    714 ; AVX-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
    715 ; AVX-NEXT:    vsubss %xmm1, %xmm0, %xmm2
    716 ; AVX-NEXT:    vcvttss2si %xmm2, %rax
    717 ; AVX-NEXT:    movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
    718 ; AVX-NEXT:    xorq %rcx, %rax
    719 ; AVX-NEXT:    vcvttss2si %xmm0, %rdx
    720 ; AVX-NEXT:    vucomiss %xmm1, %xmm0
    721 ; AVX-NEXT:    cmovaeq %rax, %rdx
    722 ; AVX-NEXT:    vmovq %rdx, %xmm2
    723 ; AVX-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
    724 ; AVX-NEXT:    vsubss %xmm1, %xmm0, %xmm3
    725 ; AVX-NEXT:    vcvttss2si %xmm3, %rax
    726 ; AVX-NEXT:    xorq %rcx, %rax
    727 ; AVX-NEXT:    vcvttss2si %xmm0, %rcx
    728 ; AVX-NEXT:    vucomiss %xmm1, %xmm0
    729 ; AVX-NEXT:    cmovaeq %rax, %rcx
    730 ; AVX-NEXT:    vmovq %rcx, %xmm0
    731 ; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
    732 ; AVX-NEXT:    retq
    733   %shuf = shufflevector <4 x float> %a, <4 x float> undef, <2 x i32> <i32 0, i32 1>
    734   %cvt = fptoui <2 x float> %shuf to <2 x i64>
    735   ret <2 x i64> %cvt
    736 }
    737 
    738 define <2 x i64> @fptoui_4f32_to_2i64(<4 x float> %a) {
    739 ; SSE-LABEL: fptoui_4f32_to_2i64:
    740 ; SSE:       # BB#0:
    741 ; SSE-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
    742 ; SSE-NEXT:    movaps %xmm0, %xmm1
    743 ; SSE-NEXT:    subss %xmm2, %xmm1
    744 ; SSE-NEXT:    cvttss2si %xmm1, %rax
    745 ; SSE-NEXT:    movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
    746 ; SSE-NEXT:    xorq %rcx, %rax
    747 ; SSE-NEXT:    cvttss2si %xmm0, %rdx
    748 ; SSE-NEXT:    ucomiss %xmm2, %xmm0
    749 ; SSE-NEXT:    cmovaeq %rax, %rdx
    750 ; SSE-NEXT:    movd %rdx, %xmm1
    751 ; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
    752 ; SSE-NEXT:    movaps %xmm0, %xmm3
    753 ; SSE-NEXT:    subss %xmm2, %xmm3
    754 ; SSE-NEXT:    cvttss2si %xmm3, %rax
    755 ; SSE-NEXT:    xorq %rcx, %rax
    756 ; SSE-NEXT:    cvttss2si %xmm0, %rcx
    757 ; SSE-NEXT:    ucomiss %xmm2, %xmm0
    758 ; SSE-NEXT:    cmovaeq %rax, %rcx
    759 ; SSE-NEXT:    movd %rcx, %xmm0
    760 ; SSE-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
    761 ; SSE-NEXT:    movdqa %xmm1, %xmm0
    762 ; SSE-NEXT:    retq
    763 ;
    764 ; AVX-LABEL: fptoui_4f32_to_2i64:
    765 ; AVX:       # BB#0:
    766 ; AVX-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
    767 ; AVX-NEXT:    vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
    768 ; AVX-NEXT:    vsubss %xmm2, %xmm1, %xmm3
    769 ; AVX-NEXT:    vcvttss2si %xmm3, %rax
    770 ; AVX-NEXT:    movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
    771 ; AVX-NEXT:    xorq %rcx, %rax
    772 ; AVX-NEXT:    vcvttss2si %xmm1, %rdx
    773 ; AVX-NEXT:    vucomiss %xmm2, %xmm1
    774 ; AVX-NEXT:    cmovaeq %rax, %rdx
    775 ; AVX-NEXT:    vsubss %xmm2, %xmm0, %xmm1
    776 ; AVX-NEXT:    vcvttss2si %xmm1, %rax
    777 ; AVX-NEXT:    xorq %rcx, %rax
    778 ; AVX-NEXT:    vcvttss2si %xmm0, %rcx
    779 ; AVX-NEXT:    vucomiss %xmm2, %xmm0
    780 ; AVX-NEXT:    cmovaeq %rax, %rcx
    781 ; AVX-NEXT:    vmovq %rcx, %xmm0
    782 ; AVX-NEXT:    vmovq %rdx, %xmm1
    783 ; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
    784 ; AVX-NEXT:    retq
    785   %cvt = fptoui <4 x float> %a to <4 x i64>
    786   %shuf = shufflevector <4 x i64> %cvt, <4 x i64> undef, <2 x i32> <i32 0, i32 1>
    787   ret <2 x i64> %shuf
    788 }
    789 
    790 define <8 x i32> @fptoui_8f32_to_8i32(<8 x float> %a) {
    791 ; SSE-LABEL: fptoui_8f32_to_8i32:
    792 ; SSE:       # BB#0:
    793 ; SSE-NEXT:    movaps %xmm0, %xmm2
    794 ; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
    795 ; SSE-NEXT:    cvttss2si %xmm0, %rax
    796 ; SSE-NEXT:    movd %eax, %xmm0
    797 ; SSE-NEXT:    movaps %xmm2, %xmm3
    798 ; SSE-NEXT:    shufps {{.*#+}} xmm3 = xmm3[1,1,2,3]
    799 ; SSE-NEXT:    cvttss2si %xmm3, %rax
    800 ; SSE-NEXT:    movd %eax, %xmm3
    801 ; SSE-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
    802 ; SSE-NEXT:    cvttss2si %xmm2, %rax
    803 ; SSE-NEXT:    movd %eax, %xmm0
    804 ; SSE-NEXT:    shufpd {{.*#+}} xmm2 = xmm2[1,0]
    805 ; SSE-NEXT:    cvttss2si %xmm2, %rax
    806 ; SSE-NEXT:    movd %eax, %xmm2
    807 ; SSE-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
    808 ; SSE-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
    809 ; SSE-NEXT:    movaps %xmm1, %xmm2
    810 ; SSE-NEXT:    shufps {{.*#+}} xmm2 = xmm2[3,1,2,3]
    811 ; SSE-NEXT:    cvttss2si %xmm2, %rax
    812 ; SSE-NEXT:    movd %eax, %xmm2
    813 ; SSE-NEXT:    movaps %xmm1, %xmm3
    814 ; SSE-NEXT:    shufps {{.*#+}} xmm3 = xmm3[1,1,2,3]
    815 ; SSE-NEXT:    cvttss2si %xmm3, %rax
    816 ; SSE-NEXT:    movd %eax, %xmm3
    817 ; SSE-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
    818 ; SSE-NEXT:    cvttss2si %xmm1, %rax
    819 ; SSE-NEXT:    movd %eax, %xmm2
    820 ; SSE-NEXT:    shufpd {{.*#+}} xmm1 = xmm1[1,0]
    821 ; SSE-NEXT:    cvttss2si %xmm1, %rax
    822 ; SSE-NEXT:    movd %eax, %xmm1
    823 ; SSE-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
    824 ; SSE-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
    825 ; SSE-NEXT:    movdqa %xmm2, %xmm1
    826 ; SSE-NEXT:    retq
    827 ;
    828 ; AVX-LABEL: fptoui_8f32_to_8i32:
    829 ; AVX:       # BB#0:
    830 ; AVX-NEXT:    vextractf128 $1, %ymm0, %xmm1
    831 ; AVX-NEXT:    vmovshdup {{.*#+}} xmm2 = xmm1[1,1,3,3]
    832 ; AVX-NEXT:    vcvttss2si %xmm2, %rax
    833 ; AVX-NEXT:    vcvttss2si %xmm1, %rcx
    834 ; AVX-NEXT:    vmovd %ecx, %xmm2
    835 ; AVX-NEXT:    vpinsrd $1, %eax, %xmm2, %xmm2
    836 ; AVX-NEXT:    vpermilpd {{.*#+}} xmm3 = xmm1[1,0]
    837 ; AVX-NEXT:    vcvttss2si %xmm3, %rax
    838 ; AVX-NEXT:    vpinsrd $2, %eax, %xmm2, %xmm2
    839 ; AVX-NEXT:    vpermilps {{.*#+}} xmm1 = xmm1[3,1,2,3]
    840 ; AVX-NEXT:    vcvttss2si %xmm1, %rax
    841 ; AVX-NEXT:    vpinsrd $3, %eax, %xmm2, %xmm1
    842 ; AVX-NEXT:    vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
    843 ; AVX-NEXT:    vcvttss2si %xmm2, %rax
    844 ; AVX-NEXT:    vcvttss2si %xmm0, %rcx
    845 ; AVX-NEXT:    vmovd %ecx, %xmm2
    846 ; AVX-NEXT:    vpinsrd $1, %eax, %xmm2, %xmm2
    847 ; AVX-NEXT:    vpermilpd {{.*#+}} xmm3 = xmm0[1,0]
    848 ; AVX-NEXT:    vcvttss2si %xmm3, %rax
    849 ; AVX-NEXT:    vpinsrd $2, %eax, %xmm2, %xmm2
    850 ; AVX-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
    851 ; AVX-NEXT:    vcvttss2si %xmm0, %rax
    852 ; AVX-NEXT:    vpinsrd $3, %eax, %xmm2, %xmm0
    853 ; AVX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
    854 ; AVX-NEXT:    retq
    855   %cvt = fptoui <8 x float> %a to <8 x i32>
    856   ret <8 x i32> %cvt
    857 }
    858 
    859 define <4 x i64> @fptoui_4f32_to_4i64(<8 x float> %a) {
    860 ; SSE-LABEL: fptoui_4f32_to_4i64:
    861 ; SSE:       # BB#0:
    862 ; SSE-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
    863 ; SSE-NEXT:    movaps %xmm0, %xmm2
    864 ; SSE-NEXT:    subss %xmm1, %xmm2
    865 ; SSE-NEXT:    cvttss2si %xmm2, %rcx
    866 ; SSE-NEXT:    movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
    867 ; SSE-NEXT:    xorq %rax, %rcx
    868 ; SSE-NEXT:    cvttss2si %xmm0, %rdx
    869 ; SSE-NEXT:    ucomiss %xmm1, %xmm0
    870 ; SSE-NEXT:    cmovaeq %rcx, %rdx
    871 ; SSE-NEXT:    movd %rdx, %xmm2
    872 ; SSE-NEXT:    movaps %xmm0, %xmm3
    873 ; SSE-NEXT:    shufps {{.*#+}} xmm3 = xmm3[1,1,2,3]
    874 ; SSE-NEXT:    movaps %xmm3, %xmm4
    875 ; SSE-NEXT:    subss %xmm1, %xmm4
    876 ; SSE-NEXT:    cvttss2si %xmm4, %rcx
    877 ; SSE-NEXT:    xorq %rax, %rcx
    878 ; SSE-NEXT:    cvttss2si %xmm3, %rdx
    879 ; SSE-NEXT:    ucomiss %xmm1, %xmm3
    880 ; SSE-NEXT:    cmovaeq %rcx, %rdx
    881 ; SSE-NEXT:    movd %rdx, %xmm3
    882 ; SSE-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
    883 ; SSE-NEXT:    movaps %xmm0, %xmm3
    884 ; SSE-NEXT:    shufps {{.*#+}} xmm3 = xmm3[3,1,2,3]
    885 ; SSE-NEXT:    movaps %xmm3, %xmm4
    886 ; SSE-NEXT:    subss %xmm1, %xmm4
    887 ; SSE-NEXT:    cvttss2si %xmm4, %rcx
    888 ; SSE-NEXT:    xorq %rax, %rcx
    889 ; SSE-NEXT:    cvttss2si %xmm3, %rdx
    890 ; SSE-NEXT:    ucomiss %xmm1, %xmm3
    891 ; SSE-NEXT:    cmovaeq %rcx, %rdx
    892 ; SSE-NEXT:    movd %rdx, %xmm3
    893 ; SSE-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1,0]
    894 ; SSE-NEXT:    movapd %xmm0, %xmm4
    895 ; SSE-NEXT:    subss %xmm1, %xmm4
    896 ; SSE-NEXT:    cvttss2si %xmm4, %rcx
    897 ; SSE-NEXT:    xorq %rax, %rcx
    898 ; SSE-NEXT:    cvttss2si %xmm0, %rax
    899 ; SSE-NEXT:    ucomiss %xmm1, %xmm0
    900 ; SSE-NEXT:    cmovaeq %rcx, %rax
    901 ; SSE-NEXT:    movd %rax, %xmm1
    902 ; SSE-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
    903 ; SSE-NEXT:    movdqa %xmm2, %xmm0
    904 ; SSE-NEXT:    retq
    905 ;
    906 ; AVX-LABEL: fptoui_4f32_to_4i64:
    907 ; AVX:       # BB#0:
    908 ; AVX-NEXT:    vpermilps {{.*#+}} xmm2 = xmm0[3,1,2,3]
    909 ; AVX-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
    910 ; AVX-NEXT:    vsubss %xmm1, %xmm2, %xmm3
    911 ; AVX-NEXT:    vcvttss2si %xmm3, %rax
    912 ; AVX-NEXT:    movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
    913 ; AVX-NEXT:    xorq %rcx, %rax
    914 ; AVX-NEXT:    vcvttss2si %xmm2, %rdx
    915 ; AVX-NEXT:    vucomiss %xmm1, %xmm2
    916 ; AVX-NEXT:    cmovaeq %rax, %rdx
    917 ; AVX-NEXT:    vmovq %rdx, %xmm2
    918 ; AVX-NEXT:    vpermilpd {{.*#+}} xmm3 = xmm0[1,0]
    919 ; AVX-NEXT:    vsubss %xmm1, %xmm3, %xmm4
    920 ; AVX-NEXT:    vcvttss2si %xmm4, %rax
    921 ; AVX-NEXT:    xorq %rcx, %rax
    922 ; AVX-NEXT:    vcvttss2si %xmm3, %rdx
    923 ; AVX-NEXT:    vucomiss %xmm1, %xmm3
    924 ; AVX-NEXT:    cmovaeq %rax, %rdx
    925 ; AVX-NEXT:    vmovq %rdx, %xmm3
    926 ; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
    927 ; AVX-NEXT:    vsubss %xmm1, %xmm0, %xmm3
    928 ; AVX-NEXT:    vcvttss2si %xmm3, %rax
    929 ; AVX-NEXT:    xorq %rcx, %rax
    930 ; AVX-NEXT:    vcvttss2si %xmm0, %rdx
    931 ; AVX-NEXT:    vucomiss %xmm1, %xmm0
    932 ; AVX-NEXT:    cmovaeq %rax, %rdx
    933 ; AVX-NEXT:    vmovq %rdx, %xmm3
    934 ; AVX-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
    935 ; AVX-NEXT:    vsubss %xmm1, %xmm0, %xmm4
    936 ; AVX-NEXT:    vcvttss2si %xmm4, %rax
    937 ; AVX-NEXT:    xorq %rcx, %rax
    938 ; AVX-NEXT:    vcvttss2si %xmm0, %rcx
    939 ; AVX-NEXT:    vucomiss %xmm1, %xmm0
    940 ; AVX-NEXT:    cmovaeq %rax, %rcx
    941 ; AVX-NEXT:    vmovq %rcx, %xmm0
    942 ; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0]
    943 ; AVX-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
    944 ; AVX-NEXT:    retq
    945   %shuf = shufflevector <8 x float> %a, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    946   %cvt = fptoui <4 x float> %shuf to <4 x i64>
    947   ret <4 x i64> %cvt
    948 }
    949 
    950 define <4 x i64> @fptoui_8f32_to_4i64(<8 x float> %a) {
    951 ; SSE-LABEL: fptoui_8f32_to_4i64:
    952 ; SSE:       # BB#0:
    953 ; SSE-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
    954 ; SSE-NEXT:    movaps %xmm0, %xmm2
    955 ; SSE-NEXT:    subss %xmm1, %xmm2
    956 ; SSE-NEXT:    cvttss2si %xmm2, %rcx
    957 ; SSE-NEXT:    movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
    958 ; SSE-NEXT:    xorq %rax, %rcx
    959 ; SSE-NEXT:    cvttss2si %xmm0, %rdx
    960 ; SSE-NEXT:    ucomiss %xmm1, %xmm0
    961 ; SSE-NEXT:    cmovaeq %rcx, %rdx
    962 ; SSE-NEXT:    movd %rdx, %xmm2
    963 ; SSE-NEXT:    movaps %xmm0, %xmm3
    964 ; SSE-NEXT:    shufps {{.*#+}} xmm3 = xmm3[1,1,2,3]
    965 ; SSE-NEXT:    movaps %xmm3, %xmm4
    966 ; SSE-NEXT:    subss %xmm1, %xmm4
    967 ; SSE-NEXT:    cvttss2si %xmm4, %rcx
    968 ; SSE-NEXT:    xorq %rax, %rcx
    969 ; SSE-NEXT:    cvttss2si %xmm3, %rdx
    970 ; SSE-NEXT:    ucomiss %xmm1, %xmm3
    971 ; SSE-NEXT:    cmovaeq %rcx, %rdx
    972 ; SSE-NEXT:    movd %rdx, %xmm3
    973 ; SSE-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
    974 ; SSE-NEXT:    movaps %xmm0, %xmm3
    975 ; SSE-NEXT:    shufps {{.*#+}} xmm3 = xmm3[3,1,2,3]
    976 ; SSE-NEXT:    movaps %xmm3, %xmm4
    977 ; SSE-NEXT:    subss %xmm1, %xmm4
    978 ; SSE-NEXT:    cvttss2si %xmm4, %rcx
    979 ; SSE-NEXT:    xorq %rax, %rcx
    980 ; SSE-NEXT:    cvttss2si %xmm3, %rdx
    981 ; SSE-NEXT:    ucomiss %xmm1, %xmm3
    982 ; SSE-NEXT:    cmovaeq %rcx, %rdx
    983 ; SSE-NEXT:    movd %rdx, %xmm3
    984 ; SSE-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1,0]
    985 ; SSE-NEXT:    movapd %xmm0, %xmm4
    986 ; SSE-NEXT:    subss %xmm1, %xmm4
    987 ; SSE-NEXT:    cvttss2si %xmm4, %rcx
    988 ; SSE-NEXT:    xorq %rax, %rcx
    989 ; SSE-NEXT:    cvttss2si %xmm0, %rax
    990 ; SSE-NEXT:    ucomiss %xmm1, %xmm0
    991 ; SSE-NEXT:    cmovaeq %rcx, %rax
    992 ; SSE-NEXT:    movd %rax, %xmm1
    993 ; SSE-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
    994 ; SSE-NEXT:    movdqa %xmm2, %xmm0
    995 ; SSE-NEXT:    retq
    996 ;
    997 ; AVX-LABEL: fptoui_8f32_to_4i64:
    998 ; AVX:       # BB#0:
    999 ; AVX-NEXT:    vpermilps {{.*#+}} xmm2 = xmm0[3,1,2,3]
   1000 ; AVX-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
   1001 ; AVX-NEXT:    vsubss %xmm1, %xmm2, %xmm3
   1002 ; AVX-NEXT:    vcvttss2si %xmm3, %rax
   1003 ; AVX-NEXT:    movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
   1004 ; AVX-NEXT:    xorq %rcx, %rax
   1005 ; AVX-NEXT:    vcvttss2si %xmm2, %rdx
   1006 ; AVX-NEXT:    vucomiss %xmm1, %xmm2
   1007 ; AVX-NEXT:    cmovaeq %rax, %rdx
   1008 ; AVX-NEXT:    vmovq %rdx, %xmm2
   1009 ; AVX-NEXT:    vpermilpd {{.*#+}} xmm3 = xmm0[1,0]
   1010 ; AVX-NEXT:    vsubss %xmm1, %xmm3, %xmm4
   1011 ; AVX-NEXT:    vcvttss2si %xmm4, %rax
   1012 ; AVX-NEXT:    xorq %rcx, %rax
   1013 ; AVX-NEXT:    vcvttss2si %xmm3, %rdx
   1014 ; AVX-NEXT:    vucomiss %xmm1, %xmm3
   1015 ; AVX-NEXT:    cmovaeq %rax, %rdx
   1016 ; AVX-NEXT:    vmovq %rdx, %xmm3
   1017 ; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
   1018 ; AVX-NEXT:    vsubss %xmm1, %xmm0, %xmm3
   1019 ; AVX-NEXT:    vcvttss2si %xmm3, %rax
   1020 ; AVX-NEXT:    xorq %rcx, %rax
   1021 ; AVX-NEXT:    vcvttss2si %xmm0, %rdx
   1022 ; AVX-NEXT:    vucomiss %xmm1, %xmm0
   1023 ; AVX-NEXT:    cmovaeq %rax, %rdx
   1024 ; AVX-NEXT:    vmovq %rdx, %xmm3
   1025 ; AVX-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
   1026 ; AVX-NEXT:    vsubss %xmm1, %xmm0, %xmm4
   1027 ; AVX-NEXT:    vcvttss2si %xmm4, %rax
   1028 ; AVX-NEXT:    xorq %rcx, %rax
   1029 ; AVX-NEXT:    vcvttss2si %xmm0, %rcx
   1030 ; AVX-NEXT:    vucomiss %xmm1, %xmm0
   1031 ; AVX-NEXT:    cmovaeq %rax, %rcx
   1032 ; AVX-NEXT:    vmovq %rcx, %xmm0
   1033 ; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0]
   1034 ; AVX-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
   1035 ; AVX-NEXT:    retq
   1036   %cvt = fptoui <8 x float> %a to <8 x i64>
   1037   %shuf = shufflevector <8 x i64> %cvt, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   1038   ret <4 x i64> %shuf
   1039 }
   1040 
   1041 ;
   1042 ; Constant Folding
   1043 ;
   1044 
   1045 define <2 x i64> @fptosi_2f64_to_2i64_const() {
   1046 ; SSE-LABEL: fptosi_2f64_to_2i64_const:
   1047 ; SSE:       # BB#0:
   1048 ; SSE-NEXT:    movaps {{.*#+}} xmm0 = [1,18446744073709551615]
   1049 ; SSE-NEXT:    retq
   1050 ;
   1051 ; AVX-LABEL: fptosi_2f64_to_2i64_const:
   1052 ; AVX:       # BB#0:
   1053 ; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [1,18446744073709551615]
   1054 ; AVX-NEXT:    retq
   1055   %cvt = fptosi <2 x double> <double 1.0, double -1.0> to <2 x i64>
   1056   ret <2 x i64> %cvt
   1057 }
   1058 
   1059 define <4 x i32> @fptosi_2f64_to_2i32_const() {
   1060 ; SSE-LABEL: fptosi_2f64_to_2i32_const:
   1061 ; SSE:       # BB#0:
   1062 ; SSE-NEXT:    movaps {{.*#+}} xmm0 = <4294967295,1,u,u>
   1063 ; SSE-NEXT:    retq
   1064 ;
   1065 ; AVX-LABEL: fptosi_2f64_to_2i32_const:
   1066 ; AVX:       # BB#0:
   1067 ; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = <4294967295,1,u,u>
   1068 ; AVX-NEXT:    retq
   1069   %cvt = fptosi <2 x double> <double -1.0, double 1.0> to <2 x i32>
   1070   %ext = shufflevector <2 x i32> %cvt, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
   1071   ret <4 x i32> %ext
   1072 }
   1073 
   1074 define <4 x i64> @fptosi_4f64_to_4i64_const() {
   1075 ; SSE-LABEL: fptosi_4f64_to_4i64_const:
   1076 ; SSE:       # BB#0:
   1077 ; SSE-NEXT:    movaps {{.*#+}} xmm0 = [1,18446744073709551615]
   1078 ; SSE-NEXT:    movaps {{.*#+}} xmm1 = [2,18446744073709551613]
   1079 ; SSE-NEXT:    retq
   1080 ;
   1081 ; AVX-LABEL: fptosi_4f64_to_4i64_const:
   1082 ; AVX:       # BB#0:
   1083 ; AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [1,18446744073709551615,2,18446744073709551613]
   1084 ; AVX-NEXT:    retq
   1085   %cvt = fptosi <4 x double> <double 1.0, double -1.0, double 2.0, double -3.0> to <4 x i64>
   1086   ret <4 x i64> %cvt
   1087 }
   1088 
   1089 define <4 x i32> @fptosi_4f64_to_4i32_const() {
   1090 ; SSE-LABEL: fptosi_4f64_to_4i32_const:
   1091 ; SSE:       # BB#0:
   1092 ; SSE-NEXT:    movaps {{.*#+}} xmm0 = [4294967295,1,4294967294,3]
   1093 ; SSE-NEXT:    retq
   1094 ;
   1095 ; AVX-LABEL: fptosi_4f64_to_4i32_const:
   1096 ; AVX:       # BB#0:
   1097 ; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [4294967295,1,4294967294,3]
   1098 ; AVX-NEXT:    retq
   1099   %cvt = fptosi <4 x double> <double -1.0, double 1.0, double -2.0, double 3.0> to <4 x i32>
   1100   ret <4 x i32> %cvt
   1101 }
   1102 
   1103 define <2 x i64> @fptoui_2f64_to_2i64_const() {
   1104 ; SSE-LABEL: fptoui_2f64_to_2i64_const:
   1105 ; SSE:       # BB#0:
   1106 ; SSE-NEXT:    movaps {{.*#+}} xmm0 = [2,4]
   1107 ; SSE-NEXT:    retq
   1108 ;
   1109 ; AVX-LABEL: fptoui_2f64_to_2i64_const:
   1110 ; AVX:       # BB#0:
   1111 ; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [2,4]
   1112 ; AVX-NEXT:    retq
   1113   %cvt = fptoui <2 x double> <double 2.0, double 4.0> to <2 x i64>
   1114   ret <2 x i64> %cvt
   1115 }
   1116 
   1117 define <4 x i32> @fptoui_2f64_to_2i32_const(<2 x double> %a) {
   1118 ; SSE-LABEL: fptoui_2f64_to_2i32_const:
   1119 ; SSE:       # BB#0:
   1120 ; SSE-NEXT:    movaps {{.*#+}} xmm0 = <2,4,u,u>
   1121 ; SSE-NEXT:    retq
   1122 ;
   1123 ; AVX-LABEL: fptoui_2f64_to_2i32_const:
   1124 ; AVX:       # BB#0:
   1125 ; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = <2,4,u,u>
   1126 ; AVX-NEXT:    retq
   1127   %cvt = fptoui <2 x double> <double 2.0, double 4.0> to <2 x i32>
   1128   %ext = shufflevector <2 x i32> %cvt, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
   1129   ret <4 x i32> %ext
   1130 }
   1131 
   1132 define <4 x i64> @fptoui_4f64_to_4i64_const(<4 x double> %a) {
   1133 ; SSE-LABEL: fptoui_4f64_to_4i64_const:
   1134 ; SSE:       # BB#0:
   1135 ; SSE-NEXT:    movaps {{.*#+}} xmm0 = [2,4]
   1136 ; SSE-NEXT:    movaps {{.*#+}} xmm1 = [6,8]
   1137 ; SSE-NEXT:    retq
   1138 ;
   1139 ; AVX-LABEL: fptoui_4f64_to_4i64_const:
   1140 ; AVX:       # BB#0:
   1141 ; AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [2,4,6,8]
   1142 ; AVX-NEXT:    retq
   1143   %cvt = fptoui <4 x double> <double 2.0, double 4.0, double 6.0, double 8.0> to <4 x i64>
   1144   ret <4 x i64> %cvt
   1145 }
   1146 
   1147 define <4 x i32> @fptoui_4f64_to_4i32_const(<4 x double> %a) {
   1148 ; SSE-LABEL: fptoui_4f64_to_4i32_const:
   1149 ; SSE:       # BB#0:
   1150 ; SSE-NEXT:    movaps {{.*#+}} xmm0 = [2,4,6,8]
   1151 ; SSE-NEXT:    retq
   1152 ;
   1153 ; AVX-LABEL: fptoui_4f64_to_4i32_const:
   1154 ; AVX:       # BB#0:
   1155 ; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [2,4,6,8]
   1156 ; AVX-NEXT:    retq
   1157   %cvt = fptoui <4 x double> <double 2.0, double 4.0, double 6.0, double 8.0> to <4 x i32>
   1158   ret <4 x i32> %cvt
   1159 }
   1160 
   1161 define <4 x i32> @fptosi_4f32_to_4i32_const() {
   1162 ; SSE-LABEL: fptosi_4f32_to_4i32_const:
   1163 ; SSE:       # BB#0:
   1164 ; SSE-NEXT:    movaps {{.*#+}} xmm0 = [1,4294967295,2,3]
   1165 ; SSE-NEXT:    retq
   1166 ;
   1167 ; AVX-LABEL: fptosi_4f32_to_4i32_const:
   1168 ; AVX:       # BB#0:
   1169 ; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [1,4294967295,2,3]
   1170 ; AVX-NEXT:    retq
   1171   %cvt = fptosi <4 x float> <float 1.0, float -1.0, float 2.0, float 3.0> to <4 x i32>
   1172   ret <4 x i32> %cvt
   1173 }
   1174 
   1175 define <4 x i64> @fptosi_4f32_to_4i64_const() {
   1176 ; SSE-LABEL: fptosi_4f32_to_4i64_const:
   1177 ; SSE:       # BB#0:
   1178 ; SSE-NEXT:    movaps {{.*#+}} xmm0 = [1,18446744073709551615]
   1179 ; SSE-NEXT:    movaps {{.*#+}} xmm1 = [2,3]
   1180 ; SSE-NEXT:    retq
   1181 ;
   1182 ; AVX-LABEL: fptosi_4f32_to_4i64_const:
   1183 ; AVX:       # BB#0:
   1184 ; AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [1,18446744073709551615,2,3]
   1185 ; AVX-NEXT:    retq
   1186   %cvt = fptosi <4 x float> <float 1.0, float -1.0, float 2.0, float 3.0> to <4 x i64>
   1187   ret <4 x i64> %cvt
   1188 }
   1189 
   1190 define <8 x i32> @fptosi_8f32_to_8i32_const(<8 x float> %a) {
   1191 ; SSE-LABEL: fptosi_8f32_to_8i32_const:
   1192 ; SSE:       # BB#0:
   1193 ; SSE-NEXT:    movaps {{.*#+}} xmm0 = [1,4294967295,2,3]
   1194 ; SSE-NEXT:    movaps {{.*#+}} xmm1 = [6,4294967288,2,4294967295]
   1195 ; SSE-NEXT:    retq
   1196 ;
   1197 ; AVX-LABEL: fptosi_8f32_to_8i32_const:
   1198 ; AVX:       # BB#0:
   1199 ; AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [1,4294967295,2,3,6,4294967288,2,4294967295]
   1200 ; AVX-NEXT:    retq
   1201   %cvt = fptosi <8 x float> <float 1.0, float -1.0, float 2.0, float 3.0, float 6.0, float -8.0, float 2.0, float -1.0> to <8 x i32>
   1202   ret <8 x i32> %cvt
   1203 }
   1204 
   1205 define <4 x i32> @fptoui_4f32_to_4i32_const(<4 x float> %a) {
   1206 ; SSE-LABEL: fptoui_4f32_to_4i32_const:
   1207 ; SSE:       # BB#0:
   1208 ; SSE-NEXT:    movaps {{.*#+}} xmm0 = [1,2,4,6]
   1209 ; SSE-NEXT:    retq
   1210 ;
   1211 ; AVX-LABEL: fptoui_4f32_to_4i32_const:
   1212 ; AVX:       # BB#0:
   1213 ; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [1,2,4,6]
   1214 ; AVX-NEXT:    retq
   1215   %cvt = fptoui <4 x float> <float 1.0, float 2.0, float 4.0, float 6.0> to <4 x i32>
   1216   ret <4 x i32> %cvt
   1217 }
   1218 
   1219 define <4 x i64> @fptoui_4f32_to_4i64_const() {
   1220 ; SSE-LABEL: fptoui_4f32_to_4i64_const:
   1221 ; SSE:       # BB#0:
   1222 ; SSE-NEXT:    movaps {{.*#+}} xmm0 = [1,2]
   1223 ; SSE-NEXT:    movaps {{.*#+}} xmm1 = [4,8]
   1224 ; SSE-NEXT:    retq
   1225 ;
   1226 ; AVX-LABEL: fptoui_4f32_to_4i64_const:
   1227 ; AVX:       # BB#0:
   1228 ; AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [1,2,4,8]
   1229 ; AVX-NEXT:    retq
   1230   %cvt = fptoui <4 x float> <float 1.0, float 2.0, float 4.0, float 8.0> to <4 x i64>
   1231   ret <4 x i64> %cvt
   1232 }
   1233 
   1234 define <8 x i32> @fptoui_8f32_to_8i32_const(<8 x float> %a) {
   1235 ; SSE-LABEL: fptoui_8f32_to_8i32_const:
   1236 ; SSE:       # BB#0:
   1237 ; SSE-NEXT:    movaps {{.*#+}} xmm0 = [1,2,4,6]
   1238 ; SSE-NEXT:    movaps {{.*#+}} xmm1 = [8,6,4,1]
   1239 ; SSE-NEXT:    retq
   1240 ;
   1241 ; AVX-LABEL: fptoui_8f32_to_8i32_const:
   1242 ; AVX:       # BB#0:
   1243 ; AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [1,2,4,6,8,6,4,1]
   1244 ; AVX-NEXT:    retq
   1245   %cvt = fptoui <8 x float> <float 1.0, float 2.0, float 4.0, float 6.0, float 8.0, float 6.0, float 4.0, float 1.0> to <8 x i32>
   1246   ret <8 x i32> %cvt
   1247 }
   1248