Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=i386-pc-windows-msvc     -mattr=+avx512f,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=CHECK,X32,AVX512_32,AVX512_32_WIN,AVX512DQVL_32_WIN
      3 ; RUN: llc < %s -mtriple=i386-unknown-linux-gnu   -mattr=+avx512f,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=CHECK,X32,AVX512_32,AVX512_32_LIN,AVX512DQVL_32_LIN
      4 ; RUN: llc < %s -mtriple=x86_64-pc-windows-msvc   -mattr=+avx512f,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=CHECK,X64,AVX512_64,AVX512_64_WIN,AVX512DQVL_64_WIN
      5 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=CHECK,X32,AVX512_64,AVX512_64_LIN,AVX512DQVL_64_LIN
      6 ; RUN: llc < %s -mtriple=i386-pc-windows-msvc     -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=CHECK,X32,AVX512_32,AVX512_32_WIN,AVX512DQ_32_WIN
      7 ; RUN: llc < %s -mtriple=i386-unknown-linux-gnu   -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=CHECK,X32,AVX512_32,AVX512_32_LIN,AVX512DQ_32_LIN
      8 ; RUN: llc < %s -mtriple=x86_64-pc-windows-msvc   -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=CHECK,X64,AVX512_64,AVX512_64_WIN,AVX512DQ_64_WIN
      9 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=CHECK,X32,AVX512_64,AVX512_64_LIN,AVX512DQ_64_LIN
     10 ; RUN: llc < %s -mtriple=i386-pc-windows-msvc     -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,X32,AVX512_32,AVX512_32_WIN,AVX512F_32_WIN
     11 ; RUN: llc < %s -mtriple=i386-unknown-linux-gnu   -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,X32,AVX512_32,AVX512_32_LIN,AVX512F_32_LIN
     12 ; RUN: llc < %s -mtriple=x86_64-pc-windows-msvc   -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,X64,AVX512_64,AVX512_64_WIN,AVX512F_64_WIN
     13 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,X32,AVX512_64,AVX512_64_LIN,AVX512F_64_LIN
     14 ; RUN: llc < %s -mtriple=i386-pc-windows-msvc     -mattr=+sse3 | FileCheck %s --check-prefixes=CHECK,X32,SSE3_32,SSE3_32_WIN
     15 ; RUN: llc < %s -mtriple=i386-unknown-linux-gnu   -mattr=+sse3 | FileCheck %s --check-prefixes=CHECK,X32,SSE3_32,SSE3_32_LIN
     16 ; RUN: llc < %s -mtriple=x86_64-pc-windows-msvc   -mattr=+sse3 | FileCheck %s --check-prefixes=CHECK,X64,SSE3_64,SSE3_64_WIN
     17 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse3 | FileCheck %s --check-prefixes=CHECK,X64,SSE3_64,SSE3_64_LIN
     18 ; RUN: llc < %s -mtriple=i386-pc-windows-msvc     -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,X32,SSE2_32,SSE2_32_WIN
     19 ; RUN: llc < %s -mtriple=i386-unknown-linux-gnu   -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,X32,SSE2_32,SSE2_32_LIN
     20 ; RUN: llc < %s -mtriple=x86_64-pc-windows-msvc   -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,X64,SSE2_64,SSE2_64_WIN
     21 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,X64,SSE2_64,SSE2_64_LIN
     22 ; RUN: llc < %s -mtriple=i386-pc-windows-msvc     -mattr=-sse  | FileCheck %s --check-prefixes=CHECK,X32,X87,X87_WIN
     23 ; RUN: llc < %s -mtriple=i386-unknown-linux-gnu   -mattr=-sse  | FileCheck %s --check-prefixes=CHECK,X32,X87,X87_LIN
     24 
     25 ; Check that scalar FP conversions to signed and unsigned int64 are using
     26 ; reasonable sequences, across platforms and target switches.
     27 ;
     28 ; The signed case is straight forward, and the tests here basically
     29 ; ensure successful compilation (f80 with avx512 was broken at one point).
     30 ;
     31 ; For the unsigned case there are many possible sequences, so to avoid
     32 ; a fragile test we just check for the presence of a few key instructions.
     33 ; AVX512 on Intel64 can use vcvtts[ds]2usi directly for float and double.
     34 ; Otherwise the sequence will involve an FP subtract (fsub, subss or subsd),
     35 ; and a truncating conversion (cvtts[ds]2si, fisttp, or fnstcw+fist).  When
     36 ; both a subtract and fnstcw are needed, they can occur in either order.
     37 ;
     38 ; The interesting subtargets are AVX512F (vcvtts[ds]2usi), SSE3 (fisttp),
     39 ; SSE2 (cvtts[ds]2si) and vanilla X87 (fnstcw+fist, 32-bit only).
     40 
     41 define i64 @f_to_u64(float %a) nounwind {
     42 ; AVX512DQVL_32_WIN-LABEL: f_to_u64:
     43 ; AVX512DQVL_32_WIN:       # %bb.0:
     44 ; AVX512DQVL_32_WIN-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
     45 ; AVX512DQVL_32_WIN-NEXT:    vcvttps2uqq %xmm0, %ymm0
     46 ; AVX512DQVL_32_WIN-NEXT:    vmovd %xmm0, %eax
     47 ; AVX512DQVL_32_WIN-NEXT:    vpextrd $1, %xmm0, %edx
     48 ; AVX512DQVL_32_WIN-NEXT:    vzeroupper
     49 ; AVX512DQVL_32_WIN-NEXT:    retl
     50 ;
     51 ; AVX512DQVL_32_LIN-LABEL: f_to_u64:
     52 ; AVX512DQVL_32_LIN:       # %bb.0:
     53 ; AVX512DQVL_32_LIN-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
     54 ; AVX512DQVL_32_LIN-NEXT:    vcvttps2uqq %xmm0, %ymm0
     55 ; AVX512DQVL_32_LIN-NEXT:    vmovd %xmm0, %eax
     56 ; AVX512DQVL_32_LIN-NEXT:    vpextrd $1, %xmm0, %edx
     57 ; AVX512DQVL_32_LIN-NEXT:    vzeroupper
     58 ; AVX512DQVL_32_LIN-NEXT:    retl
     59 ;
     60 ; AVX512_64-LABEL: f_to_u64:
     61 ; AVX512_64:       # %bb.0:
     62 ; AVX512_64-NEXT:    vcvttss2usi %xmm0, %rax
     63 ; AVX512_64-NEXT:    retq
     64 ;
     65 ; AVX512DQ_32_WIN-LABEL: f_to_u64:
     66 ; AVX512DQ_32_WIN:       # %bb.0:
     67 ; AVX512DQ_32_WIN-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
     68 ; AVX512DQ_32_WIN-NEXT:    vcvttps2uqq %ymm0, %zmm0
     69 ; AVX512DQ_32_WIN-NEXT:    vmovd %xmm0, %eax
     70 ; AVX512DQ_32_WIN-NEXT:    vpextrd $1, %xmm0, %edx
     71 ; AVX512DQ_32_WIN-NEXT:    vzeroupper
     72 ; AVX512DQ_32_WIN-NEXT:    retl
     73 ;
     74 ; AVX512DQ_32_LIN-LABEL: f_to_u64:
     75 ; AVX512DQ_32_LIN:       # %bb.0:
     76 ; AVX512DQ_32_LIN-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
     77 ; AVX512DQ_32_LIN-NEXT:    vcvttps2uqq %ymm0, %zmm0
     78 ; AVX512DQ_32_LIN-NEXT:    vmovd %xmm0, %eax
     79 ; AVX512DQ_32_LIN-NEXT:    vpextrd $1, %xmm0, %edx
     80 ; AVX512DQ_32_LIN-NEXT:    vzeroupper
     81 ; AVX512DQ_32_LIN-NEXT:    retl
     82 ;
     83 ; AVX512F_32_WIN-LABEL: f_to_u64:
     84 ; AVX512F_32_WIN:       # %bb.0:
     85 ; AVX512F_32_WIN-NEXT:    pushl %ebp
     86 ; AVX512F_32_WIN-NEXT:    movl %esp, %ebp
     87 ; AVX512F_32_WIN-NEXT:    andl $-8, %esp
     88 ; AVX512F_32_WIN-NEXT:    subl $16, %esp
     89 ; AVX512F_32_WIN-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
     90 ; AVX512F_32_WIN-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
     91 ; AVX512F_32_WIN-NEXT:    vcmpltss %xmm1, %xmm0, %k1
     92 ; AVX512F_32_WIN-NEXT:    vsubss %xmm1, %xmm0, %xmm2
     93 ; AVX512F_32_WIN-NEXT:    vmovss %xmm0, %xmm0, %xmm2 {%k1}
     94 ; AVX512F_32_WIN-NEXT:    vmovss %xmm2, {{[0-9]+}}(%esp)
     95 ; AVX512F_32_WIN-NEXT:    flds {{[0-9]+}}(%esp)
     96 ; AVX512F_32_WIN-NEXT:    fisttpll (%esp)
     97 ; AVX512F_32_WIN-NEXT:    xorl %edx, %edx
     98 ; AVX512F_32_WIN-NEXT:    vucomiss %xmm0, %xmm1
     99 ; AVX512F_32_WIN-NEXT:    setbe %dl
    100 ; AVX512F_32_WIN-NEXT:    shll $31, %edx
    101 ; AVX512F_32_WIN-NEXT:    xorl {{[0-9]+}}(%esp), %edx
    102 ; AVX512F_32_WIN-NEXT:    movl (%esp), %eax
    103 ; AVX512F_32_WIN-NEXT:    movl %ebp, %esp
    104 ; AVX512F_32_WIN-NEXT:    popl %ebp
    105 ; AVX512F_32_WIN-NEXT:    retl
    106 ;
    107 ; AVX512F_32_LIN-LABEL: f_to_u64:
    108 ; AVX512F_32_LIN:       # %bb.0:
    109 ; AVX512F_32_LIN-NEXT:    subl $20, %esp
    110 ; AVX512F_32_LIN-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
    111 ; AVX512F_32_LIN-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
    112 ; AVX512F_32_LIN-NEXT:    vcmpltss %xmm1, %xmm0, %k1
    113 ; AVX512F_32_LIN-NEXT:    vsubss %xmm1, %xmm0, %xmm2
    114 ; AVX512F_32_LIN-NEXT:    vmovss %xmm0, %xmm0, %xmm2 {%k1}
    115 ; AVX512F_32_LIN-NEXT:    vmovss %xmm2, {{[0-9]+}}(%esp)
    116 ; AVX512F_32_LIN-NEXT:    flds {{[0-9]+}}(%esp)
    117 ; AVX512F_32_LIN-NEXT:    fisttpll (%esp)
    118 ; AVX512F_32_LIN-NEXT:    xorl %edx, %edx
    119 ; AVX512F_32_LIN-NEXT:    vucomiss %xmm0, %xmm1
    120 ; AVX512F_32_LIN-NEXT:    setbe %dl
    121 ; AVX512F_32_LIN-NEXT:    shll $31, %edx
    122 ; AVX512F_32_LIN-NEXT:    xorl {{[0-9]+}}(%esp), %edx
    123 ; AVX512F_32_LIN-NEXT:    movl (%esp), %eax
    124 ; AVX512F_32_LIN-NEXT:    addl $20, %esp
    125 ; AVX512F_32_LIN-NEXT:    retl
    126 ;
    127 ; SSE3_32_WIN-LABEL: f_to_u64:
    128 ; SSE3_32_WIN:       # %bb.0:
    129 ; SSE3_32_WIN-NEXT:    pushl %ebp
    130 ; SSE3_32_WIN-NEXT:    movl %esp, %ebp
    131 ; SSE3_32_WIN-NEXT:    andl $-8, %esp
    132 ; SSE3_32_WIN-NEXT:    subl $16, %esp
    133 ; SSE3_32_WIN-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
    134 ; SSE3_32_WIN-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
    135 ; SSE3_32_WIN-NEXT:    movaps %xmm0, %xmm2
    136 ; SSE3_32_WIN-NEXT:    cmpltss %xmm1, %xmm2
    137 ; SSE3_32_WIN-NEXT:    movaps %xmm2, %xmm3
    138 ; SSE3_32_WIN-NEXT:    andps %xmm0, %xmm2
    139 ; SSE3_32_WIN-NEXT:    xorl %edx, %edx
    140 ; SSE3_32_WIN-NEXT:    ucomiss %xmm0, %xmm1
    141 ; SSE3_32_WIN-NEXT:    subss %xmm1, %xmm0
    142 ; SSE3_32_WIN-NEXT:    andnps %xmm0, %xmm3
    143 ; SSE3_32_WIN-NEXT:    orps %xmm3, %xmm2
    144 ; SSE3_32_WIN-NEXT:    movss %xmm2, {{[0-9]+}}(%esp)
    145 ; SSE3_32_WIN-NEXT:    flds {{[0-9]+}}(%esp)
    146 ; SSE3_32_WIN-NEXT:    fisttpll (%esp)
    147 ; SSE3_32_WIN-NEXT:    setbe %dl
    148 ; SSE3_32_WIN-NEXT:    shll $31, %edx
    149 ; SSE3_32_WIN-NEXT:    xorl {{[0-9]+}}(%esp), %edx
    150 ; SSE3_32_WIN-NEXT:    movl (%esp), %eax
    151 ; SSE3_32_WIN-NEXT:    movl %ebp, %esp
    152 ; SSE3_32_WIN-NEXT:    popl %ebp
    153 ; SSE3_32_WIN-NEXT:    retl
    154 ;
    155 ; SSE3_32_LIN-LABEL: f_to_u64:
    156 ; SSE3_32_LIN:       # %bb.0:
    157 ; SSE3_32_LIN-NEXT:    subl $20, %esp
    158 ; SSE3_32_LIN-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
    159 ; SSE3_32_LIN-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
    160 ; SSE3_32_LIN-NEXT:    movaps %xmm0, %xmm2
    161 ; SSE3_32_LIN-NEXT:    cmpltss %xmm1, %xmm2
    162 ; SSE3_32_LIN-NEXT:    movaps %xmm2, %xmm3
    163 ; SSE3_32_LIN-NEXT:    andps %xmm0, %xmm2
    164 ; SSE3_32_LIN-NEXT:    xorl %edx, %edx
    165 ; SSE3_32_LIN-NEXT:    ucomiss %xmm0, %xmm1
    166 ; SSE3_32_LIN-NEXT:    subss %xmm1, %xmm0
    167 ; SSE3_32_LIN-NEXT:    andnps %xmm0, %xmm3
    168 ; SSE3_32_LIN-NEXT:    orps %xmm3, %xmm2
    169 ; SSE3_32_LIN-NEXT:    movss %xmm2, {{[0-9]+}}(%esp)
    170 ; SSE3_32_LIN-NEXT:    flds {{[0-9]+}}(%esp)
    171 ; SSE3_32_LIN-NEXT:    fisttpll (%esp)
    172 ; SSE3_32_LIN-NEXT:    setbe %dl
    173 ; SSE3_32_LIN-NEXT:    shll $31, %edx
    174 ; SSE3_32_LIN-NEXT:    xorl {{[0-9]+}}(%esp), %edx
    175 ; SSE3_32_LIN-NEXT:    movl (%esp), %eax
    176 ; SSE3_32_LIN-NEXT:    addl $20, %esp
    177 ; SSE3_32_LIN-NEXT:    retl
    178 ;
    179 ; SSE3_64-LABEL: f_to_u64:
    180 ; SSE3_64:       # %bb.0:
    181 ; SSE3_64-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
    182 ; SSE3_64-NEXT:    movaps %xmm0, %xmm2
    183 ; SSE3_64-NEXT:    subss %xmm1, %xmm2
    184 ; SSE3_64-NEXT:    cvttss2si %xmm2, %rax
    185 ; SSE3_64-NEXT:    movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
    186 ; SSE3_64-NEXT:    xorq %rax, %rcx
    187 ; SSE3_64-NEXT:    cvttss2si %xmm0, %rax
    188 ; SSE3_64-NEXT:    ucomiss %xmm1, %xmm0
    189 ; SSE3_64-NEXT:    cmovaeq %rcx, %rax
    190 ; SSE3_64-NEXT:    retq
    191 ;
    192 ; SSE2_32_WIN-LABEL: f_to_u64:
    193 ; SSE2_32_WIN:       # %bb.0:
    194 ; SSE2_32_WIN-NEXT:    pushl %ebp
    195 ; SSE2_32_WIN-NEXT:    movl %esp, %ebp
    196 ; SSE2_32_WIN-NEXT:    andl $-8, %esp
    197 ; SSE2_32_WIN-NEXT:    subl $24, %esp
    198 ; SSE2_32_WIN-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
    199 ; SSE2_32_WIN-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
    200 ; SSE2_32_WIN-NEXT:    movaps %xmm0, %xmm2
    201 ; SSE2_32_WIN-NEXT:    cmpltss %xmm1, %xmm2
    202 ; SSE2_32_WIN-NEXT:    movaps %xmm2, %xmm3
    203 ; SSE2_32_WIN-NEXT:    andps %xmm0, %xmm2
    204 ; SSE2_32_WIN-NEXT:    xorl %edx, %edx
    205 ; SSE2_32_WIN-NEXT:    ucomiss %xmm0, %xmm1
    206 ; SSE2_32_WIN-NEXT:    subss %xmm1, %xmm0
    207 ; SSE2_32_WIN-NEXT:    andnps %xmm0, %xmm3
    208 ; SSE2_32_WIN-NEXT:    orps %xmm3, %xmm2
    209 ; SSE2_32_WIN-NEXT:    movss %xmm2, {{[0-9]+}}(%esp)
    210 ; SSE2_32_WIN-NEXT:    flds {{[0-9]+}}(%esp)
    211 ; SSE2_32_WIN-NEXT:    fnstcw {{[0-9]+}}(%esp)
    212 ; SSE2_32_WIN-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
    213 ; SSE2_32_WIN-NEXT:    movw $3199, {{[0-9]+}}(%esp) # imm = 0xC7F
    214 ; SSE2_32_WIN-NEXT:    fldcw {{[0-9]+}}(%esp)
    215 ; SSE2_32_WIN-NEXT:    movw %ax, {{[0-9]+}}(%esp)
    216 ; SSE2_32_WIN-NEXT:    fistpll {{[0-9]+}}(%esp)
    217 ; SSE2_32_WIN-NEXT:    fldcw {{[0-9]+}}(%esp)
    218 ; SSE2_32_WIN-NEXT:    setbe %dl
    219 ; SSE2_32_WIN-NEXT:    shll $31, %edx
    220 ; SSE2_32_WIN-NEXT:    xorl {{[0-9]+}}(%esp), %edx
    221 ; SSE2_32_WIN-NEXT:    movl {{[0-9]+}}(%esp), %eax
    222 ; SSE2_32_WIN-NEXT:    movl %ebp, %esp
    223 ; SSE2_32_WIN-NEXT:    popl %ebp
    224 ; SSE2_32_WIN-NEXT:    retl
    225 ;
    226 ; SSE2_32_LIN-LABEL: f_to_u64:
    227 ; SSE2_32_LIN:       # %bb.0:
    228 ; SSE2_32_LIN-NEXT:    subl $28, %esp
    229 ; SSE2_32_LIN-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
    230 ; SSE2_32_LIN-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
    231 ; SSE2_32_LIN-NEXT:    movaps %xmm0, %xmm2
    232 ; SSE2_32_LIN-NEXT:    cmpltss %xmm1, %xmm2
    233 ; SSE2_32_LIN-NEXT:    movaps %xmm2, %xmm3
    234 ; SSE2_32_LIN-NEXT:    andps %xmm0, %xmm2
    235 ; SSE2_32_LIN-NEXT:    xorl %edx, %edx
    236 ; SSE2_32_LIN-NEXT:    ucomiss %xmm0, %xmm1
    237 ; SSE2_32_LIN-NEXT:    subss %xmm1, %xmm0
    238 ; SSE2_32_LIN-NEXT:    andnps %xmm0, %xmm3
    239 ; SSE2_32_LIN-NEXT:    orps %xmm3, %xmm2
    240 ; SSE2_32_LIN-NEXT:    movss %xmm2, {{[0-9]+}}(%esp)
    241 ; SSE2_32_LIN-NEXT:    flds {{[0-9]+}}(%esp)
    242 ; SSE2_32_LIN-NEXT:    fnstcw {{[0-9]+}}(%esp)
    243 ; SSE2_32_LIN-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
    244 ; SSE2_32_LIN-NEXT:    movw $3199, {{[0-9]+}}(%esp) # imm = 0xC7F
    245 ; SSE2_32_LIN-NEXT:    fldcw {{[0-9]+}}(%esp)
    246 ; SSE2_32_LIN-NEXT:    movw %ax, {{[0-9]+}}(%esp)
    247 ; SSE2_32_LIN-NEXT:    fistpll {{[0-9]+}}(%esp)
    248 ; SSE2_32_LIN-NEXT:    fldcw {{[0-9]+}}(%esp)
    249 ; SSE2_32_LIN-NEXT:    setbe %dl
    250 ; SSE2_32_LIN-NEXT:    shll $31, %edx
    251 ; SSE2_32_LIN-NEXT:    xorl {{[0-9]+}}(%esp), %edx
    252 ; SSE2_32_LIN-NEXT:    movl {{[0-9]+}}(%esp), %eax
    253 ; SSE2_32_LIN-NEXT:    addl $28, %esp
    254 ; SSE2_32_LIN-NEXT:    retl
    255 ;
    256 ; SSE2_64-LABEL: f_to_u64:
    257 ; SSE2_64:       # %bb.0:
    258 ; SSE2_64-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
    259 ; SSE2_64-NEXT:    movaps %xmm0, %xmm2
    260 ; SSE2_64-NEXT:    subss %xmm1, %xmm2
    261 ; SSE2_64-NEXT:    cvttss2si %xmm2, %rax
    262 ; SSE2_64-NEXT:    movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
    263 ; SSE2_64-NEXT:    xorq %rax, %rcx
    264 ; SSE2_64-NEXT:    cvttss2si %xmm0, %rax
    265 ; SSE2_64-NEXT:    ucomiss %xmm1, %xmm0
    266 ; SSE2_64-NEXT:    cmovaeq %rcx, %rax
    267 ; SSE2_64-NEXT:    retq
    268 ;
    269 ; X87_WIN-LABEL: f_to_u64:
    270 ; X87_WIN:       # %bb.0:
    271 ; X87_WIN-NEXT:    pushl %ebp
    272 ; X87_WIN-NEXT:    movl %esp, %ebp
    273 ; X87_WIN-NEXT:    andl $-8, %esp
    274 ; X87_WIN-NEXT:    subl $16, %esp
    275 ; X87_WIN-NEXT:    flds 8(%ebp)
    276 ; X87_WIN-NEXT:    flds __real@5f000000
    277 ; X87_WIN-NEXT:    fld %st(1)
    278 ; X87_WIN-NEXT:    fsub %st(1)
    279 ; X87_WIN-NEXT:    fxch %st(1)
    280 ; X87_WIN-NEXT:    fucomp %st(2)
    281 ; X87_WIN-NEXT:    fnstsw %ax
    282 ; X87_WIN-NEXT:    # kill: def $ah killed $ah killed $ax
    283 ; X87_WIN-NEXT:    sahf
    284 ; X87_WIN-NEXT:    ja LBB0_2
    285 ; X87_WIN-NEXT:  # %bb.1:
    286 ; X87_WIN-NEXT:    fstp %st(1)
    287 ; X87_WIN-NEXT:    fldz
    288 ; X87_WIN-NEXT:  LBB0_2:
    289 ; X87_WIN-NEXT:    fstp %st(0)
    290 ; X87_WIN-NEXT:    fnstcw {{[0-9]+}}(%esp)
    291 ; X87_WIN-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
    292 ; X87_WIN-NEXT:    movw $3199, {{[0-9]+}}(%esp) # imm = 0xC7F
    293 ; X87_WIN-NEXT:    fldcw {{[0-9]+}}(%esp)
    294 ; X87_WIN-NEXT:    movw %ax, {{[0-9]+}}(%esp)
    295 ; X87_WIN-NEXT:    fistpll {{[0-9]+}}(%esp)
    296 ; X87_WIN-NEXT:    fldcw {{[0-9]+}}(%esp)
    297 ; X87_WIN-NEXT:    setbe %al
    298 ; X87_WIN-NEXT:    movzbl %al, %edx
    299 ; X87_WIN-NEXT:    shll $31, %edx
    300 ; X87_WIN-NEXT:    xorl {{[0-9]+}}(%esp), %edx
    301 ; X87_WIN-NEXT:    movl {{[0-9]+}}(%esp), %eax
    302 ; X87_WIN-NEXT:    movl %ebp, %esp
    303 ; X87_WIN-NEXT:    popl %ebp
    304 ; X87_WIN-NEXT:    retl
    305 ;
    306 ; X87_LIN-LABEL: f_to_u64:
    307 ; X87_LIN:       # %bb.0:
    308 ; X87_LIN-NEXT:    subl $20, %esp
    309 ; X87_LIN-NEXT:    flds {{[0-9]+}}(%esp)
    310 ; X87_LIN-NEXT:    flds {{\.LCPI.*}}
    311 ; X87_LIN-NEXT:    fld %st(1)
    312 ; X87_LIN-NEXT:    fsub %st(1)
    313 ; X87_LIN-NEXT:    fxch %st(1)
    314 ; X87_LIN-NEXT:    fucomp %st(2)
    315 ; X87_LIN-NEXT:    fnstsw %ax
    316 ; X87_LIN-NEXT:    # kill: def $ah killed $ah killed $ax
    317 ; X87_LIN-NEXT:    sahf
    318 ; X87_LIN-NEXT:    ja .LBB0_2
    319 ; X87_LIN-NEXT:  # %bb.1:
    320 ; X87_LIN-NEXT:    fstp %st(1)
    321 ; X87_LIN-NEXT:    fldz
    322 ; X87_LIN-NEXT:  .LBB0_2:
    323 ; X87_LIN-NEXT:    fstp %st(0)
    324 ; X87_LIN-NEXT:    fnstcw {{[0-9]+}}(%esp)
    325 ; X87_LIN-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
    326 ; X87_LIN-NEXT:    movw $3199, {{[0-9]+}}(%esp) # imm = 0xC7F
    327 ; X87_LIN-NEXT:    fldcw {{[0-9]+}}(%esp)
    328 ; X87_LIN-NEXT:    movw %ax, {{[0-9]+}}(%esp)
    329 ; X87_LIN-NEXT:    fistpll {{[0-9]+}}(%esp)
    330 ; X87_LIN-NEXT:    fldcw {{[0-9]+}}(%esp)
    331 ; X87_LIN-NEXT:    setbe %al
    332 ; X87_LIN-NEXT:    movzbl %al, %edx
    333 ; X87_LIN-NEXT:    shll $31, %edx
    334 ; X87_LIN-NEXT:    xorl {{[0-9]+}}(%esp), %edx
    335 ; X87_LIN-NEXT:    movl {{[0-9]+}}(%esp), %eax
    336 ; X87_LIN-NEXT:    addl $20, %esp
    337 ; X87_LIN-NEXT:    retl
    338   %r = fptoui float %a to i64
    339   ret i64 %r
    340 }
    341 
    342 define i64 @f_to_s64(float %a) nounwind {
    343 ; AVX512DQVL_32_WIN-LABEL: f_to_s64:
    344 ; AVX512DQVL_32_WIN:       # %bb.0:
    345 ; AVX512DQVL_32_WIN-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
    346 ; AVX512DQVL_32_WIN-NEXT:    vcvttps2qq %xmm0, %ymm0
    347 ; AVX512DQVL_32_WIN-NEXT:    vmovd %xmm0, %eax
    348 ; AVX512DQVL_32_WIN-NEXT:    vpextrd $1, %xmm0, %edx
    349 ; AVX512DQVL_32_WIN-NEXT:    vzeroupper
    350 ; AVX512DQVL_32_WIN-NEXT:    retl
    351 ;
    352 ; AVX512DQVL_32_LIN-LABEL: f_to_s64:
    353 ; AVX512DQVL_32_LIN:       # %bb.0:
    354 ; AVX512DQVL_32_LIN-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
    355 ; AVX512DQVL_32_LIN-NEXT:    vcvttps2qq %xmm0, %ymm0
    356 ; AVX512DQVL_32_LIN-NEXT:    vmovd %xmm0, %eax
    357 ; AVX512DQVL_32_LIN-NEXT:    vpextrd $1, %xmm0, %edx
    358 ; AVX512DQVL_32_LIN-NEXT:    vzeroupper
    359 ; AVX512DQVL_32_LIN-NEXT:    retl
    360 ;
    361 ; AVX512_64-LABEL: f_to_s64:
    362 ; AVX512_64:       # %bb.0:
    363 ; AVX512_64-NEXT:    vcvttss2si %xmm0, %rax
    364 ; AVX512_64-NEXT:    retq
    365 ;
    366 ; AVX512DQ_32_WIN-LABEL: f_to_s64:
    367 ; AVX512DQ_32_WIN:       # %bb.0:
    368 ; AVX512DQ_32_WIN-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
    369 ; AVX512DQ_32_WIN-NEXT:    vcvttps2qq %ymm0, %zmm0
    370 ; AVX512DQ_32_WIN-NEXT:    vmovd %xmm0, %eax
    371 ; AVX512DQ_32_WIN-NEXT:    vpextrd $1, %xmm0, %edx
    372 ; AVX512DQ_32_WIN-NEXT:    vzeroupper
    373 ; AVX512DQ_32_WIN-NEXT:    retl
    374 ;
    375 ; AVX512DQ_32_LIN-LABEL: f_to_s64:
    376 ; AVX512DQ_32_LIN:       # %bb.0:
    377 ; AVX512DQ_32_LIN-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
    378 ; AVX512DQ_32_LIN-NEXT:    vcvttps2qq %ymm0, %zmm0
    379 ; AVX512DQ_32_LIN-NEXT:    vmovd %xmm0, %eax
    380 ; AVX512DQ_32_LIN-NEXT:    vpextrd $1, %xmm0, %edx
    381 ; AVX512DQ_32_LIN-NEXT:    vzeroupper
    382 ; AVX512DQ_32_LIN-NEXT:    retl
    383 ;
    384 ; AVX512F_32_WIN-LABEL: f_to_s64:
    385 ; AVX512F_32_WIN:       # %bb.0:
    386 ; AVX512F_32_WIN-NEXT:    pushl %ebp
    387 ; AVX512F_32_WIN-NEXT:    movl %esp, %ebp
    388 ; AVX512F_32_WIN-NEXT:    andl $-8, %esp
    389 ; AVX512F_32_WIN-NEXT:    subl $16, %esp
    390 ; AVX512F_32_WIN-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
    391 ; AVX512F_32_WIN-NEXT:    vmovss %xmm0, {{[0-9]+}}(%esp)
    392 ; AVX512F_32_WIN-NEXT:    flds {{[0-9]+}}(%esp)
    393 ; AVX512F_32_WIN-NEXT:    fisttpll (%esp)
    394 ; AVX512F_32_WIN-NEXT:    movl (%esp), %eax
    395 ; AVX512F_32_WIN-NEXT:    movl {{[0-9]+}}(%esp), %edx
    396 ; AVX512F_32_WIN-NEXT:    movl %ebp, %esp
    397 ; AVX512F_32_WIN-NEXT:    popl %ebp
    398 ; AVX512F_32_WIN-NEXT:    retl
    399 ;
    400 ; AVX512F_32_LIN-LABEL: f_to_s64:
    401 ; AVX512F_32_LIN:       # %bb.0:
    402 ; AVX512F_32_LIN-NEXT:    subl $20, %esp
    403 ; AVX512F_32_LIN-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
    404 ; AVX512F_32_LIN-NEXT:    vmovss %xmm0, {{[0-9]+}}(%esp)
    405 ; AVX512F_32_LIN-NEXT:    flds {{[0-9]+}}(%esp)
    406 ; AVX512F_32_LIN-NEXT:    fisttpll (%esp)
    407 ; AVX512F_32_LIN-NEXT:    movl (%esp), %eax
    408 ; AVX512F_32_LIN-NEXT:    movl {{[0-9]+}}(%esp), %edx
    409 ; AVX512F_32_LIN-NEXT:    addl $20, %esp
    410 ; AVX512F_32_LIN-NEXT:    retl
    411 ;
    412 ; SSE3_32_WIN-LABEL: f_to_s64:
    413 ; SSE3_32_WIN:       # %bb.0:
    414 ; SSE3_32_WIN-NEXT:    pushl %ebp
    415 ; SSE3_32_WIN-NEXT:    movl %esp, %ebp
    416 ; SSE3_32_WIN-NEXT:    andl $-8, %esp
    417 ; SSE3_32_WIN-NEXT:    subl $16, %esp
    418 ; SSE3_32_WIN-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
    419 ; SSE3_32_WIN-NEXT:    movss %xmm0, {{[0-9]+}}(%esp)
    420 ; SSE3_32_WIN-NEXT:    flds {{[0-9]+}}(%esp)
    421 ; SSE3_32_WIN-NEXT:    fisttpll (%esp)
    422 ; SSE3_32_WIN-NEXT:    movl (%esp), %eax
    423 ; SSE3_32_WIN-NEXT:    movl {{[0-9]+}}(%esp), %edx
    424 ; SSE3_32_WIN-NEXT:    movl %ebp, %esp
    425 ; SSE3_32_WIN-NEXT:    popl %ebp
    426 ; SSE3_32_WIN-NEXT:    retl
    427 ;
    428 ; SSE3_32_LIN-LABEL: f_to_s64:
    429 ; SSE3_32_LIN:       # %bb.0:
    430 ; SSE3_32_LIN-NEXT:    subl $20, %esp
    431 ; SSE3_32_LIN-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
    432 ; SSE3_32_LIN-NEXT:    movss %xmm0, {{[0-9]+}}(%esp)
    433 ; SSE3_32_LIN-NEXT:    flds {{[0-9]+}}(%esp)
    434 ; SSE3_32_LIN-NEXT:    fisttpll (%esp)
    435 ; SSE3_32_LIN-NEXT:    movl (%esp), %eax
    436 ; SSE3_32_LIN-NEXT:    movl {{[0-9]+}}(%esp), %edx
    437 ; SSE3_32_LIN-NEXT:    addl $20, %esp
    438 ; SSE3_32_LIN-NEXT:    retl
    439 ;
    440 ; SSE3_64-LABEL: f_to_s64:
    441 ; SSE3_64:       # %bb.0:
    442 ; SSE3_64-NEXT:    cvttss2si %xmm0, %rax
    443 ; SSE3_64-NEXT:    retq
    444 ;
    445 ; SSE2_32_WIN-LABEL: f_to_s64:
    446 ; SSE2_32_WIN:       # %bb.0:
    447 ; SSE2_32_WIN-NEXT:    pushl %ebp
    448 ; SSE2_32_WIN-NEXT:    movl %esp, %ebp
    449 ; SSE2_32_WIN-NEXT:    andl $-8, %esp
    450 ; SSE2_32_WIN-NEXT:    subl $24, %esp
    451 ; SSE2_32_WIN-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
    452 ; SSE2_32_WIN-NEXT:    movss %xmm0, {{[0-9]+}}(%esp)
    453 ; SSE2_32_WIN-NEXT:    flds {{[0-9]+}}(%esp)
    454 ; SSE2_32_WIN-NEXT:    fnstcw {{[0-9]+}}(%esp)
    455 ; SSE2_32_WIN-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
    456 ; SSE2_32_WIN-NEXT:    movw $3199, {{[0-9]+}}(%esp) # imm = 0xC7F
    457 ; SSE2_32_WIN-NEXT:    fldcw {{[0-9]+}}(%esp)
    458 ; SSE2_32_WIN-NEXT:    movw %ax, {{[0-9]+}}(%esp)
    459 ; SSE2_32_WIN-NEXT:    fistpll {{[0-9]+}}(%esp)
    460 ; SSE2_32_WIN-NEXT:    fldcw {{[0-9]+}}(%esp)
    461 ; SSE2_32_WIN-NEXT:    movl {{[0-9]+}}(%esp), %eax
    462 ; SSE2_32_WIN-NEXT:    movl {{[0-9]+}}(%esp), %edx
    463 ; SSE2_32_WIN-NEXT:    movl %ebp, %esp
    464 ; SSE2_32_WIN-NEXT:    popl %ebp
    465 ; SSE2_32_WIN-NEXT:    retl
    466 ;
    467 ; SSE2_32_LIN-LABEL: f_to_s64:
    468 ; SSE2_32_LIN:       # %bb.0:
    469 ; SSE2_32_LIN-NEXT:    subl $28, %esp
    470 ; SSE2_32_LIN-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
    471 ; SSE2_32_LIN-NEXT:    movss %xmm0, {{[0-9]+}}(%esp)
    472 ; SSE2_32_LIN-NEXT:    flds {{[0-9]+}}(%esp)
    473 ; SSE2_32_LIN-NEXT:    fnstcw {{[0-9]+}}(%esp)
    474 ; SSE2_32_LIN-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
    475 ; SSE2_32_LIN-NEXT:    movw $3199, {{[0-9]+}}(%esp) # imm = 0xC7F
    476 ; SSE2_32_LIN-NEXT:    fldcw {{[0-9]+}}(%esp)
    477 ; SSE2_32_LIN-NEXT:    movw %ax, {{[0-9]+}}(%esp)
    478 ; SSE2_32_LIN-NEXT:    fistpll {{[0-9]+}}(%esp)
    479 ; SSE2_32_LIN-NEXT:    fldcw {{[0-9]+}}(%esp)
    480 ; SSE2_32_LIN-NEXT:    movl {{[0-9]+}}(%esp), %eax
    481 ; SSE2_32_LIN-NEXT:    movl {{[0-9]+}}(%esp), %edx
    482 ; SSE2_32_LIN-NEXT:    addl $28, %esp
    483 ; SSE2_32_LIN-NEXT:    retl
    484 ;
    485 ; SSE2_64-LABEL: f_to_s64:
    486 ; SSE2_64:       # %bb.0:
    487 ; SSE2_64-NEXT:    cvttss2si %xmm0, %rax
    488 ; SSE2_64-NEXT:    retq
    489 ;
    490 ; X87_WIN-LABEL: f_to_s64:
    491 ; X87_WIN:       # %bb.0:
    492 ; X87_WIN-NEXT:    pushl %ebp
    493 ; X87_WIN-NEXT:    movl %esp, %ebp
    494 ; X87_WIN-NEXT:    andl $-8, %esp
    495 ; X87_WIN-NEXT:    subl $16, %esp
    496 ; X87_WIN-NEXT:    flds 8(%ebp)
    497 ; X87_WIN-NEXT:    fnstcw {{[0-9]+}}(%esp)
    498 ; X87_WIN-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
    499 ; X87_WIN-NEXT:    movw $3199, {{[0-9]+}}(%esp) # imm = 0xC7F
    500 ; X87_WIN-NEXT:    fldcw {{[0-9]+}}(%esp)
    501 ; X87_WIN-NEXT:    movw %ax, {{[0-9]+}}(%esp)
    502 ; X87_WIN-NEXT:    fistpll {{[0-9]+}}(%esp)
    503 ; X87_WIN-NEXT:    fldcw {{[0-9]+}}(%esp)
    504 ; X87_WIN-NEXT:    movl {{[0-9]+}}(%esp), %eax
    505 ; X87_WIN-NEXT:    movl {{[0-9]+}}(%esp), %edx
    506 ; X87_WIN-NEXT:    movl %ebp, %esp
    507 ; X87_WIN-NEXT:    popl %ebp
    508 ; X87_WIN-NEXT:    retl
    509 ;
    510 ; X87_LIN-LABEL: f_to_s64:
    511 ; X87_LIN:       # %bb.0:
    512 ; X87_LIN-NEXT:    subl $20, %esp
    513 ; X87_LIN-NEXT:    flds {{[0-9]+}}(%esp)
    514 ; X87_LIN-NEXT:    fnstcw {{[0-9]+}}(%esp)
    515 ; X87_LIN-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
    516 ; X87_LIN-NEXT:    movw $3199, {{[0-9]+}}(%esp) # imm = 0xC7F
    517 ; X87_LIN-NEXT:    fldcw {{[0-9]+}}(%esp)
    518 ; X87_LIN-NEXT:    movw %ax, {{[0-9]+}}(%esp)
    519 ; X87_LIN-NEXT:    fistpll {{[0-9]+}}(%esp)
    520 ; X87_LIN-NEXT:    fldcw {{[0-9]+}}(%esp)
    521 ; X87_LIN-NEXT:    movl {{[0-9]+}}(%esp), %eax
    522 ; X87_LIN-NEXT:    movl {{[0-9]+}}(%esp), %edx
    523 ; X87_LIN-NEXT:    addl $20, %esp
    524 ; X87_LIN-NEXT:    retl
    525   %r = fptosi float %a to i64
    526   ret i64 %r
    527 }
    528 
    529 define i64 @d_to_u64(double %a) nounwind {
    530 ; AVX512DQVL_32_WIN-LABEL: d_to_u64:
    531 ; AVX512DQVL_32_WIN:       # %bb.0:
    532 ; AVX512DQVL_32_WIN-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
    533 ; AVX512DQVL_32_WIN-NEXT:    vcvttpd2uqq %ymm0, %ymm0
    534 ; AVX512DQVL_32_WIN-NEXT:    vmovd %xmm0, %eax
    535 ; AVX512DQVL_32_WIN-NEXT:    vpextrd $1, %xmm0, %edx
    536 ; AVX512DQVL_32_WIN-NEXT:    vzeroupper
    537 ; AVX512DQVL_32_WIN-NEXT:    retl
    538 ;
    539 ; AVX512DQVL_32_LIN-LABEL: d_to_u64:
    540 ; AVX512DQVL_32_LIN:       # %bb.0:
    541 ; AVX512DQVL_32_LIN-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
    542 ; AVX512DQVL_32_LIN-NEXT:    vcvttpd2uqq %ymm0, %ymm0
    543 ; AVX512DQVL_32_LIN-NEXT:    vmovd %xmm0, %eax
    544 ; AVX512DQVL_32_LIN-NEXT:    vpextrd $1, %xmm0, %edx
    545 ; AVX512DQVL_32_LIN-NEXT:    vzeroupper
    546 ; AVX512DQVL_32_LIN-NEXT:    retl
    547 ;
    548 ; AVX512_64-LABEL: d_to_u64:
    549 ; AVX512_64:       # %bb.0:
    550 ; AVX512_64-NEXT:    vcvttsd2usi %xmm0, %rax
    551 ; AVX512_64-NEXT:    retq
    552 ;
    553 ; AVX512DQ_32_WIN-LABEL: d_to_u64:
    554 ; AVX512DQ_32_WIN:       # %bb.0:
    555 ; AVX512DQ_32_WIN-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
    556 ; AVX512DQ_32_WIN-NEXT:    vcvttpd2uqq %zmm0, %zmm0
    557 ; AVX512DQ_32_WIN-NEXT:    vmovd %xmm0, %eax
    558 ; AVX512DQ_32_WIN-NEXT:    vpextrd $1, %xmm0, %edx
    559 ; AVX512DQ_32_WIN-NEXT:    vzeroupper
    560 ; AVX512DQ_32_WIN-NEXT:    retl
    561 ;
    562 ; AVX512DQ_32_LIN-LABEL: d_to_u64:
    563 ; AVX512DQ_32_LIN:       # %bb.0:
    564 ; AVX512DQ_32_LIN-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
    565 ; AVX512DQ_32_LIN-NEXT:    vcvttpd2uqq %zmm0, %zmm0
    566 ; AVX512DQ_32_LIN-NEXT:    vmovd %xmm0, %eax
    567 ; AVX512DQ_32_LIN-NEXT:    vpextrd $1, %xmm0, %edx
    568 ; AVX512DQ_32_LIN-NEXT:    vzeroupper
    569 ; AVX512DQ_32_LIN-NEXT:    retl
    570 ;
    571 ; AVX512F_32_WIN-LABEL: d_to_u64:
    572 ; AVX512F_32_WIN:       # %bb.0:
    573 ; AVX512F_32_WIN-NEXT:    pushl %ebp
    574 ; AVX512F_32_WIN-NEXT:    movl %esp, %ebp
    575 ; AVX512F_32_WIN-NEXT:    andl $-8, %esp
    576 ; AVX512F_32_WIN-NEXT:    subl $16, %esp
    577 ; AVX512F_32_WIN-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
    578 ; AVX512F_32_WIN-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
    579 ; AVX512F_32_WIN-NEXT:    vcmpltsd %xmm1, %xmm0, %k1
    580 ; AVX512F_32_WIN-NEXT:    vsubsd %xmm1, %xmm0, %xmm2
    581 ; AVX512F_32_WIN-NEXT:    vmovsd %xmm0, %xmm0, %xmm2 {%k1}
    582 ; AVX512F_32_WIN-NEXT:    vmovsd %xmm2, {{[0-9]+}}(%esp)
    583 ; AVX512F_32_WIN-NEXT:    fldl {{[0-9]+}}(%esp)
    584 ; AVX512F_32_WIN-NEXT:    fisttpll (%esp)
    585 ; AVX512F_32_WIN-NEXT:    xorl %edx, %edx
    586 ; AVX512F_32_WIN-NEXT:    vucomisd %xmm0, %xmm1
    587 ; AVX512F_32_WIN-NEXT:    setbe %dl
    588 ; AVX512F_32_WIN-NEXT:    shll $31, %edx
    589 ; AVX512F_32_WIN-NEXT:    xorl {{[0-9]+}}(%esp), %edx
    590 ; AVX512F_32_WIN-NEXT:    movl (%esp), %eax
    591 ; AVX512F_32_WIN-NEXT:    movl %ebp, %esp
    592 ; AVX512F_32_WIN-NEXT:    popl %ebp
    593 ; AVX512F_32_WIN-NEXT:    retl
    594 ;
    595 ; AVX512F_32_LIN-LABEL: d_to_u64:
    596 ; AVX512F_32_LIN:       # %bb.0:
    597 ; AVX512F_32_LIN-NEXT:    subl $20, %esp
    598 ; AVX512F_32_LIN-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
    599 ; AVX512F_32_LIN-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
    600 ; AVX512F_32_LIN-NEXT:    vcmpltsd %xmm1, %xmm0, %k1
    601 ; AVX512F_32_LIN-NEXT:    vsubsd %xmm1, %xmm0, %xmm2
    602 ; AVX512F_32_LIN-NEXT:    vmovsd %xmm0, %xmm0, %xmm2 {%k1}
    603 ; AVX512F_32_LIN-NEXT:    vmovsd %xmm2, {{[0-9]+}}(%esp)
    604 ; AVX512F_32_LIN-NEXT:    fldl {{[0-9]+}}(%esp)
    605 ; AVX512F_32_LIN-NEXT:    fisttpll (%esp)
    606 ; AVX512F_32_LIN-NEXT:    xorl %edx, %edx
    607 ; AVX512F_32_LIN-NEXT:    vucomisd %xmm0, %xmm1
    608 ; AVX512F_32_LIN-NEXT:    setbe %dl
    609 ; AVX512F_32_LIN-NEXT:    shll $31, %edx
    610 ; AVX512F_32_LIN-NEXT:    xorl {{[0-9]+}}(%esp), %edx
    611 ; AVX512F_32_LIN-NEXT:    movl (%esp), %eax
    612 ; AVX512F_32_LIN-NEXT:    addl $20, %esp
    613 ; AVX512F_32_LIN-NEXT:    retl
    614 ;
    615 ; SSE3_32_WIN-LABEL: d_to_u64:
    616 ; SSE3_32_WIN:       # %bb.0:
    617 ; SSE3_32_WIN-NEXT:    pushl %ebp
    618 ; SSE3_32_WIN-NEXT:    movl %esp, %ebp
    619 ; SSE3_32_WIN-NEXT:    andl $-8, %esp
    620 ; SSE3_32_WIN-NEXT:    subl $16, %esp
    621 ; SSE3_32_WIN-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
    622 ; SSE3_32_WIN-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
    623 ; SSE3_32_WIN-NEXT:    movapd %xmm0, %xmm2
    624 ; SSE3_32_WIN-NEXT:    cmpltsd %xmm1, %xmm2
    625 ; SSE3_32_WIN-NEXT:    movapd %xmm2, %xmm3
    626 ; SSE3_32_WIN-NEXT:    andpd %xmm0, %xmm2
    627 ; SSE3_32_WIN-NEXT:    xorl %edx, %edx
    628 ; SSE3_32_WIN-NEXT:    ucomisd %xmm0, %xmm1
    629 ; SSE3_32_WIN-NEXT:    subsd %xmm1, %xmm0
    630 ; SSE3_32_WIN-NEXT:    andnpd %xmm0, %xmm3
    631 ; SSE3_32_WIN-NEXT:    orpd %xmm3, %xmm2
    632 ; SSE3_32_WIN-NEXT:    movsd %xmm2, {{[0-9]+}}(%esp)
    633 ; SSE3_32_WIN-NEXT:    fldl {{[0-9]+}}(%esp)
    634 ; SSE3_32_WIN-NEXT:    fisttpll (%esp)
    635 ; SSE3_32_WIN-NEXT:    setbe %dl
    636 ; SSE3_32_WIN-NEXT:    shll $31, %edx
    637 ; SSE3_32_WIN-NEXT:    xorl {{[0-9]+}}(%esp), %edx
    638 ; SSE3_32_WIN-NEXT:    movl (%esp), %eax
    639 ; SSE3_32_WIN-NEXT:    movl %ebp, %esp
    640 ; SSE3_32_WIN-NEXT:    popl %ebp
    641 ; SSE3_32_WIN-NEXT:    retl
    642 ;
    643 ; SSE3_32_LIN-LABEL: d_to_u64:
    644 ; SSE3_32_LIN:       # %bb.0:
    645 ; SSE3_32_LIN-NEXT:    subl $20, %esp
    646 ; SSE3_32_LIN-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
    647 ; SSE3_32_LIN-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
    648 ; SSE3_32_LIN-NEXT:    movapd %xmm0, %xmm2
    649 ; SSE3_32_LIN-NEXT:    cmpltsd %xmm1, %xmm2
    650 ; SSE3_32_LIN-NEXT:    movapd %xmm2, %xmm3
    651 ; SSE3_32_LIN-NEXT:    andpd %xmm0, %xmm2
    652 ; SSE3_32_LIN-NEXT:    xorl %edx, %edx
    653 ; SSE3_32_LIN-NEXT:    ucomisd %xmm0, %xmm1
    654 ; SSE3_32_LIN-NEXT:    subsd %xmm1, %xmm0
    655 ; SSE3_32_LIN-NEXT:    andnpd %xmm0, %xmm3
    656 ; SSE3_32_LIN-NEXT:    orpd %xmm3, %xmm2
    657 ; SSE3_32_LIN-NEXT:    movsd %xmm2, {{[0-9]+}}(%esp)
    658 ; SSE3_32_LIN-NEXT:    fldl {{[0-9]+}}(%esp)
    659 ; SSE3_32_LIN-NEXT:    fisttpll (%esp)
    660 ; SSE3_32_LIN-NEXT:    setbe %dl
    661 ; SSE3_32_LIN-NEXT:    shll $31, %edx
    662 ; SSE3_32_LIN-NEXT:    xorl {{[0-9]+}}(%esp), %edx
    663 ; SSE3_32_LIN-NEXT:    movl (%esp), %eax
    664 ; SSE3_32_LIN-NEXT:    addl $20, %esp
    665 ; SSE3_32_LIN-NEXT:    retl
    666 ;
    667 ; SSE3_64-LABEL: d_to_u64:
    668 ; SSE3_64:       # %bb.0:
    669 ; SSE3_64-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
    670 ; SSE3_64-NEXT:    movapd %xmm0, %xmm2
    671 ; SSE3_64-NEXT:    subsd %xmm1, %xmm2
    672 ; SSE3_64-NEXT:    cvttsd2si %xmm2, %rax
    673 ; SSE3_64-NEXT:    movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
    674 ; SSE3_64-NEXT:    xorq %rax, %rcx
    675 ; SSE3_64-NEXT:    cvttsd2si %xmm0, %rax
    676 ; SSE3_64-NEXT:    ucomisd %xmm1, %xmm0
    677 ; SSE3_64-NEXT:    cmovaeq %rcx, %rax
    678 ; SSE3_64-NEXT:    retq
    679 ;
    680 ; SSE2_32_WIN-LABEL: d_to_u64:
    681 ; SSE2_32_WIN:       # %bb.0:
    682 ; SSE2_32_WIN-NEXT:    pushl %ebp
    683 ; SSE2_32_WIN-NEXT:    movl %esp, %ebp
    684 ; SSE2_32_WIN-NEXT:    andl $-8, %esp
    685 ; SSE2_32_WIN-NEXT:    subl $24, %esp
    686 ; SSE2_32_WIN-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
    687 ; SSE2_32_WIN-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
    688 ; SSE2_32_WIN-NEXT:    movapd %xmm0, %xmm2
    689 ; SSE2_32_WIN-NEXT:    cmpltsd %xmm1, %xmm2
    690 ; SSE2_32_WIN-NEXT:    movapd %xmm2, %xmm3
    691 ; SSE2_32_WIN-NEXT:    andpd %xmm0, %xmm2
    692 ; SSE2_32_WIN-NEXT:    xorl %edx, %edx
    693 ; SSE2_32_WIN-NEXT:    ucomisd %xmm0, %xmm1
    694 ; SSE2_32_WIN-NEXT:    subsd %xmm1, %xmm0
    695 ; SSE2_32_WIN-NEXT:    andnpd %xmm0, %xmm3
    696 ; SSE2_32_WIN-NEXT:    orpd %xmm3, %xmm2
    697 ; SSE2_32_WIN-NEXT:    movsd %xmm2, {{[0-9]+}}(%esp)
    698 ; SSE2_32_WIN-NEXT:    fldl {{[0-9]+}}(%esp)
    699 ; SSE2_32_WIN-NEXT:    fnstcw {{[0-9]+}}(%esp)
    700 ; SSE2_32_WIN-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
    701 ; SSE2_32_WIN-NEXT:    movw $3199, {{[0-9]+}}(%esp) # imm = 0xC7F
    702 ; SSE2_32_WIN-NEXT:    fldcw {{[0-9]+}}(%esp)
    703 ; SSE2_32_WIN-NEXT:    movw %ax, {{[0-9]+}}(%esp)
    704 ; SSE2_32_WIN-NEXT:    fistpll {{[0-9]+}}(%esp)
    705 ; SSE2_32_WIN-NEXT:    fldcw {{[0-9]+}}(%esp)
    706 ; SSE2_32_WIN-NEXT:    setbe %dl
    707 ; SSE2_32_WIN-NEXT:    shll $31, %edx
    708 ; SSE2_32_WIN-NEXT:    xorl {{[0-9]+}}(%esp), %edx
    709 ; SSE2_32_WIN-NEXT:    movl {{[0-9]+}}(%esp), %eax
    710 ; SSE2_32_WIN-NEXT:    movl %ebp, %esp
    711 ; SSE2_32_WIN-NEXT:    popl %ebp
    712 ; SSE2_32_WIN-NEXT:    retl
    713 ;
    714 ; SSE2_32_LIN-LABEL: d_to_u64:
    715 ; SSE2_32_LIN:       # %bb.0:
    716 ; SSE2_32_LIN-NEXT:    subl $28, %esp
    717 ; SSE2_32_LIN-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
    718 ; SSE2_32_LIN-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
    719 ; SSE2_32_LIN-NEXT:    movapd %xmm0, %xmm2
    720 ; SSE2_32_LIN-NEXT:    cmpltsd %xmm1, %xmm2
    721 ; SSE2_32_LIN-NEXT:    movapd %xmm2, %xmm3
    722 ; SSE2_32_LIN-NEXT:    andpd %xmm0, %xmm2
    723 ; SSE2_32_LIN-NEXT:    xorl %edx, %edx
    724 ; SSE2_32_LIN-NEXT:    ucomisd %xmm0, %xmm1
    725 ; SSE2_32_LIN-NEXT:    subsd %xmm1, %xmm0
    726 ; SSE2_32_LIN-NEXT:    andnpd %xmm0, %xmm3
    727 ; SSE2_32_LIN-NEXT:    orpd %xmm3, %xmm2
    728 ; SSE2_32_LIN-NEXT:    movsd %xmm2, {{[0-9]+}}(%esp)
    729 ; SSE2_32_LIN-NEXT:    fldl {{[0-9]+}}(%esp)
    730 ; SSE2_32_LIN-NEXT:    fnstcw {{[0-9]+}}(%esp)
    731 ; SSE2_32_LIN-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
    732 ; SSE2_32_LIN-NEXT:    movw $3199, {{[0-9]+}}(%esp) # imm = 0xC7F
    733 ; SSE2_32_LIN-NEXT:    fldcw {{[0-9]+}}(%esp)
    734 ; SSE2_32_LIN-NEXT:    movw %ax, {{[0-9]+}}(%esp)
    735 ; SSE2_32_LIN-NEXT:    fistpll {{[0-9]+}}(%esp)
    736 ; SSE2_32_LIN-NEXT:    fldcw {{[0-9]+}}(%esp)
    737 ; SSE2_32_LIN-NEXT:    setbe %dl
    738 ; SSE2_32_LIN-NEXT:    shll $31, %edx
    739 ; SSE2_32_LIN-NEXT:    xorl {{[0-9]+}}(%esp), %edx
    740 ; SSE2_32_LIN-NEXT:    movl {{[0-9]+}}(%esp), %eax
    741 ; SSE2_32_LIN-NEXT:    addl $28, %esp
    742 ; SSE2_32_LIN-NEXT:    retl
    743 ;
    744 ; SSE2_64-LABEL: d_to_u64:
    745 ; SSE2_64:       # %bb.0:
    746 ; SSE2_64-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
    747 ; SSE2_64-NEXT:    movapd %xmm0, %xmm2
    748 ; SSE2_64-NEXT:    subsd %xmm1, %xmm2
    749 ; SSE2_64-NEXT:    cvttsd2si %xmm2, %rax
    750 ; SSE2_64-NEXT:    movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
    751 ; SSE2_64-NEXT:    xorq %rax, %rcx
    752 ; SSE2_64-NEXT:    cvttsd2si %xmm0, %rax
    753 ; SSE2_64-NEXT:    ucomisd %xmm1, %xmm0
    754 ; SSE2_64-NEXT:    cmovaeq %rcx, %rax
    755 ; SSE2_64-NEXT:    retq
    756 ;
    757 ; X87_WIN-LABEL: d_to_u64:
    758 ; X87_WIN:       # %bb.0:
    759 ; X87_WIN-NEXT:    pushl %ebp
    760 ; X87_WIN-NEXT:    movl %esp, %ebp
    761 ; X87_WIN-NEXT:    andl $-8, %esp
    762 ; X87_WIN-NEXT:    subl $16, %esp
    763 ; X87_WIN-NEXT:    fldl 8(%ebp)
    764 ; X87_WIN-NEXT:    flds __real@5f000000
    765 ; X87_WIN-NEXT:    fld %st(1)
    766 ; X87_WIN-NEXT:    fsub %st(1)
    767 ; X87_WIN-NEXT:    fxch %st(1)
    768 ; X87_WIN-NEXT:    fucomp %st(2)
    769 ; X87_WIN-NEXT:    fnstsw %ax
    770 ; X87_WIN-NEXT:    # kill: def $ah killed $ah killed $ax
    771 ; X87_WIN-NEXT:    sahf
    772 ; X87_WIN-NEXT:    ja LBB2_2
    773 ; X87_WIN-NEXT:  # %bb.1:
    774 ; X87_WIN-NEXT:    fstp %st(1)
    775 ; X87_WIN-NEXT:    fldz
    776 ; X87_WIN-NEXT:  LBB2_2:
    777 ; X87_WIN-NEXT:    fstp %st(0)
    778 ; X87_WIN-NEXT:    fnstcw {{[0-9]+}}(%esp)
    779 ; X87_WIN-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
    780 ; X87_WIN-NEXT:    movw $3199, {{[0-9]+}}(%esp) # imm = 0xC7F
    781 ; X87_WIN-NEXT:    fldcw {{[0-9]+}}(%esp)
    782 ; X87_WIN-NEXT:    movw %ax, {{[0-9]+}}(%esp)
    783 ; X87_WIN-NEXT:    fistpll {{[0-9]+}}(%esp)
    784 ; X87_WIN-NEXT:    fldcw {{[0-9]+}}(%esp)
    785 ; X87_WIN-NEXT:    setbe %al
    786 ; X87_WIN-NEXT:    movzbl %al, %edx
    787 ; X87_WIN-NEXT:    shll $31, %edx
    788 ; X87_WIN-NEXT:    xorl {{[0-9]+}}(%esp), %edx
    789 ; X87_WIN-NEXT:    movl {{[0-9]+}}(%esp), %eax
    790 ; X87_WIN-NEXT:    movl %ebp, %esp
    791 ; X87_WIN-NEXT:    popl %ebp
    792 ; X87_WIN-NEXT:    retl
    793 ;
    794 ; X87_LIN-LABEL: d_to_u64:
    795 ; X87_LIN:       # %bb.0:
    796 ; X87_LIN-NEXT:    subl $20, %esp
    797 ; X87_LIN-NEXT:    fldl {{[0-9]+}}(%esp)
    798 ; X87_LIN-NEXT:    flds {{\.LCPI.*}}
    799 ; X87_LIN-NEXT:    fld %st(1)
    800 ; X87_LIN-NEXT:    fsub %st(1)
    801 ; X87_LIN-NEXT:    fxch %st(1)
    802 ; X87_LIN-NEXT:    fucomp %st(2)
    803 ; X87_LIN-NEXT:    fnstsw %ax
    804 ; X87_LIN-NEXT:    # kill: def $ah killed $ah killed $ax
    805 ; X87_LIN-NEXT:    sahf
    806 ; X87_LIN-NEXT:    ja .LBB2_2
    807 ; X87_LIN-NEXT:  # %bb.1:
    808 ; X87_LIN-NEXT:    fstp %st(1)
    809 ; X87_LIN-NEXT:    fldz
    810 ; X87_LIN-NEXT:  .LBB2_2:
    811 ; X87_LIN-NEXT:    fstp %st(0)
    812 ; X87_LIN-NEXT:    fnstcw {{[0-9]+}}(%esp)
    813 ; X87_LIN-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
    814 ; X87_LIN-NEXT:    movw $3199, {{[0-9]+}}(%esp) # imm = 0xC7F
    815 ; X87_LIN-NEXT:    fldcw {{[0-9]+}}(%esp)
    816 ; X87_LIN-NEXT:    movw %ax, {{[0-9]+}}(%esp)
    817 ; X87_LIN-NEXT:    fistpll {{[0-9]+}}(%esp)
    818 ; X87_LIN-NEXT:    fldcw {{[0-9]+}}(%esp)
    819 ; X87_LIN-NEXT:    setbe %al
    820 ; X87_LIN-NEXT:    movzbl %al, %edx
    821 ; X87_LIN-NEXT:    shll $31, %edx
    822 ; X87_LIN-NEXT:    xorl {{[0-9]+}}(%esp), %edx
    823 ; X87_LIN-NEXT:    movl {{[0-9]+}}(%esp), %eax
    824 ; X87_LIN-NEXT:    addl $20, %esp
    825 ; X87_LIN-NEXT:    retl
    826   %r = fptoui double %a to i64
    827   ret i64 %r
    828 }
    829 
    830 define i64 @d_to_s64(double %a) nounwind {
    831 ; AVX512DQVL_32_WIN-LABEL: d_to_s64:
    832 ; AVX512DQVL_32_WIN:       # %bb.0:
    833 ; AVX512DQVL_32_WIN-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
    834 ; AVX512DQVL_32_WIN-NEXT:    vcvttpd2qq %ymm0, %ymm0
    835 ; AVX512DQVL_32_WIN-NEXT:    vmovd %xmm0, %eax
    836 ; AVX512DQVL_32_WIN-NEXT:    vpextrd $1, %xmm0, %edx
    837 ; AVX512DQVL_32_WIN-NEXT:    vzeroupper
    838 ; AVX512DQVL_32_WIN-NEXT:    retl
    839 ;
    840 ; AVX512DQVL_32_LIN-LABEL: d_to_s64:
    841 ; AVX512DQVL_32_LIN:       # %bb.0:
    842 ; AVX512DQVL_32_LIN-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
    843 ; AVX512DQVL_32_LIN-NEXT:    vcvttpd2qq %ymm0, %ymm0
    844 ; AVX512DQVL_32_LIN-NEXT:    vmovd %xmm0, %eax
    845 ; AVX512DQVL_32_LIN-NEXT:    vpextrd $1, %xmm0, %edx
    846 ; AVX512DQVL_32_LIN-NEXT:    vzeroupper
    847 ; AVX512DQVL_32_LIN-NEXT:    retl
    848 ;
    849 ; AVX512_64-LABEL: d_to_s64:
    850 ; AVX512_64:       # %bb.0:
    851 ; AVX512_64-NEXT:    vcvttsd2si %xmm0, %rax
    852 ; AVX512_64-NEXT:    retq
    853 ;
    854 ; AVX512DQ_32_WIN-LABEL: d_to_s64:
    855 ; AVX512DQ_32_WIN:       # %bb.0:
    856 ; AVX512DQ_32_WIN-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
    857 ; AVX512DQ_32_WIN-NEXT:    vcvttpd2qq %zmm0, %zmm0
    858 ; AVX512DQ_32_WIN-NEXT:    vmovd %xmm0, %eax
    859 ; AVX512DQ_32_WIN-NEXT:    vpextrd $1, %xmm0, %edx
    860 ; AVX512DQ_32_WIN-NEXT:    vzeroupper
    861 ; AVX512DQ_32_WIN-NEXT:    retl
    862 ;
    863 ; AVX512DQ_32_LIN-LABEL: d_to_s64:
    864 ; AVX512DQ_32_LIN:       # %bb.0:
    865 ; AVX512DQ_32_LIN-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
    866 ; AVX512DQ_32_LIN-NEXT:    vcvttpd2qq %zmm0, %zmm0
    867 ; AVX512DQ_32_LIN-NEXT:    vmovd %xmm0, %eax
    868 ; AVX512DQ_32_LIN-NEXT:    vpextrd $1, %xmm0, %edx
    869 ; AVX512DQ_32_LIN-NEXT:    vzeroupper
    870 ; AVX512DQ_32_LIN-NEXT:    retl
    871 ;
    872 ; AVX512F_32_WIN-LABEL: d_to_s64:
    873 ; AVX512F_32_WIN:       # %bb.0:
    874 ; AVX512F_32_WIN-NEXT:    pushl %ebp
    875 ; AVX512F_32_WIN-NEXT:    movl %esp, %ebp
    876 ; AVX512F_32_WIN-NEXT:    andl $-8, %esp
    877 ; AVX512F_32_WIN-NEXT:    subl $16, %esp
    878 ; AVX512F_32_WIN-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
    879 ; AVX512F_32_WIN-NEXT:    vmovsd %xmm0, {{[0-9]+}}(%esp)
    880 ; AVX512F_32_WIN-NEXT:    fldl {{[0-9]+}}(%esp)
    881 ; AVX512F_32_WIN-NEXT:    fisttpll (%esp)
    882 ; AVX512F_32_WIN-NEXT:    movl (%esp), %eax
    883 ; AVX512F_32_WIN-NEXT:    movl {{[0-9]+}}(%esp), %edx
    884 ; AVX512F_32_WIN-NEXT:    movl %ebp, %esp
    885 ; AVX512F_32_WIN-NEXT:    popl %ebp
    886 ; AVX512F_32_WIN-NEXT:    retl
    887 ;
    888 ; AVX512F_32_LIN-LABEL: d_to_s64:
    889 ; AVX512F_32_LIN:       # %bb.0:
    890 ; AVX512F_32_LIN-NEXT:    subl $20, %esp
    891 ; AVX512F_32_LIN-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
    892 ; AVX512F_32_LIN-NEXT:    vmovsd %xmm0, {{[0-9]+}}(%esp)
    893 ; AVX512F_32_LIN-NEXT:    fldl {{[0-9]+}}(%esp)
    894 ; AVX512F_32_LIN-NEXT:    fisttpll (%esp)
    895 ; AVX512F_32_LIN-NEXT:    movl (%esp), %eax
    896 ; AVX512F_32_LIN-NEXT:    movl {{[0-9]+}}(%esp), %edx
    897 ; AVX512F_32_LIN-NEXT:    addl $20, %esp
    898 ; AVX512F_32_LIN-NEXT:    retl
    899 ;
    900 ; SSE3_32_WIN-LABEL: d_to_s64:
    901 ; SSE3_32_WIN:       # %bb.0:
    902 ; SSE3_32_WIN-NEXT:    pushl %ebp
    903 ; SSE3_32_WIN-NEXT:    movl %esp, %ebp
    904 ; SSE3_32_WIN-NEXT:    andl $-8, %esp
    905 ; SSE3_32_WIN-NEXT:    subl $16, %esp
    906 ; SSE3_32_WIN-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
    907 ; SSE3_32_WIN-NEXT:    movsd %xmm0, {{[0-9]+}}(%esp)
    908 ; SSE3_32_WIN-NEXT:    fldl {{[0-9]+}}(%esp)
    909 ; SSE3_32_WIN-NEXT:    fisttpll (%esp)
    910 ; SSE3_32_WIN-NEXT:    movl (%esp), %eax
    911 ; SSE3_32_WIN-NEXT:    movl {{[0-9]+}}(%esp), %edx
    912 ; SSE3_32_WIN-NEXT:    movl %ebp, %esp
    913 ; SSE3_32_WIN-NEXT:    popl %ebp
    914 ; SSE3_32_WIN-NEXT:    retl
    915 ;
    916 ; SSE3_32_LIN-LABEL: d_to_s64:
    917 ; SSE3_32_LIN:       # %bb.0:
    918 ; SSE3_32_LIN-NEXT:    subl $20, %esp
    919 ; SSE3_32_LIN-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
    920 ; SSE3_32_LIN-NEXT:    movsd %xmm0, {{[0-9]+}}(%esp)
    921 ; SSE3_32_LIN-NEXT:    fldl {{[0-9]+}}(%esp)
    922 ; SSE3_32_LIN-NEXT:    fisttpll (%esp)
    923 ; SSE3_32_LIN-NEXT:    movl (%esp), %eax
    924 ; SSE3_32_LIN-NEXT:    movl {{[0-9]+}}(%esp), %edx
    925 ; SSE3_32_LIN-NEXT:    addl $20, %esp
    926 ; SSE3_32_LIN-NEXT:    retl
    927 ;
    928 ; SSE3_64-LABEL: d_to_s64:
    929 ; SSE3_64:       # %bb.0:
    930 ; SSE3_64-NEXT:    cvttsd2si %xmm0, %rax
    931 ; SSE3_64-NEXT:    retq
    932 ;
    933 ; SSE2_32_WIN-LABEL: d_to_s64:
    934 ; SSE2_32_WIN:       # %bb.0:
    935 ; SSE2_32_WIN-NEXT:    pushl %ebp
    936 ; SSE2_32_WIN-NEXT:    movl %esp, %ebp
    937 ; SSE2_32_WIN-NEXT:    andl $-8, %esp
    938 ; SSE2_32_WIN-NEXT:    subl $24, %esp
    939 ; SSE2_32_WIN-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
    940 ; SSE2_32_WIN-NEXT:    movsd %xmm0, {{[0-9]+}}(%esp)
    941 ; SSE2_32_WIN-NEXT:    fldl {{[0-9]+}}(%esp)
    942 ; SSE2_32_WIN-NEXT:    fnstcw {{[0-9]+}}(%esp)
    943 ; SSE2_32_WIN-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
    944 ; SSE2_32_WIN-NEXT:    movw $3199, {{[0-9]+}}(%esp) # imm = 0xC7F
    945 ; SSE2_32_WIN-NEXT:    fldcw {{[0-9]+}}(%esp)
    946 ; SSE2_32_WIN-NEXT:    movw %ax, {{[0-9]+}}(%esp)
    947 ; SSE2_32_WIN-NEXT:    fistpll {{[0-9]+}}(%esp)
    948 ; SSE2_32_WIN-NEXT:    fldcw {{[0-9]+}}(%esp)
    949 ; SSE2_32_WIN-NEXT:    movl {{[0-9]+}}(%esp), %eax
    950 ; SSE2_32_WIN-NEXT:    movl {{[0-9]+}}(%esp), %edx
    951 ; SSE2_32_WIN-NEXT:    movl %ebp, %esp
    952 ; SSE2_32_WIN-NEXT:    popl %ebp
    953 ; SSE2_32_WIN-NEXT:    retl
    954 ;
    955 ; SSE2_32_LIN-LABEL: d_to_s64:
    956 ; SSE2_32_LIN:       # %bb.0:
    957 ; SSE2_32_LIN-NEXT:    subl $28, %esp
    958 ; SSE2_32_LIN-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
    959 ; SSE2_32_LIN-NEXT:    movsd %xmm0, {{[0-9]+}}(%esp)
    960 ; SSE2_32_LIN-NEXT:    fldl {{[0-9]+}}(%esp)
    961 ; SSE2_32_LIN-NEXT:    fnstcw {{[0-9]+}}(%esp)
    962 ; SSE2_32_LIN-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
    963 ; SSE2_32_LIN-NEXT:    movw $3199, {{[0-9]+}}(%esp) # imm = 0xC7F
    964 ; SSE2_32_LIN-NEXT:    fldcw {{[0-9]+}}(%esp)
    965 ; SSE2_32_LIN-NEXT:    movw %ax, {{[0-9]+}}(%esp)
    966 ; SSE2_32_LIN-NEXT:    fistpll {{[0-9]+}}(%esp)
    967 ; SSE2_32_LIN-NEXT:    fldcw {{[0-9]+}}(%esp)
    968 ; SSE2_32_LIN-NEXT:    movl {{[0-9]+}}(%esp), %eax
    969 ; SSE2_32_LIN-NEXT:    movl {{[0-9]+}}(%esp), %edx
    970 ; SSE2_32_LIN-NEXT:    addl $28, %esp
    971 ; SSE2_32_LIN-NEXT:    retl
    972 ;
    973 ; SSE2_64-LABEL: d_to_s64:
    974 ; SSE2_64:       # %bb.0:
    975 ; SSE2_64-NEXT:    cvttsd2si %xmm0, %rax
    976 ; SSE2_64-NEXT:    retq
    977 ;
    978 ; X87_WIN-LABEL: d_to_s64:
    979 ; X87_WIN:       # %bb.0:
    980 ; X87_WIN-NEXT:    pushl %ebp
    981 ; X87_WIN-NEXT:    movl %esp, %ebp
    982 ; X87_WIN-NEXT:    andl $-8, %esp
    983 ; X87_WIN-NEXT:    subl $16, %esp
    984 ; X87_WIN-NEXT:    fldl 8(%ebp)
    985 ; X87_WIN-NEXT:    fnstcw {{[0-9]+}}(%esp)
    986 ; X87_WIN-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
    987 ; X87_WIN-NEXT:    movw $3199, {{[0-9]+}}(%esp) # imm = 0xC7F
    988 ; X87_WIN-NEXT:    fldcw {{[0-9]+}}(%esp)
    989 ; X87_WIN-NEXT:    movw %ax, {{[0-9]+}}(%esp)
    990 ; X87_WIN-NEXT:    fistpll {{[0-9]+}}(%esp)
    991 ; X87_WIN-NEXT:    fldcw {{[0-9]+}}(%esp)
    992 ; X87_WIN-NEXT:    movl {{[0-9]+}}(%esp), %eax
    993 ; X87_WIN-NEXT:    movl {{[0-9]+}}(%esp), %edx
    994 ; X87_WIN-NEXT:    movl %ebp, %esp
    995 ; X87_WIN-NEXT:    popl %ebp
    996 ; X87_WIN-NEXT:    retl
    997 ;
    998 ; X87_LIN-LABEL: d_to_s64:
    999 ; X87_LIN:       # %bb.0:
   1000 ; X87_LIN-NEXT:    subl $20, %esp
   1001 ; X87_LIN-NEXT:    fldl {{[0-9]+}}(%esp)
   1002 ; X87_LIN-NEXT:    fnstcw {{[0-9]+}}(%esp)
   1003 ; X87_LIN-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
   1004 ; X87_LIN-NEXT:    movw $3199, {{[0-9]+}}(%esp) # imm = 0xC7F
   1005 ; X87_LIN-NEXT:    fldcw {{[0-9]+}}(%esp)
   1006 ; X87_LIN-NEXT:    movw %ax, {{[0-9]+}}(%esp)
   1007 ; X87_LIN-NEXT:    fistpll {{[0-9]+}}(%esp)
   1008 ; X87_LIN-NEXT:    fldcw {{[0-9]+}}(%esp)
   1009 ; X87_LIN-NEXT:    movl {{[0-9]+}}(%esp), %eax
   1010 ; X87_LIN-NEXT:    movl {{[0-9]+}}(%esp), %edx
   1011 ; X87_LIN-NEXT:    addl $20, %esp
   1012 ; X87_LIN-NEXT:    retl
   1013   %r = fptosi double %a to i64
   1014   ret i64 %r
   1015 }
   1016 
   1017 define i64 @x_to_u64(x86_fp80 %a) nounwind {
   1018 ; AVX512_32_WIN-LABEL: x_to_u64:
   1019 ; AVX512_32_WIN:       # %bb.0:
   1020 ; AVX512_32_WIN-NEXT:    pushl %ebp
   1021 ; AVX512_32_WIN-NEXT:    movl %esp, %ebp
   1022 ; AVX512_32_WIN-NEXT:    andl $-8, %esp
   1023 ; AVX512_32_WIN-NEXT:    subl $8, %esp
   1024 ; AVX512_32_WIN-NEXT:    fldt 8(%ebp)
   1025 ; AVX512_32_WIN-NEXT:    flds __real@5f000000
   1026 ; AVX512_32_WIN-NEXT:    fld %st(1)
   1027 ; AVX512_32_WIN-NEXT:    fsub %st(1)
   1028 ; AVX512_32_WIN-NEXT:    xorl %edx, %edx
   1029 ; AVX512_32_WIN-NEXT:    fxch %st(1)
   1030 ; AVX512_32_WIN-NEXT:    fucompi %st(2)
   1031 ; AVX512_32_WIN-NEXT:    fcmovnbe %st(1), %st(0)
   1032 ; AVX512_32_WIN-NEXT:    fstp %st(1)
   1033 ; AVX512_32_WIN-NEXT:    fisttpll (%esp)
   1034 ; AVX512_32_WIN-NEXT:    setbe %dl
   1035 ; AVX512_32_WIN-NEXT:    shll $31, %edx
   1036 ; AVX512_32_WIN-NEXT:    xorl {{[0-9]+}}(%esp), %edx
   1037 ; AVX512_32_WIN-NEXT:    movl (%esp), %eax
   1038 ; AVX512_32_WIN-NEXT:    movl %ebp, %esp
   1039 ; AVX512_32_WIN-NEXT:    popl %ebp
   1040 ; AVX512_32_WIN-NEXT:    retl
   1041 ;
   1042 ; AVX512_32_LIN-LABEL: x_to_u64:
   1043 ; AVX512_32_LIN:       # %bb.0:
   1044 ; AVX512_32_LIN-NEXT:    subl $12, %esp
   1045 ; AVX512_32_LIN-NEXT:    fldt {{[0-9]+}}(%esp)
   1046 ; AVX512_32_LIN-NEXT:    flds {{\.LCPI.*}}
   1047 ; AVX512_32_LIN-NEXT:    fld %st(1)
   1048 ; AVX512_32_LIN-NEXT:    fsub %st(1)
   1049 ; AVX512_32_LIN-NEXT:    xorl %edx, %edx
   1050 ; AVX512_32_LIN-NEXT:    fxch %st(1)
   1051 ; AVX512_32_LIN-NEXT:    fucompi %st(2)
   1052 ; AVX512_32_LIN-NEXT:    fcmovnbe %st(1), %st(0)
   1053 ; AVX512_32_LIN-NEXT:    fstp %st(1)
   1054 ; AVX512_32_LIN-NEXT:    fisttpll (%esp)
   1055 ; AVX512_32_LIN-NEXT:    setbe %dl
   1056 ; AVX512_32_LIN-NEXT:    shll $31, %edx
   1057 ; AVX512_32_LIN-NEXT:    xorl {{[0-9]+}}(%esp), %edx
   1058 ; AVX512_32_LIN-NEXT:    movl (%esp), %eax
   1059 ; AVX512_32_LIN-NEXT:    addl $12, %esp
   1060 ; AVX512_32_LIN-NEXT:    retl
   1061 ;
   1062 ; AVX512_64_WIN-LABEL: x_to_u64:
   1063 ; AVX512_64_WIN:       # %bb.0:
   1064 ; AVX512_64_WIN-NEXT:    pushq %rax
   1065 ; AVX512_64_WIN-NEXT:    fldt (%rcx)
   1066 ; AVX512_64_WIN-NEXT:    flds __real@{{.*}}(%rip)
   1067 ; AVX512_64_WIN-NEXT:    fld %st(1)
   1068 ; AVX512_64_WIN-NEXT:    fsub %st(1)
   1069 ; AVX512_64_WIN-NEXT:    xorl %ecx, %ecx
   1070 ; AVX512_64_WIN-NEXT:    fxch %st(1)
   1071 ; AVX512_64_WIN-NEXT:    fucompi %st(2)
   1072 ; AVX512_64_WIN-NEXT:    fcmovnbe %st(1), %st(0)
   1073 ; AVX512_64_WIN-NEXT:    fstp %st(1)
   1074 ; AVX512_64_WIN-NEXT:    fisttpll (%rsp)
   1075 ; AVX512_64_WIN-NEXT:    setbe %cl
   1076 ; AVX512_64_WIN-NEXT:    shll $31, %ecx
   1077 ; AVX512_64_WIN-NEXT:    xorl {{[0-9]+}}(%rsp), %ecx
   1078 ; AVX512_64_WIN-NEXT:    shlq $32, %rcx
   1079 ; AVX512_64_WIN-NEXT:    movl (%rsp), %eax
   1080 ; AVX512_64_WIN-NEXT:    orq %rcx, %rax
   1081 ; AVX512_64_WIN-NEXT:    popq %rcx
   1082 ; AVX512_64_WIN-NEXT:    retq
   1083 ;
   1084 ; AVX512_64_LIN-LABEL: x_to_u64:
   1085 ; AVX512_64_LIN:       # %bb.0:
   1086 ; AVX512_64_LIN-NEXT:    fldt {{[0-9]+}}(%rsp)
   1087 ; AVX512_64_LIN-NEXT:    flds {{.*}}(%rip)
   1088 ; AVX512_64_LIN-NEXT:    fld %st(1)
   1089 ; AVX512_64_LIN-NEXT:    fsub %st(1)
   1090 ; AVX512_64_LIN-NEXT:    xorl %ecx, %ecx
   1091 ; AVX512_64_LIN-NEXT:    fxch %st(1)
   1092 ; AVX512_64_LIN-NEXT:    fucompi %st(2)
   1093 ; AVX512_64_LIN-NEXT:    fcmovnbe %st(1), %st(0)
   1094 ; AVX512_64_LIN-NEXT:    fstp %st(1)
   1095 ; AVX512_64_LIN-NEXT:    fisttpll -{{[0-9]+}}(%rsp)
   1096 ; AVX512_64_LIN-NEXT:    setbe %cl
   1097 ; AVX512_64_LIN-NEXT:    shll $31, %ecx
   1098 ; AVX512_64_LIN-NEXT:    xorl -{{[0-9]+}}(%rsp), %ecx
   1099 ; AVX512_64_LIN-NEXT:    shlq $32, %rcx
   1100 ; AVX512_64_LIN-NEXT:    movl -{{[0-9]+}}(%rsp), %eax
   1101 ; AVX512_64_LIN-NEXT:    orq %rcx, %rax
   1102 ; AVX512_64_LIN-NEXT:    retq
   1103 ;
   1104 ; SSE3_32_WIN-LABEL: x_to_u64:
   1105 ; SSE3_32_WIN:       # %bb.0:
   1106 ; SSE3_32_WIN-NEXT:    pushl %ebp
   1107 ; SSE3_32_WIN-NEXT:    movl %esp, %ebp
   1108 ; SSE3_32_WIN-NEXT:    andl $-8, %esp
   1109 ; SSE3_32_WIN-NEXT:    subl $8, %esp
   1110 ; SSE3_32_WIN-NEXT:    fldt 8(%ebp)
   1111 ; SSE3_32_WIN-NEXT:    flds __real@5f000000
   1112 ; SSE3_32_WIN-NEXT:    fld %st(1)
   1113 ; SSE3_32_WIN-NEXT:    fsub %st(1)
   1114 ; SSE3_32_WIN-NEXT:    xorl %edx, %edx
   1115 ; SSE3_32_WIN-NEXT:    fxch %st(1)
   1116 ; SSE3_32_WIN-NEXT:    fucompi %st(2)
   1117 ; SSE3_32_WIN-NEXT:    fcmovnbe %st(1), %st(0)
   1118 ; SSE3_32_WIN-NEXT:    fstp %st(1)
   1119 ; SSE3_32_WIN-NEXT:    fisttpll (%esp)
   1120 ; SSE3_32_WIN-NEXT:    setbe %dl
   1121 ; SSE3_32_WIN-NEXT:    shll $31, %edx
   1122 ; SSE3_32_WIN-NEXT:    xorl {{[0-9]+}}(%esp), %edx
   1123 ; SSE3_32_WIN-NEXT:    movl (%esp), %eax
   1124 ; SSE3_32_WIN-NEXT:    movl %ebp, %esp
   1125 ; SSE3_32_WIN-NEXT:    popl %ebp
   1126 ; SSE3_32_WIN-NEXT:    retl
   1127 ;
   1128 ; SSE3_32_LIN-LABEL: x_to_u64:
   1129 ; SSE3_32_LIN:       # %bb.0:
   1130 ; SSE3_32_LIN-NEXT:    subl $12, %esp
   1131 ; SSE3_32_LIN-NEXT:    fldt {{[0-9]+}}(%esp)
   1132 ; SSE3_32_LIN-NEXT:    flds {{\.LCPI.*}}
   1133 ; SSE3_32_LIN-NEXT:    fld %st(1)
   1134 ; SSE3_32_LIN-NEXT:    fsub %st(1)
   1135 ; SSE3_32_LIN-NEXT:    xorl %edx, %edx
   1136 ; SSE3_32_LIN-NEXT:    fxch %st(1)
   1137 ; SSE3_32_LIN-NEXT:    fucompi %st(2)
   1138 ; SSE3_32_LIN-NEXT:    fcmovnbe %st(1), %st(0)
   1139 ; SSE3_32_LIN-NEXT:    fstp %st(1)
   1140 ; SSE3_32_LIN-NEXT:    fisttpll (%esp)
   1141 ; SSE3_32_LIN-NEXT:    setbe %dl
   1142 ; SSE3_32_LIN-NEXT:    shll $31, %edx
   1143 ; SSE3_32_LIN-NEXT:    xorl {{[0-9]+}}(%esp), %edx
   1144 ; SSE3_32_LIN-NEXT:    movl (%esp), %eax
   1145 ; SSE3_32_LIN-NEXT:    addl $12, %esp
   1146 ; SSE3_32_LIN-NEXT:    retl
   1147 ;
   1148 ; SSE3_64_WIN-LABEL: x_to_u64:
   1149 ; SSE3_64_WIN:       # %bb.0:
   1150 ; SSE3_64_WIN-NEXT:    subq $16, %rsp
   1151 ; SSE3_64_WIN-NEXT:    fldt (%rcx)
   1152 ; SSE3_64_WIN-NEXT:    flds __real@{{.*}}(%rip)
   1153 ; SSE3_64_WIN-NEXT:    fld %st(1)
   1154 ; SSE3_64_WIN-NEXT:    fsub %st(1)
   1155 ; SSE3_64_WIN-NEXT:    fisttpll {{[0-9]+}}(%rsp)
   1156 ; SSE3_64_WIN-NEXT:    fld %st(1)
   1157 ; SSE3_64_WIN-NEXT:    fisttpll (%rsp)
   1158 ; SSE3_64_WIN-NEXT:    fucompi %st(1)
   1159 ; SSE3_64_WIN-NEXT:    fstp %st(0)
   1160 ; SSE3_64_WIN-NEXT:    jbe .LBB4_1
   1161 ; SSE3_64_WIN-NEXT:  # %bb.2:
   1162 ; SSE3_64_WIN-NEXT:    movq (%rsp), %rax
   1163 ; SSE3_64_WIN-NEXT:    addq $16, %rsp
   1164 ; SSE3_64_WIN-NEXT:    retq
   1165 ; SSE3_64_WIN-NEXT:  .LBB4_1:
   1166 ; SSE3_64_WIN-NEXT:    movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
   1167 ; SSE3_64_WIN-NEXT:    xorq {{[0-9]+}}(%rsp), %rax
   1168 ; SSE3_64_WIN-NEXT:    addq $16, %rsp
   1169 ; SSE3_64_WIN-NEXT:    retq
   1170 ;
   1171 ; SSE3_64_LIN-LABEL: x_to_u64:
   1172 ; SSE3_64_LIN:       # %bb.0:
   1173 ; SSE3_64_LIN-NEXT:    fldt {{[0-9]+}}(%rsp)
   1174 ; SSE3_64_LIN-NEXT:    flds {{.*}}(%rip)
   1175 ; SSE3_64_LIN-NEXT:    fld %st(1)
   1176 ; SSE3_64_LIN-NEXT:    fsub %st(1)
   1177 ; SSE3_64_LIN-NEXT:    fisttpll -{{[0-9]+}}(%rsp)
   1178 ; SSE3_64_LIN-NEXT:    fld %st(1)
   1179 ; SSE3_64_LIN-NEXT:    fisttpll -{{[0-9]+}}(%rsp)
   1180 ; SSE3_64_LIN-NEXT:    fucompi %st(1)
   1181 ; SSE3_64_LIN-NEXT:    fstp %st(0)
   1182 ; SSE3_64_LIN-NEXT:    jbe .LBB4_1
   1183 ; SSE3_64_LIN-NEXT:  # %bb.2:
   1184 ; SSE3_64_LIN-NEXT:    movq -{{[0-9]+}}(%rsp), %rax
   1185 ; SSE3_64_LIN-NEXT:    retq
   1186 ; SSE3_64_LIN-NEXT:  .LBB4_1:
   1187 ; SSE3_64_LIN-NEXT:    movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
   1188 ; SSE3_64_LIN-NEXT:    xorq -{{[0-9]+}}(%rsp), %rax
   1189 ; SSE3_64_LIN-NEXT:    retq
   1190 ;
   1191 ; SSE2_32_WIN-LABEL: x_to_u64:
   1192 ; SSE2_32_WIN:       # %bb.0:
   1193 ; SSE2_32_WIN-NEXT:    pushl %ebp
   1194 ; SSE2_32_WIN-NEXT:    movl %esp, %ebp
   1195 ; SSE2_32_WIN-NEXT:    andl $-8, %esp
   1196 ; SSE2_32_WIN-NEXT:    subl $16, %esp
   1197 ; SSE2_32_WIN-NEXT:    fldt 8(%ebp)
   1198 ; SSE2_32_WIN-NEXT:    flds __real@5f000000
   1199 ; SSE2_32_WIN-NEXT:    fld %st(1)
   1200 ; SSE2_32_WIN-NEXT:    fsub %st(1)
   1201 ; SSE2_32_WIN-NEXT:    xorl %edx, %edx
   1202 ; SSE2_32_WIN-NEXT:    fxch %st(1)
   1203 ; SSE2_32_WIN-NEXT:    fucompi %st(2)
   1204 ; SSE2_32_WIN-NEXT:    fcmovnbe %st(1), %st(0)
   1205 ; SSE2_32_WIN-NEXT:    fstp %st(1)
   1206 ; SSE2_32_WIN-NEXT:    fnstcw {{[0-9]+}}(%esp)
   1207 ; SSE2_32_WIN-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
   1208 ; SSE2_32_WIN-NEXT:    movw $3199, {{[0-9]+}}(%esp) # imm = 0xC7F
   1209 ; SSE2_32_WIN-NEXT:    fldcw {{[0-9]+}}(%esp)
   1210 ; SSE2_32_WIN-NEXT:    movw %ax, {{[0-9]+}}(%esp)
   1211 ; SSE2_32_WIN-NEXT:    fistpll {{[0-9]+}}(%esp)
   1212 ; SSE2_32_WIN-NEXT:    fldcw {{[0-9]+}}(%esp)
   1213 ; SSE2_32_WIN-NEXT:    setbe %dl
   1214 ; SSE2_32_WIN-NEXT:    shll $31, %edx
   1215 ; SSE2_32_WIN-NEXT:    xorl {{[0-9]+}}(%esp), %edx
   1216 ; SSE2_32_WIN-NEXT:    movl {{[0-9]+}}(%esp), %eax
   1217 ; SSE2_32_WIN-NEXT:    movl %ebp, %esp
   1218 ; SSE2_32_WIN-NEXT:    popl %ebp
   1219 ; SSE2_32_WIN-NEXT:    retl
   1220 ;
   1221 ; SSE2_32_LIN-LABEL: x_to_u64:
   1222 ; SSE2_32_LIN:       # %bb.0:
   1223 ; SSE2_32_LIN-NEXT:    subl $20, %esp
   1224 ; SSE2_32_LIN-NEXT:    fldt {{[0-9]+}}(%esp)
   1225 ; SSE2_32_LIN-NEXT:    flds {{\.LCPI.*}}
   1226 ; SSE2_32_LIN-NEXT:    fld %st(1)
   1227 ; SSE2_32_LIN-NEXT:    fsub %st(1)
   1228 ; SSE2_32_LIN-NEXT:    xorl %edx, %edx
   1229 ; SSE2_32_LIN-NEXT:    fxch %st(1)
   1230 ; SSE2_32_LIN-NEXT:    fucompi %st(2)
   1231 ; SSE2_32_LIN-NEXT:    fcmovnbe %st(1), %st(0)
   1232 ; SSE2_32_LIN-NEXT:    fstp %st(1)
   1233 ; SSE2_32_LIN-NEXT:    fnstcw {{[0-9]+}}(%esp)
   1234 ; SSE2_32_LIN-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
   1235 ; SSE2_32_LIN-NEXT:    movw $3199, {{[0-9]+}}(%esp) # imm = 0xC7F
   1236 ; SSE2_32_LIN-NEXT:    fldcw {{[0-9]+}}(%esp)
   1237 ; SSE2_32_LIN-NEXT:    movw %ax, {{[0-9]+}}(%esp)
   1238 ; SSE2_32_LIN-NEXT:    fistpll {{[0-9]+}}(%esp)
   1239 ; SSE2_32_LIN-NEXT:    fldcw {{[0-9]+}}(%esp)
   1240 ; SSE2_32_LIN-NEXT:    setbe %dl
   1241 ; SSE2_32_LIN-NEXT:    shll $31, %edx
   1242 ; SSE2_32_LIN-NEXT:    xorl {{[0-9]+}}(%esp), %edx
   1243 ; SSE2_32_LIN-NEXT:    movl {{[0-9]+}}(%esp), %eax
   1244 ; SSE2_32_LIN-NEXT:    addl $20, %esp
   1245 ; SSE2_32_LIN-NEXT:    retl
   1246 ;
   1247 ; SSE2_64_WIN-LABEL: x_to_u64:
   1248 ; SSE2_64_WIN:       # %bb.0:
   1249 ; SSE2_64_WIN-NEXT:    subq $24, %rsp
   1250 ; SSE2_64_WIN-NEXT:    fldt (%rcx)
   1251 ; SSE2_64_WIN-NEXT:    flds __real@{{.*}}(%rip)
   1252 ; SSE2_64_WIN-NEXT:    fld %st(1)
   1253 ; SSE2_64_WIN-NEXT:    fsub %st(1)
   1254 ; SSE2_64_WIN-NEXT:    fnstcw {{[0-9]+}}(%rsp)
   1255 ; SSE2_64_WIN-NEXT:    movzwl {{[0-9]+}}(%rsp), %eax
   1256 ; SSE2_64_WIN-NEXT:    movw $3199, {{[0-9]+}}(%rsp) # imm = 0xC7F
   1257 ; SSE2_64_WIN-NEXT:    fldcw {{[0-9]+}}(%rsp)
   1258 ; SSE2_64_WIN-NEXT:    movw %ax, {{[0-9]+}}(%rsp)
   1259 ; SSE2_64_WIN-NEXT:    fistpll {{[0-9]+}}(%rsp)
   1260 ; SSE2_64_WIN-NEXT:    fldcw {{[0-9]+}}(%rsp)
   1261 ; SSE2_64_WIN-NEXT:    fnstcw {{[0-9]+}}(%rsp)
   1262 ; SSE2_64_WIN-NEXT:    movzwl {{[0-9]+}}(%rsp), %eax
   1263 ; SSE2_64_WIN-NEXT:    movw $3199, {{[0-9]+}}(%rsp) # imm = 0xC7F
   1264 ; SSE2_64_WIN-NEXT:    fldcw {{[0-9]+}}(%rsp)
   1265 ; SSE2_64_WIN-NEXT:    movw %ax, {{[0-9]+}}(%rsp)
   1266 ; SSE2_64_WIN-NEXT:    fld %st(1)
   1267 ; SSE2_64_WIN-NEXT:    fistpll {{[0-9]+}}(%rsp)
   1268 ; SSE2_64_WIN-NEXT:    fldcw {{[0-9]+}}(%rsp)
   1269 ; SSE2_64_WIN-NEXT:    fucompi %st(1)
   1270 ; SSE2_64_WIN-NEXT:    fstp %st(0)
   1271 ; SSE2_64_WIN-NEXT:    jbe .LBB4_1
   1272 ; SSE2_64_WIN-NEXT:  # %bb.2:
   1273 ; SSE2_64_WIN-NEXT:    movq {{[0-9]+}}(%rsp), %rax
   1274 ; SSE2_64_WIN-NEXT:    addq $24, %rsp
   1275 ; SSE2_64_WIN-NEXT:    retq
   1276 ; SSE2_64_WIN-NEXT:  .LBB4_1:
   1277 ; SSE2_64_WIN-NEXT:    movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
   1278 ; SSE2_64_WIN-NEXT:    xorq {{[0-9]+}}(%rsp), %rax
   1279 ; SSE2_64_WIN-NEXT:    addq $24, %rsp
   1280 ; SSE2_64_WIN-NEXT:    retq
   1281 ;
   1282 ; SSE2_64_LIN-LABEL: x_to_u64:
   1283 ; SSE2_64_LIN:       # %bb.0:
   1284 ; SSE2_64_LIN-NEXT:    fldt {{[0-9]+}}(%rsp)
   1285 ; SSE2_64_LIN-NEXT:    flds {{.*}}(%rip)
   1286 ; SSE2_64_LIN-NEXT:    fld %st(1)
   1287 ; SSE2_64_LIN-NEXT:    fsub %st(1)
   1288 ; SSE2_64_LIN-NEXT:    fnstcw -{{[0-9]+}}(%rsp)
   1289 ; SSE2_64_LIN-NEXT:    movzwl -{{[0-9]+}}(%rsp), %eax
   1290 ; SSE2_64_LIN-NEXT:    movw $3199, -{{[0-9]+}}(%rsp) # imm = 0xC7F
   1291 ; SSE2_64_LIN-NEXT:    fldcw -{{[0-9]+}}(%rsp)
   1292 ; SSE2_64_LIN-NEXT:    movw %ax, -{{[0-9]+}}(%rsp)
   1293 ; SSE2_64_LIN-NEXT:    fistpll -{{[0-9]+}}(%rsp)
   1294 ; SSE2_64_LIN-NEXT:    fldcw -{{[0-9]+}}(%rsp)
   1295 ; SSE2_64_LIN-NEXT:    fnstcw -{{[0-9]+}}(%rsp)
   1296 ; SSE2_64_LIN-NEXT:    movzwl -{{[0-9]+}}(%rsp), %eax
   1297 ; SSE2_64_LIN-NEXT:    movw $3199, -{{[0-9]+}}(%rsp) # imm = 0xC7F
   1298 ; SSE2_64_LIN-NEXT:    fldcw -{{[0-9]+}}(%rsp)
   1299 ; SSE2_64_LIN-NEXT:    movw %ax, -{{[0-9]+}}(%rsp)
   1300 ; SSE2_64_LIN-NEXT:    fld %st(1)
   1301 ; SSE2_64_LIN-NEXT:    fistpll -{{[0-9]+}}(%rsp)
   1302 ; SSE2_64_LIN-NEXT:    fldcw -{{[0-9]+}}(%rsp)
   1303 ; SSE2_64_LIN-NEXT:    fucompi %st(1)
   1304 ; SSE2_64_LIN-NEXT:    fstp %st(0)
   1305 ; SSE2_64_LIN-NEXT:    jbe .LBB4_1
   1306 ; SSE2_64_LIN-NEXT:  # %bb.2:
   1307 ; SSE2_64_LIN-NEXT:    movq -{{[0-9]+}}(%rsp), %rax
   1308 ; SSE2_64_LIN-NEXT:    retq
   1309 ; SSE2_64_LIN-NEXT:  .LBB4_1:
   1310 ; SSE2_64_LIN-NEXT:    movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
   1311 ; SSE2_64_LIN-NEXT:    xorq -{{[0-9]+}}(%rsp), %rax
   1312 ; SSE2_64_LIN-NEXT:    retq
   1313 ;
   1314 ; X87_WIN-LABEL: x_to_u64:
   1315 ; X87_WIN:       # %bb.0:
   1316 ; X87_WIN-NEXT:    pushl %ebp
   1317 ; X87_WIN-NEXT:    movl %esp, %ebp
   1318 ; X87_WIN-NEXT:    andl $-8, %esp
   1319 ; X87_WIN-NEXT:    subl $16, %esp
   1320 ; X87_WIN-NEXT:    fldt 8(%ebp)
   1321 ; X87_WIN-NEXT:    flds __real@5f000000
   1322 ; X87_WIN-NEXT:    fld %st(1)
   1323 ; X87_WIN-NEXT:    fsub %st(1)
   1324 ; X87_WIN-NEXT:    fxch %st(1)
   1325 ; X87_WIN-NEXT:    fucomp %st(2)
   1326 ; X87_WIN-NEXT:    fnstsw %ax
   1327 ; X87_WIN-NEXT:    # kill: def $ah killed $ah killed $ax
   1328 ; X87_WIN-NEXT:    sahf
   1329 ; X87_WIN-NEXT:    ja LBB4_2
   1330 ; X87_WIN-NEXT:  # %bb.1:
   1331 ; X87_WIN-NEXT:    fstp %st(1)
   1332 ; X87_WIN-NEXT:    fldz
   1333 ; X87_WIN-NEXT:  LBB4_2:
   1334 ; X87_WIN-NEXT:    fstp %st(0)
   1335 ; X87_WIN-NEXT:    fnstcw {{[0-9]+}}(%esp)
   1336 ; X87_WIN-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
   1337 ; X87_WIN-NEXT:    movw $3199, {{[0-9]+}}(%esp) # imm = 0xC7F
   1338 ; X87_WIN-NEXT:    fldcw {{[0-9]+}}(%esp)
   1339 ; X87_WIN-NEXT:    movw %ax, {{[0-9]+}}(%esp)
   1340 ; X87_WIN-NEXT:    fistpll {{[0-9]+}}(%esp)
   1341 ; X87_WIN-NEXT:    fldcw {{[0-9]+}}(%esp)
   1342 ; X87_WIN-NEXT:    setbe %al
   1343 ; X87_WIN-NEXT:    movzbl %al, %edx
   1344 ; X87_WIN-NEXT:    shll $31, %edx
   1345 ; X87_WIN-NEXT:    xorl {{[0-9]+}}(%esp), %edx
   1346 ; X87_WIN-NEXT:    movl {{[0-9]+}}(%esp), %eax
   1347 ; X87_WIN-NEXT:    movl %ebp, %esp
   1348 ; X87_WIN-NEXT:    popl %ebp
   1349 ; X87_WIN-NEXT:    retl
   1350 ;
   1351 ; X87_LIN-LABEL: x_to_u64:
   1352 ; X87_LIN:       # %bb.0:
   1353 ; X87_LIN-NEXT:    subl $20, %esp
   1354 ; X87_LIN-NEXT:    fldt {{[0-9]+}}(%esp)
   1355 ; X87_LIN-NEXT:    flds {{\.LCPI.*}}
   1356 ; X87_LIN-NEXT:    fld %st(1)
   1357 ; X87_LIN-NEXT:    fsub %st(1)
   1358 ; X87_LIN-NEXT:    fxch %st(1)
   1359 ; X87_LIN-NEXT:    fucomp %st(2)
   1360 ; X87_LIN-NEXT:    fnstsw %ax
   1361 ; X87_LIN-NEXT:    # kill: def $ah killed $ah killed $ax
   1362 ; X87_LIN-NEXT:    sahf
   1363 ; X87_LIN-NEXT:    ja .LBB4_2
   1364 ; X87_LIN-NEXT:  # %bb.1:
   1365 ; X87_LIN-NEXT:    fstp %st(1)
   1366 ; X87_LIN-NEXT:    fldz
   1367 ; X87_LIN-NEXT:  .LBB4_2:
   1368 ; X87_LIN-NEXT:    fstp %st(0)
   1369 ; X87_LIN-NEXT:    fnstcw {{[0-9]+}}(%esp)
   1370 ; X87_LIN-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
   1371 ; X87_LIN-NEXT:    movw $3199, {{[0-9]+}}(%esp) # imm = 0xC7F
   1372 ; X87_LIN-NEXT:    fldcw {{[0-9]+}}(%esp)
   1373 ; X87_LIN-NEXT:    movw %ax, {{[0-9]+}}(%esp)
   1374 ; X87_LIN-NEXT:    fistpll {{[0-9]+}}(%esp)
   1375 ; X87_LIN-NEXT:    fldcw {{[0-9]+}}(%esp)
   1376 ; X87_LIN-NEXT:    setbe %al
   1377 ; X87_LIN-NEXT:    movzbl %al, %edx
   1378 ; X87_LIN-NEXT:    shll $31, %edx
   1379 ; X87_LIN-NEXT:    xorl {{[0-9]+}}(%esp), %edx
   1380 ; X87_LIN-NEXT:    movl {{[0-9]+}}(%esp), %eax
   1381 ; X87_LIN-NEXT:    addl $20, %esp
   1382 ; X87_LIN-NEXT:    retl
   1383   %r = fptoui x86_fp80 %a to i64
   1384   ret i64 %r
   1385 }
   1386 
   1387 define i64 @x_to_s64(x86_fp80 %a) nounwind {
   1388 ; AVX512_32_WIN-LABEL: x_to_s64:
   1389 ; AVX512_32_WIN:       # %bb.0:
   1390 ; AVX512_32_WIN-NEXT:    pushl %ebp
   1391 ; AVX512_32_WIN-NEXT:    movl %esp, %ebp
   1392 ; AVX512_32_WIN-NEXT:    andl $-8, %esp
   1393 ; AVX512_32_WIN-NEXT:    subl $8, %esp
   1394 ; AVX512_32_WIN-NEXT:    fldt 8(%ebp)
   1395 ; AVX512_32_WIN-NEXT:    fisttpll (%esp)
   1396 ; AVX512_32_WIN-NEXT:    movl (%esp), %eax
   1397 ; AVX512_32_WIN-NEXT:    movl {{[0-9]+}}(%esp), %edx
   1398 ; AVX512_32_WIN-NEXT:    movl %ebp, %esp
   1399 ; AVX512_32_WIN-NEXT:    popl %ebp
   1400 ; AVX512_32_WIN-NEXT:    retl
   1401 ;
   1402 ; AVX512_32_LIN-LABEL: x_to_s64:
   1403 ; AVX512_32_LIN:       # %bb.0:
   1404 ; AVX512_32_LIN-NEXT:    subl $12, %esp
   1405 ; AVX512_32_LIN-NEXT:    fldt {{[0-9]+}}(%esp)
   1406 ; AVX512_32_LIN-NEXT:    fisttpll (%esp)
   1407 ; AVX512_32_LIN-NEXT:    movl (%esp), %eax
   1408 ; AVX512_32_LIN-NEXT:    movl {{[0-9]+}}(%esp), %edx
   1409 ; AVX512_32_LIN-NEXT:    addl $12, %esp
   1410 ; AVX512_32_LIN-NEXT:    retl
   1411 ;
   1412 ; AVX512_64_WIN-LABEL: x_to_s64:
   1413 ; AVX512_64_WIN:       # %bb.0:
   1414 ; AVX512_64_WIN-NEXT:    pushq %rax
   1415 ; AVX512_64_WIN-NEXT:    fldt (%rcx)
   1416 ; AVX512_64_WIN-NEXT:    fisttpll (%rsp)
   1417 ; AVX512_64_WIN-NEXT:    movq (%rsp), %rax
   1418 ; AVX512_64_WIN-NEXT:    popq %rcx
   1419 ; AVX512_64_WIN-NEXT:    retq
   1420 ;
   1421 ; AVX512_64_LIN-LABEL: x_to_s64:
   1422 ; AVX512_64_LIN:       # %bb.0:
   1423 ; AVX512_64_LIN-NEXT:    fldt {{[0-9]+}}(%rsp)
   1424 ; AVX512_64_LIN-NEXT:    fisttpll -{{[0-9]+}}(%rsp)
   1425 ; AVX512_64_LIN-NEXT:    movq -{{[0-9]+}}(%rsp), %rax
   1426 ; AVX512_64_LIN-NEXT:    retq
   1427 ;
   1428 ; SSE3_32_WIN-LABEL: x_to_s64:
   1429 ; SSE3_32_WIN:       # %bb.0:
   1430 ; SSE3_32_WIN-NEXT:    pushl %ebp
   1431 ; SSE3_32_WIN-NEXT:    movl %esp, %ebp
   1432 ; SSE3_32_WIN-NEXT:    andl $-8, %esp
   1433 ; SSE3_32_WIN-NEXT:    subl $8, %esp
   1434 ; SSE3_32_WIN-NEXT:    fldt 8(%ebp)
   1435 ; SSE3_32_WIN-NEXT:    fisttpll (%esp)
   1436 ; SSE3_32_WIN-NEXT:    movl (%esp), %eax
   1437 ; SSE3_32_WIN-NEXT:    movl {{[0-9]+}}(%esp), %edx
   1438 ; SSE3_32_WIN-NEXT:    movl %ebp, %esp
   1439 ; SSE3_32_WIN-NEXT:    popl %ebp
   1440 ; SSE3_32_WIN-NEXT:    retl
   1441 ;
   1442 ; SSE3_32_LIN-LABEL: x_to_s64:
   1443 ; SSE3_32_LIN:       # %bb.0:
   1444 ; SSE3_32_LIN-NEXT:    subl $12, %esp
   1445 ; SSE3_32_LIN-NEXT:    fldt {{[0-9]+}}(%esp)
   1446 ; SSE3_32_LIN-NEXT:    fisttpll (%esp)
   1447 ; SSE3_32_LIN-NEXT:    movl (%esp), %eax
   1448 ; SSE3_32_LIN-NEXT:    movl {{[0-9]+}}(%esp), %edx
   1449 ; SSE3_32_LIN-NEXT:    addl $12, %esp
   1450 ; SSE3_32_LIN-NEXT:    retl
   1451 ;
   1452 ; SSE3_64_WIN-LABEL: x_to_s64:
   1453 ; SSE3_64_WIN:       # %bb.0:
   1454 ; SSE3_64_WIN-NEXT:    pushq %rax
   1455 ; SSE3_64_WIN-NEXT:    fldt (%rcx)
   1456 ; SSE3_64_WIN-NEXT:    fisttpll (%rsp)
   1457 ; SSE3_64_WIN-NEXT:    movq (%rsp), %rax
   1458 ; SSE3_64_WIN-NEXT:    popq %rcx
   1459 ; SSE3_64_WIN-NEXT:    retq
   1460 ;
   1461 ; SSE3_64_LIN-LABEL: x_to_s64:
   1462 ; SSE3_64_LIN:       # %bb.0:
   1463 ; SSE3_64_LIN-NEXT:    fldt {{[0-9]+}}(%rsp)
   1464 ; SSE3_64_LIN-NEXT:    fisttpll -{{[0-9]+}}(%rsp)
   1465 ; SSE3_64_LIN-NEXT:    movq -{{[0-9]+}}(%rsp), %rax
   1466 ; SSE3_64_LIN-NEXT:    retq
   1467 ;
   1468 ; SSE2_32_WIN-LABEL: x_to_s64:
   1469 ; SSE2_32_WIN:       # %bb.0:
   1470 ; SSE2_32_WIN-NEXT:    pushl %ebp
   1471 ; SSE2_32_WIN-NEXT:    movl %esp, %ebp
   1472 ; SSE2_32_WIN-NEXT:    andl $-8, %esp
   1473 ; SSE2_32_WIN-NEXT:    subl $16, %esp
   1474 ; SSE2_32_WIN-NEXT:    fldt 8(%ebp)
   1475 ; SSE2_32_WIN-NEXT:    fnstcw {{[0-9]+}}(%esp)
   1476 ; SSE2_32_WIN-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
   1477 ; SSE2_32_WIN-NEXT:    movw $3199, {{[0-9]+}}(%esp) # imm = 0xC7F
   1478 ; SSE2_32_WIN-NEXT:    fldcw {{[0-9]+}}(%esp)
   1479 ; SSE2_32_WIN-NEXT:    movw %ax, {{[0-9]+}}(%esp)
   1480 ; SSE2_32_WIN-NEXT:    fistpll {{[0-9]+}}(%esp)
   1481 ; SSE2_32_WIN-NEXT:    fldcw {{[0-9]+}}(%esp)
   1482 ; SSE2_32_WIN-NEXT:    movl {{[0-9]+}}(%esp), %eax
   1483 ; SSE2_32_WIN-NEXT:    movl {{[0-9]+}}(%esp), %edx
   1484 ; SSE2_32_WIN-NEXT:    movl %ebp, %esp
   1485 ; SSE2_32_WIN-NEXT:    popl %ebp
   1486 ; SSE2_32_WIN-NEXT:    retl
   1487 ;
   1488 ; SSE2_32_LIN-LABEL: x_to_s64:
   1489 ; SSE2_32_LIN:       # %bb.0:
   1490 ; SSE2_32_LIN-NEXT:    subl $20, %esp
   1491 ; SSE2_32_LIN-NEXT:    fldt {{[0-9]+}}(%esp)
   1492 ; SSE2_32_LIN-NEXT:    fnstcw {{[0-9]+}}(%esp)
   1493 ; SSE2_32_LIN-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
   1494 ; SSE2_32_LIN-NEXT:    movw $3199, {{[0-9]+}}(%esp) # imm = 0xC7F
   1495 ; SSE2_32_LIN-NEXT:    fldcw {{[0-9]+}}(%esp)
   1496 ; SSE2_32_LIN-NEXT:    movw %ax, {{[0-9]+}}(%esp)
   1497 ; SSE2_32_LIN-NEXT:    fistpll {{[0-9]+}}(%esp)
   1498 ; SSE2_32_LIN-NEXT:    fldcw {{[0-9]+}}(%esp)
   1499 ; SSE2_32_LIN-NEXT:    movl {{[0-9]+}}(%esp), %eax
   1500 ; SSE2_32_LIN-NEXT:    movl {{[0-9]+}}(%esp), %edx
   1501 ; SSE2_32_LIN-NEXT:    addl $20, %esp
   1502 ; SSE2_32_LIN-NEXT:    retl
   1503 ;
   1504 ; SSE2_64_WIN-LABEL: x_to_s64:
   1505 ; SSE2_64_WIN:       # %bb.0:
   1506 ; SSE2_64_WIN-NEXT:    subq $16, %rsp
   1507 ; SSE2_64_WIN-NEXT:    fldt (%rcx)
   1508 ; SSE2_64_WIN-NEXT:    fnstcw {{[0-9]+}}(%rsp)
   1509 ; SSE2_64_WIN-NEXT:    movzwl {{[0-9]+}}(%rsp), %eax
   1510 ; SSE2_64_WIN-NEXT:    movw $3199, {{[0-9]+}}(%rsp) # imm = 0xC7F
   1511 ; SSE2_64_WIN-NEXT:    fldcw {{[0-9]+}}(%rsp)
   1512 ; SSE2_64_WIN-NEXT:    movw %ax, {{[0-9]+}}(%rsp)
   1513 ; SSE2_64_WIN-NEXT:    fistpll {{[0-9]+}}(%rsp)
   1514 ; SSE2_64_WIN-NEXT:    fldcw {{[0-9]+}}(%rsp)
   1515 ; SSE2_64_WIN-NEXT:    movq {{[0-9]+}}(%rsp), %rax
   1516 ; SSE2_64_WIN-NEXT:    addq $16, %rsp
   1517 ; SSE2_64_WIN-NEXT:    retq
   1518 ;
   1519 ; SSE2_64_LIN-LABEL: x_to_s64:
   1520 ; SSE2_64_LIN:       # %bb.0:
   1521 ; SSE2_64_LIN-NEXT:    fldt {{[0-9]+}}(%rsp)
   1522 ; SSE2_64_LIN-NEXT:    fnstcw -{{[0-9]+}}(%rsp)
   1523 ; SSE2_64_LIN-NEXT:    movzwl -{{[0-9]+}}(%rsp), %eax
   1524 ; SSE2_64_LIN-NEXT:    movw $3199, -{{[0-9]+}}(%rsp) # imm = 0xC7F
   1525 ; SSE2_64_LIN-NEXT:    fldcw -{{[0-9]+}}(%rsp)
   1526 ; SSE2_64_LIN-NEXT:    movw %ax, -{{[0-9]+}}(%rsp)
   1527 ; SSE2_64_LIN-NEXT:    fistpll -{{[0-9]+}}(%rsp)
   1528 ; SSE2_64_LIN-NEXT:    fldcw -{{[0-9]+}}(%rsp)
   1529 ; SSE2_64_LIN-NEXT:    movq -{{[0-9]+}}(%rsp), %rax
   1530 ; SSE2_64_LIN-NEXT:    retq
   1531 ;
   1532 ; X87_WIN-LABEL: x_to_s64:
   1533 ; X87_WIN:       # %bb.0:
   1534 ; X87_WIN-NEXT:    pushl %ebp
   1535 ; X87_WIN-NEXT:    movl %esp, %ebp
   1536 ; X87_WIN-NEXT:    andl $-8, %esp
   1537 ; X87_WIN-NEXT:    subl $16, %esp
   1538 ; X87_WIN-NEXT:    fldt 8(%ebp)
   1539 ; X87_WIN-NEXT:    fnstcw {{[0-9]+}}(%esp)
   1540 ; X87_WIN-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
   1541 ; X87_WIN-NEXT:    movw $3199, {{[0-9]+}}(%esp) # imm = 0xC7F
   1542 ; X87_WIN-NEXT:    fldcw {{[0-9]+}}(%esp)
   1543 ; X87_WIN-NEXT:    movw %ax, {{[0-9]+}}(%esp)
   1544 ; X87_WIN-NEXT:    fistpll {{[0-9]+}}(%esp)
   1545 ; X87_WIN-NEXT:    fldcw {{[0-9]+}}(%esp)
   1546 ; X87_WIN-NEXT:    movl {{[0-9]+}}(%esp), %eax
   1547 ; X87_WIN-NEXT:    movl {{[0-9]+}}(%esp), %edx
   1548 ; X87_WIN-NEXT:    movl %ebp, %esp
   1549 ; X87_WIN-NEXT:    popl %ebp
   1550 ; X87_WIN-NEXT:    retl
   1551 ;
   1552 ; X87_LIN-LABEL: x_to_s64:
   1553 ; X87_LIN:       # %bb.0:
   1554 ; X87_LIN-NEXT:    subl $20, %esp
   1555 ; X87_LIN-NEXT:    fldt {{[0-9]+}}(%esp)
   1556 ; X87_LIN-NEXT:    fnstcw {{[0-9]+}}(%esp)
   1557 ; X87_LIN-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
   1558 ; X87_LIN-NEXT:    movw $3199, {{[0-9]+}}(%esp) # imm = 0xC7F
   1559 ; X87_LIN-NEXT:    fldcw {{[0-9]+}}(%esp)
   1560 ; X87_LIN-NEXT:    movw %ax, {{[0-9]+}}(%esp)
   1561 ; X87_LIN-NEXT:    fistpll {{[0-9]+}}(%esp)
   1562 ; X87_LIN-NEXT:    fldcw {{[0-9]+}}(%esp)
   1563 ; X87_LIN-NEXT:    movl {{[0-9]+}}(%esp), %eax
   1564 ; X87_LIN-NEXT:    movl {{[0-9]+}}(%esp), %edx
   1565 ; X87_LIN-NEXT:    addl $20, %esp
   1566 ; X87_LIN-NEXT:    retl
   1567   %r = fptosi x86_fp80 %a to i64
   1568   ret i64 %r
   1569 }
   1570 
   1571 define i64 @t_to_u64(fp128 %a) nounwind {
   1572 ; AVX512_32_WIN-LABEL: t_to_u64:
   1573 ; AVX512_32_WIN:       # %bb.0:
   1574 ; AVX512_32_WIN-NEXT:    subl $16, %esp
   1575 ; AVX512_32_WIN-NEXT:    vmovups {{[0-9]+}}(%esp), %xmm0
   1576 ; AVX512_32_WIN-NEXT:    vmovups %xmm0, (%esp)
   1577 ; AVX512_32_WIN-NEXT:    calll ___fixunstfdi
   1578 ; AVX512_32_WIN-NEXT:    addl $16, %esp
   1579 ; AVX512_32_WIN-NEXT:    retl
   1580 ;
   1581 ; AVX512_32_LIN-LABEL: t_to_u64:
   1582 ; AVX512_32_LIN:       # %bb.0:
   1583 ; AVX512_32_LIN-NEXT:    subl $28, %esp
   1584 ; AVX512_32_LIN-NEXT:    vmovaps {{[0-9]+}}(%esp), %xmm0
   1585 ; AVX512_32_LIN-NEXT:    vmovups %xmm0, (%esp)
   1586 ; AVX512_32_LIN-NEXT:    calll __fixunstfdi
   1587 ; AVX512_32_LIN-NEXT:    addl $28, %esp
   1588 ; AVX512_32_LIN-NEXT:    retl
   1589 ;
   1590 ; AVX512_64_WIN-LABEL: t_to_u64:
   1591 ; AVX512_64_WIN:       # %bb.0:
   1592 ; AVX512_64_WIN-NEXT:    subq $40, %rsp
   1593 ; AVX512_64_WIN-NEXT:    callq __fixunstfdi
   1594 ; AVX512_64_WIN-NEXT:    addq $40, %rsp
   1595 ; AVX512_64_WIN-NEXT:    retq
   1596 ;
   1597 ; AVX512_64_LIN-LABEL: t_to_u64:
   1598 ; AVX512_64_LIN:       # %bb.0:
   1599 ; AVX512_64_LIN-NEXT:    pushq %rax
   1600 ; AVX512_64_LIN-NEXT:    callq __fixunstfdi
   1601 ; AVX512_64_LIN-NEXT:    popq %rcx
   1602 ; AVX512_64_LIN-NEXT:    retq
   1603 ;
   1604 ; SSE3_32_WIN-LABEL: t_to_u64:
   1605 ; SSE3_32_WIN:       # %bb.0:
   1606 ; SSE3_32_WIN-NEXT:    pushl {{[0-9]+}}(%esp)
   1607 ; SSE3_32_WIN-NEXT:    pushl {{[0-9]+}}(%esp)
   1608 ; SSE3_32_WIN-NEXT:    pushl {{[0-9]+}}(%esp)
   1609 ; SSE3_32_WIN-NEXT:    pushl {{[0-9]+}}(%esp)
   1610 ; SSE3_32_WIN-NEXT:    calll ___fixunstfdi
   1611 ; SSE3_32_WIN-NEXT:    addl $16, %esp
   1612 ; SSE3_32_WIN-NEXT:    retl
   1613 ;
   1614 ; SSE3_32_LIN-LABEL: t_to_u64:
   1615 ; SSE3_32_LIN:       # %bb.0:
   1616 ; SSE3_32_LIN-NEXT:    subl $12, %esp
   1617 ; SSE3_32_LIN-NEXT:    pushl {{[0-9]+}}(%esp)
   1618 ; SSE3_32_LIN-NEXT:    pushl {{[0-9]+}}(%esp)
   1619 ; SSE3_32_LIN-NEXT:    pushl {{[0-9]+}}(%esp)
   1620 ; SSE3_32_LIN-NEXT:    pushl {{[0-9]+}}(%esp)
   1621 ; SSE3_32_LIN-NEXT:    calll __fixunstfdi
   1622 ; SSE3_32_LIN-NEXT:    addl $28, %esp
   1623 ; SSE3_32_LIN-NEXT:    retl
   1624 ;
   1625 ; SSE3_64_WIN-LABEL: t_to_u64:
   1626 ; SSE3_64_WIN:       # %bb.0:
   1627 ; SSE3_64_WIN-NEXT:    subq $40, %rsp
   1628 ; SSE3_64_WIN-NEXT:    callq __fixunstfdi
   1629 ; SSE3_64_WIN-NEXT:    addq $40, %rsp
   1630 ; SSE3_64_WIN-NEXT:    retq
   1631 ;
   1632 ; SSE3_64_LIN-LABEL: t_to_u64:
   1633 ; SSE3_64_LIN:       # %bb.0:
   1634 ; SSE3_64_LIN-NEXT:    pushq %rax
   1635 ; SSE3_64_LIN-NEXT:    callq __fixunstfdi
   1636 ; SSE3_64_LIN-NEXT:    popq %rcx
   1637 ; SSE3_64_LIN-NEXT:    retq
   1638 ;
   1639 ; SSE2_32_WIN-LABEL: t_to_u64:
   1640 ; SSE2_32_WIN:       # %bb.0:
   1641 ; SSE2_32_WIN-NEXT:    pushl {{[0-9]+}}(%esp)
   1642 ; SSE2_32_WIN-NEXT:    pushl {{[0-9]+}}(%esp)
   1643 ; SSE2_32_WIN-NEXT:    pushl {{[0-9]+}}(%esp)
   1644 ; SSE2_32_WIN-NEXT:    pushl {{[0-9]+}}(%esp)
   1645 ; SSE2_32_WIN-NEXT:    calll ___fixunstfdi
   1646 ; SSE2_32_WIN-NEXT:    addl $16, %esp
   1647 ; SSE2_32_WIN-NEXT:    retl
   1648 ;
   1649 ; SSE2_32_LIN-LABEL: t_to_u64:
   1650 ; SSE2_32_LIN:       # %bb.0:
   1651 ; SSE2_32_LIN-NEXT:    subl $12, %esp
   1652 ; SSE2_32_LIN-NEXT:    pushl {{[0-9]+}}(%esp)
   1653 ; SSE2_32_LIN-NEXT:    pushl {{[0-9]+}}(%esp)
   1654 ; SSE2_32_LIN-NEXT:    pushl {{[0-9]+}}(%esp)
   1655 ; SSE2_32_LIN-NEXT:    pushl {{[0-9]+}}(%esp)
   1656 ; SSE2_32_LIN-NEXT:    calll __fixunstfdi
   1657 ; SSE2_32_LIN-NEXT:    addl $28, %esp
   1658 ; SSE2_32_LIN-NEXT:    retl
   1659 ;
   1660 ; SSE2_64_WIN-LABEL: t_to_u64:
   1661 ; SSE2_64_WIN:       # %bb.0:
   1662 ; SSE2_64_WIN-NEXT:    subq $40, %rsp
   1663 ; SSE2_64_WIN-NEXT:    callq __fixunstfdi
   1664 ; SSE2_64_WIN-NEXT:    addq $40, %rsp
   1665 ; SSE2_64_WIN-NEXT:    retq
   1666 ;
   1667 ; SSE2_64_LIN-LABEL: t_to_u64:
   1668 ; SSE2_64_LIN:       # %bb.0:
   1669 ; SSE2_64_LIN-NEXT:    pushq %rax
   1670 ; SSE2_64_LIN-NEXT:    callq __fixunstfdi
   1671 ; SSE2_64_LIN-NEXT:    popq %rcx
   1672 ; SSE2_64_LIN-NEXT:    retq
   1673 ;
   1674 ; X87_WIN-LABEL: t_to_u64:
   1675 ; X87_WIN:       # %bb.0:
   1676 ; X87_WIN-NEXT:    pushl {{[0-9]+}}(%esp)
   1677 ; X87_WIN-NEXT:    pushl {{[0-9]+}}(%esp)
   1678 ; X87_WIN-NEXT:    pushl {{[0-9]+}}(%esp)
   1679 ; X87_WIN-NEXT:    pushl {{[0-9]+}}(%esp)
   1680 ; X87_WIN-NEXT:    calll ___fixunstfdi
   1681 ; X87_WIN-NEXT:    addl $16, %esp
   1682 ; X87_WIN-NEXT:    retl
   1683 ;
   1684 ; X87_LIN-LABEL: t_to_u64:
   1685 ; X87_LIN:       # %bb.0:
   1686 ; X87_LIN-NEXT:    subl $12, %esp
   1687 ; X87_LIN-NEXT:    pushl {{[0-9]+}}(%esp)
   1688 ; X87_LIN-NEXT:    pushl {{[0-9]+}}(%esp)
   1689 ; X87_LIN-NEXT:    pushl {{[0-9]+}}(%esp)
   1690 ; X87_LIN-NEXT:    pushl {{[0-9]+}}(%esp)
   1691 ; X87_LIN-NEXT:    calll __fixunstfdi
   1692 ; X87_LIN-NEXT:    addl $28, %esp
   1693 ; X87_LIN-NEXT:    retl
   1694   %r = fptoui fp128 %a to i64
   1695   ret i64 %r
   1696 }
   1697 
   1698 define i64 @t_to_s64(fp128 %a) nounwind {
   1699 ; AVX512_32_WIN-LABEL: t_to_s64:
   1700 ; AVX512_32_WIN:       # %bb.0:
   1701 ; AVX512_32_WIN-NEXT:    subl $16, %esp
   1702 ; AVX512_32_WIN-NEXT:    vmovups {{[0-9]+}}(%esp), %xmm0
   1703 ; AVX512_32_WIN-NEXT:    vmovups %xmm0, (%esp)
   1704 ; AVX512_32_WIN-NEXT:    calll ___fixtfdi
   1705 ; AVX512_32_WIN-NEXT:    addl $16, %esp
   1706 ; AVX512_32_WIN-NEXT:    retl
   1707 ;
   1708 ; AVX512_32_LIN-LABEL: t_to_s64:
   1709 ; AVX512_32_LIN:       # %bb.0:
   1710 ; AVX512_32_LIN-NEXT:    subl $28, %esp
   1711 ; AVX512_32_LIN-NEXT:    vmovaps {{[0-9]+}}(%esp), %xmm0
   1712 ; AVX512_32_LIN-NEXT:    vmovups %xmm0, (%esp)
   1713 ; AVX512_32_LIN-NEXT:    calll __fixtfdi
   1714 ; AVX512_32_LIN-NEXT:    addl $28, %esp
   1715 ; AVX512_32_LIN-NEXT:    retl
   1716 ;
   1717 ; AVX512_64_WIN-LABEL: t_to_s64:
   1718 ; AVX512_64_WIN:       # %bb.0:
   1719 ; AVX512_64_WIN-NEXT:    subq $40, %rsp
   1720 ; AVX512_64_WIN-NEXT:    callq __fixtfdi
   1721 ; AVX512_64_WIN-NEXT:    addq $40, %rsp
   1722 ; AVX512_64_WIN-NEXT:    retq
   1723 ;
   1724 ; AVX512_64_LIN-LABEL: t_to_s64:
   1725 ; AVX512_64_LIN:       # %bb.0:
   1726 ; AVX512_64_LIN-NEXT:    pushq %rax
   1727 ; AVX512_64_LIN-NEXT:    callq __fixtfdi
   1728 ; AVX512_64_LIN-NEXT:    popq %rcx
   1729 ; AVX512_64_LIN-NEXT:    retq
   1730 ;
   1731 ; SSE3_32_WIN-LABEL: t_to_s64:
   1732 ; SSE3_32_WIN:       # %bb.0:
   1733 ; SSE3_32_WIN-NEXT:    pushl {{[0-9]+}}(%esp)
   1734 ; SSE3_32_WIN-NEXT:    pushl {{[0-9]+}}(%esp)
   1735 ; SSE3_32_WIN-NEXT:    pushl {{[0-9]+}}(%esp)
   1736 ; SSE3_32_WIN-NEXT:    pushl {{[0-9]+}}(%esp)
   1737 ; SSE3_32_WIN-NEXT:    calll ___fixtfdi
   1738 ; SSE3_32_WIN-NEXT:    addl $16, %esp
   1739 ; SSE3_32_WIN-NEXT:    retl
   1740 ;
   1741 ; SSE3_32_LIN-LABEL: t_to_s64:
   1742 ; SSE3_32_LIN:       # %bb.0:
   1743 ; SSE3_32_LIN-NEXT:    subl $12, %esp
   1744 ; SSE3_32_LIN-NEXT:    pushl {{[0-9]+}}(%esp)
   1745 ; SSE3_32_LIN-NEXT:    pushl {{[0-9]+}}(%esp)
   1746 ; SSE3_32_LIN-NEXT:    pushl {{[0-9]+}}(%esp)
   1747 ; SSE3_32_LIN-NEXT:    pushl {{[0-9]+}}(%esp)
   1748 ; SSE3_32_LIN-NEXT:    calll __fixtfdi
   1749 ; SSE3_32_LIN-NEXT:    addl $28, %esp
   1750 ; SSE3_32_LIN-NEXT:    retl
   1751 ;
   1752 ; SSE3_64_WIN-LABEL: t_to_s64:
   1753 ; SSE3_64_WIN:       # %bb.0:
   1754 ; SSE3_64_WIN-NEXT:    subq $40, %rsp
   1755 ; SSE3_64_WIN-NEXT:    callq __fixtfdi
   1756 ; SSE3_64_WIN-NEXT:    addq $40, %rsp
   1757 ; SSE3_64_WIN-NEXT:    retq
   1758 ;
   1759 ; SSE3_64_LIN-LABEL: t_to_s64:
   1760 ; SSE3_64_LIN:       # %bb.0:
   1761 ; SSE3_64_LIN-NEXT:    pushq %rax
   1762 ; SSE3_64_LIN-NEXT:    callq __fixtfdi
   1763 ; SSE3_64_LIN-NEXT:    popq %rcx
   1764 ; SSE3_64_LIN-NEXT:    retq
   1765 ;
   1766 ; SSE2_32_WIN-LABEL: t_to_s64:
   1767 ; SSE2_32_WIN:       # %bb.0:
   1768 ; SSE2_32_WIN-NEXT:    pushl {{[0-9]+}}(%esp)
   1769 ; SSE2_32_WIN-NEXT:    pushl {{[0-9]+}}(%esp)
   1770 ; SSE2_32_WIN-NEXT:    pushl {{[0-9]+}}(%esp)
   1771 ; SSE2_32_WIN-NEXT:    pushl {{[0-9]+}}(%esp)
   1772 ; SSE2_32_WIN-NEXT:    calll ___fixtfdi
   1773 ; SSE2_32_WIN-NEXT:    addl $16, %esp
   1774 ; SSE2_32_WIN-NEXT:    retl
   1775 ;
   1776 ; SSE2_32_LIN-LABEL: t_to_s64:
   1777 ; SSE2_32_LIN:       # %bb.0:
   1778 ; SSE2_32_LIN-NEXT:    subl $12, %esp
   1779 ; SSE2_32_LIN-NEXT:    pushl {{[0-9]+}}(%esp)
   1780 ; SSE2_32_LIN-NEXT:    pushl {{[0-9]+}}(%esp)
   1781 ; SSE2_32_LIN-NEXT:    pushl {{[0-9]+}}(%esp)
   1782 ; SSE2_32_LIN-NEXT:    pushl {{[0-9]+}}(%esp)
   1783 ; SSE2_32_LIN-NEXT:    calll __fixtfdi
   1784 ; SSE2_32_LIN-NEXT:    addl $28, %esp
   1785 ; SSE2_32_LIN-NEXT:    retl
   1786 ;
   1787 ; SSE2_64_WIN-LABEL: t_to_s64:
   1788 ; SSE2_64_WIN:       # %bb.0:
   1789 ; SSE2_64_WIN-NEXT:    subq $40, %rsp
   1790 ; SSE2_64_WIN-NEXT:    callq __fixtfdi
   1791 ; SSE2_64_WIN-NEXT:    addq $40, %rsp
   1792 ; SSE2_64_WIN-NEXT:    retq
   1793 ;
   1794 ; SSE2_64_LIN-LABEL: t_to_s64:
   1795 ; SSE2_64_LIN:       # %bb.0:
   1796 ; SSE2_64_LIN-NEXT:    pushq %rax
   1797 ; SSE2_64_LIN-NEXT:    callq __fixtfdi
   1798 ; SSE2_64_LIN-NEXT:    popq %rcx
   1799 ; SSE2_64_LIN-NEXT:    retq
   1800 ;
   1801 ; X87_WIN-LABEL: t_to_s64:
   1802 ; X87_WIN:       # %bb.0:
   1803 ; X87_WIN-NEXT:    pushl {{[0-9]+}}(%esp)
   1804 ; X87_WIN-NEXT:    pushl {{[0-9]+}}(%esp)
   1805 ; X87_WIN-NEXT:    pushl {{[0-9]+}}(%esp)
   1806 ; X87_WIN-NEXT:    pushl {{[0-9]+}}(%esp)
   1807 ; X87_WIN-NEXT:    calll ___fixtfdi
   1808 ; X87_WIN-NEXT:    addl $16, %esp
   1809 ; X87_WIN-NEXT:    retl
   1810 ;
   1811 ; X87_LIN-LABEL: t_to_s64:
   1812 ; X87_LIN:       # %bb.0:
   1813 ; X87_LIN-NEXT:    subl $12, %esp
   1814 ; X87_LIN-NEXT:    pushl {{[0-9]+}}(%esp)
   1815 ; X87_LIN-NEXT:    pushl {{[0-9]+}}(%esp)
   1816 ; X87_LIN-NEXT:    pushl {{[0-9]+}}(%esp)
   1817 ; X87_LIN-NEXT:    pushl {{[0-9]+}}(%esp)
   1818 ; X87_LIN-NEXT:    calll __fixtfdi
   1819 ; X87_LIN-NEXT:    addl $28, %esp
   1820 ; X87_LIN-NEXT:    retl
   1821   %r = fptosi fp128 %a to i64
   1822   ret i64 %r
   1823 }
   1824