Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=-f16c -fixup-byte-word-insts=1 \
      3 ; RUN:   | FileCheck %s -check-prefixes=CHECK,CHECK-LIBCALL,BWON,BWON-NOF16C
      4 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=-f16c  -fixup-byte-word-insts=0 \
      5 ; RUN:   | FileCheck %s -check-prefixes=CHECK,CHECK-LIBCALL,BWOFF
      6 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+f16c -fixup-byte-word-insts=1 \
      7 ; RUN:    | FileCheck %s -check-prefixes=CHECK,BWON,BWON-F16C
      8 ; RUN: llc < %s -mtriple=i686-unknown-linux-gnu -mattr +sse2 -fixup-byte-word-insts=0  \
      9 ; RUN:    | FileCheck %s -check-prefixes=CHECK-I686
     10 
     11 define void @test_load_store(half* %in, half* %out) #0 {
     12 ; BWON-LABEL: test_load_store:
     13 ; BWON:       # %bb.0:
     14 ; BWON-NEXT:    movzwl (%rdi), %eax
     15 ; BWON-NEXT:    movw %ax, (%rsi)
     16 ; BWON-NEXT:    retq
     17 ;
     18 ; BWOFF-LABEL: test_load_store:
     19 ; BWOFF:       # %bb.0:
     20 ; BWOFF-NEXT:    movw (%rdi), %ax
     21 ; BWOFF-NEXT:    movw %ax, (%rsi)
     22 ; BWOFF-NEXT:    retq
     23 ;
     24 ; CHECK-I686-LABEL: test_load_store:
     25 ; CHECK-I686:       # %bb.0:
     26 ; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %eax
     27 ; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %ecx
     28 ; CHECK-I686-NEXT:    movw (%ecx), %cx
     29 ; CHECK-I686-NEXT:    movw %cx, (%eax)
     30 ; CHECK-I686-NEXT:    retl
     31   %val = load half, half* %in
     32   store half %val, half* %out
     33   ret void
     34 }
     35 
     36 define i16 @test_bitcast_from_half(half* %addr) #0 {
     37 ; BWON-LABEL: test_bitcast_from_half:
     38 ; BWON:       # %bb.0:
     39 ; BWON-NEXT:    movzwl (%rdi), %eax
     40 ; BWON-NEXT:    retq
     41 ;
     42 ; BWOFF-LABEL: test_bitcast_from_half:
     43 ; BWOFF:       # %bb.0:
     44 ; BWOFF-NEXT:    movw (%rdi), %ax
     45 ; BWOFF-NEXT:    retq
     46 ;
     47 ; CHECK-I686-LABEL: test_bitcast_from_half:
     48 ; CHECK-I686:       # %bb.0:
     49 ; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %eax
     50 ; CHECK-I686-NEXT:    movw (%eax), %ax
     51 ; CHECK-I686-NEXT:    retl
     52   %val = load half, half* %addr
     53   %val_int = bitcast half %val to i16
     54   ret i16 %val_int
     55 }
     56 
     57 define void @test_bitcast_to_half(half* %addr, i16 %in) #0 {
     58 ; CHECK-LABEL: test_bitcast_to_half:
     59 ; CHECK:       # %bb.0:
     60 ; CHECK-NEXT:    movw %si, (%rdi)
     61 ; CHECK-NEXT:    retq
     62 ;
     63 ; CHECK-I686-LABEL: test_bitcast_to_half:
     64 ; CHECK-I686:       # %bb.0:
     65 ; CHECK-I686-NEXT:    movw {{[0-9]+}}(%esp), %ax
     66 ; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %ecx
     67 ; CHECK-I686-NEXT:    movw %ax, (%ecx)
     68 ; CHECK-I686-NEXT:    retl
     69   %val_fp = bitcast i16 %in to half
     70   store half %val_fp, half* %addr
     71   ret void
     72 }
     73 
     74 define float @test_extend32(half* %addr) #0 {
     75 ; CHECK-LIBCALL-LABEL: test_extend32:
     76 ; CHECK-LIBCALL:       # %bb.0:
     77 ; CHECK-LIBCALL-NEXT:    movzwl (%rdi), %edi
     78 ; CHECK-LIBCALL-NEXT:    jmp __gnu_h2f_ieee # TAILCALL
     79 ;
     80 ; BWON-F16C-LABEL: test_extend32:
     81 ; BWON-F16C:       # %bb.0:
     82 ; BWON-F16C-NEXT:    movswl (%rdi), %eax
     83 ; BWON-F16C-NEXT:    vmovd %eax, %xmm0
     84 ; BWON-F16C-NEXT:    vcvtph2ps %xmm0, %xmm0
     85 ; BWON-F16C-NEXT:    retq
     86 ;
     87 ; CHECK-I686-LABEL: test_extend32:
     88 ; CHECK-I686:       # %bb.0:
     89 ; CHECK-I686-NEXT:    subl $12, %esp
     90 ; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %eax
     91 ; CHECK-I686-NEXT:    movzwl (%eax), %eax
     92 ; CHECK-I686-NEXT:    movl %eax, (%esp)
     93 ; CHECK-I686-NEXT:    calll __gnu_h2f_ieee
     94 ; CHECK-I686-NEXT:    addl $12, %esp
     95 ; CHECK-I686-NEXT:    retl
     96   %val16 = load half, half* %addr
     97   %val32 = fpext half %val16 to float
     98   ret float %val32
     99 }
    100 
    101 define double @test_extend64(half* %addr) #0 {
    102 ; CHECK-LIBCALL-LABEL: test_extend64:
    103 ; CHECK-LIBCALL:       # %bb.0:
    104 ; CHECK-LIBCALL-NEXT:    pushq %rax
    105 ; CHECK-LIBCALL-NEXT:    movzwl (%rdi), %edi
    106 ; CHECK-LIBCALL-NEXT:    callq __gnu_h2f_ieee
    107 ; CHECK-LIBCALL-NEXT:    cvtss2sd %xmm0, %xmm0
    108 ; CHECK-LIBCALL-NEXT:    popq %rax
    109 ; CHECK-LIBCALL-NEXT:    retq
    110 ;
    111 ; BWON-F16C-LABEL: test_extend64:
    112 ; BWON-F16C:       # %bb.0:
    113 ; BWON-F16C-NEXT:    movswl (%rdi), %eax
    114 ; BWON-F16C-NEXT:    vmovd %eax, %xmm0
    115 ; BWON-F16C-NEXT:    vcvtph2ps %xmm0, %xmm0
    116 ; BWON-F16C-NEXT:    vcvtss2sd %xmm0, %xmm0, %xmm0
    117 ; BWON-F16C-NEXT:    retq
    118 ;
    119 ; CHECK-I686-LABEL: test_extend64:
    120 ; CHECK-I686:       # %bb.0:
    121 ; CHECK-I686-NEXT:    subl $12, %esp
    122 ; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %eax
    123 ; CHECK-I686-NEXT:    movzwl (%eax), %eax
    124 ; CHECK-I686-NEXT:    movl %eax, (%esp)
    125 ; CHECK-I686-NEXT:    calll __gnu_h2f_ieee
    126 ; CHECK-I686-NEXT:    addl $12, %esp
    127 ; CHECK-I686-NEXT:    retl
    128   %val16 = load half, half* %addr
    129   %val32 = fpext half %val16 to double
    130   ret double %val32
    131 }
    132 
    133 define void @test_trunc32(float %in, half* %addr) #0 {
    134 ; CHECK-LIBCALL-LABEL: test_trunc32:
    135 ; CHECK-LIBCALL:       # %bb.0:
    136 ; CHECK-LIBCALL-NEXT:    pushq %rbx
    137 ; CHECK-LIBCALL-NEXT:    movq %rdi, %rbx
    138 ; CHECK-LIBCALL-NEXT:    callq __gnu_f2h_ieee
    139 ; CHECK-LIBCALL-NEXT:    movw %ax, (%rbx)
    140 ; CHECK-LIBCALL-NEXT:    popq %rbx
    141 ; CHECK-LIBCALL-NEXT:    retq
    142 ;
    143 ; BWON-F16C-LABEL: test_trunc32:
    144 ; BWON-F16C:       # %bb.0:
    145 ; BWON-F16C-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
    146 ; BWON-F16C-NEXT:    vmovd %xmm0, %eax
    147 ; BWON-F16C-NEXT:    movw %ax, (%rdi)
    148 ; BWON-F16C-NEXT:    retq
    149 ;
    150 ; CHECK-I686-LABEL: test_trunc32:
    151 ; CHECK-I686:       # %bb.0:
    152 ; CHECK-I686-NEXT:    pushl %esi
    153 ; CHECK-I686-NEXT:    subl $8, %esp
    154 ; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %esi
    155 ; CHECK-I686-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
    156 ; CHECK-I686-NEXT:    movss %xmm0, (%esp)
    157 ; CHECK-I686-NEXT:    calll __gnu_f2h_ieee
    158 ; CHECK-I686-NEXT:    movw %ax, (%esi)
    159 ; CHECK-I686-NEXT:    addl $8, %esp
    160 ; CHECK-I686-NEXT:    popl %esi
    161 ; CHECK-I686-NEXT:    retl
    162   %val16 = fptrunc float %in to half
    163   store half %val16, half* %addr
    164   ret void
    165 }
    166 
    167 define void @test_trunc64(double %in, half* %addr) #0 {
    168 ; CHECK-LABEL: test_trunc64:
    169 ; CHECK:       # %bb.0:
    170 ; CHECK-NEXT:    pushq %rbx
    171 ; CHECK-NEXT:    movq %rdi, %rbx
    172 ; CHECK-NEXT:    callq __truncdfhf2
    173 ; CHECK-NEXT:    movw %ax, (%rbx)
    174 ; CHECK-NEXT:    popq %rbx
    175 ; CHECK-NEXT:    retq
    176 ;
    177 ; CHECK-I686-LABEL: test_trunc64:
    178 ; CHECK-I686:       # %bb.0:
    179 ; CHECK-I686-NEXT:    pushl %esi
    180 ; CHECK-I686-NEXT:    subl $8, %esp
    181 ; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %esi
    182 ; CHECK-I686-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
    183 ; CHECK-I686-NEXT:    movsd %xmm0, (%esp)
    184 ; CHECK-I686-NEXT:    calll __truncdfhf2
    185 ; CHECK-I686-NEXT:    movw %ax, (%esi)
    186 ; CHECK-I686-NEXT:    addl $8, %esp
    187 ; CHECK-I686-NEXT:    popl %esi
    188 ; CHECK-I686-NEXT:    retl
    189   %val16 = fptrunc double %in to half
    190   store half %val16, half* %addr
    191   ret void
    192 }
    193 
    194 define i64 @test_fptosi_i64(half* %p) #0 {
    195 ; CHECK-LIBCALL-LABEL: test_fptosi_i64:
    196 ; CHECK-LIBCALL:       # %bb.0:
    197 ; CHECK-LIBCALL-NEXT:    pushq %rax
    198 ; CHECK-LIBCALL-NEXT:    movzwl (%rdi), %edi
    199 ; CHECK-LIBCALL-NEXT:    callq __gnu_h2f_ieee
    200 ; CHECK-LIBCALL-NEXT:    cvttss2si %xmm0, %rax
    201 ; CHECK-LIBCALL-NEXT:    popq %rcx
    202 ; CHECK-LIBCALL-NEXT:    retq
    203 ;
    204 ; BWON-F16C-LABEL: test_fptosi_i64:
    205 ; BWON-F16C:       # %bb.0:
    206 ; BWON-F16C-NEXT:    movswl (%rdi), %eax
    207 ; BWON-F16C-NEXT:    vmovd %eax, %xmm0
    208 ; BWON-F16C-NEXT:    vcvtph2ps %xmm0, %xmm0
    209 ; BWON-F16C-NEXT:    vcvttss2si %xmm0, %rax
    210 ; BWON-F16C-NEXT:    retq
    211 ;
    212 ; CHECK-I686-LABEL: test_fptosi_i64:
    213 ; CHECK-I686:       # %bb.0:
    214 ; CHECK-I686-NEXT:    subl $12, %esp
    215 ; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %eax
    216 ; CHECK-I686-NEXT:    movzwl (%eax), %eax
    217 ; CHECK-I686-NEXT:    movl %eax, (%esp)
    218 ; CHECK-I686-NEXT:    calll __gnu_h2f_ieee
    219 ; CHECK-I686-NEXT:    fstps (%esp)
    220 ; CHECK-I686-NEXT:    calll __fixsfdi
    221 ; CHECK-I686-NEXT:    addl $12, %esp
    222 ; CHECK-I686-NEXT:    retl
    223   %a = load half, half* %p, align 2
    224   %r = fptosi half %a to i64
    225   ret i64 %r
    226 }
    227 
    228 define void @test_sitofp_i64(i64 %a, half* %p) #0 {
    229 ; CHECK-LIBCALL-LABEL: test_sitofp_i64:
    230 ; CHECK-LIBCALL:       # %bb.0:
    231 ; CHECK-LIBCALL-NEXT:    pushq %rbx
    232 ; CHECK-LIBCALL-NEXT:    movq %rsi, %rbx
    233 ; CHECK-LIBCALL-NEXT:    cvtsi2ssq %rdi, %xmm0
    234 ; CHECK-LIBCALL-NEXT:    callq __gnu_f2h_ieee
    235 ; CHECK-LIBCALL-NEXT:    movw %ax, (%rbx)
    236 ; CHECK-LIBCALL-NEXT:    popq %rbx
    237 ; CHECK-LIBCALL-NEXT:    retq
    238 ;
    239 ; BWON-F16C-LABEL: test_sitofp_i64:
    240 ; BWON-F16C:       # %bb.0:
    241 ; BWON-F16C-NEXT:    vcvtsi2ssq %rdi, %xmm0, %xmm0
    242 ; BWON-F16C-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
    243 ; BWON-F16C-NEXT:    vmovd %xmm0, %eax
    244 ; BWON-F16C-NEXT:    movw %ax, (%rsi)
    245 ; BWON-F16C-NEXT:    retq
    246 ;
    247 ; CHECK-I686-LABEL: test_sitofp_i64:
    248 ; CHECK-I686:       # %bb.0:
    249 ; CHECK-I686-NEXT:    pushl %esi
    250 ; CHECK-I686-NEXT:    subl $24, %esp
    251 ; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %esi
    252 ; CHECK-I686-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
    253 ; CHECK-I686-NEXT:    movlps %xmm0, {{[0-9]+}}(%esp)
    254 ; CHECK-I686-NEXT:    fildll {{[0-9]+}}(%esp)
    255 ; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
    256 ; CHECK-I686-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
    257 ; CHECK-I686-NEXT:    movss %xmm0, (%esp)
    258 ; CHECK-I686-NEXT:    calll __gnu_f2h_ieee
    259 ; CHECK-I686-NEXT:    movw %ax, (%esi)
    260 ; CHECK-I686-NEXT:    addl $24, %esp
    261 ; CHECK-I686-NEXT:    popl %esi
    262 ; CHECK-I686-NEXT:    retl
    263   %r = sitofp i64 %a to half
    264   store half %r, half* %p
    265   ret void
    266 }
    267 
    268 define i64 @test_fptoui_i64(half* %p) #0 {
    269 ; CHECK-LIBCALL-LABEL: test_fptoui_i64:
    270 ; CHECK-LIBCALL:       # %bb.0:
    271 ; CHECK-LIBCALL-NEXT:    pushq %rax
    272 ; CHECK-LIBCALL-NEXT:    movzwl (%rdi), %edi
    273 ; CHECK-LIBCALL-NEXT:    callq __gnu_h2f_ieee
    274 ; CHECK-LIBCALL-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
    275 ; CHECK-LIBCALL-NEXT:    movaps %xmm0, %xmm2
    276 ; CHECK-LIBCALL-NEXT:    subss %xmm1, %xmm2
    277 ; CHECK-LIBCALL-NEXT:    cvttss2si %xmm2, %rax
    278 ; CHECK-LIBCALL-NEXT:    movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
    279 ; CHECK-LIBCALL-NEXT:    xorq %rax, %rcx
    280 ; CHECK-LIBCALL-NEXT:    cvttss2si %xmm0, %rax
    281 ; CHECK-LIBCALL-NEXT:    ucomiss %xmm1, %xmm0
    282 ; CHECK-LIBCALL-NEXT:    cmovaeq %rcx, %rax
    283 ; CHECK-LIBCALL-NEXT:    popq %rcx
    284 ; CHECK-LIBCALL-NEXT:    retq
    285 ;
    286 ; BWON-F16C-LABEL: test_fptoui_i64:
    287 ; BWON-F16C:       # %bb.0:
    288 ; BWON-F16C-NEXT:    movswl (%rdi), %eax
    289 ; BWON-F16C-NEXT:    vmovd %eax, %xmm0
    290 ; BWON-F16C-NEXT:    vcvtph2ps %xmm0, %xmm0
    291 ; BWON-F16C-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
    292 ; BWON-F16C-NEXT:    vsubss %xmm1, %xmm0, %xmm2
    293 ; BWON-F16C-NEXT:    vcvttss2si %xmm2, %rax
    294 ; BWON-F16C-NEXT:    movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
    295 ; BWON-F16C-NEXT:    xorq %rax, %rcx
    296 ; BWON-F16C-NEXT:    vcvttss2si %xmm0, %rax
    297 ; BWON-F16C-NEXT:    vucomiss %xmm1, %xmm0
    298 ; BWON-F16C-NEXT:    cmovaeq %rcx, %rax
    299 ; BWON-F16C-NEXT:    retq
    300 ;
    301 ; CHECK-I686-LABEL: test_fptoui_i64:
    302 ; CHECK-I686:       # %bb.0:
    303 ; CHECK-I686-NEXT:    subl $12, %esp
    304 ; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %eax
    305 ; CHECK-I686-NEXT:    movzwl (%eax), %eax
    306 ; CHECK-I686-NEXT:    movl %eax, (%esp)
    307 ; CHECK-I686-NEXT:    calll __gnu_h2f_ieee
    308 ; CHECK-I686-NEXT:    fstps (%esp)
    309 ; CHECK-I686-NEXT:    calll __fixunssfdi
    310 ; CHECK-I686-NEXT:    addl $12, %esp
    311 ; CHECK-I686-NEXT:    retl
    312   %a = load half, half* %p, align 2
    313   %r = fptoui half %a to i64
    314   ret i64 %r
    315 }
    316 
    317 define void @test_uitofp_i64(i64 %a, half* %p) #0 {
    318 ; CHECK-LIBCALL-LABEL: test_uitofp_i64:
    319 ; CHECK-LIBCALL:       # %bb.0:
    320 ; CHECK-LIBCALL-NEXT:    pushq %rbx
    321 ; CHECK-LIBCALL-NEXT:    movq %rsi, %rbx
    322 ; CHECK-LIBCALL-NEXT:    testq %rdi, %rdi
    323 ; CHECK-LIBCALL-NEXT:    js .LBB10_1
    324 ; CHECK-LIBCALL-NEXT:  # %bb.2:
    325 ; CHECK-LIBCALL-NEXT:    cvtsi2ssq %rdi, %xmm0
    326 ; CHECK-LIBCALL-NEXT:    jmp .LBB10_3
    327 ; CHECK-LIBCALL-NEXT:  .LBB10_1:
    328 ; CHECK-LIBCALL-NEXT:    movq %rdi, %rax
    329 ; CHECK-LIBCALL-NEXT:    shrq %rax
    330 ; CHECK-LIBCALL-NEXT:    andl $1, %edi
    331 ; CHECK-LIBCALL-NEXT:    orq %rax, %rdi
    332 ; CHECK-LIBCALL-NEXT:    cvtsi2ssq %rdi, %xmm0
    333 ; CHECK-LIBCALL-NEXT:    addss %xmm0, %xmm0
    334 ; CHECK-LIBCALL-NEXT:  .LBB10_3:
    335 ; CHECK-LIBCALL-NEXT:    callq __gnu_f2h_ieee
    336 ; CHECK-LIBCALL-NEXT:    movw %ax, (%rbx)
    337 ; CHECK-LIBCALL-NEXT:    popq %rbx
    338 ; CHECK-LIBCALL-NEXT:    retq
    339 ;
    340 ; BWON-F16C-LABEL: test_uitofp_i64:
    341 ; BWON-F16C:       # %bb.0:
    342 ; BWON-F16C-NEXT:    testq %rdi, %rdi
    343 ; BWON-F16C-NEXT:    js .LBB10_1
    344 ; BWON-F16C-NEXT:  # %bb.2:
    345 ; BWON-F16C-NEXT:    vcvtsi2ssq %rdi, %xmm0, %xmm0
    346 ; BWON-F16C-NEXT:    jmp .LBB10_3
    347 ; BWON-F16C-NEXT:  .LBB10_1:
    348 ; BWON-F16C-NEXT:    movq %rdi, %rax
    349 ; BWON-F16C-NEXT:    shrq %rax
    350 ; BWON-F16C-NEXT:    andl $1, %edi
    351 ; BWON-F16C-NEXT:    orq %rax, %rdi
    352 ; BWON-F16C-NEXT:    vcvtsi2ssq %rdi, %xmm0, %xmm0
    353 ; BWON-F16C-NEXT:    vaddss %xmm0, %xmm0, %xmm0
    354 ; BWON-F16C-NEXT:  .LBB10_3:
    355 ; BWON-F16C-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
    356 ; BWON-F16C-NEXT:    vmovd %xmm0, %eax
    357 ; BWON-F16C-NEXT:    movw %ax, (%rsi)
    358 ; BWON-F16C-NEXT:    retq
    359 ;
    360 ; CHECK-I686-LABEL: test_uitofp_i64:
    361 ; CHECK-I686:       # %bb.0:
    362 ; CHECK-I686-NEXT:    pushl %esi
    363 ; CHECK-I686-NEXT:    subl $24, %esp
    364 ; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %esi
    365 ; CHECK-I686-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
    366 ; CHECK-I686-NEXT:    movlps %xmm0, {{[0-9]+}}(%esp)
    367 ; CHECK-I686-NEXT:    xorl %eax, %eax
    368 ; CHECK-I686-NEXT:    cmpl $0, {{[0-9]+}}(%esp)
    369 ; CHECK-I686-NEXT:    setns %al
    370 ; CHECK-I686-NEXT:    fildll {{[0-9]+}}(%esp)
    371 ; CHECK-I686-NEXT:    fadds {{\.LCPI.*}}(,%eax,4)
    372 ; CHECK-I686-NEXT:    fstps (%esp)
    373 ; CHECK-I686-NEXT:    calll __gnu_f2h_ieee
    374 ; CHECK-I686-NEXT:    movw %ax, (%esi)
    375 ; CHECK-I686-NEXT:    addl $24, %esp
    376 ; CHECK-I686-NEXT:    popl %esi
    377 ; CHECK-I686-NEXT:    retl
    378   %r = uitofp i64 %a to half
    379   store half %r, half* %p
    380   ret void
    381 }
    382 
    383 define <4 x float> @test_extend32_vec4(<4 x half>* %p) #0 {
    384 ; CHECK-LIBCALL-LABEL: test_extend32_vec4:
    385 ; CHECK-LIBCALL:       # %bb.0:
    386 ; CHECK-LIBCALL-NEXT:    pushq %rbx
    387 ; CHECK-LIBCALL-NEXT:    subq $48, %rsp
    388 ; CHECK-LIBCALL-NEXT:    movq %rdi, %rbx
    389 ; CHECK-LIBCALL-NEXT:    movzwl (%rdi), %edi
    390 ; CHECK-LIBCALL-NEXT:    callq __gnu_h2f_ieee
    391 ; CHECK-LIBCALL-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
    392 ; CHECK-LIBCALL-NEXT:    movzwl 2(%rbx), %edi
    393 ; CHECK-LIBCALL-NEXT:    callq __gnu_h2f_ieee
    394 ; CHECK-LIBCALL-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
    395 ; CHECK-LIBCALL-NEXT:    movzwl 4(%rbx), %edi
    396 ; CHECK-LIBCALL-NEXT:    callq __gnu_h2f_ieee
    397 ; CHECK-LIBCALL-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
    398 ; CHECK-LIBCALL-NEXT:    movzwl 6(%rbx), %edi
    399 ; CHECK-LIBCALL-NEXT:    callq __gnu_h2f_ieee
    400 ; CHECK-LIBCALL-NEXT:    movaps (%rsp), %xmm1 # 16-byte Reload
    401 ; CHECK-LIBCALL-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
    402 ; CHECK-LIBCALL-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
    403 ; CHECK-LIBCALL-NEXT:    unpcklps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
    404 ; CHECK-LIBCALL-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
    405 ; CHECK-LIBCALL-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
    406 ; CHECK-LIBCALL-NEXT:    addq $48, %rsp
    407 ; CHECK-LIBCALL-NEXT:    popq %rbx
    408 ; CHECK-LIBCALL-NEXT:    retq
    409 ;
    410 ; BWON-F16C-LABEL: test_extend32_vec4:
    411 ; BWON-F16C:       # %bb.0:
    412 ; BWON-F16C-NEXT:    movswl 6(%rdi), %eax
    413 ; BWON-F16C-NEXT:    vmovd %eax, %xmm0
    414 ; BWON-F16C-NEXT:    vcvtph2ps %xmm0, %xmm0
    415 ; BWON-F16C-NEXT:    movswl 4(%rdi), %eax
    416 ; BWON-F16C-NEXT:    vmovd %eax, %xmm1
    417 ; BWON-F16C-NEXT:    vcvtph2ps %xmm1, %xmm1
    418 ; BWON-F16C-NEXT:    movswl (%rdi), %eax
    419 ; BWON-F16C-NEXT:    vmovd %eax, %xmm2
    420 ; BWON-F16C-NEXT:    vcvtph2ps %xmm2, %xmm2
    421 ; BWON-F16C-NEXT:    movswl 2(%rdi), %eax
    422 ; BWON-F16C-NEXT:    vmovd %eax, %xmm3
    423 ; BWON-F16C-NEXT:    vcvtph2ps %xmm3, %xmm3
    424 ; BWON-F16C-NEXT:    vinsertps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[2,3]
    425 ; BWON-F16C-NEXT:    vinsertps {{.*#+}} xmm1 = xmm2[0,1],xmm1[0],xmm2[3]
    426 ; BWON-F16C-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
    427 ; BWON-F16C-NEXT:    retq
    428 ;
    429 ; CHECK-I686-LABEL: test_extend32_vec4:
    430 ; CHECK-I686:       # %bb.0:
    431 ; CHECK-I686-NEXT:    pushl %esi
    432 ; CHECK-I686-NEXT:    subl $56, %esp
    433 ; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %esi
    434 ; CHECK-I686-NEXT:    movzwl 4(%esi), %eax
    435 ; CHECK-I686-NEXT:    movl %eax, (%esp)
    436 ; CHECK-I686-NEXT:    calll __gnu_h2f_ieee
    437 ; CHECK-I686-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
    438 ; CHECK-I686-NEXT:    movzwl 2(%esi), %eax
    439 ; CHECK-I686-NEXT:    movl %eax, (%esp)
    440 ; CHECK-I686-NEXT:    calll __gnu_h2f_ieee
    441 ; CHECK-I686-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
    442 ; CHECK-I686-NEXT:    movzwl (%esi), %eax
    443 ; CHECK-I686-NEXT:    movl %eax, (%esp)
    444 ; CHECK-I686-NEXT:    calll __gnu_h2f_ieee
    445 ; CHECK-I686-NEXT:    movzwl 6(%esi), %eax
    446 ; CHECK-I686-NEXT:    movl %eax, (%esp)
    447 ; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
    448 ; CHECK-I686-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
    449 ; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
    450 ; CHECK-I686-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
    451 ; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
    452 ; CHECK-I686-NEXT:    calll __gnu_h2f_ieee
    453 ; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
    454 ; CHECK-I686-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
    455 ; CHECK-I686-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
    456 ; CHECK-I686-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
    457 ; CHECK-I686-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
    458 ; CHECK-I686-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
    459 ; CHECK-I686-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
    460 ; CHECK-I686-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
    461 ; CHECK-I686-NEXT:    addl $56, %esp
    462 ; CHECK-I686-NEXT:    popl %esi
    463 ; CHECK-I686-NEXT:    retl
    464   %a = load <4 x half>, <4 x half>* %p, align 8
    465   %b = fpext <4 x half> %a to <4 x float>
    466   ret <4 x float> %b
    467 }
    468 
    469 define <4 x double> @test_extend64_vec4(<4 x half>* %p) #0 {
    470 ; CHECK-LIBCALL-LABEL: test_extend64_vec4:
    471 ; CHECK-LIBCALL:       # %bb.0:
    472 ; CHECK-LIBCALL-NEXT:    pushq %rbx
    473 ; CHECK-LIBCALL-NEXT:    subq $16, %rsp
    474 ; CHECK-LIBCALL-NEXT:    movq %rdi, %rbx
    475 ; CHECK-LIBCALL-NEXT:    movzwl 4(%rdi), %edi
    476 ; CHECK-LIBCALL-NEXT:    callq __gnu_h2f_ieee
    477 ; CHECK-LIBCALL-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
    478 ; CHECK-LIBCALL-NEXT:    movzwl 6(%rbx), %edi
    479 ; CHECK-LIBCALL-NEXT:    callq __gnu_h2f_ieee
    480 ; CHECK-LIBCALL-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
    481 ; CHECK-LIBCALL-NEXT:    movzwl (%rbx), %edi
    482 ; CHECK-LIBCALL-NEXT:    callq __gnu_h2f_ieee
    483 ; CHECK-LIBCALL-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
    484 ; CHECK-LIBCALL-NEXT:    movzwl 2(%rbx), %edi
    485 ; CHECK-LIBCALL-NEXT:    callq __gnu_h2f_ieee
    486 ; CHECK-LIBCALL-NEXT:    cvtss2sd %xmm0, %xmm1
    487 ; CHECK-LIBCALL-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
    488 ; CHECK-LIBCALL-NEXT:    # xmm0 = mem[0],zero,zero,zero
    489 ; CHECK-LIBCALL-NEXT:    cvtss2sd %xmm0, %xmm0
    490 ; CHECK-LIBCALL-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
    491 ; CHECK-LIBCALL-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload
    492 ; CHECK-LIBCALL-NEXT:    # xmm1 = mem[0],zero,zero,zero
    493 ; CHECK-LIBCALL-NEXT:    cvtss2sd %xmm1, %xmm2
    494 ; CHECK-LIBCALL-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload
    495 ; CHECK-LIBCALL-NEXT:    # xmm1 = mem[0],zero,zero,zero
    496 ; CHECK-LIBCALL-NEXT:    cvtss2sd %xmm1, %xmm1
    497 ; CHECK-LIBCALL-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
    498 ; CHECK-LIBCALL-NEXT:    addq $16, %rsp
    499 ; CHECK-LIBCALL-NEXT:    popq %rbx
    500 ; CHECK-LIBCALL-NEXT:    retq
    501 ;
    502 ; BWON-F16C-LABEL: test_extend64_vec4:
    503 ; BWON-F16C:       # %bb.0:
    504 ; BWON-F16C-NEXT:    movswl (%rdi), %eax
    505 ; BWON-F16C-NEXT:    vmovd %eax, %xmm0
    506 ; BWON-F16C-NEXT:    vcvtph2ps %xmm0, %xmm0
    507 ; BWON-F16C-NEXT:    movswl 2(%rdi), %eax
    508 ; BWON-F16C-NEXT:    vmovd %eax, %xmm1
    509 ; BWON-F16C-NEXT:    vcvtph2ps %xmm1, %xmm1
    510 ; BWON-F16C-NEXT:    movswl 4(%rdi), %eax
    511 ; BWON-F16C-NEXT:    vmovd %eax, %xmm2
    512 ; BWON-F16C-NEXT:    vcvtph2ps %xmm2, %xmm2
    513 ; BWON-F16C-NEXT:    movswl 6(%rdi), %eax
    514 ; BWON-F16C-NEXT:    vmovd %eax, %xmm3
    515 ; BWON-F16C-NEXT:    vcvtph2ps %xmm3, %xmm3
    516 ; BWON-F16C-NEXT:    vcvtss2sd %xmm3, %xmm3, %xmm3
    517 ; BWON-F16C-NEXT:    vcvtss2sd %xmm2, %xmm2, %xmm2
    518 ; BWON-F16C-NEXT:    vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
    519 ; BWON-F16C-NEXT:    vcvtss2sd %xmm1, %xmm1, %xmm1
    520 ; BWON-F16C-NEXT:    vcvtss2sd %xmm0, %xmm0, %xmm0
    521 ; BWON-F16C-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
    522 ; BWON-F16C-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
    523 ; BWON-F16C-NEXT:    retq
    524 ;
    525 ; CHECK-I686-LABEL: test_extend64_vec4:
    526 ; CHECK-I686:       # %bb.0:
    527 ; CHECK-I686-NEXT:    pushl %esi
    528 ; CHECK-I686-NEXT:    subl $88, %esp
    529 ; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %esi
    530 ; CHECK-I686-NEXT:    movzwl 6(%esi), %eax
    531 ; CHECK-I686-NEXT:    movl %eax, (%esp)
    532 ; CHECK-I686-NEXT:    calll __gnu_h2f_ieee
    533 ; CHECK-I686-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
    534 ; CHECK-I686-NEXT:    movzwl 4(%esi), %eax
    535 ; CHECK-I686-NEXT:    movl %eax, (%esp)
    536 ; CHECK-I686-NEXT:    calll __gnu_h2f_ieee
    537 ; CHECK-I686-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
    538 ; CHECK-I686-NEXT:    movzwl 2(%esi), %eax
    539 ; CHECK-I686-NEXT:    movl %eax, (%esp)
    540 ; CHECK-I686-NEXT:    calll __gnu_h2f_ieee
    541 ; CHECK-I686-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
    542 ; CHECK-I686-NEXT:    movzwl (%esi), %eax
    543 ; CHECK-I686-NEXT:    movl %eax, (%esp)
    544 ; CHECK-I686-NEXT:    calll __gnu_h2f_ieee
    545 ; CHECK-I686-NEXT:    fstpl {{[0-9]+}}(%esp)
    546 ; CHECK-I686-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
    547 ; CHECK-I686-NEXT:    fstpl {{[0-9]+}}(%esp)
    548 ; CHECK-I686-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
    549 ; CHECK-I686-NEXT:    fstpl {{[0-9]+}}(%esp)
    550 ; CHECK-I686-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
    551 ; CHECK-I686-NEXT:    fstpl {{[0-9]+}}(%esp)
    552 ; CHECK-I686-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
    553 ; CHECK-I686-NEXT:    movhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
    554 ; CHECK-I686-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
    555 ; CHECK-I686-NEXT:    movhpd {{.*#+}} xmm1 = xmm1[0],mem[0]
    556 ; CHECK-I686-NEXT:    addl $88, %esp
    557 ; CHECK-I686-NEXT:    popl %esi
    558 ; CHECK-I686-NEXT:    retl
    559   %a = load <4 x half>, <4 x half>* %p, align 8
    560   %b = fpext <4 x half> %a to <4 x double>
    561   ret <4 x double> %b
    562 }
    563 
    564 define void @test_trunc32_vec4(<4 x float> %a, <4 x half>* %p) #0 {
    565 ; BWON-NOF16C-LABEL: test_trunc32_vec4:
    566 ; BWON-NOF16C:       # %bb.0:
    567 ; BWON-NOF16C-NEXT:    pushq %rbp
    568 ; BWON-NOF16C-NEXT:    pushq %r15
    569 ; BWON-NOF16C-NEXT:    pushq %r14
    570 ; BWON-NOF16C-NEXT:    pushq %rbx
    571 ; BWON-NOF16C-NEXT:    subq $24, %rsp
    572 ; BWON-NOF16C-NEXT:    movq %rdi, %rbx
    573 ; BWON-NOF16C-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
    574 ; BWON-NOF16C-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
    575 ; BWON-NOF16C-NEXT:    callq __gnu_f2h_ieee
    576 ; BWON-NOF16C-NEXT:    movl %eax, %r14d
    577 ; BWON-NOF16C-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
    578 ; BWON-NOF16C-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
    579 ; BWON-NOF16C-NEXT:    callq __gnu_f2h_ieee
    580 ; BWON-NOF16C-NEXT:    movl %eax, %r15d
    581 ; BWON-NOF16C-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
    582 ; BWON-NOF16C-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
    583 ; BWON-NOF16C-NEXT:    callq __gnu_f2h_ieee
    584 ; BWON-NOF16C-NEXT:    movl %eax, %ebp
    585 ; BWON-NOF16C-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
    586 ; BWON-NOF16C-NEXT:    callq __gnu_f2h_ieee
    587 ; BWON-NOF16C-NEXT:    movw %ax, (%rbx)
    588 ; BWON-NOF16C-NEXT:    movw %bp, 6(%rbx)
    589 ; BWON-NOF16C-NEXT:    movw %r15w, 4(%rbx)
    590 ; BWON-NOF16C-NEXT:    movw %r14w, 2(%rbx)
    591 ; BWON-NOF16C-NEXT:    addq $24, %rsp
    592 ; BWON-NOF16C-NEXT:    popq %rbx
    593 ; BWON-NOF16C-NEXT:    popq %r14
    594 ; BWON-NOF16C-NEXT:    popq %r15
    595 ; BWON-NOF16C-NEXT:    popq %rbp
    596 ; BWON-NOF16C-NEXT:    retq
    597 ;
    598 ; BWOFF-LABEL: test_trunc32_vec4:
    599 ; BWOFF:       # %bb.0:
    600 ; BWOFF-NEXT:    pushq %rbp
    601 ; BWOFF-NEXT:    pushq %r15
    602 ; BWOFF-NEXT:    pushq %r14
    603 ; BWOFF-NEXT:    pushq %rbx
    604 ; BWOFF-NEXT:    subq $24, %rsp
    605 ; BWOFF-NEXT:    movq %rdi, %rbx
    606 ; BWOFF-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
    607 ; BWOFF-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
    608 ; BWOFF-NEXT:    callq __gnu_f2h_ieee
    609 ; BWOFF-NEXT:    movw %ax, %r14w
    610 ; BWOFF-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
    611 ; BWOFF-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
    612 ; BWOFF-NEXT:    callq __gnu_f2h_ieee
    613 ; BWOFF-NEXT:    movw %ax, %r15w
    614 ; BWOFF-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
    615 ; BWOFF-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
    616 ; BWOFF-NEXT:    callq __gnu_f2h_ieee
    617 ; BWOFF-NEXT:    movw %ax, %bp
    618 ; BWOFF-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
    619 ; BWOFF-NEXT:    callq __gnu_f2h_ieee
    620 ; BWOFF-NEXT:    movw %ax, (%rbx)
    621 ; BWOFF-NEXT:    movw %bp, 6(%rbx)
    622 ; BWOFF-NEXT:    movw %r15w, 4(%rbx)
    623 ; BWOFF-NEXT:    movw %r14w, 2(%rbx)
    624 ; BWOFF-NEXT:    addq $24, %rsp
    625 ; BWOFF-NEXT:    popq %rbx
    626 ; BWOFF-NEXT:    popq %r14
    627 ; BWOFF-NEXT:    popq %r15
    628 ; BWOFF-NEXT:    popq %rbp
    629 ; BWOFF-NEXT:    retq
    630 ;
    631 ; BWON-F16C-LABEL: test_trunc32_vec4:
    632 ; BWON-F16C:       # %bb.0:
    633 ; BWON-F16C-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
    634 ; BWON-F16C-NEXT:    vcvtps2ph $4, %xmm1, %xmm1
    635 ; BWON-F16C-NEXT:    vmovd %xmm1, %eax
    636 ; BWON-F16C-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
    637 ; BWON-F16C-NEXT:    vcvtps2ph $4, %xmm1, %xmm1
    638 ; BWON-F16C-NEXT:    vmovd %xmm1, %ecx
    639 ; BWON-F16C-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[3,1,2,3]
    640 ; BWON-F16C-NEXT:    vcvtps2ph $4, %xmm1, %xmm1
    641 ; BWON-F16C-NEXT:    vmovd %xmm1, %edx
    642 ; BWON-F16C-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
    643 ; BWON-F16C-NEXT:    vmovd %xmm0, %esi
    644 ; BWON-F16C-NEXT:    movw %si, (%rdi)
    645 ; BWON-F16C-NEXT:    movw %dx, 6(%rdi)
    646 ; BWON-F16C-NEXT:    movw %cx, 4(%rdi)
    647 ; BWON-F16C-NEXT:    movw %ax, 2(%rdi)
    648 ; BWON-F16C-NEXT:    retq
    649 ;
    650 ; CHECK-I686-LABEL: test_trunc32_vec4:
    651 ; CHECK-I686:       # %bb.0:
    652 ; CHECK-I686-NEXT:    pushl %ebp
    653 ; CHECK-I686-NEXT:    pushl %ebx
    654 ; CHECK-I686-NEXT:    pushl %edi
    655 ; CHECK-I686-NEXT:    pushl %esi
    656 ; CHECK-I686-NEXT:    subl $44, %esp
    657 ; CHECK-I686-NEXT:    movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
    658 ; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %ebp
    659 ; CHECK-I686-NEXT:    movaps %xmm0, %xmm1
    660 ; CHECK-I686-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3]
    661 ; CHECK-I686-NEXT:    movss %xmm1, (%esp)
    662 ; CHECK-I686-NEXT:    calll __gnu_f2h_ieee
    663 ; CHECK-I686-NEXT:    movw %ax, %si
    664 ; CHECK-I686-NEXT:    movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
    665 ; CHECK-I686-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
    666 ; CHECK-I686-NEXT:    movss %xmm0, (%esp)
    667 ; CHECK-I686-NEXT:    calll __gnu_f2h_ieee
    668 ; CHECK-I686-NEXT:    movw %ax, %di
    669 ; CHECK-I686-NEXT:    movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
    670 ; CHECK-I686-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
    671 ; CHECK-I686-NEXT:    movss %xmm0, (%esp)
    672 ; CHECK-I686-NEXT:    calll __gnu_f2h_ieee
    673 ; CHECK-I686-NEXT:    movw %ax, %bx
    674 ; CHECK-I686-NEXT:    movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
    675 ; CHECK-I686-NEXT:    movss %xmm0, (%esp)
    676 ; CHECK-I686-NEXT:    calll __gnu_f2h_ieee
    677 ; CHECK-I686-NEXT:    movw %ax, (%ebp)
    678 ; CHECK-I686-NEXT:    movw %bx, 6(%ebp)
    679 ; CHECK-I686-NEXT:    movw %di, 4(%ebp)
    680 ; CHECK-I686-NEXT:    movw %si, 2(%ebp)
    681 ; CHECK-I686-NEXT:    addl $44, %esp
    682 ; CHECK-I686-NEXT:    popl %esi
    683 ; CHECK-I686-NEXT:    popl %edi
    684 ; CHECK-I686-NEXT:    popl %ebx
    685 ; CHECK-I686-NEXT:    popl %ebp
    686 ; CHECK-I686-NEXT:    retl
    687   %v = fptrunc <4 x float> %a to <4 x half>
    688   store <4 x half> %v, <4 x half>* %p
    689   ret void
    690 }
    691 
    692 define void @test_trunc64_vec4(<4 x double> %a, <4 x half>* %p) #0 {
    693 ; BWON-NOF16C-LABEL: test_trunc64_vec4:
    694 ; BWON-NOF16C:       # %bb.0:
    695 ; BWON-NOF16C-NEXT:    pushq %rbp
    696 ; BWON-NOF16C-NEXT:    pushq %r15
    697 ; BWON-NOF16C-NEXT:    pushq %r14
    698 ; BWON-NOF16C-NEXT:    pushq %rbx
    699 ; BWON-NOF16C-NEXT:    subq $40, %rsp
    700 ; BWON-NOF16C-NEXT:    movq %rdi, %rbx
    701 ; BWON-NOF16C-NEXT:    movaps %xmm1, (%rsp) # 16-byte Spill
    702 ; BWON-NOF16C-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
    703 ; BWON-NOF16C-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
    704 ; BWON-NOF16C-NEXT:    callq __truncdfhf2
    705 ; BWON-NOF16C-NEXT:    movl %eax, %r14d
    706 ; BWON-NOF16C-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
    707 ; BWON-NOF16C-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
    708 ; BWON-NOF16C-NEXT:    callq __truncdfhf2
    709 ; BWON-NOF16C-NEXT:    movl %eax, %r15d
    710 ; BWON-NOF16C-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
    711 ; BWON-NOF16C-NEXT:    callq __truncdfhf2
    712 ; BWON-NOF16C-NEXT:    movl %eax, %ebp
    713 ; BWON-NOF16C-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
    714 ; BWON-NOF16C-NEXT:    callq __truncdfhf2
    715 ; BWON-NOF16C-NEXT:    movw %ax, 4(%rbx)
    716 ; BWON-NOF16C-NEXT:    movw %bp, (%rbx)
    717 ; BWON-NOF16C-NEXT:    movw %r15w, 6(%rbx)
    718 ; BWON-NOF16C-NEXT:    movw %r14w, 2(%rbx)
    719 ; BWON-NOF16C-NEXT:    addq $40, %rsp
    720 ; BWON-NOF16C-NEXT:    popq %rbx
    721 ; BWON-NOF16C-NEXT:    popq %r14
    722 ; BWON-NOF16C-NEXT:    popq %r15
    723 ; BWON-NOF16C-NEXT:    popq %rbp
    724 ; BWON-NOF16C-NEXT:    retq
    725 ;
    726 ; BWOFF-LABEL: test_trunc64_vec4:
    727 ; BWOFF:       # %bb.0:
    728 ; BWOFF-NEXT:    pushq %rbp
    729 ; BWOFF-NEXT:    pushq %r15
    730 ; BWOFF-NEXT:    pushq %r14
    731 ; BWOFF-NEXT:    pushq %rbx
    732 ; BWOFF-NEXT:    subq $40, %rsp
    733 ; BWOFF-NEXT:    movq %rdi, %rbx
    734 ; BWOFF-NEXT:    movaps %xmm1, (%rsp) # 16-byte Spill
    735 ; BWOFF-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
    736 ; BWOFF-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
    737 ; BWOFF-NEXT:    callq __truncdfhf2
    738 ; BWOFF-NEXT:    movw %ax, %r14w
    739 ; BWOFF-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
    740 ; BWOFF-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
    741 ; BWOFF-NEXT:    callq __truncdfhf2
    742 ; BWOFF-NEXT:    movw %ax, %r15w
    743 ; BWOFF-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
    744 ; BWOFF-NEXT:    callq __truncdfhf2
    745 ; BWOFF-NEXT:    movw %ax, %bp
    746 ; BWOFF-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
    747 ; BWOFF-NEXT:    callq __truncdfhf2
    748 ; BWOFF-NEXT:    movw %ax, 4(%rbx)
    749 ; BWOFF-NEXT:    movw %bp, (%rbx)
    750 ; BWOFF-NEXT:    movw %r15w, 6(%rbx)
    751 ; BWOFF-NEXT:    movw %r14w, 2(%rbx)
    752 ; BWOFF-NEXT:    addq $40, %rsp
    753 ; BWOFF-NEXT:    popq %rbx
    754 ; BWOFF-NEXT:    popq %r14
    755 ; BWOFF-NEXT:    popq %r15
    756 ; BWOFF-NEXT:    popq %rbp
    757 ; BWOFF-NEXT:    retq
    758 ;
    759 ; BWON-F16C-LABEL: test_trunc64_vec4:
    760 ; BWON-F16C:       # %bb.0:
    761 ; BWON-F16C-NEXT:    pushq %rbp
    762 ; BWON-F16C-NEXT:    pushq %r15
    763 ; BWON-F16C-NEXT:    pushq %r14
    764 ; BWON-F16C-NEXT:    pushq %rbx
    765 ; BWON-F16C-NEXT:    subq $88, %rsp
    766 ; BWON-F16C-NEXT:    movq %rdi, %rbx
    767 ; BWON-F16C-NEXT:    vmovupd %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
    768 ; BWON-F16C-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
    769 ; BWON-F16C-NEXT:    vzeroupper
    770 ; BWON-F16C-NEXT:    callq __truncdfhf2
    771 ; BWON-F16C-NEXT:    movl %eax, %r14d
    772 ; BWON-F16C-NEXT:    vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
    773 ; BWON-F16C-NEXT:    vextractf128 $1, %ymm0, %xmm0
    774 ; BWON-F16C-NEXT:    vmovapd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
    775 ; BWON-F16C-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
    776 ; BWON-F16C-NEXT:    vzeroupper
    777 ; BWON-F16C-NEXT:    callq __truncdfhf2
    778 ; BWON-F16C-NEXT:    movl %eax, %r15d
    779 ; BWON-F16C-NEXT:    vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
    780 ; BWON-F16C-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
    781 ; BWON-F16C-NEXT:    vzeroupper
    782 ; BWON-F16C-NEXT:    callq __truncdfhf2
    783 ; BWON-F16C-NEXT:    movl %eax, %ebp
    784 ; BWON-F16C-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
    785 ; BWON-F16C-NEXT:    callq __truncdfhf2
    786 ; BWON-F16C-NEXT:    movw %ax, 4(%rbx)
    787 ; BWON-F16C-NEXT:    movw %bp, (%rbx)
    788 ; BWON-F16C-NEXT:    movw %r15w, 6(%rbx)
    789 ; BWON-F16C-NEXT:    movw %r14w, 2(%rbx)
    790 ; BWON-F16C-NEXT:    addq $88, %rsp
    791 ; BWON-F16C-NEXT:    popq %rbx
    792 ; BWON-F16C-NEXT:    popq %r14
    793 ; BWON-F16C-NEXT:    popq %r15
    794 ; BWON-F16C-NEXT:    popq %rbp
    795 ; BWON-F16C-NEXT:    retq
    796 ;
    797 ; CHECK-I686-LABEL: test_trunc64_vec4:
    798 ; CHECK-I686:       # %bb.0:
    799 ; CHECK-I686-NEXT:    pushl %ebp
    800 ; CHECK-I686-NEXT:    pushl %ebx
    801 ; CHECK-I686-NEXT:    pushl %edi
    802 ; CHECK-I686-NEXT:    pushl %esi
    803 ; CHECK-I686-NEXT:    subl $60, %esp
    804 ; CHECK-I686-NEXT:    movaps %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
    805 ; CHECK-I686-NEXT:    movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
    806 ; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %ebp
    807 ; CHECK-I686-NEXT:    movlps %xmm0, (%esp)
    808 ; CHECK-I686-NEXT:    calll __truncdfhf2
    809 ; CHECK-I686-NEXT:    movw %ax, %si
    810 ; CHECK-I686-NEXT:    movapd {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
    811 ; CHECK-I686-NEXT:    movhpd %xmm0, (%esp)
    812 ; CHECK-I686-NEXT:    calll __truncdfhf2
    813 ; CHECK-I686-NEXT:    movw %ax, %di
    814 ; CHECK-I686-NEXT:    movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
    815 ; CHECK-I686-NEXT:    movlps %xmm0, (%esp)
    816 ; CHECK-I686-NEXT:    calll __truncdfhf2
    817 ; CHECK-I686-NEXT:    movw %ax, %bx
    818 ; CHECK-I686-NEXT:    movapd {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
    819 ; CHECK-I686-NEXT:    movhpd %xmm0, (%esp)
    820 ; CHECK-I686-NEXT:    calll __truncdfhf2
    821 ; CHECK-I686-NEXT:    movw %ax, 6(%ebp)
    822 ; CHECK-I686-NEXT:    movw %bx, 4(%ebp)
    823 ; CHECK-I686-NEXT:    movw %di, 2(%ebp)
    824 ; CHECK-I686-NEXT:    movw %si, (%ebp)
    825 ; CHECK-I686-NEXT:    addl $60, %esp
    826 ; CHECK-I686-NEXT:    popl %esi
    827 ; CHECK-I686-NEXT:    popl %edi
    828 ; CHECK-I686-NEXT:    popl %ebx
    829 ; CHECK-I686-NEXT:    popl %ebp
    830 ; CHECK-I686-NEXT:    retl
    831   %v = fptrunc <4 x double> %a to <4 x half>
    832   store <4 x half> %v, <4 x half>* %p
    833   ret void
    834 }
    835 
    836 declare float @test_floatret();
    837 
    838 ; On i686, if SSE2 is available, the return value from test_floatret is loaded
    839 ; to f80 and then rounded to f32.  The DAG combiner should not combine this
    840 ; fp_round and the subsequent fptrunc from float to half.
    841 define half @test_f80trunc_nodagcombine() #0 {
    842 ; CHECK-LIBCALL-LABEL: test_f80trunc_nodagcombine:
    843 ; CHECK-LIBCALL:       # %bb.0:
    844 ; CHECK-LIBCALL-NEXT:    pushq %rax
    845 ; CHECK-LIBCALL-NEXT:    callq test_floatret
    846 ; CHECK-LIBCALL-NEXT:    callq __gnu_f2h_ieee
    847 ; CHECK-LIBCALL-NEXT:    movzwl %ax, %edi
    848 ; CHECK-LIBCALL-NEXT:    callq __gnu_h2f_ieee
    849 ; CHECK-LIBCALL-NEXT:    popq %rax
    850 ; CHECK-LIBCALL-NEXT:    retq
    851 ;
    852 ; BWON-F16C-LABEL: test_f80trunc_nodagcombine:
    853 ; BWON-F16C:       # %bb.0:
    854 ; BWON-F16C-NEXT:    pushq %rax
    855 ; BWON-F16C-NEXT:    callq test_floatret
    856 ; BWON-F16C-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
    857 ; BWON-F16C-NEXT:    vcvtph2ps %xmm0, %xmm0
    858 ; BWON-F16C-NEXT:    popq %rax
    859 ; BWON-F16C-NEXT:    retq
    860 ;
    861 ; CHECK-I686-LABEL: test_f80trunc_nodagcombine:
    862 ; CHECK-I686:       # %bb.0:
    863 ; CHECK-I686-NEXT:    subl $12, %esp
    864 ; CHECK-I686-NEXT:    calll test_floatret
    865 ; CHECK-I686-NEXT:    fstps (%esp)
    866 ; CHECK-I686-NEXT:    calll __gnu_f2h_ieee
    867 ; CHECK-I686-NEXT:    movzwl %ax, %eax
    868 ; CHECK-I686-NEXT:    movl %eax, (%esp)
    869 ; CHECK-I686-NEXT:    calll __gnu_h2f_ieee
    870 ; CHECK-I686-NEXT:    addl $12, %esp
    871 ; CHECK-I686-NEXT:    retl
    872   %1 = call float @test_floatret()
    873   %2 = fptrunc float %1 to half
    874   ret half %2
    875 }
    876 
    877 
    878 
    879 
    880 define float @test_sitofp_fadd_i32(i32 %a, half* %b) #0 {
    881 ; CHECK-LIBCALL-LABEL: test_sitofp_fadd_i32:
    882 ; CHECK-LIBCALL:       # %bb.0:
    883 ; CHECK-LIBCALL-NEXT:    pushq %rbx
    884 ; CHECK-LIBCALL-NEXT:    subq $16, %rsp
    885 ; CHECK-LIBCALL-NEXT:    movl %edi, %ebx
    886 ; CHECK-LIBCALL-NEXT:    movzwl (%rsi), %edi
    887 ; CHECK-LIBCALL-NEXT:    callq __gnu_h2f_ieee
    888 ; CHECK-LIBCALL-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
    889 ; CHECK-LIBCALL-NEXT:    cvtsi2ssl %ebx, %xmm0
    890 ; CHECK-LIBCALL-NEXT:    callq __gnu_f2h_ieee
    891 ; CHECK-LIBCALL-NEXT:    movzwl %ax, %edi
    892 ; CHECK-LIBCALL-NEXT:    callq __gnu_h2f_ieee
    893 ; CHECK-LIBCALL-NEXT:    addss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
    894 ; CHECK-LIBCALL-NEXT:    addq $16, %rsp
    895 ; CHECK-LIBCALL-NEXT:    popq %rbx
    896 ; CHECK-LIBCALL-NEXT:    retq
    897 ;
    898 ; BWON-F16C-LABEL: test_sitofp_fadd_i32:
    899 ; BWON-F16C:       # %bb.0:
    900 ; BWON-F16C-NEXT:    movswl (%rsi), %eax
    901 ; BWON-F16C-NEXT:    vmovd %eax, %xmm0
    902 ; BWON-F16C-NEXT:    vcvtph2ps %xmm0, %xmm0
    903 ; BWON-F16C-NEXT:    vcvtsi2ssl %edi, %xmm1, %xmm1
    904 ; BWON-F16C-NEXT:    vcvtps2ph $4, %xmm1, %xmm1
    905 ; BWON-F16C-NEXT:    vcvtph2ps %xmm1, %xmm1
    906 ; BWON-F16C-NEXT:    vaddss %xmm1, %xmm0, %xmm0
    907 ; BWON-F16C-NEXT:    retq
    908 ;
    909 ; CHECK-I686-LABEL: test_sitofp_fadd_i32:
    910 ; CHECK-I686:       # %bb.0:
    911 ; CHECK-I686-NEXT:    subl $28, %esp
    912 ; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %eax
    913 ; CHECK-I686-NEXT:    movzwl (%eax), %eax
    914 ; CHECK-I686-NEXT:    movl %eax, (%esp)
    915 ; CHECK-I686-NEXT:    calll __gnu_h2f_ieee
    916 ; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
    917 ; CHECK-I686-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
    918 ; CHECK-I686-NEXT:    movss %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
    919 ; CHECK-I686-NEXT:    xorps %xmm0, %xmm0
    920 ; CHECK-I686-NEXT:    cvtsi2ssl {{[0-9]+}}(%esp), %xmm0
    921 ; CHECK-I686-NEXT:    movss %xmm0, (%esp)
    922 ; CHECK-I686-NEXT:    calll __gnu_f2h_ieee
    923 ; CHECK-I686-NEXT:    movzwl %ax, %eax
    924 ; CHECK-I686-NEXT:    movl %eax, (%esp)
    925 ; CHECK-I686-NEXT:    calll __gnu_h2f_ieee
    926 ; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
    927 ; CHECK-I686-NEXT:    movss {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 4-byte Reload
    928 ; CHECK-I686-NEXT:    # xmm0 = mem[0],zero,zero,zero
    929 ; CHECK-I686-NEXT:    addss {{[0-9]+}}(%esp), %xmm0
    930 ; CHECK-I686-NEXT:    movss %xmm0, {{[0-9]+}}(%esp)
    931 ; CHECK-I686-NEXT:    flds {{[0-9]+}}(%esp)
    932 ; CHECK-I686-NEXT:    addl $28, %esp
    933 ; CHECK-I686-NEXT:    retl
    934   %tmp0 = load half, half* %b
    935   %tmp1 = sitofp i32 %a to half
    936   %tmp2 = fadd half %tmp0, %tmp1
    937   %tmp3 = fpext half %tmp2 to float
    938   ret float %tmp3
    939 }
    940 
    941 attributes #0 = { nounwind }
    942