Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X86
      3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X64
      4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+f16c | FileCheck %s --check-prefix=F16C
      5 
      6 define <1 x half> @ir_fadd_v1f16(<1 x half> %arg0, <1 x half> %arg1) nounwind {
      7 ; X86-LABEL: ir_fadd_v1f16:
      8 ; X86:       # %bb.0:
      9 ; X86-NEXT:    subl $28, %esp
     10 ; X86-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
     11 ; X86-NEXT:    movss %xmm0, (%esp)
     12 ; X86-NEXT:    calll __gnu_f2h_ieee
     13 ; X86-NEXT:    movzwl %ax, %eax
     14 ; X86-NEXT:    movl %eax, (%esp)
     15 ; X86-NEXT:    calll __gnu_h2f_ieee
     16 ; X86-NEXT:    fstpt {{[0-9]+}}(%esp) # 10-byte Folded Spill
     17 ; X86-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
     18 ; X86-NEXT:    movss %xmm0, (%esp)
     19 ; X86-NEXT:    calll __gnu_f2h_ieee
     20 ; X86-NEXT:    movzwl %ax, %eax
     21 ; X86-NEXT:    movl %eax, (%esp)
     22 ; X86-NEXT:    fldt {{[0-9]+}}(%esp) # 10-byte Folded Reload
     23 ; X86-NEXT:    fstps {{[0-9]+}}(%esp)
     24 ; X86-NEXT:    calll __gnu_h2f_ieee
     25 ; X86-NEXT:    fstps {{[0-9]+}}(%esp)
     26 ; X86-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
     27 ; X86-NEXT:    addss {{[0-9]+}}(%esp), %xmm0
     28 ; X86-NEXT:    movss %xmm0, {{[0-9]+}}(%esp)
     29 ; X86-NEXT:    flds {{[0-9]+}}(%esp)
     30 ; X86-NEXT:    addl $28, %esp
     31 ; X86-NEXT:    retl
     32 ;
     33 ; X64-LABEL: ir_fadd_v1f16:
     34 ; X64:       # %bb.0:
     35 ; X64-NEXT:    pushq %rax
     36 ; X64-NEXT:    movss %xmm0, {{[0-9]+}}(%rsp) # 4-byte Spill
     37 ; X64-NEXT:    movaps %xmm1, %xmm0
     38 ; X64-NEXT:    callq __gnu_f2h_ieee
     39 ; X64-NEXT:    movzwl %ax, %edi
     40 ; X64-NEXT:    callq __gnu_h2f_ieee
     41 ; X64-NEXT:    movss %xmm0, (%rsp) # 4-byte Spill
     42 ; X64-NEXT:    movss {{[0-9]+}}(%rsp), %xmm0 # 4-byte Reload
     43 ; X64-NEXT:    # xmm0 = mem[0],zero,zero,zero
     44 ; X64-NEXT:    callq __gnu_f2h_ieee
     45 ; X64-NEXT:    movzwl %ax, %edi
     46 ; X64-NEXT:    callq __gnu_h2f_ieee
     47 ; X64-NEXT:    addss (%rsp), %xmm0 # 4-byte Folded Reload
     48 ; X64-NEXT:    popq %rax
     49 ; X64-NEXT:    retq
     50 ;
     51 ; F16C-LABEL: ir_fadd_v1f16:
     52 ; F16C:       # %bb.0:
     53 ; F16C-NEXT:    vcvtps2ph $4, %xmm1, %xmm1
     54 ; F16C-NEXT:    vcvtph2ps %xmm1, %xmm1
     55 ; F16C-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
     56 ; F16C-NEXT:    vcvtph2ps %xmm0, %xmm0
     57 ; F16C-NEXT:    vaddss %xmm1, %xmm0, %xmm0
     58 ; F16C-NEXT:    retq
     59   %retval = fadd <1 x half> %arg0, %arg1
     60   ret <1 x half> %retval
     61 }
     62 
     63 define <2 x half> @ir_fadd_v2f16(<2 x half> %arg0, <2 x half> %arg1) nounwind {
     64 ; X86-LABEL: ir_fadd_v2f16:
     65 ; X86:       # %bb.0:
     66 ; X86-NEXT:    subl $64, %esp
     67 ; X86-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
     68 ; X86-NEXT:    movss %xmm0, (%esp)
     69 ; X86-NEXT:    calll __gnu_f2h_ieee
     70 ; X86-NEXT:    movzwl %ax, %eax
     71 ; X86-NEXT:    movl %eax, (%esp)
     72 ; X86-NEXT:    calll __gnu_h2f_ieee
     73 ; X86-NEXT:    fstpt {{[0-9]+}}(%esp) # 10-byte Folded Spill
     74 ; X86-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
     75 ; X86-NEXT:    movss %xmm0, (%esp)
     76 ; X86-NEXT:    calll __gnu_f2h_ieee
     77 ; X86-NEXT:    movzwl %ax, %eax
     78 ; X86-NEXT:    movl %eax, (%esp)
     79 ; X86-NEXT:    calll __gnu_h2f_ieee
     80 ; X86-NEXT:    fstpt {{[0-9]+}}(%esp) # 10-byte Folded Spill
     81 ; X86-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
     82 ; X86-NEXT:    movss %xmm0, (%esp)
     83 ; X86-NEXT:    calll __gnu_f2h_ieee
     84 ; X86-NEXT:    movzwl %ax, %eax
     85 ; X86-NEXT:    movl %eax, (%esp)
     86 ; X86-NEXT:    calll __gnu_h2f_ieee
     87 ; X86-NEXT:    fstpt {{[0-9]+}}(%esp) # 10-byte Folded Spill
     88 ; X86-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
     89 ; X86-NEXT:    movss %xmm0, (%esp)
     90 ; X86-NEXT:    calll __gnu_f2h_ieee
     91 ; X86-NEXT:    movzwl %ax, %eax
     92 ; X86-NEXT:    movl %eax, (%esp)
     93 ; X86-NEXT:    fldt {{[0-9]+}}(%esp) # 10-byte Folded Reload
     94 ; X86-NEXT:    fstps {{[0-9]+}}(%esp)
     95 ; X86-NEXT:    fldt {{[0-9]+}}(%esp) # 10-byte Folded Reload
     96 ; X86-NEXT:    fstps {{[0-9]+}}(%esp)
     97 ; X86-NEXT:    fldt {{[0-9]+}}(%esp) # 10-byte Folded Reload
     98 ; X86-NEXT:    fstps {{[0-9]+}}(%esp)
     99 ; X86-NEXT:    calll __gnu_h2f_ieee
    100 ; X86-NEXT:    fstps {{[0-9]+}}(%esp)
    101 ; X86-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
    102 ; X86-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
    103 ; X86-NEXT:    addss {{[0-9]+}}(%esp), %xmm1
    104 ; X86-NEXT:    addss {{[0-9]+}}(%esp), %xmm0
    105 ; X86-NEXT:    movss %xmm0, {{[0-9]+}}(%esp)
    106 ; X86-NEXT:    movss %xmm1, {{[0-9]+}}(%esp)
    107 ; X86-NEXT:    flds {{[0-9]+}}(%esp)
    108 ; X86-NEXT:    flds {{[0-9]+}}(%esp)
    109 ; X86-NEXT:    addl $64, %esp
    110 ; X86-NEXT:    retl
    111 ;
    112 ; X64-LABEL: ir_fadd_v2f16:
    113 ; X64:       # %bb.0:
    114 ; X64-NEXT:    subq $24, %rsp
    115 ; X64-NEXT:    movss %xmm2, {{[0-9]+}}(%rsp) # 4-byte Spill
    116 ; X64-NEXT:    movss %xmm1, {{[0-9]+}}(%rsp) # 4-byte Spill
    117 ; X64-NEXT:    movss %xmm0, {{[0-9]+}}(%rsp) # 4-byte Spill
    118 ; X64-NEXT:    movaps %xmm3, %xmm0
    119 ; X64-NEXT:    callq __gnu_f2h_ieee
    120 ; X64-NEXT:    movzwl %ax, %edi
    121 ; X64-NEXT:    callq __gnu_h2f_ieee
    122 ; X64-NEXT:    movss %xmm0, {{[0-9]+}}(%rsp) # 4-byte Spill
    123 ; X64-NEXT:    movss {{[0-9]+}}(%rsp), %xmm0 # 4-byte Reload
    124 ; X64-NEXT:    # xmm0 = mem[0],zero,zero,zero
    125 ; X64-NEXT:    callq __gnu_f2h_ieee
    126 ; X64-NEXT:    movzwl %ax, %edi
    127 ; X64-NEXT:    callq __gnu_h2f_ieee
    128 ; X64-NEXT:    movss %xmm0, {{[0-9]+}}(%rsp) # 4-byte Spill
    129 ; X64-NEXT:    movss {{[0-9]+}}(%rsp), %xmm0 # 4-byte Reload
    130 ; X64-NEXT:    # xmm0 = mem[0],zero,zero,zero
    131 ; X64-NEXT:    callq __gnu_f2h_ieee
    132 ; X64-NEXT:    movzwl %ax, %edi
    133 ; X64-NEXT:    callq __gnu_h2f_ieee
    134 ; X64-NEXT:    movss %xmm0, {{[0-9]+}}(%rsp) # 4-byte Spill
    135 ; X64-NEXT:    movss {{[0-9]+}}(%rsp), %xmm0 # 4-byte Reload
    136 ; X64-NEXT:    # xmm0 = mem[0],zero,zero,zero
    137 ; X64-NEXT:    callq __gnu_f2h_ieee
    138 ; X64-NEXT:    movzwl %ax, %edi
    139 ; X64-NEXT:    callq __gnu_h2f_ieee
    140 ; X64-NEXT:    addss {{[0-9]+}}(%rsp), %xmm0 # 4-byte Folded Reload
    141 ; X64-NEXT:    movss {{[0-9]+}}(%rsp), %xmm1 # 4-byte Reload
    142 ; X64-NEXT:    # xmm1 = mem[0],zero,zero,zero
    143 ; X64-NEXT:    addss {{[0-9]+}}(%rsp), %xmm1 # 4-byte Folded Reload
    144 ; X64-NEXT:    addq $24, %rsp
    145 ; X64-NEXT:    retq
    146 ;
    147 ; F16C-LABEL: ir_fadd_v2f16:
    148 ; F16C:       # %bb.0:
    149 ; F16C-NEXT:    vcvtps2ph $4, %xmm3, %xmm3
    150 ; F16C-NEXT:    vcvtph2ps %xmm3, %xmm3
    151 ; F16C-NEXT:    vcvtps2ph $4, %xmm1, %xmm1
    152 ; F16C-NEXT:    vcvtph2ps %xmm1, %xmm1
    153 ; F16C-NEXT:    vcvtps2ph $4, %xmm2, %xmm2
    154 ; F16C-NEXT:    vcvtph2ps %xmm2, %xmm2
    155 ; F16C-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
    156 ; F16C-NEXT:    vcvtph2ps %xmm0, %xmm0
    157 ; F16C-NEXT:    vaddss %xmm2, %xmm0, %xmm0
    158 ; F16C-NEXT:    vaddss %xmm3, %xmm1, %xmm1
    159 ; F16C-NEXT:    retq
    160   %retval = fadd <2 x half> %arg0, %arg1
    161   ret <2 x half> %retval
    162 }
    163