1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X86 3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X64 4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+f16c | FileCheck %s --check-prefix=F16C 5 6 define <1 x half> @ir_fadd_v1f16(<1 x half> %arg0, <1 x half> %arg1) nounwind { 7 ; X86-LABEL: ir_fadd_v1f16: 8 ; X86: # %bb.0: 9 ; X86-NEXT: subl $28, %esp 10 ; X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 11 ; X86-NEXT: movss %xmm0, (%esp) 12 ; X86-NEXT: calll __gnu_f2h_ieee 13 ; X86-NEXT: movzwl %ax, %eax 14 ; X86-NEXT: movl %eax, (%esp) 15 ; X86-NEXT: calll __gnu_h2f_ieee 16 ; X86-NEXT: fstpt {{[0-9]+}}(%esp) # 10-byte Folded Spill 17 ; X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 18 ; X86-NEXT: movss %xmm0, (%esp) 19 ; X86-NEXT: calll __gnu_f2h_ieee 20 ; X86-NEXT: movzwl %ax, %eax 21 ; X86-NEXT: movl %eax, (%esp) 22 ; X86-NEXT: fldt {{[0-9]+}}(%esp) # 10-byte Folded Reload 23 ; X86-NEXT: fstps {{[0-9]+}}(%esp) 24 ; X86-NEXT: calll __gnu_h2f_ieee 25 ; X86-NEXT: fstps {{[0-9]+}}(%esp) 26 ; X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 27 ; X86-NEXT: addss {{[0-9]+}}(%esp), %xmm0 28 ; X86-NEXT: movss %xmm0, {{[0-9]+}}(%esp) 29 ; X86-NEXT: flds {{[0-9]+}}(%esp) 30 ; X86-NEXT: addl $28, %esp 31 ; X86-NEXT: retl 32 ; 33 ; X64-LABEL: ir_fadd_v1f16: 34 ; X64: # %bb.0: 35 ; X64-NEXT: pushq %rax 36 ; X64-NEXT: movss %xmm0, {{[0-9]+}}(%rsp) # 4-byte Spill 37 ; X64-NEXT: movaps %xmm1, %xmm0 38 ; X64-NEXT: callq __gnu_f2h_ieee 39 ; X64-NEXT: movzwl %ax, %edi 40 ; X64-NEXT: callq __gnu_h2f_ieee 41 ; X64-NEXT: movss %xmm0, (%rsp) # 4-byte Spill 42 ; X64-NEXT: movss {{[0-9]+}}(%rsp), %xmm0 # 4-byte Reload 43 ; X64-NEXT: # xmm0 = mem[0],zero,zero,zero 44 ; X64-NEXT: callq __gnu_f2h_ieee 45 ; X64-NEXT: movzwl %ax, %edi 46 ; X64-NEXT: callq __gnu_h2f_ieee 47 ; X64-NEXT: addss (%rsp), %xmm0 # 4-byte Folded Reload 48 ; X64-NEXT: popq %rax 49 ; X64-NEXT: retq 50 ; 51 ; F16C-LABEL: ir_fadd_v1f16: 52 ; F16C: # %bb.0: 53 ; F16C-NEXT: vcvtps2ph $4, %xmm1, %xmm1 54 ; F16C-NEXT: vcvtph2ps %xmm1, %xmm1 55 ; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 56 ; F16C-NEXT: vcvtph2ps %xmm0, %xmm0 57 ; F16C-NEXT: vaddss %xmm1, %xmm0, %xmm0 58 ; F16C-NEXT: retq 59 %retval = fadd <1 x half> %arg0, %arg1 60 ret <1 x half> %retval 61 } 62 63 define <2 x half> @ir_fadd_v2f16(<2 x half> %arg0, <2 x half> %arg1) nounwind { 64 ; X86-LABEL: ir_fadd_v2f16: 65 ; X86: # %bb.0: 66 ; X86-NEXT: subl $64, %esp 67 ; X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 68 ; X86-NEXT: movss %xmm0, (%esp) 69 ; X86-NEXT: calll __gnu_f2h_ieee 70 ; X86-NEXT: movzwl %ax, %eax 71 ; X86-NEXT: movl %eax, (%esp) 72 ; X86-NEXT: calll __gnu_h2f_ieee 73 ; X86-NEXT: fstpt {{[0-9]+}}(%esp) # 10-byte Folded Spill 74 ; X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 75 ; X86-NEXT: movss %xmm0, (%esp) 76 ; X86-NEXT: calll __gnu_f2h_ieee 77 ; X86-NEXT: movzwl %ax, %eax 78 ; X86-NEXT: movl %eax, (%esp) 79 ; X86-NEXT: calll __gnu_h2f_ieee 80 ; X86-NEXT: fstpt {{[0-9]+}}(%esp) # 10-byte Folded Spill 81 ; X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 82 ; X86-NEXT: movss %xmm0, (%esp) 83 ; X86-NEXT: calll __gnu_f2h_ieee 84 ; X86-NEXT: movzwl %ax, %eax 85 ; X86-NEXT: movl %eax, (%esp) 86 ; X86-NEXT: calll __gnu_h2f_ieee 87 ; X86-NEXT: fstpt {{[0-9]+}}(%esp) # 10-byte Folded Spill 88 ; X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 89 ; X86-NEXT: movss %xmm0, (%esp) 90 ; X86-NEXT: calll __gnu_f2h_ieee 91 ; X86-NEXT: movzwl %ax, %eax 92 ; X86-NEXT: movl %eax, (%esp) 93 ; X86-NEXT: fldt {{[0-9]+}}(%esp) # 10-byte Folded Reload 94 ; X86-NEXT: fstps {{[0-9]+}}(%esp) 95 ; X86-NEXT: fldt {{[0-9]+}}(%esp) # 10-byte Folded Reload 96 ; X86-NEXT: fstps {{[0-9]+}}(%esp) 97 ; X86-NEXT: fldt {{[0-9]+}}(%esp) # 10-byte Folded Reload 98 ; X86-NEXT: fstps {{[0-9]+}}(%esp) 99 ; X86-NEXT: calll __gnu_h2f_ieee 100 ; X86-NEXT: fstps {{[0-9]+}}(%esp) 101 ; X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 102 ; X86-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 103 ; X86-NEXT: addss {{[0-9]+}}(%esp), %xmm1 104 ; X86-NEXT: addss {{[0-9]+}}(%esp), %xmm0 105 ; X86-NEXT: movss %xmm0, {{[0-9]+}}(%esp) 106 ; X86-NEXT: movss %xmm1, {{[0-9]+}}(%esp) 107 ; X86-NEXT: flds {{[0-9]+}}(%esp) 108 ; X86-NEXT: flds {{[0-9]+}}(%esp) 109 ; X86-NEXT: addl $64, %esp 110 ; X86-NEXT: retl 111 ; 112 ; X64-LABEL: ir_fadd_v2f16: 113 ; X64: # %bb.0: 114 ; X64-NEXT: subq $24, %rsp 115 ; X64-NEXT: movss %xmm2, {{[0-9]+}}(%rsp) # 4-byte Spill 116 ; X64-NEXT: movss %xmm1, {{[0-9]+}}(%rsp) # 4-byte Spill 117 ; X64-NEXT: movss %xmm0, {{[0-9]+}}(%rsp) # 4-byte Spill 118 ; X64-NEXT: movaps %xmm3, %xmm0 119 ; X64-NEXT: callq __gnu_f2h_ieee 120 ; X64-NEXT: movzwl %ax, %edi 121 ; X64-NEXT: callq __gnu_h2f_ieee 122 ; X64-NEXT: movss %xmm0, {{[0-9]+}}(%rsp) # 4-byte Spill 123 ; X64-NEXT: movss {{[0-9]+}}(%rsp), %xmm0 # 4-byte Reload 124 ; X64-NEXT: # xmm0 = mem[0],zero,zero,zero 125 ; X64-NEXT: callq __gnu_f2h_ieee 126 ; X64-NEXT: movzwl %ax, %edi 127 ; X64-NEXT: callq __gnu_h2f_ieee 128 ; X64-NEXT: movss %xmm0, {{[0-9]+}}(%rsp) # 4-byte Spill 129 ; X64-NEXT: movss {{[0-9]+}}(%rsp), %xmm0 # 4-byte Reload 130 ; X64-NEXT: # xmm0 = mem[0],zero,zero,zero 131 ; X64-NEXT: callq __gnu_f2h_ieee 132 ; X64-NEXT: movzwl %ax, %edi 133 ; X64-NEXT: callq __gnu_h2f_ieee 134 ; X64-NEXT: movss %xmm0, {{[0-9]+}}(%rsp) # 4-byte Spill 135 ; X64-NEXT: movss {{[0-9]+}}(%rsp), %xmm0 # 4-byte Reload 136 ; X64-NEXT: # xmm0 = mem[0],zero,zero,zero 137 ; X64-NEXT: callq __gnu_f2h_ieee 138 ; X64-NEXT: movzwl %ax, %edi 139 ; X64-NEXT: callq __gnu_h2f_ieee 140 ; X64-NEXT: addss {{[0-9]+}}(%rsp), %xmm0 # 4-byte Folded Reload 141 ; X64-NEXT: movss {{[0-9]+}}(%rsp), %xmm1 # 4-byte Reload 142 ; X64-NEXT: # xmm1 = mem[0],zero,zero,zero 143 ; X64-NEXT: addss {{[0-9]+}}(%rsp), %xmm1 # 4-byte Folded Reload 144 ; X64-NEXT: addq $24, %rsp 145 ; X64-NEXT: retq 146 ; 147 ; F16C-LABEL: ir_fadd_v2f16: 148 ; F16C: # %bb.0: 149 ; F16C-NEXT: vcvtps2ph $4, %xmm3, %xmm3 150 ; F16C-NEXT: vcvtph2ps %xmm3, %xmm3 151 ; F16C-NEXT: vcvtps2ph $4, %xmm1, %xmm1 152 ; F16C-NEXT: vcvtph2ps %xmm1, %xmm1 153 ; F16C-NEXT: vcvtps2ph $4, %xmm2, %xmm2 154 ; F16C-NEXT: vcvtph2ps %xmm2, %xmm2 155 ; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 156 ; F16C-NEXT: vcvtph2ps %xmm0, %xmm0 157 ; F16C-NEXT: vaddss %xmm2, %xmm0, %xmm0 158 ; F16C-NEXT: vaddss %xmm3, %xmm1, %xmm1 159 ; F16C-NEXT: retq 160 %retval = fadd <2 x half> %arg0, %arg1 161 ret <2 x half> %retval 162 } 163