1 ; RUN: llc < %s -march=x86-64 -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -mattr=-f16c -asm-verbose=false \ 2 ; RUN: | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-LIBCALL 3 ; RUN: llc < %s -march=x86-64 -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -mattr=+f16c -asm-verbose=false \ 4 ; RUN: | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-F16C 5 ; RUN: llc < %s -mtriple=i686-unknown-linux-gnu -mattr +sse2 -asm-verbose=false \ 6 ; RUN: | FileCheck %s -check-prefix=CHECK-I686 7 8 define void @test_load_store(half* %in, half* %out) { 9 ; CHECK-LABEL: test_load_store: 10 ; CHECK: movw (%rdi), [[TMP:%[a-z0-9]+]] 11 ; CHECK: movw [[TMP]], (%rsi) 12 %val = load half, half* %in 13 store half %val, half* %out 14 ret void 15 } 16 17 define i16 @test_bitcast_from_half(half* %addr) { 18 ; CHECK-LABEL: test_bitcast_from_half: 19 ; CHECK: movzwl (%rdi), %eax 20 %val = load half, half* %addr 21 %val_int = bitcast half %val to i16 22 ret i16 %val_int 23 } 24 25 define void @test_bitcast_to_half(half* %addr, i16 %in) { 26 ; CHECK-LABEL: test_bitcast_to_half: 27 ; CHECK: movw %si, (%rdi) 28 %val_fp = bitcast i16 %in to half 29 store half %val_fp, half* %addr 30 ret void 31 } 32 33 define float @test_extend32(half* %addr) { 34 ; CHECK-LABEL: test_extend32: 35 36 ; CHECK-LIBCALL: jmp __gnu_h2f_ieee 37 ; CHECK-F16C: vcvtph2ps 38 %val16 = load half, half* %addr 39 %val32 = fpext half %val16 to float 40 ret float %val32 41 } 42 43 define double @test_extend64(half* %addr) { 44 ; CHECK-LABEL: test_extend64: 45 46 ; CHECK-LIBCALL: callq __gnu_h2f_ieee 47 ; CHECK-LIBCALL: cvtss2sd 48 ; CHECK-F16C: vcvtph2ps 49 ; CHECK-F16C: vcvtss2sd 50 %val16 = load half, half* %addr 51 %val32 = fpext half %val16 to double 52 ret double %val32 53 } 54 55 define void @test_trunc32(float %in, half* %addr) { 56 ; CHECK-LABEL: test_trunc32: 57 58 ; CHECK-LIBCALL: callq __gnu_f2h_ieee 59 ; CHECK-F16C: vcvtps2ph 60 %val16 = fptrunc float %in to half 61 store half %val16, half* %addr 62 ret void 63 } 64 65 define void @test_trunc64(double %in, half* %addr) { 66 ; CHECK-LABEL: test_trunc64: 67 68 ; CHECK-LIBCALL: callq __truncdfhf2 69 ; CHECK-F16C: callq __truncdfhf2 70 %val16 = fptrunc double %in to half 71 store half %val16, half* %addr 72 ret void 73 } 74 75 define i64 @test_fptosi_i64(half* %p) #0 { 76 ; CHECK-LABEL: test_fptosi_i64: 77 78 ; CHECK-LIBCALL-NEXT: pushq %rax 79 ; CHECK-LIBCALL-NEXT: movzwl (%rdi), %edi 80 ; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee 81 ; CHECK-LIBCALL-NEXT: cvttss2si %xmm0, %rax 82 ; CHECK-LIBCALL-NEXT: popq %rcx 83 ; CHECK-LIBCALL-NEXT: retq 84 85 ; CHECK-F16C-NEXT: movswl (%rdi), [[REG0:%[a-z0-9]+]] 86 ; CHECK-F16C-NEXT: vmovd [[REG0]], [[REG1:%[a-z0-9]+]] 87 ; CHECK-F16C-NEXT: vcvtph2ps [[REG1]], [[REG2:%[a-z0-9]+]] 88 ; CHECK-F16C-NEXT: vcvttss2si [[REG2]], %rax 89 ; CHECK-F16C-NEXT: retq 90 %a = load half, half* %p, align 2 91 %r = fptosi half %a to i64 92 ret i64 %r 93 } 94 95 define void @test_sitofp_i64(i64 %a, half* %p) #0 { 96 ; CHECK-LABEL: test_sitofp_i64: 97 98 ; CHECK-LIBCALL-NEXT: pushq [[ADDR:%[a-z]+]] 99 ; CHECK-LIBCALL-NEXT: movq %rsi, [[ADDR]] 100 ; CHECK-LIBCALL-NEXT: cvtsi2ssq %rdi, %xmm0 101 ; CHECK-LIBCALL-NEXT: callq __gnu_f2h_ieee 102 ; CHECK-LIBCALL-NEXT: movw %ax, ([[ADDR]]) 103 ; CHECK_LIBCALL-NEXT: popq [[ADDR]] 104 ; CHECK_LIBCALL-NEXT: retq 105 106 ; CHECK-F16C-NEXT: vcvtsi2ssq %rdi, [[REG0:%[a-z0-9]+]], [[REG0]] 107 ; CHECK-F16C-NEXT: vcvtps2ph $0, [[REG0]], [[REG0]] 108 ; CHECK-F16C-NEXT: vmovd [[REG0]], %eax 109 ; CHECK-F16C-NEXT: movw %ax, (%rsi) 110 ; CHECK-F16C-NEXT: retq 111 %r = sitofp i64 %a to half 112 store half %r, half* %p 113 ret void 114 } 115 116 define i64 @test_fptoui_i64(half* %p) #0 { 117 ; CHECK-LABEL: test_fptoui_i64: 118 119 ; FP_TO_UINT is expanded using FP_TO_SINT 120 ; CHECK-LIBCALL-NEXT: pushq %rax 121 ; CHECK-LIBCALL-NEXT: movzwl (%rdi), %edi 122 ; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee 123 ; CHECK-LIBCALL-NEXT: movss {{.[A-Z_0-9]+}}(%rip), [[REG1:%[a-z0-9]+]] 124 ; CHECK-LIBCALL-NEXT: movaps %xmm0, [[REG2:%[a-z0-9]+]] 125 ; CHECK-LIBCALL-NEXT: subss [[REG1]], [[REG2]] 126 ; CHECK-LIBCALL-NEXT: cvttss2si [[REG2]], [[REG3:%[a-z0-9]+]] 127 ; CHECK-LIBCALL-NEXT: movabsq $-9223372036854775808, [[REG4:%[a-z0-9]+]] 128 ; CHECK-LIBCALL-NEXT: xorq [[REG3]], [[REG4]] 129 ; CHECK-LIBCALL-NEXT: cvttss2si %xmm0, [[REG5:%[a-z0-9]+]] 130 ; CHECK-LIBCALL-NEXT: ucomiss [[REG1]], %xmm0 131 ; CHECK-LIBCALL-NEXT: cmovaeq [[REG4]], [[REG5]] 132 ; CHECK-LIBCALL-NEXT: popq %rcx 133 ; CHECK-LIBCALL-NEXT: retq 134 135 ; CHECK-F16C-NEXT: movswl (%rdi), [[REG0:%[a-z0-9]+]] 136 ; CHECK-F16C-NEXT: vmovd [[REG0]], [[REG1:%[a-z0-9]+]] 137 ; CHECK-F16C-NEXT: vcvtph2ps [[REG1]], [[REG2:%[a-z0-9]+]] 138 ; CHECK-F16C-NEXT: vmovss {{.[A-Z_0-9]+}}(%rip), [[REG3:%[a-z0-9]+]] 139 ; CHECK-F16C-NEXT: vsubss [[REG3]], [[REG2]], [[REG4:%[a-z0-9]+]] 140 ; CHECK-F16C-NEXT: vcvttss2si [[REG4]], [[REG5:%[a-z0-9]+]] 141 ; CHECK-F16C-NEXT: movabsq $-9223372036854775808, [[REG6:%[a-z0-9]+]] 142 ; CHECK-F16C-NEXT: xorq [[REG5]], [[REG6:%[a-z0-9]+]] 143 ; CHECK-F16C-NEXT: vcvttss2si [[REG2]], [[REG7:%[a-z0-9]+]] 144 ; CHECK-F16C-NEXT: vucomiss [[REG3]], [[REG2]] 145 ; CHECK-F16C-NEXT: cmovaeq [[REG6]], %rax 146 ; CHECK-F16C-NEXT: retq 147 %a = load half, half* %p, align 2 148 %r = fptoui half %a to i64 149 ret i64 %r 150 } 151 152 define void @test_uitofp_i64(i64 %a, half* %p) #0 { 153 ; CHECK-LABEL: test_uitofp_i64: 154 ; CHECK-LIBCALL-NEXT: pushq [[ADDR:%[a-z0-9]+]] 155 ; CHECK-LIBCALL-NEXT: movq %rsi, [[ADDR]] 156 ; CHECK-NEXT: movl %edi, [[REG0:%[a-z0-9]+]] 157 ; CHECK-NEXT: andl $1, [[REG0]] 158 ; CHECK-NEXT: testq %rdi, %rdi 159 ; CHECK-NEXT: js [[LABEL1:.LBB[0-9_]+]] 160 161 ; simple conversion to float if non-negative 162 ; CHECK-LIBCALL-NEXT: cvtsi2ssq %rdi, [[REG1:%[a-z0-9]+]] 163 ; CHECK-F16C-NEXT: vcvtsi2ssq %rdi, [[REG1:%[a-z0-9]+]], [[REG1]] 164 ; CHECK-NEXT: jmp [[LABEL2:.LBB[0-9_]+]] 165 166 ; convert using shift+or if negative 167 ; CHECK-NEXT: [[LABEL1]]: 168 ; CHECK-NEXT: shrq %rdi 169 ; CHECK-NEXT: orq %rdi, [[REG2:%[a-z0-9]+]] 170 ; CHECK-LIBCALL-NEXT: cvtsi2ssq [[REG2]], [[REG3:%[a-z0-9]+]] 171 ; CHECK-LIBCALL-NEXT: addss [[REG3]], [[REG1]] 172 ; CHECK-F16C-NEXT: vcvtsi2ssq [[REG2]], [[REG3:%[a-z0-9]+]], [[REG3]] 173 ; CHECK-F16C-NEXT: vaddss [[REG3]], [[REG3]], [[REG1:[%a-z0-9]+]] 174 175 ; convert float to half 176 ; CHECK-NEXT: [[LABEL2]]: 177 ; CHECK-LIBCALL-NEXT: callq __gnu_f2h_ieee 178 ; CHECK-LIBCALL-NEXT: movw %ax, ([[ADDR]]) 179 ; CHECK-LIBCALL-NEXT: popq [[ADDR]] 180 ; CHECK-F16C-NEXT: vcvtps2ph $0, [[REG1]], [[REG4:%[a-z0-9]+]] 181 ; CHECK-F16C-NEXT: vmovd [[REG4]], %eax 182 ; CHECK-F16C-NEXT: movw %ax, (%rsi) 183 ; CHECK-NEXT: retq 184 185 %r = uitofp i64 %a to half 186 store half %r, half* %p 187 ret void 188 } 189 190 define <4 x float> @test_extend32_vec4(<4 x half>* %p) #0 { 191 ; CHECK-LABEL: test_extend32_vec4: 192 193 ; CHECK-LIBCALL: callq __gnu_h2f_ieee 194 ; CHECK-LIBCALL: callq __gnu_h2f_ieee 195 ; CHECK-LIBCALL: callq __gnu_h2f_ieee 196 ; CHECK-LIBCALL: callq __gnu_h2f_ieee 197 ; CHECK-F16C: vcvtph2ps 198 ; CHECK-F16C: vcvtph2ps 199 ; CHECK-F16C: vcvtph2ps 200 ; CHECK-F16C: vcvtph2ps 201 %a = load <4 x half>, <4 x half>* %p, align 8 202 %b = fpext <4 x half> %a to <4 x float> 203 ret <4 x float> %b 204 } 205 206 define <4 x double> @test_extend64_vec4(<4 x half>* %p) #0 { 207 ; CHECK-LABEL: test_extend64_vec4 208 209 ; CHECK-LIBCALL: callq __gnu_h2f_ieee 210 ; CHECK-LIBCALL-DAG: callq __gnu_h2f_ieee 211 ; CHECK-LIBCALL-DAG: callq __gnu_h2f_ieee 212 ; CHECK-LIBCALL-DAG: callq __gnu_h2f_ieee 213 ; CHECK-LIBCALL-DAG: cvtss2sd 214 ; CHECK-LIBCALL-DAG: cvtss2sd 215 ; CHECK-LIBCALL-DAG: cvtss2sd 216 ; CHECK-LIBCALL: cvtss2sd 217 ; CHECK-F16C: vcvtph2ps 218 ; CHECK-F16C-DAG: vcvtph2ps 219 ; CHECK-F16C-DAG: vcvtph2ps 220 ; CHECK-F16C-DAG: vcvtph2ps 221 ; CHECK-F16C-DAG: vcvtss2sd 222 ; CHECK-F16C-DAG: vcvtss2sd 223 ; CHECK-F16C-DAG: vcvtss2sd 224 ; CHECK-F16C: vcvtss2sd 225 %a = load <4 x half>, <4 x half>* %p, align 8 226 %b = fpext <4 x half> %a to <4 x double> 227 ret <4 x double> %b 228 } 229 230 define void @test_trunc32_vec4(<4 x float> %a, <4 x half>* %p) { 231 ; CHECK-LABEL: test_trunc32_vec4: 232 233 ; CHECK-LIBCALL: callq __gnu_f2h_ieee 234 ; CHECK-LIBCALL: callq __gnu_f2h_ieee 235 ; CHECK-LIBCALL: callq __gnu_f2h_ieee 236 ; CHECK-LIBCALL: callq __gnu_f2h_ieee 237 ; CHECK-F16C: vcvtps2ph 238 ; CHECK-F16C: vcvtps2ph 239 ; CHECK-F16C: vcvtps2ph 240 ; CHECK-F16C: vcvtps2ph 241 ; CHECK: movw 242 ; CHECK: movw 243 ; CHECK: movw 244 ; CHECK: movw 245 %v = fptrunc <4 x float> %a to <4 x half> 246 store <4 x half> %v, <4 x half>* %p 247 ret void 248 } 249 250 define void @test_trunc64_vec4(<4 x double> %a, <4 x half>* %p) { 251 ; CHECK-LABEL: test_trunc64_vec4: 252 ; CHECK: callq __truncdfhf2 253 ; CHECK: callq __truncdfhf2 254 ; CHECK: callq __truncdfhf2 255 ; CHECK: callq __truncdfhf2 256 ; CHECK: movw 257 ; CHECK: movw 258 ; CHECK: movw 259 ; CHECK: movw 260 %v = fptrunc <4 x double> %a to <4 x half> 261 store <4 x half> %v, <4 x half>* %p 262 ret void 263 } 264 265 declare float @test_floatret(); 266 267 ; On i686, if SSE2 is available, the return value from test_floatret is loaded 268 ; to f80 and then rounded to f32. The DAG combiner should not combine this 269 ; fp_round and the subsequent fptrunc from float to half. 270 define half @test_f80trunc_nodagcombine() #0 { 271 ; CHECK-LABEL: test_f80trunc_nodagcombine: 272 ; CHECK-I686-NOT: calll __truncxfhf2 273 %1 = call float @test_floatret() 274 %2 = fptrunc float %1 to half 275 ret half %2 276 } 277 278 attributes #0 = { nounwind } 279