1 ; RUN: llc < %s -march=x86-64 -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -mattr=-f16c -asm-verbose=false -fixup-byte-word-insts=1 \ 2 ; RUN: | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-LIBCALL -check-prefix=BWON 3 ; RUN: llc < %s -march=x86-64 -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -mattr=-f16c -asm-verbose=false -fixup-byte-word-insts=0 \ 4 ; RUN: | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-LIBCALL -check-prefix=BWOFF 5 ; RUN: llc < %s -march=x86-64 -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -mattr=+f16c -asm-verbose=false -fixup-byte-word-insts=1 \ 6 ; RUN: | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-F16C -check-prefix=BWON 7 ; RUN: llc < %s -mtriple=i686-unknown-linux-gnu -mattr +sse2 -asm-verbose=false -fixup-byte-word-insts=0 \ 8 ; RUN: | FileCheck %s -check-prefix=CHECK-I686 9 10 define void @test_load_store(half* %in, half* %out) { 11 ; CHECK-LABEL: test_load_store: 12 ; BWON: movzwl (%rdi), %eax 13 ; BWOFF: movw (%rdi), %ax 14 ; CHECK: movw %ax, (%rsi) 15 %val = load half, half* %in 16 store half %val, half* %out 17 ret void 18 } 19 20 define i16 @test_bitcast_from_half(half* %addr) { 21 ; CHECK-LABEL: test_bitcast_from_half: 22 ; BWON: movzwl (%rdi), %eax 23 ; BWOFF: movw (%rdi), %ax 24 %val = load half, half* %addr 25 %val_int = bitcast half %val to i16 26 ret i16 %val_int 27 } 28 29 define void @test_bitcast_to_half(half* %addr, i16 %in) { 30 ; CHECK-LABEL: test_bitcast_to_half: 31 ; CHECK: movw %si, (%rdi) 32 %val_fp = bitcast i16 %in to half 33 store half %val_fp, half* %addr 34 ret void 35 } 36 37 define float @test_extend32(half* %addr) { 38 ; CHECK-LABEL: test_extend32: 39 40 ; CHECK-LIBCALL: jmp __gnu_h2f_ieee 41 ; CHECK-F16C: vcvtph2ps 42 %val16 = load half, half* %addr 43 %val32 = fpext half %val16 to float 44 ret float %val32 45 } 46 47 define double @test_extend64(half* %addr) { 48 ; CHECK-LABEL: test_extend64: 49 50 ; CHECK-LIBCALL: callq __gnu_h2f_ieee 51 ; CHECK-LIBCALL: cvtss2sd 52 ; CHECK-F16C: vcvtph2ps 53 ; CHECK-F16C: vcvtss2sd 54 %val16 = load half, half* %addr 55 %val32 = fpext half %val16 to double 56 ret double %val32 57 } 58 59 define void @test_trunc32(float %in, half* %addr) { 60 ; CHECK-LABEL: test_trunc32: 61 62 ; CHECK-LIBCALL: callq __gnu_f2h_ieee 63 ; CHECK-F16C: vcvtps2ph 64 %val16 = fptrunc float %in to half 65 store half %val16, half* %addr 66 ret void 67 } 68 69 define void @test_trunc64(double %in, half* %addr) { 70 ; CHECK-LABEL: test_trunc64: 71 72 ; CHECK-LIBCALL: callq __truncdfhf2 73 ; CHECK-F16C: callq __truncdfhf2 74 %val16 = fptrunc double %in to half 75 store half %val16, half* %addr 76 ret void 77 } 78 79 define i64 @test_fptosi_i64(half* %p) #0 { 80 ; CHECK-LABEL: test_fptosi_i64: 81 82 ; CHECK-LIBCALL-NEXT: pushq %rax 83 ; CHECK-LIBCALL-NEXT: movzwl (%rdi), %edi 84 ; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee 85 ; CHECK-LIBCALL-NEXT: cvttss2si %xmm0, %rax 86 ; CHECK-LIBCALL-NEXT: popq %rcx 87 ; CHECK-LIBCALL-NEXT: retq 88 89 ; CHECK-F16C-NEXT: movswl (%rdi), [[REG0:%[a-z0-9]+]] 90 ; CHECK-F16C-NEXT: vmovd [[REG0]], [[REG1:%[a-z0-9]+]] 91 ; CHECK-F16C-NEXT: vcvtph2ps [[REG1]], [[REG2:%[a-z0-9]+]] 92 ; CHECK-F16C-NEXT: vcvttss2si [[REG2]], %rax 93 ; CHECK-F16C-NEXT: retq 94 %a = load half, half* %p, align 2 95 %r = fptosi half %a to i64 96 ret i64 %r 97 } 98 99 define void @test_sitofp_i64(i64 %a, half* %p) #0 { 100 ; CHECK-LABEL: test_sitofp_i64: 101 102 ; CHECK-LIBCALL-NEXT: pushq [[ADDR:%[a-z]+]] 103 ; CHECK-LIBCALL-NEXT: movq %rsi, [[ADDR]] 104 ; CHECK-LIBCALL-NEXT: cvtsi2ssq %rdi, %xmm0 105 ; CHECK-LIBCALL-NEXT: callq __gnu_f2h_ieee 106 ; CHECK-LIBCALL-NEXT: movw %ax, ([[ADDR]]) 107 ; CHECK_LIBCALL-NEXT: popq [[ADDR]] 108 ; CHECK_LIBCALL-NEXT: retq 109 110 ; CHECK-F16C-NEXT: vcvtsi2ssq %rdi, [[REG0:%[a-z0-9]+]], [[REG0]] 111 ; CHECK-F16C-NEXT: vcvtps2ph $4, [[REG0]], [[REG0]] 112 ; CHECK-F16C-NEXT: vmovd [[REG0]], %eax 113 ; CHECK-F16C-NEXT: movw %ax, (%rsi) 114 ; CHECK-F16C-NEXT: retq 115 %r = sitofp i64 %a to half 116 store half %r, half* %p 117 ret void 118 } 119 120 define i64 @test_fptoui_i64(half* %p) #0 { 121 ; CHECK-LABEL: test_fptoui_i64: 122 123 ; FP_TO_UINT is expanded using FP_TO_SINT 124 ; CHECK-LIBCALL-NEXT: pushq %rax 125 ; CHECK-LIBCALL-NEXT: movzwl (%rdi), %edi 126 ; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee 127 ; CHECK-LIBCALL-NEXT: movss {{.[A-Z_0-9]+}}(%rip), [[REG1:%[a-z0-9]+]] 128 ; CHECK-LIBCALL-NEXT: movaps %xmm0, [[REG2:%[a-z0-9]+]] 129 ; CHECK-LIBCALL-NEXT: subss [[REG1]], [[REG2]] 130 ; CHECK-LIBCALL-NEXT: cvttss2si [[REG2]], [[REG3:%[a-z0-9]+]] 131 ; CHECK-LIBCALL-NEXT: movabsq $-9223372036854775808, [[REG4:%[a-z0-9]+]] 132 ; CHECK-LIBCALL-NEXT: xorq [[REG3]], [[REG4]] 133 ; CHECK-LIBCALL-NEXT: cvttss2si %xmm0, [[REG5:%[a-z0-9]+]] 134 ; CHECK-LIBCALL-NEXT: ucomiss [[REG1]], %xmm0 135 ; CHECK-LIBCALL-NEXT: cmovaeq [[REG4]], [[REG5]] 136 ; CHECK-LIBCALL-NEXT: popq %rcx 137 ; CHECK-LIBCALL-NEXT: retq 138 139 ; CHECK-F16C-NEXT: movswl (%rdi), [[REG0:%[a-z0-9]+]] 140 ; CHECK-F16C-NEXT: vmovd [[REG0]], [[REG1:%[a-z0-9]+]] 141 ; CHECK-F16C-NEXT: vcvtph2ps [[REG1]], [[REG2:%[a-z0-9]+]] 142 ; CHECK-F16C-NEXT: vmovss {{.[A-Z_0-9]+}}(%rip), [[REG3:%[a-z0-9]+]] 143 ; CHECK-F16C-NEXT: vsubss [[REG3]], [[REG2]], [[REG4:%[a-z0-9]+]] 144 ; CHECK-F16C-NEXT: vcvttss2si [[REG4]], [[REG5:%[a-z0-9]+]] 145 ; CHECK-F16C-NEXT: movabsq $-9223372036854775808, [[REG6:%[a-z0-9]+]] 146 ; CHECK-F16C-NEXT: xorq [[REG5]], [[REG6:%[a-z0-9]+]] 147 ; CHECK-F16C-NEXT: vcvttss2si [[REG2]], [[REG7:%[a-z0-9]+]] 148 ; CHECK-F16C-NEXT: vucomiss [[REG3]], [[REG2]] 149 ; CHECK-F16C-NEXT: cmovaeq [[REG6]], %rax 150 ; CHECK-F16C-NEXT: retq 151 %a = load half, half* %p, align 2 152 %r = fptoui half %a to i64 153 ret i64 %r 154 } 155 156 define void @test_uitofp_i64(i64 %a, half* %p) #0 { 157 ; CHECK-LABEL: test_uitofp_i64: 158 ; CHECK-LIBCALL-NEXT: pushq [[ADDR:%[a-z0-9]+]] 159 ; CHECK-LIBCALL-NEXT: movq %rsi, [[ADDR]] 160 ; CHECK-NEXT: movl %edi, [[REG0:%[a-z0-9]+]] 161 ; CHECK-NEXT: andl $1, [[REG0]] 162 ; CHECK-NEXT: testq %rdi, %rdi 163 ; CHECK-NEXT: js [[LABEL1:.LBB[0-9_]+]] 164 165 ; simple conversion to float if non-negative 166 ; CHECK-LIBCALL-NEXT: cvtsi2ssq %rdi, [[REG1:%[a-z0-9]+]] 167 ; CHECK-F16C-NEXT: vcvtsi2ssq %rdi, [[REG1:%[a-z0-9]+]], [[REG1]] 168 ; CHECK-NEXT: jmp [[LABEL2:.LBB[0-9_]+]] 169 170 ; convert using shift+or if negative 171 ; CHECK-NEXT: [[LABEL1]]: 172 ; CHECK-NEXT: shrq %rdi 173 ; CHECK-NEXT: orq %rdi, [[REG2:%[a-z0-9]+]] 174 ; CHECK-LIBCALL-NEXT: cvtsi2ssq [[REG2]], [[REG3:%[a-z0-9]+]] 175 ; CHECK-LIBCALL-NEXT: addss [[REG3]], [[REG1]] 176 ; CHECK-F16C-NEXT: vcvtsi2ssq [[REG2]], [[REG3:%[a-z0-9]+]], [[REG3]] 177 ; CHECK-F16C-NEXT: vaddss [[REG3]], [[REG3]], [[REG1:[%a-z0-9]+]] 178 179 ; convert float to half 180 ; CHECK-NEXT: [[LABEL2]]: 181 ; CHECK-LIBCALL-NEXT: callq __gnu_f2h_ieee 182 ; CHECK-LIBCALL-NEXT: movw %ax, ([[ADDR]]) 183 ; CHECK-LIBCALL-NEXT: popq [[ADDR]] 184 ; CHECK-F16C-NEXT: vcvtps2ph $4, [[REG1]], [[REG4:%[a-z0-9]+]] 185 ; CHECK-F16C-NEXT: vmovd [[REG4]], %eax 186 ; CHECK-F16C-NEXT: movw %ax, (%rsi) 187 ; CHECK-NEXT: retq 188 189 %r = uitofp i64 %a to half 190 store half %r, half* %p 191 ret void 192 } 193 194 define <4 x float> @test_extend32_vec4(<4 x half>* %p) #0 { 195 ; CHECK-LABEL: test_extend32_vec4: 196 197 ; CHECK-LIBCALL: callq __gnu_h2f_ieee 198 ; CHECK-LIBCALL: callq __gnu_h2f_ieee 199 ; CHECK-LIBCALL: callq __gnu_h2f_ieee 200 ; CHECK-LIBCALL: callq __gnu_h2f_ieee 201 ; CHECK-F16C: vcvtph2ps 202 ; CHECK-F16C: vcvtph2ps 203 ; CHECK-F16C: vcvtph2ps 204 ; CHECK-F16C: vcvtph2ps 205 %a = load <4 x half>, <4 x half>* %p, align 8 206 %b = fpext <4 x half> %a to <4 x float> 207 ret <4 x float> %b 208 } 209 210 define <4 x double> @test_extend64_vec4(<4 x half>* %p) #0 { 211 ; CHECK-LABEL: test_extend64_vec4 212 213 ; CHECK-LIBCALL: callq __gnu_h2f_ieee 214 ; CHECK-LIBCALL-DAG: callq __gnu_h2f_ieee 215 ; CHECK-LIBCALL-DAG: callq __gnu_h2f_ieee 216 ; CHECK-LIBCALL-DAG: callq __gnu_h2f_ieee 217 ; CHECK-LIBCALL-DAG: cvtss2sd 218 ; CHECK-LIBCALL-DAG: cvtss2sd 219 ; CHECK-LIBCALL-DAG: cvtss2sd 220 ; CHECK-LIBCALL: cvtss2sd 221 ; CHECK-F16C: vcvtph2ps 222 ; CHECK-F16C-DAG: vcvtph2ps 223 ; CHECK-F16C-DAG: vcvtph2ps 224 ; CHECK-F16C-DAG: vcvtph2ps 225 ; CHECK-F16C-DAG: vcvtss2sd 226 ; CHECK-F16C-DAG: vcvtss2sd 227 ; CHECK-F16C-DAG: vcvtss2sd 228 ; CHECK-F16C: vcvtss2sd 229 %a = load <4 x half>, <4 x half>* %p, align 8 230 %b = fpext <4 x half> %a to <4 x double> 231 ret <4 x double> %b 232 } 233 234 define void @test_trunc32_vec4(<4 x float> %a, <4 x half>* %p) { 235 ; CHECK-LABEL: test_trunc32_vec4: 236 237 ; CHECK-LIBCALL: callq __gnu_f2h_ieee 238 ; CHECK-LIBCALL: callq __gnu_f2h_ieee 239 ; CHECK-LIBCALL: callq __gnu_f2h_ieee 240 ; CHECK-LIBCALL: callq __gnu_f2h_ieee 241 ; CHECK-F16C: vcvtps2ph 242 ; CHECK-F16C: vcvtps2ph 243 ; CHECK-F16C: vcvtps2ph 244 ; CHECK-F16C: vcvtps2ph 245 ; CHECK: movw 246 ; CHECK: movw 247 ; CHECK: movw 248 ; CHECK: movw 249 %v = fptrunc <4 x float> %a to <4 x half> 250 store <4 x half> %v, <4 x half>* %p 251 ret void 252 } 253 254 define void @test_trunc64_vec4(<4 x double> %a, <4 x half>* %p) { 255 ; CHECK-LABEL: test_trunc64_vec4: 256 ; CHECK: callq __truncdfhf2 257 ; CHECK: callq __truncdfhf2 258 ; CHECK: callq __truncdfhf2 259 ; CHECK: callq __truncdfhf2 260 ; CHECK: movw 261 ; CHECK: movw 262 ; CHECK: movw 263 ; CHECK: movw 264 %v = fptrunc <4 x double> %a to <4 x half> 265 store <4 x half> %v, <4 x half>* %p 266 ret void 267 } 268 269 declare float @test_floatret(); 270 271 ; On i686, if SSE2 is available, the return value from test_floatret is loaded 272 ; to f80 and then rounded to f32. The DAG combiner should not combine this 273 ; fp_round and the subsequent fptrunc from float to half. 274 define half @test_f80trunc_nodagcombine() #0 { 275 ; CHECK-LABEL: test_f80trunc_nodagcombine: 276 ; CHECK-I686-NOT: calll __truncxfhf2 277 %1 = call float @test_floatret() 278 %2 = fptrunc float %1 to half 279 ret half %2 280 } 281 282 ; CHECK-LABEL: test_sitofp_fadd_i32: 283 284 ; CHECK-LIBCALL-NEXT: pushq %rbx 285 ; CHECK-LIBCALL-NEXT: subq $16, %rsp 286 ; CHECK-LIBCALL-NEXT: movl %edi, %ebx 287 ; CHECK-LIBCALL-NEXT: movzwl (%rsi), %edi 288 ; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee 289 ; CHECK-LIBCALL-NEXT: movss %xmm0, 12(%rsp) 290 ; CHECK-LIBCALL-NEXT: cvtsi2ssl %ebx, %xmm0 291 ; CHECK-LIBCALL-NEXT: callq __gnu_f2h_ieee 292 ; CHECK-LIBCALL-NEXT: movzwl %ax, %edi 293 ; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee 294 ; CHECK-LIBCALL-NEXT: addss 12(%rsp), %xmm0 295 ; CHECK-LIBCALL-NEXT: addq $16, %rsp 296 ; CHECK-LIBCALL-NEXT: popq %rbx 297 ; CHECK-LIBCALL-NEXT: retq 298 299 ; CHECK-F16C-NEXT: movswl (%rsi), %eax 300 ; CHECK-F16C-NEXT: vmovd %eax, %xmm0 301 ; CHECK-F16C-NEXT: vcvtph2ps %xmm0, %xmm0 302 ; CHECK-F16C-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm1 303 ; CHECK-F16C-NEXT: vcvtps2ph $4, %xmm1, %xmm1 304 ; CHECK-F16C-NEXT: vcvtph2ps %xmm1, %xmm1 305 ; CHECK-F16C-NEXT: vaddss %xmm1, %xmm0, %xmm0 306 ; CHECK-F16C-NEXT: retq 307 308 define float @test_sitofp_fadd_i32(i32 %a, half* %b) #0 { 309 %tmp0 = load half, half* %b 310 %tmp1 = sitofp i32 %a to half 311 %tmp2 = fadd half %tmp0, %tmp1 312 %tmp3 = fpext half %tmp2 to float 313 ret float %tmp3 314 } 315 316 attributes #0 = { nounwind } 317