1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=-f16c -fixup-byte-word-insts=1 \ 3 ; RUN: | FileCheck %s -check-prefixes=CHECK,CHECK-LIBCALL,BWON,BWON-NOF16C 4 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=-f16c -fixup-byte-word-insts=0 \ 5 ; RUN: | FileCheck %s -check-prefixes=CHECK,CHECK-LIBCALL,BWOFF 6 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+f16c -fixup-byte-word-insts=1 \ 7 ; RUN: | FileCheck %s -check-prefixes=CHECK,BWON,BWON-F16C 8 ; RUN: llc < %s -mtriple=i686-unknown-linux-gnu -mattr +sse2 -fixup-byte-word-insts=0 \ 9 ; RUN: | FileCheck %s -check-prefixes=CHECK-I686 10 11 define void @test_load_store(half* %in, half* %out) #0 { 12 ; BWON-LABEL: test_load_store: 13 ; BWON: # %bb.0: 14 ; BWON-NEXT: movzwl (%rdi), %eax 15 ; BWON-NEXT: movw %ax, (%rsi) 16 ; BWON-NEXT: retq 17 ; 18 ; BWOFF-LABEL: test_load_store: 19 ; BWOFF: # %bb.0: 20 ; BWOFF-NEXT: movw (%rdi), %ax 21 ; BWOFF-NEXT: movw %ax, (%rsi) 22 ; BWOFF-NEXT: retq 23 ; 24 ; CHECK-I686-LABEL: test_load_store: 25 ; CHECK-I686: # %bb.0: 26 ; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %eax 27 ; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %ecx 28 ; CHECK-I686-NEXT: movw (%ecx), %cx 29 ; CHECK-I686-NEXT: movw %cx, (%eax) 30 ; CHECK-I686-NEXT: retl 31 %val = load half, half* %in 32 store half %val, half* %out 33 ret void 34 } 35 36 define i16 @test_bitcast_from_half(half* %addr) #0 { 37 ; BWON-LABEL: test_bitcast_from_half: 38 ; BWON: # %bb.0: 39 ; BWON-NEXT: movzwl (%rdi), %eax 40 ; BWON-NEXT: retq 41 ; 42 ; BWOFF-LABEL: test_bitcast_from_half: 43 ; BWOFF: # %bb.0: 44 ; BWOFF-NEXT: movw (%rdi), %ax 45 ; BWOFF-NEXT: retq 46 ; 47 ; CHECK-I686-LABEL: test_bitcast_from_half: 48 ; CHECK-I686: # %bb.0: 49 ; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %eax 50 ; CHECK-I686-NEXT: movw (%eax), %ax 51 ; CHECK-I686-NEXT: retl 52 %val = load half, half* %addr 53 %val_int = bitcast half %val to i16 54 ret i16 %val_int 55 } 56 57 define void @test_bitcast_to_half(half* %addr, i16 %in) #0 { 58 ; CHECK-LABEL: test_bitcast_to_half: 59 ; CHECK: # %bb.0: 60 ; CHECK-NEXT: movw %si, (%rdi) 61 ; CHECK-NEXT: retq 62 ; 63 ; CHECK-I686-LABEL: test_bitcast_to_half: 64 ; CHECK-I686: # %bb.0: 65 ; CHECK-I686-NEXT: movw {{[0-9]+}}(%esp), %ax 66 ; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %ecx 67 ; CHECK-I686-NEXT: movw %ax, (%ecx) 68 ; CHECK-I686-NEXT: retl 69 %val_fp = bitcast i16 %in to half 70 store half %val_fp, half* %addr 71 ret void 72 } 73 74 define float @test_extend32(half* %addr) #0 { 75 ; CHECK-LIBCALL-LABEL: test_extend32: 76 ; CHECK-LIBCALL: # %bb.0: 77 ; CHECK-LIBCALL-NEXT: movzwl (%rdi), %edi 78 ; CHECK-LIBCALL-NEXT: jmp __gnu_h2f_ieee # TAILCALL 79 ; 80 ; BWON-F16C-LABEL: test_extend32: 81 ; BWON-F16C: # %bb.0: 82 ; BWON-F16C-NEXT: movswl (%rdi), %eax 83 ; BWON-F16C-NEXT: vmovd %eax, %xmm0 84 ; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0 85 ; BWON-F16C-NEXT: retq 86 ; 87 ; CHECK-I686-LABEL: test_extend32: 88 ; CHECK-I686: # %bb.0: 89 ; CHECK-I686-NEXT: subl $12, %esp 90 ; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %eax 91 ; CHECK-I686-NEXT: movzwl (%eax), %eax 92 ; CHECK-I686-NEXT: movl %eax, (%esp) 93 ; CHECK-I686-NEXT: calll __gnu_h2f_ieee 94 ; CHECK-I686-NEXT: addl $12, %esp 95 ; CHECK-I686-NEXT: retl 96 %val16 = load half, half* %addr 97 %val32 = fpext half %val16 to float 98 ret float %val32 99 } 100 101 define double @test_extend64(half* %addr) #0 { 102 ; CHECK-LIBCALL-LABEL: test_extend64: 103 ; CHECK-LIBCALL: # %bb.0: 104 ; CHECK-LIBCALL-NEXT: pushq %rax 105 ; CHECK-LIBCALL-NEXT: movzwl (%rdi), %edi 106 ; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee 107 ; CHECK-LIBCALL-NEXT: cvtss2sd %xmm0, %xmm0 108 ; CHECK-LIBCALL-NEXT: popq %rax 109 ; CHECK-LIBCALL-NEXT: retq 110 ; 111 ; BWON-F16C-LABEL: test_extend64: 112 ; BWON-F16C: # %bb.0: 113 ; BWON-F16C-NEXT: movswl (%rdi), %eax 114 ; BWON-F16C-NEXT: vmovd %eax, %xmm0 115 ; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0 116 ; BWON-F16C-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 117 ; BWON-F16C-NEXT: retq 118 ; 119 ; CHECK-I686-LABEL: test_extend64: 120 ; CHECK-I686: # %bb.0: 121 ; CHECK-I686-NEXT: subl $12, %esp 122 ; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %eax 123 ; CHECK-I686-NEXT: movzwl (%eax), %eax 124 ; CHECK-I686-NEXT: movl %eax, (%esp) 125 ; CHECK-I686-NEXT: calll __gnu_h2f_ieee 126 ; CHECK-I686-NEXT: addl $12, %esp 127 ; CHECK-I686-NEXT: retl 128 %val16 = load half, half* %addr 129 %val32 = fpext half %val16 to double 130 ret double %val32 131 } 132 133 define void @test_trunc32(float %in, half* %addr) #0 { 134 ; CHECK-LIBCALL-LABEL: test_trunc32: 135 ; CHECK-LIBCALL: # %bb.0: 136 ; CHECK-LIBCALL-NEXT: pushq %rbx 137 ; CHECK-LIBCALL-NEXT: movq %rdi, %rbx 138 ; CHECK-LIBCALL-NEXT: callq __gnu_f2h_ieee 139 ; CHECK-LIBCALL-NEXT: movw %ax, (%rbx) 140 ; CHECK-LIBCALL-NEXT: popq %rbx 141 ; CHECK-LIBCALL-NEXT: retq 142 ; 143 ; BWON-F16C-LABEL: test_trunc32: 144 ; BWON-F16C: # %bb.0: 145 ; BWON-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 146 ; BWON-F16C-NEXT: vmovd %xmm0, %eax 147 ; BWON-F16C-NEXT: movw %ax, (%rdi) 148 ; BWON-F16C-NEXT: retq 149 ; 150 ; CHECK-I686-LABEL: test_trunc32: 151 ; CHECK-I686: # %bb.0: 152 ; CHECK-I686-NEXT: pushl %esi 153 ; CHECK-I686-NEXT: subl $8, %esp 154 ; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %esi 155 ; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 156 ; CHECK-I686-NEXT: movss %xmm0, (%esp) 157 ; CHECK-I686-NEXT: calll __gnu_f2h_ieee 158 ; CHECK-I686-NEXT: movw %ax, (%esi) 159 ; CHECK-I686-NEXT: addl $8, %esp 160 ; CHECK-I686-NEXT: popl %esi 161 ; CHECK-I686-NEXT: retl 162 %val16 = fptrunc float %in to half 163 store half %val16, half* %addr 164 ret void 165 } 166 167 define void @test_trunc64(double %in, half* %addr) #0 { 168 ; CHECK-LABEL: test_trunc64: 169 ; CHECK: # %bb.0: 170 ; CHECK-NEXT: pushq %rbx 171 ; CHECK-NEXT: movq %rdi, %rbx 172 ; CHECK-NEXT: callq __truncdfhf2 173 ; CHECK-NEXT: movw %ax, (%rbx) 174 ; CHECK-NEXT: popq %rbx 175 ; CHECK-NEXT: retq 176 ; 177 ; CHECK-I686-LABEL: test_trunc64: 178 ; CHECK-I686: # %bb.0: 179 ; CHECK-I686-NEXT: pushl %esi 180 ; CHECK-I686-NEXT: subl $8, %esp 181 ; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %esi 182 ; CHECK-I686-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 183 ; CHECK-I686-NEXT: movsd %xmm0, (%esp) 184 ; CHECK-I686-NEXT: calll __truncdfhf2 185 ; CHECK-I686-NEXT: movw %ax, (%esi) 186 ; CHECK-I686-NEXT: addl $8, %esp 187 ; CHECK-I686-NEXT: popl %esi 188 ; CHECK-I686-NEXT: retl 189 %val16 = fptrunc double %in to half 190 store half %val16, half* %addr 191 ret void 192 } 193 194 define i64 @test_fptosi_i64(half* %p) #0 { 195 ; CHECK-LIBCALL-LABEL: test_fptosi_i64: 196 ; CHECK-LIBCALL: # %bb.0: 197 ; CHECK-LIBCALL-NEXT: pushq %rax 198 ; CHECK-LIBCALL-NEXT: movzwl (%rdi), %edi 199 ; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee 200 ; CHECK-LIBCALL-NEXT: cvttss2si %xmm0, %rax 201 ; CHECK-LIBCALL-NEXT: popq %rcx 202 ; CHECK-LIBCALL-NEXT: retq 203 ; 204 ; BWON-F16C-LABEL: test_fptosi_i64: 205 ; BWON-F16C: # %bb.0: 206 ; BWON-F16C-NEXT: movswl (%rdi), %eax 207 ; BWON-F16C-NEXT: vmovd %eax, %xmm0 208 ; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0 209 ; BWON-F16C-NEXT: vcvttss2si %xmm0, %rax 210 ; BWON-F16C-NEXT: retq 211 ; 212 ; CHECK-I686-LABEL: test_fptosi_i64: 213 ; CHECK-I686: # %bb.0: 214 ; CHECK-I686-NEXT: subl $12, %esp 215 ; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %eax 216 ; CHECK-I686-NEXT: movzwl (%eax), %eax 217 ; CHECK-I686-NEXT: movl %eax, (%esp) 218 ; CHECK-I686-NEXT: calll __gnu_h2f_ieee 219 ; CHECK-I686-NEXT: fstps (%esp) 220 ; CHECK-I686-NEXT: calll __fixsfdi 221 ; CHECK-I686-NEXT: addl $12, %esp 222 ; CHECK-I686-NEXT: retl 223 %a = load half, half* %p, align 2 224 %r = fptosi half %a to i64 225 ret i64 %r 226 } 227 228 define void @test_sitofp_i64(i64 %a, half* %p) #0 { 229 ; CHECK-LIBCALL-LABEL: test_sitofp_i64: 230 ; CHECK-LIBCALL: # %bb.0: 231 ; CHECK-LIBCALL-NEXT: pushq %rbx 232 ; CHECK-LIBCALL-NEXT: movq %rsi, %rbx 233 ; CHECK-LIBCALL-NEXT: cvtsi2ssq %rdi, %xmm0 234 ; CHECK-LIBCALL-NEXT: callq __gnu_f2h_ieee 235 ; CHECK-LIBCALL-NEXT: movw %ax, (%rbx) 236 ; CHECK-LIBCALL-NEXT: popq %rbx 237 ; CHECK-LIBCALL-NEXT: retq 238 ; 239 ; BWON-F16C-LABEL: test_sitofp_i64: 240 ; BWON-F16C: # %bb.0: 241 ; BWON-F16C-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 242 ; BWON-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 243 ; BWON-F16C-NEXT: vmovd %xmm0, %eax 244 ; BWON-F16C-NEXT: movw %ax, (%rsi) 245 ; BWON-F16C-NEXT: retq 246 ; 247 ; CHECK-I686-LABEL: test_sitofp_i64: 248 ; CHECK-I686: # %bb.0: 249 ; CHECK-I686-NEXT: pushl %esi 250 ; CHECK-I686-NEXT: subl $24, %esp 251 ; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %esi 252 ; CHECK-I686-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 253 ; CHECK-I686-NEXT: movlps %xmm0, {{[0-9]+}}(%esp) 254 ; CHECK-I686-NEXT: fildll {{[0-9]+}}(%esp) 255 ; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp) 256 ; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 257 ; CHECK-I686-NEXT: movss %xmm0, (%esp) 258 ; CHECK-I686-NEXT: calll __gnu_f2h_ieee 259 ; CHECK-I686-NEXT: movw %ax, (%esi) 260 ; CHECK-I686-NEXT: addl $24, %esp 261 ; CHECK-I686-NEXT: popl %esi 262 ; CHECK-I686-NEXT: retl 263 %r = sitofp i64 %a to half 264 store half %r, half* %p 265 ret void 266 } 267 268 define i64 @test_fptoui_i64(half* %p) #0 { 269 ; CHECK-LIBCALL-LABEL: test_fptoui_i64: 270 ; CHECK-LIBCALL: # %bb.0: 271 ; CHECK-LIBCALL-NEXT: pushq %rax 272 ; CHECK-LIBCALL-NEXT: movzwl (%rdi), %edi 273 ; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee 274 ; CHECK-LIBCALL-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 275 ; CHECK-LIBCALL-NEXT: movaps %xmm0, %xmm2 276 ; CHECK-LIBCALL-NEXT: subss %xmm1, %xmm2 277 ; CHECK-LIBCALL-NEXT: cvttss2si %xmm2, %rax 278 ; CHECK-LIBCALL-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 279 ; CHECK-LIBCALL-NEXT: xorq %rax, %rcx 280 ; CHECK-LIBCALL-NEXT: cvttss2si %xmm0, %rax 281 ; CHECK-LIBCALL-NEXT: ucomiss %xmm1, %xmm0 282 ; CHECK-LIBCALL-NEXT: cmovaeq %rcx, %rax 283 ; CHECK-LIBCALL-NEXT: popq %rcx 284 ; CHECK-LIBCALL-NEXT: retq 285 ; 286 ; BWON-F16C-LABEL: test_fptoui_i64: 287 ; BWON-F16C: # %bb.0: 288 ; BWON-F16C-NEXT: movswl (%rdi), %eax 289 ; BWON-F16C-NEXT: vmovd %eax, %xmm0 290 ; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0 291 ; BWON-F16C-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero 292 ; BWON-F16C-NEXT: vsubss %xmm1, %xmm0, %xmm2 293 ; BWON-F16C-NEXT: vcvttss2si %xmm2, %rax 294 ; BWON-F16C-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 295 ; BWON-F16C-NEXT: xorq %rax, %rcx 296 ; BWON-F16C-NEXT: vcvttss2si %xmm0, %rax 297 ; BWON-F16C-NEXT: vucomiss %xmm1, %xmm0 298 ; BWON-F16C-NEXT: cmovaeq %rcx, %rax 299 ; BWON-F16C-NEXT: retq 300 ; 301 ; CHECK-I686-LABEL: test_fptoui_i64: 302 ; CHECK-I686: # %bb.0: 303 ; CHECK-I686-NEXT: subl $12, %esp 304 ; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %eax 305 ; CHECK-I686-NEXT: movzwl (%eax), %eax 306 ; CHECK-I686-NEXT: movl %eax, (%esp) 307 ; CHECK-I686-NEXT: calll __gnu_h2f_ieee 308 ; CHECK-I686-NEXT: fstps (%esp) 309 ; CHECK-I686-NEXT: calll __fixunssfdi 310 ; CHECK-I686-NEXT: addl $12, %esp 311 ; CHECK-I686-NEXT: retl 312 %a = load half, half* %p, align 2 313 %r = fptoui half %a to i64 314 ret i64 %r 315 } 316 317 define void @test_uitofp_i64(i64 %a, half* %p) #0 { 318 ; CHECK-LIBCALL-LABEL: test_uitofp_i64: 319 ; CHECK-LIBCALL: # %bb.0: 320 ; CHECK-LIBCALL-NEXT: pushq %rbx 321 ; CHECK-LIBCALL-NEXT: movq %rsi, %rbx 322 ; CHECK-LIBCALL-NEXT: testq %rdi, %rdi 323 ; CHECK-LIBCALL-NEXT: js .LBB10_1 324 ; CHECK-LIBCALL-NEXT: # %bb.2: 325 ; CHECK-LIBCALL-NEXT: cvtsi2ssq %rdi, %xmm0 326 ; CHECK-LIBCALL-NEXT: jmp .LBB10_3 327 ; CHECK-LIBCALL-NEXT: .LBB10_1: 328 ; CHECK-LIBCALL-NEXT: movq %rdi, %rax 329 ; CHECK-LIBCALL-NEXT: shrq %rax 330 ; CHECK-LIBCALL-NEXT: andl $1, %edi 331 ; CHECK-LIBCALL-NEXT: orq %rax, %rdi 332 ; CHECK-LIBCALL-NEXT: cvtsi2ssq %rdi, %xmm0 333 ; CHECK-LIBCALL-NEXT: addss %xmm0, %xmm0 334 ; CHECK-LIBCALL-NEXT: .LBB10_3: 335 ; CHECK-LIBCALL-NEXT: callq __gnu_f2h_ieee 336 ; CHECK-LIBCALL-NEXT: movw %ax, (%rbx) 337 ; CHECK-LIBCALL-NEXT: popq %rbx 338 ; CHECK-LIBCALL-NEXT: retq 339 ; 340 ; BWON-F16C-LABEL: test_uitofp_i64: 341 ; BWON-F16C: # %bb.0: 342 ; BWON-F16C-NEXT: testq %rdi, %rdi 343 ; BWON-F16C-NEXT: js .LBB10_1 344 ; BWON-F16C-NEXT: # %bb.2: 345 ; BWON-F16C-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 346 ; BWON-F16C-NEXT: jmp .LBB10_3 347 ; BWON-F16C-NEXT: .LBB10_1: 348 ; BWON-F16C-NEXT: movq %rdi, %rax 349 ; BWON-F16C-NEXT: shrq %rax 350 ; BWON-F16C-NEXT: andl $1, %edi 351 ; BWON-F16C-NEXT: orq %rax, %rdi 352 ; BWON-F16C-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 353 ; BWON-F16C-NEXT: vaddss %xmm0, %xmm0, %xmm0 354 ; BWON-F16C-NEXT: .LBB10_3: 355 ; BWON-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 356 ; BWON-F16C-NEXT: vmovd %xmm0, %eax 357 ; BWON-F16C-NEXT: movw %ax, (%rsi) 358 ; BWON-F16C-NEXT: retq 359 ; 360 ; CHECK-I686-LABEL: test_uitofp_i64: 361 ; CHECK-I686: # %bb.0: 362 ; CHECK-I686-NEXT: pushl %esi 363 ; CHECK-I686-NEXT: subl $24, %esp 364 ; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %esi 365 ; CHECK-I686-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 366 ; CHECK-I686-NEXT: movlps %xmm0, {{[0-9]+}}(%esp) 367 ; CHECK-I686-NEXT: xorl %eax, %eax 368 ; CHECK-I686-NEXT: cmpl $0, {{[0-9]+}}(%esp) 369 ; CHECK-I686-NEXT: setns %al 370 ; CHECK-I686-NEXT: fildll {{[0-9]+}}(%esp) 371 ; CHECK-I686-NEXT: fadds {{\.LCPI.*}}(,%eax,4) 372 ; CHECK-I686-NEXT: fstps (%esp) 373 ; CHECK-I686-NEXT: calll __gnu_f2h_ieee 374 ; CHECK-I686-NEXT: movw %ax, (%esi) 375 ; CHECK-I686-NEXT: addl $24, %esp 376 ; CHECK-I686-NEXT: popl %esi 377 ; CHECK-I686-NEXT: retl 378 %r = uitofp i64 %a to half 379 store half %r, half* %p 380 ret void 381 } 382 383 define <4 x float> @test_extend32_vec4(<4 x half>* %p) #0 { 384 ; CHECK-LIBCALL-LABEL: test_extend32_vec4: 385 ; CHECK-LIBCALL: # %bb.0: 386 ; CHECK-LIBCALL-NEXT: pushq %rbx 387 ; CHECK-LIBCALL-NEXT: subq $48, %rsp 388 ; CHECK-LIBCALL-NEXT: movq %rdi, %rbx 389 ; CHECK-LIBCALL-NEXT: movzwl (%rdi), %edi 390 ; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee 391 ; CHECK-LIBCALL-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 392 ; CHECK-LIBCALL-NEXT: movzwl 2(%rbx), %edi 393 ; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee 394 ; CHECK-LIBCALL-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 395 ; CHECK-LIBCALL-NEXT: movzwl 4(%rbx), %edi 396 ; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee 397 ; CHECK-LIBCALL-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 398 ; CHECK-LIBCALL-NEXT: movzwl 6(%rbx), %edi 399 ; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee 400 ; CHECK-LIBCALL-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload 401 ; CHECK-LIBCALL-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 402 ; CHECK-LIBCALL-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 403 ; CHECK-LIBCALL-NEXT: unpcklps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 404 ; CHECK-LIBCALL-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] 405 ; CHECK-LIBCALL-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] 406 ; CHECK-LIBCALL-NEXT: addq $48, %rsp 407 ; CHECK-LIBCALL-NEXT: popq %rbx 408 ; CHECK-LIBCALL-NEXT: retq 409 ; 410 ; BWON-F16C-LABEL: test_extend32_vec4: 411 ; BWON-F16C: # %bb.0: 412 ; BWON-F16C-NEXT: movswl 6(%rdi), %eax 413 ; BWON-F16C-NEXT: vmovd %eax, %xmm0 414 ; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0 415 ; BWON-F16C-NEXT: movswl 4(%rdi), %eax 416 ; BWON-F16C-NEXT: vmovd %eax, %xmm1 417 ; BWON-F16C-NEXT: vcvtph2ps %xmm1, %xmm1 418 ; BWON-F16C-NEXT: movswl (%rdi), %eax 419 ; BWON-F16C-NEXT: vmovd %eax, %xmm2 420 ; BWON-F16C-NEXT: vcvtph2ps %xmm2, %xmm2 421 ; BWON-F16C-NEXT: movswl 2(%rdi), %eax 422 ; BWON-F16C-NEXT: vmovd %eax, %xmm3 423 ; BWON-F16C-NEXT: vcvtph2ps %xmm3, %xmm3 424 ; BWON-F16C-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[2,3] 425 ; BWON-F16C-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0,1],xmm1[0],xmm2[3] 426 ; BWON-F16C-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] 427 ; BWON-F16C-NEXT: retq 428 ; 429 ; CHECK-I686-LABEL: test_extend32_vec4: 430 ; CHECK-I686: # %bb.0: 431 ; CHECK-I686-NEXT: pushl %esi 432 ; CHECK-I686-NEXT: subl $56, %esp 433 ; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %esi 434 ; CHECK-I686-NEXT: movzwl 4(%esi), %eax 435 ; CHECK-I686-NEXT: movl %eax, (%esp) 436 ; CHECK-I686-NEXT: calll __gnu_h2f_ieee 437 ; CHECK-I686-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill 438 ; CHECK-I686-NEXT: movzwl 2(%esi), %eax 439 ; CHECK-I686-NEXT: movl %eax, (%esp) 440 ; CHECK-I686-NEXT: calll __gnu_h2f_ieee 441 ; CHECK-I686-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill 442 ; CHECK-I686-NEXT: movzwl (%esi), %eax 443 ; CHECK-I686-NEXT: movl %eax, (%esp) 444 ; CHECK-I686-NEXT: calll __gnu_h2f_ieee 445 ; CHECK-I686-NEXT: movzwl 6(%esi), %eax 446 ; CHECK-I686-NEXT: movl %eax, (%esp) 447 ; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp) 448 ; CHECK-I686-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload 449 ; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp) 450 ; CHECK-I686-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload 451 ; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp) 452 ; CHECK-I686-NEXT: calll __gnu_h2f_ieee 453 ; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp) 454 ; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 455 ; CHECK-I686-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 456 ; CHECK-I686-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 457 ; CHECK-I686-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 458 ; CHECK-I686-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero 459 ; CHECK-I686-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 460 ; CHECK-I686-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] 461 ; CHECK-I686-NEXT: addl $56, %esp 462 ; CHECK-I686-NEXT: popl %esi 463 ; CHECK-I686-NEXT: retl 464 %a = load <4 x half>, <4 x half>* %p, align 8 465 %b = fpext <4 x half> %a to <4 x float> 466 ret <4 x float> %b 467 } 468 469 define <4 x double> @test_extend64_vec4(<4 x half>* %p) #0 { 470 ; CHECK-LIBCALL-LABEL: test_extend64_vec4: 471 ; CHECK-LIBCALL: # %bb.0: 472 ; CHECK-LIBCALL-NEXT: pushq %rbx 473 ; CHECK-LIBCALL-NEXT: subq $16, %rsp 474 ; CHECK-LIBCALL-NEXT: movq %rdi, %rbx 475 ; CHECK-LIBCALL-NEXT: movzwl 4(%rdi), %edi 476 ; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee 477 ; CHECK-LIBCALL-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 478 ; CHECK-LIBCALL-NEXT: movzwl 6(%rbx), %edi 479 ; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee 480 ; CHECK-LIBCALL-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 481 ; CHECK-LIBCALL-NEXT: movzwl (%rbx), %edi 482 ; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee 483 ; CHECK-LIBCALL-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 484 ; CHECK-LIBCALL-NEXT: movzwl 2(%rbx), %edi 485 ; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee 486 ; CHECK-LIBCALL-NEXT: cvtss2sd %xmm0, %xmm1 487 ; CHECK-LIBCALL-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload 488 ; CHECK-LIBCALL-NEXT: # xmm0 = mem[0],zero,zero,zero 489 ; CHECK-LIBCALL-NEXT: cvtss2sd %xmm0, %xmm0 490 ; CHECK-LIBCALL-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] 491 ; CHECK-LIBCALL-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload 492 ; CHECK-LIBCALL-NEXT: # xmm1 = mem[0],zero,zero,zero 493 ; CHECK-LIBCALL-NEXT: cvtss2sd %xmm1, %xmm2 494 ; CHECK-LIBCALL-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload 495 ; CHECK-LIBCALL-NEXT: # xmm1 = mem[0],zero,zero,zero 496 ; CHECK-LIBCALL-NEXT: cvtss2sd %xmm1, %xmm1 497 ; CHECK-LIBCALL-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0] 498 ; CHECK-LIBCALL-NEXT: addq $16, %rsp 499 ; CHECK-LIBCALL-NEXT: popq %rbx 500 ; CHECK-LIBCALL-NEXT: retq 501 ; 502 ; BWON-F16C-LABEL: test_extend64_vec4: 503 ; BWON-F16C: # %bb.0: 504 ; BWON-F16C-NEXT: movswl (%rdi), %eax 505 ; BWON-F16C-NEXT: vmovd %eax, %xmm0 506 ; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0 507 ; BWON-F16C-NEXT: movswl 2(%rdi), %eax 508 ; BWON-F16C-NEXT: vmovd %eax, %xmm1 509 ; BWON-F16C-NEXT: vcvtph2ps %xmm1, %xmm1 510 ; BWON-F16C-NEXT: movswl 4(%rdi), %eax 511 ; BWON-F16C-NEXT: vmovd %eax, %xmm2 512 ; BWON-F16C-NEXT: vcvtph2ps %xmm2, %xmm2 513 ; BWON-F16C-NEXT: movswl 6(%rdi), %eax 514 ; BWON-F16C-NEXT: vmovd %eax, %xmm3 515 ; BWON-F16C-NEXT: vcvtph2ps %xmm3, %xmm3 516 ; BWON-F16C-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3 517 ; BWON-F16C-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2 518 ; BWON-F16C-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0] 519 ; BWON-F16C-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 520 ; BWON-F16C-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 521 ; BWON-F16C-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] 522 ; BWON-F16C-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 523 ; BWON-F16C-NEXT: retq 524 ; 525 ; CHECK-I686-LABEL: test_extend64_vec4: 526 ; CHECK-I686: # %bb.0: 527 ; CHECK-I686-NEXT: pushl %esi 528 ; CHECK-I686-NEXT: subl $88, %esp 529 ; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %esi 530 ; CHECK-I686-NEXT: movzwl 6(%esi), %eax 531 ; CHECK-I686-NEXT: movl %eax, (%esp) 532 ; CHECK-I686-NEXT: calll __gnu_h2f_ieee 533 ; CHECK-I686-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill 534 ; CHECK-I686-NEXT: movzwl 4(%esi), %eax 535 ; CHECK-I686-NEXT: movl %eax, (%esp) 536 ; CHECK-I686-NEXT: calll __gnu_h2f_ieee 537 ; CHECK-I686-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill 538 ; CHECK-I686-NEXT: movzwl 2(%esi), %eax 539 ; CHECK-I686-NEXT: movl %eax, (%esp) 540 ; CHECK-I686-NEXT: calll __gnu_h2f_ieee 541 ; CHECK-I686-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill 542 ; CHECK-I686-NEXT: movzwl (%esi), %eax 543 ; CHECK-I686-NEXT: movl %eax, (%esp) 544 ; CHECK-I686-NEXT: calll __gnu_h2f_ieee 545 ; CHECK-I686-NEXT: fstpl {{[0-9]+}}(%esp) 546 ; CHECK-I686-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload 547 ; CHECK-I686-NEXT: fstpl {{[0-9]+}}(%esp) 548 ; CHECK-I686-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload 549 ; CHECK-I686-NEXT: fstpl {{[0-9]+}}(%esp) 550 ; CHECK-I686-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload 551 ; CHECK-I686-NEXT: fstpl {{[0-9]+}}(%esp) 552 ; CHECK-I686-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 553 ; CHECK-I686-NEXT: movhpd {{.*#+}} xmm0 = xmm0[0],mem[0] 554 ; CHECK-I686-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero 555 ; CHECK-I686-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] 556 ; CHECK-I686-NEXT: addl $88, %esp 557 ; CHECK-I686-NEXT: popl %esi 558 ; CHECK-I686-NEXT: retl 559 %a = load <4 x half>, <4 x half>* %p, align 8 560 %b = fpext <4 x half> %a to <4 x double> 561 ret <4 x double> %b 562 } 563 564 define void @test_trunc32_vec4(<4 x float> %a, <4 x half>* %p) #0 { 565 ; BWON-NOF16C-LABEL: test_trunc32_vec4: 566 ; BWON-NOF16C: # %bb.0: 567 ; BWON-NOF16C-NEXT: pushq %rbp 568 ; BWON-NOF16C-NEXT: pushq %r15 569 ; BWON-NOF16C-NEXT: pushq %r14 570 ; BWON-NOF16C-NEXT: pushq %rbx 571 ; BWON-NOF16C-NEXT: subq $24, %rsp 572 ; BWON-NOF16C-NEXT: movq %rdi, %rbx 573 ; BWON-NOF16C-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 574 ; BWON-NOF16C-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3] 575 ; BWON-NOF16C-NEXT: callq __gnu_f2h_ieee 576 ; BWON-NOF16C-NEXT: movl %eax, %r14d 577 ; BWON-NOF16C-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 578 ; BWON-NOF16C-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] 579 ; BWON-NOF16C-NEXT: callq __gnu_f2h_ieee 580 ; BWON-NOF16C-NEXT: movl %eax, %r15d 581 ; BWON-NOF16C-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 582 ; BWON-NOF16C-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] 583 ; BWON-NOF16C-NEXT: callq __gnu_f2h_ieee 584 ; BWON-NOF16C-NEXT: movl %eax, %ebp 585 ; BWON-NOF16C-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 586 ; BWON-NOF16C-NEXT: callq __gnu_f2h_ieee 587 ; BWON-NOF16C-NEXT: movw %ax, (%rbx) 588 ; BWON-NOF16C-NEXT: movw %bp, 6(%rbx) 589 ; BWON-NOF16C-NEXT: movw %r15w, 4(%rbx) 590 ; BWON-NOF16C-NEXT: movw %r14w, 2(%rbx) 591 ; BWON-NOF16C-NEXT: addq $24, %rsp 592 ; BWON-NOF16C-NEXT: popq %rbx 593 ; BWON-NOF16C-NEXT: popq %r14 594 ; BWON-NOF16C-NEXT: popq %r15 595 ; BWON-NOF16C-NEXT: popq %rbp 596 ; BWON-NOF16C-NEXT: retq 597 ; 598 ; BWOFF-LABEL: test_trunc32_vec4: 599 ; BWOFF: # %bb.0: 600 ; BWOFF-NEXT: pushq %rbp 601 ; BWOFF-NEXT: pushq %r15 602 ; BWOFF-NEXT: pushq %r14 603 ; BWOFF-NEXT: pushq %rbx 604 ; BWOFF-NEXT: subq $24, %rsp 605 ; BWOFF-NEXT: movq %rdi, %rbx 606 ; BWOFF-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 607 ; BWOFF-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3] 608 ; BWOFF-NEXT: callq __gnu_f2h_ieee 609 ; BWOFF-NEXT: movw %ax, %r14w 610 ; BWOFF-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 611 ; BWOFF-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] 612 ; BWOFF-NEXT: callq __gnu_f2h_ieee 613 ; BWOFF-NEXT: movw %ax, %r15w 614 ; BWOFF-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 615 ; BWOFF-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] 616 ; BWOFF-NEXT: callq __gnu_f2h_ieee 617 ; BWOFF-NEXT: movw %ax, %bp 618 ; BWOFF-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 619 ; BWOFF-NEXT: callq __gnu_f2h_ieee 620 ; BWOFF-NEXT: movw %ax, (%rbx) 621 ; BWOFF-NEXT: movw %bp, 6(%rbx) 622 ; BWOFF-NEXT: movw %r15w, 4(%rbx) 623 ; BWOFF-NEXT: movw %r14w, 2(%rbx) 624 ; BWOFF-NEXT: addq $24, %rsp 625 ; BWOFF-NEXT: popq %rbx 626 ; BWOFF-NEXT: popq %r14 627 ; BWOFF-NEXT: popq %r15 628 ; BWOFF-NEXT: popq %rbp 629 ; BWOFF-NEXT: retq 630 ; 631 ; BWON-F16C-LABEL: test_trunc32_vec4: 632 ; BWON-F16C: # %bb.0: 633 ; BWON-F16C-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 634 ; BWON-F16C-NEXT: vcvtps2ph $4, %xmm1, %xmm1 635 ; BWON-F16C-NEXT: vmovd %xmm1, %eax 636 ; BWON-F16C-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] 637 ; BWON-F16C-NEXT: vcvtps2ph $4, %xmm1, %xmm1 638 ; BWON-F16C-NEXT: vmovd %xmm1, %ecx 639 ; BWON-F16C-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,1,2,3] 640 ; BWON-F16C-NEXT: vcvtps2ph $4, %xmm1, %xmm1 641 ; BWON-F16C-NEXT: vmovd %xmm1, %edx 642 ; BWON-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 643 ; BWON-F16C-NEXT: vmovd %xmm0, %esi 644 ; BWON-F16C-NEXT: movw %si, (%rdi) 645 ; BWON-F16C-NEXT: movw %dx, 6(%rdi) 646 ; BWON-F16C-NEXT: movw %cx, 4(%rdi) 647 ; BWON-F16C-NEXT: movw %ax, 2(%rdi) 648 ; BWON-F16C-NEXT: retq 649 ; 650 ; CHECK-I686-LABEL: test_trunc32_vec4: 651 ; CHECK-I686: # %bb.0: 652 ; CHECK-I686-NEXT: pushl %ebp 653 ; CHECK-I686-NEXT: pushl %ebx 654 ; CHECK-I686-NEXT: pushl %edi 655 ; CHECK-I686-NEXT: pushl %esi 656 ; CHECK-I686-NEXT: subl $44, %esp 657 ; CHECK-I686-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill 658 ; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %ebp 659 ; CHECK-I686-NEXT: movaps %xmm0, %xmm1 660 ; CHECK-I686-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3] 661 ; CHECK-I686-NEXT: movss %xmm1, (%esp) 662 ; CHECK-I686-NEXT: calll __gnu_f2h_ieee 663 ; CHECK-I686-NEXT: movw %ax, %si 664 ; CHECK-I686-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload 665 ; CHECK-I686-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] 666 ; CHECK-I686-NEXT: movss %xmm0, (%esp) 667 ; CHECK-I686-NEXT: calll __gnu_f2h_ieee 668 ; CHECK-I686-NEXT: movw %ax, %di 669 ; CHECK-I686-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload 670 ; CHECK-I686-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] 671 ; CHECK-I686-NEXT: movss %xmm0, (%esp) 672 ; CHECK-I686-NEXT: calll __gnu_f2h_ieee 673 ; CHECK-I686-NEXT: movw %ax, %bx 674 ; CHECK-I686-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload 675 ; CHECK-I686-NEXT: movss %xmm0, (%esp) 676 ; CHECK-I686-NEXT: calll __gnu_f2h_ieee 677 ; CHECK-I686-NEXT: movw %ax, (%ebp) 678 ; CHECK-I686-NEXT: movw %bx, 6(%ebp) 679 ; CHECK-I686-NEXT: movw %di, 4(%ebp) 680 ; CHECK-I686-NEXT: movw %si, 2(%ebp) 681 ; CHECK-I686-NEXT: addl $44, %esp 682 ; CHECK-I686-NEXT: popl %esi 683 ; CHECK-I686-NEXT: popl %edi 684 ; CHECK-I686-NEXT: popl %ebx 685 ; CHECK-I686-NEXT: popl %ebp 686 ; CHECK-I686-NEXT: retl 687 %v = fptrunc <4 x float> %a to <4 x half> 688 store <4 x half> %v, <4 x half>* %p 689 ret void 690 } 691 692 define void @test_trunc64_vec4(<4 x double> %a, <4 x half>* %p) #0 { 693 ; BWON-NOF16C-LABEL: test_trunc64_vec4: 694 ; BWON-NOF16C: # %bb.0: 695 ; BWON-NOF16C-NEXT: pushq %rbp 696 ; BWON-NOF16C-NEXT: pushq %r15 697 ; BWON-NOF16C-NEXT: pushq %r14 698 ; BWON-NOF16C-NEXT: pushq %rbx 699 ; BWON-NOF16C-NEXT: subq $40, %rsp 700 ; BWON-NOF16C-NEXT: movq %rdi, %rbx 701 ; BWON-NOF16C-NEXT: movaps %xmm1, (%rsp) # 16-byte Spill 702 ; BWON-NOF16C-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 703 ; BWON-NOF16C-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] 704 ; BWON-NOF16C-NEXT: callq __truncdfhf2 705 ; BWON-NOF16C-NEXT: movl %eax, %r14d 706 ; BWON-NOF16C-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 707 ; BWON-NOF16C-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] 708 ; BWON-NOF16C-NEXT: callq __truncdfhf2 709 ; BWON-NOF16C-NEXT: movl %eax, %r15d 710 ; BWON-NOF16C-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 711 ; BWON-NOF16C-NEXT: callq __truncdfhf2 712 ; BWON-NOF16C-NEXT: movl %eax, %ebp 713 ; BWON-NOF16C-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 714 ; BWON-NOF16C-NEXT: callq __truncdfhf2 715 ; BWON-NOF16C-NEXT: movw %ax, 4(%rbx) 716 ; BWON-NOF16C-NEXT: movw %bp, (%rbx) 717 ; BWON-NOF16C-NEXT: movw %r15w, 6(%rbx) 718 ; BWON-NOF16C-NEXT: movw %r14w, 2(%rbx) 719 ; BWON-NOF16C-NEXT: addq $40, %rsp 720 ; BWON-NOF16C-NEXT: popq %rbx 721 ; BWON-NOF16C-NEXT: popq %r14 722 ; BWON-NOF16C-NEXT: popq %r15 723 ; BWON-NOF16C-NEXT: popq %rbp 724 ; BWON-NOF16C-NEXT: retq 725 ; 726 ; BWOFF-LABEL: test_trunc64_vec4: 727 ; BWOFF: # %bb.0: 728 ; BWOFF-NEXT: pushq %rbp 729 ; BWOFF-NEXT: pushq %r15 730 ; BWOFF-NEXT: pushq %r14 731 ; BWOFF-NEXT: pushq %rbx 732 ; BWOFF-NEXT: subq $40, %rsp 733 ; BWOFF-NEXT: movq %rdi, %rbx 734 ; BWOFF-NEXT: movaps %xmm1, (%rsp) # 16-byte Spill 735 ; BWOFF-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 736 ; BWOFF-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] 737 ; BWOFF-NEXT: callq __truncdfhf2 738 ; BWOFF-NEXT: movw %ax, %r14w 739 ; BWOFF-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 740 ; BWOFF-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] 741 ; BWOFF-NEXT: callq __truncdfhf2 742 ; BWOFF-NEXT: movw %ax, %r15w 743 ; BWOFF-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 744 ; BWOFF-NEXT: callq __truncdfhf2 745 ; BWOFF-NEXT: movw %ax, %bp 746 ; BWOFF-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 747 ; BWOFF-NEXT: callq __truncdfhf2 748 ; BWOFF-NEXT: movw %ax, 4(%rbx) 749 ; BWOFF-NEXT: movw %bp, (%rbx) 750 ; BWOFF-NEXT: movw %r15w, 6(%rbx) 751 ; BWOFF-NEXT: movw %r14w, 2(%rbx) 752 ; BWOFF-NEXT: addq $40, %rsp 753 ; BWOFF-NEXT: popq %rbx 754 ; BWOFF-NEXT: popq %r14 755 ; BWOFF-NEXT: popq %r15 756 ; BWOFF-NEXT: popq %rbp 757 ; BWOFF-NEXT: retq 758 ; 759 ; BWON-F16C-LABEL: test_trunc64_vec4: 760 ; BWON-F16C: # %bb.0: 761 ; BWON-F16C-NEXT: pushq %rbp 762 ; BWON-F16C-NEXT: pushq %r15 763 ; BWON-F16C-NEXT: pushq %r14 764 ; BWON-F16C-NEXT: pushq %rbx 765 ; BWON-F16C-NEXT: subq $88, %rsp 766 ; BWON-F16C-NEXT: movq %rdi, %rbx 767 ; BWON-F16C-NEXT: vmovupd %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill 768 ; BWON-F16C-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 769 ; BWON-F16C-NEXT: vzeroupper 770 ; BWON-F16C-NEXT: callq __truncdfhf2 771 ; BWON-F16C-NEXT: movl %eax, %r14d 772 ; BWON-F16C-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload 773 ; BWON-F16C-NEXT: vextractf128 $1, %ymm0, %xmm0 774 ; BWON-F16C-NEXT: vmovapd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 775 ; BWON-F16C-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 776 ; BWON-F16C-NEXT: vzeroupper 777 ; BWON-F16C-NEXT: callq __truncdfhf2 778 ; BWON-F16C-NEXT: movl %eax, %r15d 779 ; BWON-F16C-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload 780 ; BWON-F16C-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 781 ; BWON-F16C-NEXT: vzeroupper 782 ; BWON-F16C-NEXT: callq __truncdfhf2 783 ; BWON-F16C-NEXT: movl %eax, %ebp 784 ; BWON-F16C-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 785 ; BWON-F16C-NEXT: callq __truncdfhf2 786 ; BWON-F16C-NEXT: movw %ax, 4(%rbx) 787 ; BWON-F16C-NEXT: movw %bp, (%rbx) 788 ; BWON-F16C-NEXT: movw %r15w, 6(%rbx) 789 ; BWON-F16C-NEXT: movw %r14w, 2(%rbx) 790 ; BWON-F16C-NEXT: addq $88, %rsp 791 ; BWON-F16C-NEXT: popq %rbx 792 ; BWON-F16C-NEXT: popq %r14 793 ; BWON-F16C-NEXT: popq %r15 794 ; BWON-F16C-NEXT: popq %rbp 795 ; BWON-F16C-NEXT: retq 796 ; 797 ; CHECK-I686-LABEL: test_trunc64_vec4: 798 ; CHECK-I686: # %bb.0: 799 ; CHECK-I686-NEXT: pushl %ebp 800 ; CHECK-I686-NEXT: pushl %ebx 801 ; CHECK-I686-NEXT: pushl %edi 802 ; CHECK-I686-NEXT: pushl %esi 803 ; CHECK-I686-NEXT: subl $60, %esp 804 ; CHECK-I686-NEXT: movaps %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill 805 ; CHECK-I686-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill 806 ; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %ebp 807 ; CHECK-I686-NEXT: movlps %xmm0, (%esp) 808 ; CHECK-I686-NEXT: calll __truncdfhf2 809 ; CHECK-I686-NEXT: movw %ax, %si 810 ; CHECK-I686-NEXT: movapd {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload 811 ; CHECK-I686-NEXT: movhpd %xmm0, (%esp) 812 ; CHECK-I686-NEXT: calll __truncdfhf2 813 ; CHECK-I686-NEXT: movw %ax, %di 814 ; CHECK-I686-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload 815 ; CHECK-I686-NEXT: movlps %xmm0, (%esp) 816 ; CHECK-I686-NEXT: calll __truncdfhf2 817 ; CHECK-I686-NEXT: movw %ax, %bx 818 ; CHECK-I686-NEXT: movapd {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload 819 ; CHECK-I686-NEXT: movhpd %xmm0, (%esp) 820 ; CHECK-I686-NEXT: calll __truncdfhf2 821 ; CHECK-I686-NEXT: movw %ax, 6(%ebp) 822 ; CHECK-I686-NEXT: movw %bx, 4(%ebp) 823 ; CHECK-I686-NEXT: movw %di, 2(%ebp) 824 ; CHECK-I686-NEXT: movw %si, (%ebp) 825 ; CHECK-I686-NEXT: addl $60, %esp 826 ; CHECK-I686-NEXT: popl %esi 827 ; CHECK-I686-NEXT: popl %edi 828 ; CHECK-I686-NEXT: popl %ebx 829 ; CHECK-I686-NEXT: popl %ebp 830 ; CHECK-I686-NEXT: retl 831 %v = fptrunc <4 x double> %a to <4 x half> 832 store <4 x half> %v, <4 x half>* %p 833 ret void 834 } 835 836 declare float @test_floatret(); 837 838 ; On i686, if SSE2 is available, the return value from test_floatret is loaded 839 ; to f80 and then rounded to f32. The DAG combiner should not combine this 840 ; fp_round and the subsequent fptrunc from float to half. 841 define half @test_f80trunc_nodagcombine() #0 { 842 ; CHECK-LIBCALL-LABEL: test_f80trunc_nodagcombine: 843 ; CHECK-LIBCALL: # %bb.0: 844 ; CHECK-LIBCALL-NEXT: pushq %rax 845 ; CHECK-LIBCALL-NEXT: callq test_floatret 846 ; CHECK-LIBCALL-NEXT: callq __gnu_f2h_ieee 847 ; CHECK-LIBCALL-NEXT: movzwl %ax, %edi 848 ; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee 849 ; CHECK-LIBCALL-NEXT: popq %rax 850 ; CHECK-LIBCALL-NEXT: retq 851 ; 852 ; BWON-F16C-LABEL: test_f80trunc_nodagcombine: 853 ; BWON-F16C: # %bb.0: 854 ; BWON-F16C-NEXT: pushq %rax 855 ; BWON-F16C-NEXT: callq test_floatret 856 ; BWON-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 857 ; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0 858 ; BWON-F16C-NEXT: popq %rax 859 ; BWON-F16C-NEXT: retq 860 ; 861 ; CHECK-I686-LABEL: test_f80trunc_nodagcombine: 862 ; CHECK-I686: # %bb.0: 863 ; CHECK-I686-NEXT: subl $12, %esp 864 ; CHECK-I686-NEXT: calll test_floatret 865 ; CHECK-I686-NEXT: fstps (%esp) 866 ; CHECK-I686-NEXT: calll __gnu_f2h_ieee 867 ; CHECK-I686-NEXT: movzwl %ax, %eax 868 ; CHECK-I686-NEXT: movl %eax, (%esp) 869 ; CHECK-I686-NEXT: calll __gnu_h2f_ieee 870 ; CHECK-I686-NEXT: addl $12, %esp 871 ; CHECK-I686-NEXT: retl 872 %1 = call float @test_floatret() 873 %2 = fptrunc float %1 to half 874 ret half %2 875 } 876 877 878 879 880 define float @test_sitofp_fadd_i32(i32 %a, half* %b) #0 { 881 ; CHECK-LIBCALL-LABEL: test_sitofp_fadd_i32: 882 ; CHECK-LIBCALL: # %bb.0: 883 ; CHECK-LIBCALL-NEXT: pushq %rbx 884 ; CHECK-LIBCALL-NEXT: subq $16, %rsp 885 ; CHECK-LIBCALL-NEXT: movl %edi, %ebx 886 ; CHECK-LIBCALL-NEXT: movzwl (%rsi), %edi 887 ; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee 888 ; CHECK-LIBCALL-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 889 ; CHECK-LIBCALL-NEXT: cvtsi2ssl %ebx, %xmm0 890 ; CHECK-LIBCALL-NEXT: callq __gnu_f2h_ieee 891 ; CHECK-LIBCALL-NEXT: movzwl %ax, %edi 892 ; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee 893 ; CHECK-LIBCALL-NEXT: addss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload 894 ; CHECK-LIBCALL-NEXT: addq $16, %rsp 895 ; CHECK-LIBCALL-NEXT: popq %rbx 896 ; CHECK-LIBCALL-NEXT: retq 897 ; 898 ; BWON-F16C-LABEL: test_sitofp_fadd_i32: 899 ; BWON-F16C: # %bb.0: 900 ; BWON-F16C-NEXT: movswl (%rsi), %eax 901 ; BWON-F16C-NEXT: vmovd %eax, %xmm0 902 ; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0 903 ; BWON-F16C-NEXT: vcvtsi2ssl %edi, %xmm1, %xmm1 904 ; BWON-F16C-NEXT: vcvtps2ph $4, %xmm1, %xmm1 905 ; BWON-F16C-NEXT: vcvtph2ps %xmm1, %xmm1 906 ; BWON-F16C-NEXT: vaddss %xmm1, %xmm0, %xmm0 907 ; BWON-F16C-NEXT: retq 908 ; 909 ; CHECK-I686-LABEL: test_sitofp_fadd_i32: 910 ; CHECK-I686: # %bb.0: 911 ; CHECK-I686-NEXT: subl $28, %esp 912 ; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %eax 913 ; CHECK-I686-NEXT: movzwl (%eax), %eax 914 ; CHECK-I686-NEXT: movl %eax, (%esp) 915 ; CHECK-I686-NEXT: calll __gnu_h2f_ieee 916 ; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp) 917 ; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 918 ; CHECK-I686-NEXT: movss %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 919 ; CHECK-I686-NEXT: xorps %xmm0, %xmm0 920 ; CHECK-I686-NEXT: cvtsi2ssl {{[0-9]+}}(%esp), %xmm0 921 ; CHECK-I686-NEXT: movss %xmm0, (%esp) 922 ; CHECK-I686-NEXT: calll __gnu_f2h_ieee 923 ; CHECK-I686-NEXT: movzwl %ax, %eax 924 ; CHECK-I686-NEXT: movl %eax, (%esp) 925 ; CHECK-I686-NEXT: calll __gnu_h2f_ieee 926 ; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp) 927 ; CHECK-I686-NEXT: movss {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 4-byte Reload 928 ; CHECK-I686-NEXT: # xmm0 = mem[0],zero,zero,zero 929 ; CHECK-I686-NEXT: addss {{[0-9]+}}(%esp), %xmm0 930 ; CHECK-I686-NEXT: movss %xmm0, {{[0-9]+}}(%esp) 931 ; CHECK-I686-NEXT: flds {{[0-9]+}}(%esp) 932 ; CHECK-I686-NEXT: addl $28, %esp 933 ; CHECK-I686-NEXT: retl 934 %tmp0 = load half, half* %b 935 %tmp1 = sitofp i32 %a to half 936 %tmp2 = fadd half %tmp0, %tmp1 937 %tmp3 = fpext half %tmp2 to float 938 ret float %tmp3 939 } 940 941 attributes #0 = { nounwind } 942