Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
      2 ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
      3 
      4 ; GCN-LABEL: {{^}}sitofp_i16_to_f16
      5 ; GCN: buffer_load_{{sshort|ushort}} v[[A_I16:[0-9]+]]
      6 ; GCN: v_cvt_f32_i32_e32 v[[A_F32:[0-9]+]], v[[A_I16]]
      7 ; GCN: v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[A_F32]]
      8 ; GCN: buffer_store_short v[[R_F16]]
      9 ; GCN: s_endpgm
     10 define amdgpu_kernel void @sitofp_i16_to_f16(
     11     half addrspace(1)* %r,
     12     i16 addrspace(1)* %a) {
     13 entry:
     14   %a.val = load i16, i16 addrspace(1)* %a
     15   %r.val = sitofp i16 %a.val to half
     16   store half %r.val, half addrspace(1)* %r
     17   ret void
     18 }
     19 
     20 ; GCN-LABEL: {{^}}sitofp_i32_to_f16
     21 ; GCN: buffer_load_dword v[[A_I32:[0-9]+]]
     22 ; GCN: v_cvt_f32_i32_e32 v[[A_I16:[0-9]+]], v[[A_I32]]
     23 ; GCN: v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[A_I16]]
     24 ; GCN: buffer_store_short v[[R_F16]]
     25 ; GCN: s_endpgm
     26 define amdgpu_kernel void @sitofp_i32_to_f16(
     27     half addrspace(1)* %r,
     28     i32 addrspace(1)* %a) {
     29 entry:
     30   %a.val = load i32, i32 addrspace(1)* %a
     31   %r.val = sitofp i32 %a.val to half
     32   store half %r.val, half addrspace(1)* %r
     33   ret void
     34 }
     35 
     36 ; f16 = sitofp i64 is in sint_to_fp.i64.ll
     37 
     38 ; GCN-LABEL: {{^}}sitofp_v2i16_to_v2f16
     39 ; GCN:     buffer_load_dword
     40 
     41 ; SI: v_cvt_f32_i32_e32
     42 ; SI: v_cvt_f32_i32_e32
     43 ; SI: v_cvt_f16_f32_e32
     44 ; SI: v_cvt_f16_f32_e32
     45 ; SI-DAG: v_lshlrev_b32_e32
     46 ; SI: v_or_b32_e32
     47 
     48 ; VI-DAG: v_cvt_f32_i32_sdwa
     49 ; VI-DAG: v_cvt_f32_i32_sdwa
     50 ; VI-DAG: v_cvt_f16_f32_e32
     51 ; VI-DAG: v_cvt_f16_f32_sdwa
     52 ; VI:     v_or_b32_e32
     53 
     54 ; GCN: buffer_store_dword
     55 ; GCN: s_endpgm
     56 
     57 define amdgpu_kernel void @sitofp_v2i16_to_v2f16(
     58     <2 x half> addrspace(1)* %r,
     59     <2 x i16> addrspace(1)* %a) {
     60 entry:
     61   %a.val = load <2 x i16>, <2 x i16> addrspace(1)* %a
     62   %r.val = sitofp <2 x i16> %a.val to <2 x half>
     63   store <2 x half> %r.val, <2 x half> addrspace(1)* %r
     64   ret void
     65 }
     66 
     67 ; GCN-LABEL: {{^}}sitofp_v2i32_to_v2f16
     68 ; GCN:    buffer_load_dwordx2
     69 
     70 ; SI: v_cvt_f32_i32_e32
     71 ; SI: v_cvt_f32_i32_e32
     72 ; SI: v_cvt_f16_f32_e32
     73 ; SI: v_cvt_f16_f32_e32
     74 ; SI-DAG: v_lshlrev_b32_e32
     75 ; SI: v_or_b32_e32
     76 
     77 ; VI-DAG: v_cvt_f32_i32_e32
     78 ; VI-DAG: v_cvt_f32_i32_e32
     79 ; VI-DAG: v_cvt_f16_f32_e32
     80 ; VI-DAG: v_cvt_f16_f32_sdwa
     81 ; VI:     v_or_b32_e32
     82 
     83 ; GCN: buffer_store_dword
     84 ; GCN: s_endpgm
     85 define amdgpu_kernel void @sitofp_v2i32_to_v2f16(
     86     <2 x half> addrspace(1)* %r,
     87     <2 x i32> addrspace(1)* %a) {
     88 entry:
     89   %a.val = load <2 x i32>, <2 x i32> addrspace(1)* %a
     90   %r.val = sitofp <2 x i32> %a.val to <2 x half>
     91   store <2 x half> %r.val, <2 x half> addrspace(1)* %r
     92   ret void
     93 }
     94 
     95 ; v2f16 = sitofp v2i64 is in sint_to_fp.i64.ll
     96