Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI %s
      2 ; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GFX89 -check-prefix=VI %s
      3 ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GFX89 -check-prefix=GFX9 %s
      4 
      5 ; GCN-LABEL: {{^}}s_cvt_pkrtz_v2f16_f32:
      6 ; GCN-DAG: s_load_dwordx2 s{{\[}}[[SX:[0-9]+]]:[[SY:[0-9]+]]{{\]}}, s[0:1], 0x{{b|2c}}
      7 ; GCN: v_mov_b32_e32 [[VY:v[0-9]+]], s[[SY]]
      8 ; SI: v_cvt_pkrtz_f16_f32_e32 v{{[0-9]+}}, s[[SX]], [[VY]]
      9 ; GFX89: v_cvt_pkrtz_f16_f32 v{{[0-9]+}}, s[[SX]], [[VY]]
     10 define amdgpu_kernel void @s_cvt_pkrtz_v2f16_f32(<2 x half> addrspace(1)* %out, float %x, float %y) #0 {
     11   %result = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %x, float %y)
     12   store <2 x half> %result, <2 x half> addrspace(1)* %out
     13   ret void
     14 }
     15 
     16 ; GCN-LABEL: {{^}}s_cvt_pkrtz_samereg_v2f16_f32:
     17 ; GCN: s_load_dword [[X:s[0-9]+]]
     18 ; GCN: v_cvt_pkrtz_f16_f32{{(_e64)*}} v{{[0-9]+}}, [[X]], [[X]]
     19 define amdgpu_kernel void @s_cvt_pkrtz_samereg_v2f16_f32(<2 x half> addrspace(1)* %out, float %x) #0 {
     20   %result = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %x, float %x)
     21   store <2 x half> %result, <2 x half> addrspace(1)* %out
     22   ret void
     23 }
     24 
     25 ; GCN-LABEL: {{^}}s_cvt_pkrtz_undef_undef:
     26 ; GCN-NEXT: ; %bb.0
     27 ; GCN-NEXT: s_endpgm
     28 define amdgpu_kernel void @s_cvt_pkrtz_undef_undef(<2 x half> addrspace(1)* %out) #0 {
     29   %result = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float undef, float undef)
     30   store <2 x half> %result, <2 x half> addrspace(1)* %out
     31   ret void
     32 }
     33 
     34 ; GCN-LABEL: {{^}}v_cvt_pkrtz_v2f16_f32:
     35 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
     36 ; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]]
     37 ; SI: v_cvt_pkrtz_f16_f32_e32 v{{[0-9]+}}, [[A]], [[B]]
     38 ; GFX89: v_cvt_pkrtz_f16_f32 v{{[0-9]+}}, [[A]], [[B]]
     39 define amdgpu_kernel void @v_cvt_pkrtz_v2f16_f32(<2 x half> addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
     40   %tid = call i32 @llvm.amdgcn.workitem.id.x()
     41   %tid.ext = sext i32 %tid to i64
     42   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
     43   %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
     44   %out.gep = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %out, i64 %tid.ext
     45   %a = load volatile float, float addrspace(1)* %a.gep
     46   %b = load volatile float, float addrspace(1)* %b.gep
     47   %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %a, float %b)
     48   store <2 x half> %cvt, <2 x half> addrspace(1)* %out.gep
     49   ret void
     50 }
     51 
     52 ; GCN-LABEL: {{^}}v_cvt_pkrtz_v2f16_f32_reg_imm:
     53 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
     54 ; GCN: v_cvt_pkrtz_f16_f32{{(_e64)*}} v{{[0-9]+}}, [[A]], 1.0
     55 define amdgpu_kernel void @v_cvt_pkrtz_v2f16_f32_reg_imm(<2 x half> addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
     56   %tid = call i32 @llvm.amdgcn.workitem.id.x()
     57   %tid.ext = sext i32 %tid to i64
     58   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
     59   %out.gep = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %out, i64 %tid.ext
     60   %a = load volatile float, float addrspace(1)* %a.gep
     61   %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %a, float 1.0)
     62   store <2 x half> %cvt, <2 x half> addrspace(1)* %out.gep
     63   ret void
     64 }
     65 
     66 ; GCN-LABEL: {{^}}v_cvt_pkrtz_v2f16_f32_imm_reg:
     67 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
     68 ; SI: v_cvt_pkrtz_f16_f32_e32 v{{[0-9]+}}, 1.0, [[A]]
     69 ; GFX89: v_cvt_pkrtz_f16_f32 v{{[0-9]+}}, 1.0, [[A]]
     70 define amdgpu_kernel void @v_cvt_pkrtz_v2f16_f32_imm_reg(<2 x half> addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
     71   %tid = call i32 @llvm.amdgcn.workitem.id.x()
     72   %tid.ext = sext i32 %tid to i64
     73   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
     74   %out.gep = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %out, i64 %tid.ext
     75   %a = load volatile float, float addrspace(1)* %a.gep
     76   %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float 1.0, float %a)
     77   store <2 x half> %cvt, <2 x half> addrspace(1)* %out.gep
     78   ret void
     79 }
     80 
     81 ; GCN-LABEL: {{^}}v_cvt_pkrtz_v2f16_f32_fneg_lo:
     82 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
     83 ; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]]
     84 ; GCN: v_cvt_pkrtz_f16_f32{{(_e64)*}} v{{[0-9]+}}, -[[A]], [[B]]
     85 define amdgpu_kernel void @v_cvt_pkrtz_v2f16_f32_fneg_lo(<2 x half> addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
     86   %tid = call i32 @llvm.amdgcn.workitem.id.x()
     87   %tid.ext = sext i32 %tid to i64
     88   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
     89   %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
     90   %out.gep = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %out, i64 %tid.ext
     91   %a = load volatile float, float addrspace(1)* %a.gep
     92   %b = load volatile float, float addrspace(1)* %b.gep
     93   %neg.a = fsub float -0.0, %a
     94   %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %neg.a, float %b)
     95   store <2 x half> %cvt, <2 x half> addrspace(1)* %out.gep
     96   ret void
     97 }
     98 
     99 ; GCN-LABEL: {{^}}v_cvt_pkrtz_v2f16_f32_fneg_hi:
    100 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
    101 ; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]]
    102 ; GCN: v_cvt_pkrtz_f16_f32{{(_e64)*}} v{{[0-9]+}}, [[A]], -[[B]]
    103 define amdgpu_kernel void @v_cvt_pkrtz_v2f16_f32_fneg_hi(<2 x half> addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
    104   %tid = call i32 @llvm.amdgcn.workitem.id.x()
    105   %tid.ext = sext i32 %tid to i64
    106   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
    107   %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
    108   %out.gep = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %out, i64 %tid.ext
    109   %a = load volatile float, float addrspace(1)* %a.gep
    110   %b = load volatile float, float addrspace(1)* %b.gep
    111   %neg.b = fsub float -0.0, %b
    112   %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %a, float %neg.b)
    113   store <2 x half> %cvt, <2 x half> addrspace(1)* %out.gep
    114   ret void
    115 }
    116 
    117 ; GCN-LABEL: {{^}}v_cvt_pkrtz_v2f16_f32_fneg_lo_hi:
    118 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
    119 ; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]]
    120 ; GCN: v_cvt_pkrtz_f16_f32{{(_e64)*}} v{{[0-9]+}}, -[[A]], -[[B]]
    121 define amdgpu_kernel void @v_cvt_pkrtz_v2f16_f32_fneg_lo_hi(<2 x half> addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
    122   %tid = call i32 @llvm.amdgcn.workitem.id.x()
    123   %tid.ext = sext i32 %tid to i64
    124   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
    125   %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
    126   %out.gep = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %out, i64 %tid.ext
    127   %a = load volatile float, float addrspace(1)* %a.gep
    128   %b = load volatile float, float addrspace(1)* %b.gep
    129   %neg.a = fsub float -0.0, %a
    130   %neg.b = fsub float -0.0, %b
    131   %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %neg.a, float %neg.b)
    132   store <2 x half> %cvt, <2 x half> addrspace(1)* %out.gep
    133   ret void
    134 }
    135 
    136 ; GCN-LABEL: {{^}}v_cvt_pkrtz_v2f16_f32_fneg_fabs_lo_fneg_hi:
    137 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
    138 ; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]]
    139 ; GCN: v_cvt_pkrtz_f16_f32{{(_e64)*}} v{{[0-9]+}}, -|[[A]]|, -[[B]]
    140 define amdgpu_kernel void @v_cvt_pkrtz_v2f16_f32_fneg_fabs_lo_fneg_hi(<2 x half> addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
    141   %tid = call i32 @llvm.amdgcn.workitem.id.x()
    142   %tid.ext = sext i32 %tid to i64
    143   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
    144   %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
    145   %out.gep = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %out, i64 %tid.ext
    146   %a = load volatile float, float addrspace(1)* %a.gep
    147   %b = load volatile float, float addrspace(1)* %b.gep
    148   %fabs.a = call float @llvm.fabs.f32(float %a)
    149   %neg.fabs.a = fsub float -0.0, %fabs.a
    150   %neg.b = fsub float -0.0, %b
    151   %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %neg.fabs.a, float %neg.b)
    152   store <2 x half> %cvt, <2 x half> addrspace(1)* %out.gep
    153   ret void
    154 }
    155 
    156 declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float) #1
    157 declare float @llvm.fabs.f32(float) #1
    158 declare i32 @llvm.amdgcn.workitem.id.x() #1
    159 
    160 
    161 attributes #0 = { nounwind }
    162 attributes #1 = { nounwind readnone }
    163