Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI %s
      2 
      3 declare half @llvm.fabs.f16(half %a)
      4 declare i1 @llvm.amdgcn.class.f16(half %a, i32 %b)
      5 
      6 ; GCN-LABEL: {{^}}class_f16:
      7 ; GCN-DAG: buffer_load_ushort v[[A_F16:[0-9]+]]
      8 ; GCN-DAG: buffer_load_dword v[[B_I32:[0-9]+]]
      9 ; VI:  v_cmp_class_f16_e32 vcc, v[[A_F16]], v[[B_I32]]
     10 ; GCN: v_cndmask_b32_e64 v[[R_I32:[0-9]+]]
     11 ; GCN: buffer_store_dword v[[R_I32]]
     12 ; GCN: s_endpgm
     13 define amdgpu_kernel void @class_f16(
     14     i32 addrspace(1)* %r,
     15     half addrspace(1)* %a,
     16     i32 addrspace(1)* %b) {
     17 entry:
     18   %a.val = load half, half addrspace(1)* %a
     19   %b.val = load i32, i32 addrspace(1)* %b
     20   %r.val = call i1 @llvm.amdgcn.class.f16(half %a.val, i32 %b.val)
     21   %r.val.sext = sext i1 %r.val to i32
     22   store i32 %r.val.sext, i32 addrspace(1)* %r
     23   ret void
     24 }
     25 
     26 ; GCN-LABEL: {{^}}class_f16_fabs:
     27 ; GCN: s_load_dword s[[SA_F16:[0-9]+]]
     28 ; GCN: s_load_dword s[[SB_I32:[0-9]+]]
     29 ; GCN: v_mov_b32_e32 [[V_B_I32:v[0-9]+]], s[[SB_I32]]
     30 ; VI:  v_cmp_class_f16_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], |s[[SA_F16]]|, [[V_B_I32]]
     31 ; VI:  v_cndmask_b32_e64 v[[VR_I32:[0-9]+]], 0, -1, [[CMP]]
     32 ; GCN: buffer_store_dword v[[VR_I32]]
     33 ; GCN: s_endpgm
     34 define amdgpu_kernel void @class_f16_fabs(
     35   i32 addrspace(1)* %r,
     36   [8 x i32],
     37   half %a.val,
     38   [8 x i32],
     39   i32 %b.val) {
     40 entry:
     41   %a.val.fabs = call half @llvm.fabs.f16(half %a.val)
     42   %r.val = call i1 @llvm.amdgcn.class.f16(half %a.val.fabs, i32 %b.val)
     43   %r.val.sext = sext i1 %r.val to i32
     44   store i32 %r.val.sext, i32 addrspace(1)* %r
     45   ret void
     46 }
     47 
     48 ; GCN-LABEL: {{^}}class_f16_fneg:
     49 ; GCN: s_load_dword s[[SA_F16:[0-9]+]]
     50 ; GCN: s_load_dword s[[SB_I32:[0-9]+]]
     51 ; GCN: v_mov_b32_e32 [[V_B_I32:v[0-9]+]], s[[SB_I32]]
     52 ; VI:  v_cmp_class_f16_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], -s[[SA_F16]], [[V_B_I32]]
     53 ; VI:  v_cndmask_b32_e64 v[[VR_I32:[0-9]+]], 0, -1, [[CMP]]
     54 ; GCN: buffer_store_dword v[[VR_I32]]
     55 ; GCN: s_endpgm
     56 define amdgpu_kernel void @class_f16_fneg(
     57   i32 addrspace(1)* %r,
     58   [8 x i32],
     59   half %a.val,
     60   [8 x i32],
     61   i32 %b.val) {
     62 entry:
     63   %a.val.fneg = fsub half -0.0, %a.val
     64   %r.val = call i1 @llvm.amdgcn.class.f16(half %a.val.fneg, i32 %b.val)
     65   %r.val.sext = sext i1 %r.val to i32
     66   store i32 %r.val.sext, i32 addrspace(1)* %r
     67   ret void
     68 }
     69 
     70 ; GCN-LABEL: {{^}}class_f16_fabs_fneg:
     71 ; GCN: s_load_dword s[[SA_F16:[0-9]+]]
     72 ; GCN: s_load_dword s[[SB_I32:[0-9]+]]
     73 ; GCN: v_mov_b32_e32 [[V_B_I32:v[0-9]+]], s[[SB_I32]]
     74 ; VI: v_cmp_class_f16_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], -|s[[SA_F16]]|, [[V_B_I32]]
     75 ; VI: v_cndmask_b32_e64 v[[VR_I32:[0-9]+]], 0, -1, [[CMP]]
     76 ; GCN: buffer_store_dword v[[VR_I32]]
     77 ; GCN: s_endpgm
     78 define amdgpu_kernel void @class_f16_fabs_fneg(
     79   i32 addrspace(1)* %r,
     80   [8 x i32],
     81   half %a.val,
     82   [8 x i32],
     83   i32 %b.val) {
     84 entry:
     85   %a.val.fabs = call half @llvm.fabs.f16(half %a.val)
     86   %a.val.fabs.fneg = fsub half -0.0, %a.val.fabs
     87   %r.val = call i1 @llvm.amdgcn.class.f16(half %a.val.fabs.fneg, i32 %b.val)
     88   %r.val.sext = sext i1 %r.val to i32
     89   store i32 %r.val.sext, i32 addrspace(1)* %r
     90   ret void
     91 }
     92 
     93 ; GCN-LABEL: {{^}}class_f16_1:
     94 ; GCN: s_load_dword s[[SA_F16:[0-9]+]]
     95 ; VI: v_cmp_class_f16_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], s[[SA_F16]], 1{{$}}
     96 ; VI: v_cndmask_b32_e64 v[[VR_I32:[0-9]+]], 0, -1, [[CMP]]
     97 ; GCN: buffer_store_dword v[[VR_I32]]
     98 ; GCN: s_endpgm
     99 define amdgpu_kernel void @class_f16_1(
    100   i32 addrspace(1)* %r,
    101   half %a.val) {
    102 entry:
    103   %r.val = call i1 @llvm.amdgcn.class.f16(half %a.val, i32 1)
    104   %r.val.sext = sext i1 %r.val to i32
    105   store i32 %r.val.sext, i32 addrspace(1)* %r
    106   ret void
    107 }
    108 
    109 ; GCN-LABEL: {{^}}class_f16_64
    110 ; GCN: s_load_dword s[[SA_F16:[0-9]+]]
    111 ; VI:  v_cmp_class_f16_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], s[[SA_F16]], 64{{$}}
    112 ; VI:  v_cndmask_b32_e64 v[[VR_I32:[0-9]+]], 0, -1, [[CMP]]
    113 ; GCN: buffer_store_dword v[[VR_I32]]
    114 ; GCN: s_endpgm
    115 define amdgpu_kernel void @class_f16_64(
    116   i32 addrspace(1)* %r,
    117   half %a.val) {
    118 entry:
    119   %r.val = call i1 @llvm.amdgcn.class.f16(half %a.val, i32 64)
    120   %r.val.sext = sext i1 %r.val to i32
    121   store i32 %r.val.sext, i32 addrspace(1)* %r
    122   ret void
    123 }
    124 
    125 ; GCN-LABEL: {{^}}class_f16_full_mask:
    126 ; GCN: s_load_dword s[[SA_F16:[0-9]+]]
    127 ; VI:  v_mov_b32_e32 v[[MASK:[0-9]+]], 0x3ff{{$}}
    128 ; VI:  v_cmp_class_f16_e32 vcc, s[[SA_F16]], v[[MASK]]
    129 ; VI:  v_cndmask_b32_e64 v[[VR_I32:[0-9]+]], 0, -1, vcc
    130 ; GCN: buffer_store_dword v[[VR_I32]]
    131 ; GCN: s_endpgm
    132 define amdgpu_kernel void @class_f16_full_mask(
    133   i32 addrspace(1)* %r,
    134   half %a.val) {
    135 entry:
    136   %r.val = call i1 @llvm.amdgcn.class.f16(half %a.val, i32 1023)
    137   %r.val.sext = sext i1 %r.val to i32
    138   store i32 %r.val.sext, i32 addrspace(1)* %r
    139   ret void
    140 }
    141 
    142 ; GCN-LABEL: {{^}}class_f16_nine_bit_mask:
    143 ; GCN: s_load_dword s[[SA_F16:[0-9]+]]
    144 ; VI:  v_mov_b32_e32 v[[MASK:[0-9]+]], 0x1ff{{$}}
    145 ; VI:  v_cmp_class_f16_e32 vcc, s[[SA_F16]], v[[MASK]]
    146 ; VI:  v_cndmask_b32_e64 v[[VR_I32:[0-9]+]], 0, -1, vcc
    147 ; GCN: buffer_store_dword v[[VR_I32]]
    148 ; GCN: s_endpgm
    149 define amdgpu_kernel void @class_f16_nine_bit_mask(
    150   i32 addrspace(1)* %r,
    151   half %a.val) {
    152 entry:
    153   %r.val = call i1 @llvm.amdgcn.class.f16(half %a.val, i32 511)
    154   %r.val.sext = sext i1 %r.val to i32
    155   store i32 %r.val.sext, i32 addrspace(1)* %r
    156   ret void
    157 }
    158