Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI %s
      2 ; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI %s
      3 
      4 ; GCN-LABEL: {{^}}br_cc_f16:
      5 ; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
      6 ; GCN: buffer_load_ushort v[[B_F16:[0-9]+]]
      7 
      8 ; SI:  v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
      9 ; SI:  v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]]
     10 ; SI:  v_cmp_nlt_f32_e32 vcc, v[[A_F32]], v[[B_F32]]
     11 ; VI:  v_cmp_nlt_f16_e32 vcc, v[[A_F16]], v[[B_F16]]
     12 ; GCN: s_cbranch_vccnz
     13 
     14 ; SI: one{{$}}
     15 ; SI: v_cvt_f16_f32_e32 v[[CVT:[0-9]+]], v[[A_F32]]
     16 
     17 ; SI: two{{$}}
     18 ; SI:  v_cvt_f16_f32_e32 v[[CVT]], v[[B_F32]]
     19 
     20 ; SI: one{{$}}
     21 ; SI: buffer_store_short v[[CVT]]
     22 ; SI: s_endpgm
     23 
     24 
     25 
     26 ; VI: one{{$}}
     27 ; VI: buffer_store_short v[[A_F16]]
     28 ; VI: s_endpgm
     29 
     30 ; VI: two{{$}}
     31 ; VI: buffer_store_short v[[B_F16]]
     32 ; VI: s_endpgm
     33 define amdgpu_kernel void @br_cc_f16(
     34     half addrspace(1)* %r,
     35     half addrspace(1)* %a,
     36     half addrspace(1)* %b) {
     37 entry:
     38   %a.val = load volatile half, half addrspace(1)* %a
     39   %b.val = load volatile half, half addrspace(1)* %b
     40   %fcmp = fcmp olt half %a.val, %b.val
     41   br i1 %fcmp, label %one, label %two
     42 
     43 one:
     44   store half %a.val, half addrspace(1)* %r
     45   ret void
     46 
     47 two:
     48   store half %b.val, half addrspace(1)* %r
     49   ret void
     50 }
     51 
     52 ; GCN-LABEL: {{^}}br_cc_f16_imm_a:
     53 ; GCN: buffer_load_ushort v[[B_F16:[0-9]+]]
     54 
     55 ; SI:  v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]]
     56 ; SI:  v_cmp_nlt_f32_e32 vcc, 0.5, v[[B_F32]]
     57 ; SI: s_cbranch_vccnz
     58 
     59 ; VI:  v_cmp_nlt_f16_e32 vcc, 0.5, v[[B_F16]]
     60 ; VI: s_cbranch_vccnz
     61 
     62 ; GCN: one{{$}}
     63 ; GCN: v_mov_b32_e32 v[[A_F16:[0-9]+]], 0x380{{0|1}}{{$}}
     64 
     65 ; SI: buffer_store_short v[[A_F16]]
     66 ; SI: s_endpgm
     67 
     68 
     69 ; GCN: two{{$}}
     70 ; SI:  v_cvt_f16_f32_e32 v[[B_F16:[0-9]+]], v[[B_F32]]
     71 
     72 define amdgpu_kernel void @br_cc_f16_imm_a(
     73     half addrspace(1)* %r,
     74     half addrspace(1)* %b) {
     75 entry:
     76   %b.val = load half, half addrspace(1)* %b
     77   %fcmp = fcmp olt half 0xH3800, %b.val
     78   br i1 %fcmp, label %one, label %two
     79 
     80 one:
     81   store half 0xH3800, half addrspace(1)* %r
     82   ret void
     83 
     84 two:
     85   store half %b.val, half addrspace(1)* %r
     86   ret void
     87 }
     88 
     89 ; GCN-LABEL: {{^}}br_cc_f16_imm_b:
     90 ; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
     91 
     92 ; SI:  v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
     93 ; SI:  v_cmp_ngt_f32_e32 vcc, 0.5, v[[A_F32]]
     94 
     95 ; VI:  v_cmp_ngt_f16_e32 vcc, 0.5, v[[A_F16]]
     96 ; GCN: s_cbranch_vccnz
     97 
     98 ; GCN: one{{$}}
     99 ; SI:  v_cvt_f16_f32_e32 v[[A_F16:[0-9]+]], v[[A_F32]]
    100 
    101 ; GCN: two{{$}}
    102 ; GCN:  v_mov_b32_e32 v[[B_F16:[0-9]+]], 0x3800{{$}}
    103 ; GCN: buffer_store_short v[[B_F16]]
    104 ; GCN: s_endpgm
    105 define amdgpu_kernel void @br_cc_f16_imm_b(
    106     half addrspace(1)* %r,
    107     half addrspace(1)* %a) {
    108 entry:
    109   %a.val = load half, half addrspace(1)* %a
    110   %fcmp = fcmp olt half %a.val, 0xH3800
    111   br i1 %fcmp, label %one, label %two
    112 
    113 one:
    114   store half %a.val, half addrspace(1)* %r
    115   ret void
    116 
    117 two:
    118   store half 0xH3800, half addrspace(1)* %r
    119   ret void
    120 }
    121