Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
      2 ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
      3 
      4 ; GCN-LABEL: {{^}}fold_mi_v_and_0:
      5 ; GCN: v_mov_b32_e32 [[RESULT:v[0-9]+]], 0{{$}}
      6 ; GCN-NOT: [[RESULT]]
      7 ; GCN: buffer_store_dword [[RESULT]]
      8 define amdgpu_kernel void @fold_mi_v_and_0(i32 addrspace(1)* %out) {
      9   %x = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
     10   %size = call i32 @llvm.amdgcn.groupstaticsize()
     11   %and = and i32 %size, %x
     12   store i32 %and, i32 addrspace(1)* %out
     13   ret void
     14 }
     15 
     16 ; GCN-LABEL: {{^}}fold_mi_s_and_0:
     17 ; GCN: v_mov_b32_e32 [[RESULT:v[0-9]+]], 0{{$}}
     18 ; GCN-NOT: [[RESULT]]
     19 ; GCN: buffer_store_dword [[RESULT]]
     20 define amdgpu_kernel void @fold_mi_s_and_0(i32 addrspace(1)* %out, i32 %x) #0 {
     21   %size = call i32 @llvm.amdgcn.groupstaticsize()
     22   %and = and i32 %size, %x
     23   store i32 %and, i32 addrspace(1)* %out
     24   ret void
     25 }
     26 
     27 ; GCN-LABEL: {{^}}fold_mi_v_or_0:
     28 ; GCN: v_mbcnt_lo_u32_b32{{(_e64)*}} [[RESULT:v[0-9]+]]
     29 ; GCN-NOT: [[RESULT]]
     30 ; GCN: buffer_store_dword [[RESULT]]
     31 define amdgpu_kernel void @fold_mi_v_or_0(i32 addrspace(1)* %out) {
     32   %x = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
     33   %size = call i32 @llvm.amdgcn.groupstaticsize()
     34   %or = or i32 %size, %x
     35   store i32 %or, i32 addrspace(1)* %out
     36   ret void
     37 }
     38 
     39 ; GCN-LABEL: {{^}}fold_mi_s_or_0:
     40 ; GCN: s_load_dword [[SVAL:s[0-9]+]]
     41 ; GCN-NOT: [[SVAL]]
     42 ; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[SVAL]]
     43 ; GCN-NOT: [[VVAL]]
     44 ; GCN: buffer_store_dword [[VVAL]]
     45 define amdgpu_kernel void @fold_mi_s_or_0(i32 addrspace(1)* %out, i32 %x) #0 {
     46   %size = call i32 @llvm.amdgcn.groupstaticsize()
     47   %or = or i32 %size, %x
     48   store i32 %or, i32 addrspace(1)* %out
     49   ret void
     50 }
     51 
     52 ; GCN-LABEL: {{^}}fold_mi_v_xor_0:
     53 ; GCN: v_mbcnt_lo_u32_b32{{(_e64)*}} [[RESULT:v[0-9]+]]
     54 ; GCN-NOT: [[RESULT]]
     55 ; GCN: buffer_store_dword [[RESULT]]
     56 define amdgpu_kernel void @fold_mi_v_xor_0(i32 addrspace(1)* %out) {
     57   %x = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
     58   %size = call i32 @llvm.amdgcn.groupstaticsize()
     59   %xor = xor i32 %size, %x
     60   store i32 %xor, i32 addrspace(1)* %out
     61   ret void
     62 }
     63 
     64 ; GCN-LABEL: {{^}}fold_mi_s_xor_0:
     65 ; GCN: s_load_dword [[SVAL:s[0-9]+]]
     66 ; GCN-NOT: [[SVAL]]
     67 ; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[SVAL]]
     68 ; GCN-NOT: [[VVAL]]
     69 ; GCN: buffer_store_dword [[VVAL]]
     70 define amdgpu_kernel void @fold_mi_s_xor_0(i32 addrspace(1)* %out, i32 %x) #0 {
     71   %size = call i32 @llvm.amdgcn.groupstaticsize()
     72   %xor = xor i32 %size, %x
     73   store i32 %xor, i32 addrspace(1)* %out
     74   ret void
     75 }
     76 
     77 ; GCN-LABEL: {{^}}fold_mi_s_not_0:
     78 ; GCN: v_mov_b32_e32 [[RESULT:v[0-9]+]], -1{{$}}
     79 ; GCN-NOT: [[RESULT]]
     80 ; GCN: buffer_store_dword [[RESULT]]
     81 define amdgpu_kernel void @fold_mi_s_not_0(i32 addrspace(1)* %out, i32 %x) #0 {
     82   %size = call i32 @llvm.amdgcn.groupstaticsize()
     83   %xor = xor i32 %size, -1
     84   store i32 %xor, i32 addrspace(1)* %out
     85   ret void
     86 }
     87 
     88 ; GCN-LABEL: {{^}}fold_mi_v_not_0:
     89 ; GCN: v_bcnt_u32_b32{{(_e64)*}} v[[RESULT_LO:[0-9]+]], v{{[0-9]+}}, 0{{$}}
     90 ; GCN: v_bcnt_u32_b32{{(_e32)*(_e64)*}} v[[RESULT_LO:[0-9]+]], v{{[0-9]+}}, v[[RESULT_LO]]{{$}}
     91 ; GCN-NEXT: v_not_b32_e32 v[[RESULT_LO]]
     92 ; GCN-NEXT: v_mov_b32_e32 v[[RESULT_HI:[0-9]+]], -1{{$}}
     93 ; GCN-NEXT: buffer_store_dwordx2 v{{\[}}[[RESULT_LO]]:[[RESULT_HI]]{{\]}}
     94 define amdgpu_kernel void @fold_mi_v_not_0(i64 addrspace(1)* %out) {
     95   %vreg = load volatile i64, i64 addrspace(1)* undef
     96   %ctpop = call i64 @llvm.ctpop.i64(i64 %vreg)
     97   %xor = xor i64 %ctpop, -1
     98   store i64 %xor, i64 addrspace(1)* %out
     99   ret void
    100 }
    101 
    102 ; The neg1 appears after folding the not 0
    103 ; GCN-LABEL: {{^}}fold_mi_or_neg1:
    104 ; GCN: buffer_load_dwordx2
    105 ; GCN: buffer_load_dwordx2 v{{\[}}[[VREG1_LO:[0-9]+]]:[[VREG1_HI:[0-9]+]]{{\]}}
    106 
    107 ; GCN: v_bcnt_u32_b32{{(_e64)*}} v[[RESULT_LO:[0-9]+]], v{{[0-9]+}}, 0{{$}}
    108 ; GCN: v_bcnt_u32_b32{{(_e32)*(_e64)*}} v[[RESULT_LO:[0-9]+]], v{{[0-9]+}}, v[[RESULT_LO]]{{$}}
    109 ; GCN-DAG: v_not_b32_e32 v[[RESULT_LO]], v[[RESULT_LO]]
    110 ; GCN-DAG: v_or_b32_e32 v[[RESULT_LO]], v[[RESULT_LO]], v[[VREG1_LO]]
    111 ; GCN-DAG: v_mov_b32_e32 v[[RESULT_HI:[0-9]+]], v[[VREG1_HI]]
    112 ; GCN: buffer_store_dwordx2 v{{\[}}[[RESULT_LO]]:[[RESULT_HI]]{{\]}}
    113 define amdgpu_kernel void @fold_mi_or_neg1(i64 addrspace(1)* %out) {
    114   %vreg0 = load volatile i64, i64 addrspace(1)* undef
    115   %vreg1 = load volatile i64, i64 addrspace(1)* undef
    116   %ctpop = call i64 @llvm.ctpop.i64(i64 %vreg0)
    117   %xor = xor i64 %ctpop, -1
    118   %or = or i64 %xor, %vreg1
    119   store i64 %or, i64 addrspace(1)* %out
    120   ret void
    121 }
    122 
    123 ; GCN-LABEL: {{^}}fold_mi_and_neg1:
    124 ; GCN: v_bcnt_u32_b32
    125 ; GCN: v_bcnt_u32_b32
    126 ; GCN: v_not_b32
    127 ; GCN: v_and_b32
    128 ; GCN-NOT: v_and_b32
    129 define amdgpu_kernel void @fold_mi_and_neg1(i64 addrspace(1)* %out) {
    130   %vreg0 = load volatile i64, i64 addrspace(1)* undef
    131   %vreg1 = load volatile i64, i64 addrspace(1)* undef
    132   %ctpop = call i64 @llvm.ctpop.i64(i64 %vreg0)
    133   %xor = xor i64 %ctpop, -1
    134   %and = and i64 %xor, %vreg1
    135   store i64 %and, i64 addrspace(1)* %out
    136   ret void
    137 }
    138 
    139 declare i64 @llvm.ctpop.i64(i64) #1
    140 declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #1
    141 declare i32 @llvm.amdgcn.groupstaticsize() #1
    142 
    143 attributes #0 = { nounwind }
    144 attributes #1 = { nounwind readnone }
    145