1 ; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s 2 ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s 3 4 ; GCN-LABEL: {{^}}fold_mi_v_and_0: 5 ; GCN: v_mov_b32_e32 [[RESULT:v[0-9]+]], 0{{$}} 6 ; GCN-NOT: [[RESULT]] 7 ; GCN: buffer_store_dword [[RESULT]] 8 define amdgpu_kernel void @fold_mi_v_and_0(i32 addrspace(1)* %out) { 9 %x = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 10 %size = call i32 @llvm.amdgcn.groupstaticsize() 11 %and = and i32 %size, %x 12 store i32 %and, i32 addrspace(1)* %out 13 ret void 14 } 15 16 ; GCN-LABEL: {{^}}fold_mi_s_and_0: 17 ; GCN: v_mov_b32_e32 [[RESULT:v[0-9]+]], 0{{$}} 18 ; GCN-NOT: [[RESULT]] 19 ; GCN: buffer_store_dword [[RESULT]] 20 define amdgpu_kernel void @fold_mi_s_and_0(i32 addrspace(1)* %out, i32 %x) #0 { 21 %size = call i32 @llvm.amdgcn.groupstaticsize() 22 %and = and i32 %size, %x 23 store i32 %and, i32 addrspace(1)* %out 24 ret void 25 } 26 27 ; GCN-LABEL: {{^}}fold_mi_v_or_0: 28 ; GCN: v_mbcnt_lo_u32_b32{{(_e64)*}} [[RESULT:v[0-9]+]] 29 ; GCN-NOT: [[RESULT]] 30 ; GCN: buffer_store_dword [[RESULT]] 31 define amdgpu_kernel void @fold_mi_v_or_0(i32 addrspace(1)* %out) { 32 %x = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 33 %size = call i32 @llvm.amdgcn.groupstaticsize() 34 %or = or i32 %size, %x 35 store i32 %or, i32 addrspace(1)* %out 36 ret void 37 } 38 39 ; GCN-LABEL: {{^}}fold_mi_s_or_0: 40 ; GCN: s_load_dword [[SVAL:s[0-9]+]] 41 ; GCN-NOT: [[SVAL]] 42 ; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[SVAL]] 43 ; GCN-NOT: [[VVAL]] 44 ; GCN: buffer_store_dword [[VVAL]] 45 define amdgpu_kernel void @fold_mi_s_or_0(i32 addrspace(1)* %out, i32 %x) #0 { 46 %size = call i32 @llvm.amdgcn.groupstaticsize() 47 %or = or i32 %size, %x 48 store i32 %or, i32 addrspace(1)* %out 49 ret void 50 } 51 52 ; GCN-LABEL: {{^}}fold_mi_v_xor_0: 53 ; GCN: v_mbcnt_lo_u32_b32{{(_e64)*}} [[RESULT:v[0-9]+]] 54 ; GCN-NOT: [[RESULT]] 55 ; GCN: buffer_store_dword [[RESULT]] 56 define amdgpu_kernel void @fold_mi_v_xor_0(i32 addrspace(1)* %out) { 57 %x = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 58 %size = call i32 @llvm.amdgcn.groupstaticsize() 59 %xor = xor i32 %size, %x 60 store i32 %xor, i32 addrspace(1)* %out 61 ret void 62 } 63 64 ; GCN-LABEL: {{^}}fold_mi_s_xor_0: 65 ; GCN: s_load_dword [[SVAL:s[0-9]+]] 66 ; GCN-NOT: [[SVAL]] 67 ; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[SVAL]] 68 ; GCN-NOT: [[VVAL]] 69 ; GCN: buffer_store_dword [[VVAL]] 70 define amdgpu_kernel void @fold_mi_s_xor_0(i32 addrspace(1)* %out, i32 %x) #0 { 71 %size = call i32 @llvm.amdgcn.groupstaticsize() 72 %xor = xor i32 %size, %x 73 store i32 %xor, i32 addrspace(1)* %out 74 ret void 75 } 76 77 ; GCN-LABEL: {{^}}fold_mi_s_not_0: 78 ; GCN: v_mov_b32_e32 [[RESULT:v[0-9]+]], -1{{$}} 79 ; GCN-NOT: [[RESULT]] 80 ; GCN: buffer_store_dword [[RESULT]] 81 define amdgpu_kernel void @fold_mi_s_not_0(i32 addrspace(1)* %out, i32 %x) #0 { 82 %size = call i32 @llvm.amdgcn.groupstaticsize() 83 %xor = xor i32 %size, -1 84 store i32 %xor, i32 addrspace(1)* %out 85 ret void 86 } 87 88 ; GCN-LABEL: {{^}}fold_mi_v_not_0: 89 ; GCN: v_bcnt_u32_b32{{(_e64)*}} v[[RESULT_LO:[0-9]+]], v{{[0-9]+}}, 0{{$}} 90 ; GCN: v_bcnt_u32_b32{{(_e32)*(_e64)*}} v[[RESULT_LO:[0-9]+]], v{{[0-9]+}}, v[[RESULT_LO]]{{$}} 91 ; GCN-NEXT: v_not_b32_e32 v[[RESULT_LO]] 92 ; GCN-NEXT: v_mov_b32_e32 v[[RESULT_HI:[0-9]+]], -1{{$}} 93 ; GCN-NEXT: buffer_store_dwordx2 v{{\[}}[[RESULT_LO]]:[[RESULT_HI]]{{\]}} 94 define amdgpu_kernel void @fold_mi_v_not_0(i64 addrspace(1)* %out) { 95 %vreg = load volatile i64, i64 addrspace(1)* undef 96 %ctpop = call i64 @llvm.ctpop.i64(i64 %vreg) 97 %xor = xor i64 %ctpop, -1 98 store i64 %xor, i64 addrspace(1)* %out 99 ret void 100 } 101 102 ; The neg1 appears after folding the not 0 103 ; GCN-LABEL: {{^}}fold_mi_or_neg1: 104 ; GCN: buffer_load_dwordx2 105 ; GCN: buffer_load_dwordx2 v{{\[}}[[VREG1_LO:[0-9]+]]:[[VREG1_HI:[0-9]+]]{{\]}} 106 107 ; GCN: v_bcnt_u32_b32{{(_e64)*}} v[[RESULT_LO:[0-9]+]], v{{[0-9]+}}, 0{{$}} 108 ; GCN: v_bcnt_u32_b32{{(_e32)*(_e64)*}} v[[RESULT_LO:[0-9]+]], v{{[0-9]+}}, v[[RESULT_LO]]{{$}} 109 ; GCN-DAG: v_not_b32_e32 v[[RESULT_LO]], v[[RESULT_LO]] 110 ; GCN-DAG: v_or_b32_e32 v[[RESULT_LO]], v[[RESULT_LO]], v[[VREG1_LO]] 111 ; GCN-DAG: v_mov_b32_e32 v[[RESULT_HI:[0-9]+]], v[[VREG1_HI]] 112 ; GCN: buffer_store_dwordx2 v{{\[}}[[RESULT_LO]]:[[RESULT_HI]]{{\]}} 113 define amdgpu_kernel void @fold_mi_or_neg1(i64 addrspace(1)* %out) { 114 %vreg0 = load volatile i64, i64 addrspace(1)* undef 115 %vreg1 = load volatile i64, i64 addrspace(1)* undef 116 %ctpop = call i64 @llvm.ctpop.i64(i64 %vreg0) 117 %xor = xor i64 %ctpop, -1 118 %or = or i64 %xor, %vreg1 119 store i64 %or, i64 addrspace(1)* %out 120 ret void 121 } 122 123 ; GCN-LABEL: {{^}}fold_mi_and_neg1: 124 ; GCN: v_bcnt_u32_b32 125 ; GCN: v_bcnt_u32_b32 126 ; GCN: v_not_b32 127 ; GCN: v_and_b32 128 ; GCN-NOT: v_and_b32 129 define amdgpu_kernel void @fold_mi_and_neg1(i64 addrspace(1)* %out) { 130 %vreg0 = load volatile i64, i64 addrspace(1)* undef 131 %vreg1 = load volatile i64, i64 addrspace(1)* undef 132 %ctpop = call i64 @llvm.ctpop.i64(i64 %vreg0) 133 %xor = xor i64 %ctpop, -1 134 %and = and i64 %xor, %vreg1 135 store i64 %and, i64 addrspace(1)* %out 136 ret void 137 } 138 139 declare i64 @llvm.ctpop.i64(i64) #1 140 declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #1 141 declare i32 @llvm.amdgcn.groupstaticsize() #1 142 143 attributes #0 = { nounwind } 144 attributes #1 = { nounwind readnone } 145