1 ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC -check-prefix=SI %s 2 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC -check-prefix=VI %s 3 ; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s 4 5 declare i32 @llvm.ctpop.i32(i32) nounwind readnone 6 declare <2 x i32> @llvm.ctpop.v2i32(<2 x i32>) nounwind readnone 7 declare <4 x i32> @llvm.ctpop.v4i32(<4 x i32>) nounwind readnone 8 declare <8 x i32> @llvm.ctpop.v8i32(<8 x i32>) nounwind readnone 9 declare <16 x i32> @llvm.ctpop.v16i32(<16 x i32>) nounwind readnone 10 11 ; FUNC-LABEL: {{^}}s_ctpop_i32: 12 ; GCN: s_load_dword [[SVAL:s[0-9]+]], 13 ; GCN: s_bcnt1_i32_b32 [[SRESULT:s[0-9]+]], [[SVAL]] 14 ; GCN: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]] 15 ; GCN: buffer_store_dword [[VRESULT]], 16 ; GCN: s_endpgm 17 18 ; EG: BCNT_INT 19 define void @s_ctpop_i32(i32 addrspace(1)* noalias %out, i32 %val) nounwind { 20 %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone 21 store i32 %ctpop, i32 addrspace(1)* %out, align 4 22 ret void 23 } 24 25 ; XXX - Why 0 in register? 26 ; FUNC-LABEL: {{^}}v_ctpop_i32: 27 ; GCN: buffer_load_dword [[VAL:v[0-9]+]], 28 ; GCN: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], 0 29 ; GCN: buffer_store_dword [[RESULT]], 30 ; GCN: s_endpgm 31 32 ; EG: BCNT_INT 33 define void @v_ctpop_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind { 34 %val = load i32, i32 addrspace(1)* %in, align 4 35 %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone 36 store i32 %ctpop, i32 addrspace(1)* %out, align 4 37 ret void 38 } 39 40 ; FUNC-LABEL: {{^}}v_ctpop_add_chain_i32: 41 ; GCN: buffer_load_dword [[VAL1:v[0-9]+]], 42 ; GCN: buffer_load_dword [[VAL0:v[0-9]+]], 43 ; GCN: v_bcnt_u32_b32_e64 [[MIDRESULT:v[0-9]+]], [[VAL1]], 0 44 ; SI: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], [[VAL0]], [[MIDRESULT]] 45 ; VI: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL0]], [[MIDRESULT]] 46 ; GCN: buffer_store_dword [[RESULT]], 47 ; GCN: s_endpgm 48 49 ; EG: BCNT_INT 50 ; EG: BCNT_INT 51 define void @v_ctpop_add_chain_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in0, i32 addrspace(1)* noalias %in1) nounwind { 52 %val0 = load i32, i32 addrspace(1)* %in0, align 4 53 %val1 = load i32, i32 addrspace(1)* %in1, align 4 54 %ctpop0 = call i32 @llvm.ctpop.i32(i32 %val0) nounwind readnone 55 %ctpop1 = call i32 @llvm.ctpop.i32(i32 %val1) nounwind readnone 56 %add = add i32 %ctpop0, %ctpop1 57 store i32 %add, i32 addrspace(1)* %out, align 4 58 ret void 59 } 60 61 ; FUNC-LABEL: {{^}}v_ctpop_add_sgpr_i32: 62 ; GCN: buffer_load_dword [[VAL0:v[0-9]+]], 63 ; GCN: s_waitcnt 64 ; GCN-NEXT: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL0]], s{{[0-9]+}} 65 ; GCN: buffer_store_dword [[RESULT]], 66 ; GCN: s_endpgm 67 define void @v_ctpop_add_sgpr_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in0, i32 addrspace(1)* noalias %in1, i32 %sval) nounwind { 68 %val0 = load i32, i32 addrspace(1)* %in0, align 4 69 %ctpop0 = call i32 @llvm.ctpop.i32(i32 %val0) nounwind readnone 70 %add = add i32 %ctpop0, %sval 71 store i32 %add, i32 addrspace(1)* %out, align 4 72 ret void 73 } 74 75 ; FUNC-LABEL: {{^}}v_ctpop_v2i32: 76 ; GCN: v_bcnt_u32_b32_e64 77 ; GCN: v_bcnt_u32_b32_e64 78 ; GCN: s_endpgm 79 80 ; EG: BCNT_INT 81 ; EG: BCNT_INT 82 define void @v_ctpop_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x i32> addrspace(1)* noalias %in) nounwind { 83 %val = load <2 x i32>, <2 x i32> addrspace(1)* %in, align 8 84 %ctpop = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %val) nounwind readnone 85 store <2 x i32> %ctpop, <2 x i32> addrspace(1)* %out, align 8 86 ret void 87 } 88 89 ; FUNC-LABEL: {{^}}v_ctpop_v4i32: 90 ; GCN: v_bcnt_u32_b32_e64 91 ; GCN: v_bcnt_u32_b32_e64 92 ; GCN: v_bcnt_u32_b32_e64 93 ; GCN: v_bcnt_u32_b32_e64 94 ; GCN: s_endpgm 95 96 ; EG: BCNT_INT 97 ; EG: BCNT_INT 98 ; EG: BCNT_INT 99 ; EG: BCNT_INT 100 define void @v_ctpop_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> addrspace(1)* noalias %in) nounwind { 101 %val = load <4 x i32>, <4 x i32> addrspace(1)* %in, align 16 102 %ctpop = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %val) nounwind readnone 103 store <4 x i32> %ctpop, <4 x i32> addrspace(1)* %out, align 16 104 ret void 105 } 106 107 ; FUNC-LABEL: {{^}}v_ctpop_v8i32: 108 ; GCN: v_bcnt_u32_b32_e64 109 ; GCN: v_bcnt_u32_b32_e64 110 ; GCN: v_bcnt_u32_b32_e64 111 ; GCN: v_bcnt_u32_b32_e64 112 ; GCN: v_bcnt_u32_b32_e64 113 ; GCN: v_bcnt_u32_b32_e64 114 ; GCN: v_bcnt_u32_b32_e64 115 ; GCN: v_bcnt_u32_b32_e64 116 ; GCN: s_endpgm 117 118 ; EG: BCNT_INT 119 ; EG: BCNT_INT 120 ; EG: BCNT_INT 121 ; EG: BCNT_INT 122 ; EG: BCNT_INT 123 ; EG: BCNT_INT 124 ; EG: BCNT_INT 125 ; EG: BCNT_INT 126 define void @v_ctpop_v8i32(<8 x i32> addrspace(1)* noalias %out, <8 x i32> addrspace(1)* noalias %in) nounwind { 127 %val = load <8 x i32>, <8 x i32> addrspace(1)* %in, align 32 128 %ctpop = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %val) nounwind readnone 129 store <8 x i32> %ctpop, <8 x i32> addrspace(1)* %out, align 32 130 ret void 131 } 132 133 ; FUNC-LABEL: {{^}}v_ctpop_v16i32: 134 ; GCN: v_bcnt_u32_b32_e64 135 ; GCN: v_bcnt_u32_b32_e64 136 ; GCN: v_bcnt_u32_b32_e64 137 ; GCN: v_bcnt_u32_b32_e64 138 ; GCN: v_bcnt_u32_b32_e64 139 ; GCN: v_bcnt_u32_b32_e64 140 ; GCN: v_bcnt_u32_b32_e64 141 ; GCN: v_bcnt_u32_b32_e64 142 ; GCN: v_bcnt_u32_b32_e64 143 ; GCN: v_bcnt_u32_b32_e64 144 ; GCN: v_bcnt_u32_b32_e64 145 ; GCN: v_bcnt_u32_b32_e64 146 ; GCN: v_bcnt_u32_b32_e64 147 ; GCN: v_bcnt_u32_b32_e64 148 ; GCN: v_bcnt_u32_b32_e64 149 ; GCN: v_bcnt_u32_b32_e64 150 ; GCN: s_endpgm 151 152 ; EG: BCNT_INT 153 ; EG: BCNT_INT 154 ; EG: BCNT_INT 155 ; EG: BCNT_INT 156 ; EG: BCNT_INT 157 ; EG: BCNT_INT 158 ; EG: BCNT_INT 159 ; EG: BCNT_INT 160 ; EG: BCNT_INT 161 ; EG: BCNT_INT 162 ; EG: BCNT_INT 163 ; EG: BCNT_INT 164 ; EG: BCNT_INT 165 ; EG: BCNT_INT 166 ; EG: BCNT_INT 167 ; EG: BCNT_INT 168 define void @v_ctpop_v16i32(<16 x i32> addrspace(1)* noalias %out, <16 x i32> addrspace(1)* noalias %in) nounwind { 169 %val = load <16 x i32>, <16 x i32> addrspace(1)* %in, align 32 170 %ctpop = call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %val) nounwind readnone 171 store <16 x i32> %ctpop, <16 x i32> addrspace(1)* %out, align 32 172 ret void 173 } 174 175 ; FUNC-LABEL: {{^}}v_ctpop_i32_add_inline_constant: 176 ; GCN: buffer_load_dword [[VAL:v[0-9]+]], 177 ; GCN: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], 4 178 ; GCN: buffer_store_dword [[RESULT]], 179 ; GCN: s_endpgm 180 181 ; EG: BCNT_INT 182 define void @v_ctpop_i32_add_inline_constant(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind { 183 %val = load i32, i32 addrspace(1)* %in, align 4 184 %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone 185 %add = add i32 %ctpop, 4 186 store i32 %add, i32 addrspace(1)* %out, align 4 187 ret void 188 } 189 190 ; FUNC-LABEL: {{^}}v_ctpop_i32_add_inline_constant_inv: 191 ; GCN: buffer_load_dword [[VAL:v[0-9]+]], 192 ; GCN: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], 4 193 ; GCN: buffer_store_dword [[RESULT]], 194 ; GCN: s_endpgm 195 196 ; EG: BCNT_INT 197 define void @v_ctpop_i32_add_inline_constant_inv(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind { 198 %val = load i32, i32 addrspace(1)* %in, align 4 199 %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone 200 %add = add i32 4, %ctpop 201 store i32 %add, i32 addrspace(1)* %out, align 4 202 ret void 203 } 204 205 ; FUNC-LABEL: {{^}}v_ctpop_i32_add_literal: 206 ; GCN-DAG: buffer_load_dword [[VAL:v[0-9]+]], 207 ; GCN-DAG: v_mov_b32_e32 [[LIT:v[0-9]+]], 0x1869f 208 ; SI: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], [[VAL]], [[LIT]] 209 ; VI: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], [[LIT]] 210 ; GCN: buffer_store_dword [[RESULT]], 211 ; GCN: s_endpgm 212 define void @v_ctpop_i32_add_literal(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind { 213 %val = load i32, i32 addrspace(1)* %in, align 4 214 %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone 215 %add = add i32 %ctpop, 99999 216 store i32 %add, i32 addrspace(1)* %out, align 4 217 ret void 218 } 219 220 ; FUNC-LABEL: {{^}}v_ctpop_i32_add_var: 221 ; GCN-DAG: buffer_load_dword [[VAL:v[0-9]+]], 222 ; GCN-DAG: s_load_dword [[VAR:s[0-9]+]], 223 ; GCN: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]] 224 ; GCN: buffer_store_dword [[RESULT]], 225 ; GCN: s_endpgm 226 227 ; EG: BCNT_INT 228 define void @v_ctpop_i32_add_var(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 %const) nounwind { 229 %val = load i32, i32 addrspace(1)* %in, align 4 230 %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone 231 %add = add i32 %ctpop, %const 232 store i32 %add, i32 addrspace(1)* %out, align 4 233 ret void 234 } 235 236 ; FUNC-LABEL: {{^}}v_ctpop_i32_add_var_inv: 237 ; GCN-DAG: buffer_load_dword [[VAL:v[0-9]+]], 238 ; GCN-DAG: s_load_dword [[VAR:s[0-9]+]], 239 ; GCN: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]] 240 ; GCN: buffer_store_dword [[RESULT]], 241 ; GCN: s_endpgm 242 243 ; EG: BCNT_INT 244 define void @v_ctpop_i32_add_var_inv(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 %const) nounwind { 245 %val = load i32, i32 addrspace(1)* %in, align 4 246 %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone 247 %add = add i32 %const, %ctpop 248 store i32 %add, i32 addrspace(1)* %out, align 4 249 ret void 250 } 251 252 ; FUNC-LABEL: {{^}}v_ctpop_i32_add_vvar_inv: 253 ; GCN-DAG: buffer_load_dword [[VAL:v[0-9]+]], off, s[{{[0-9]+:[0-9]+}}], {{0$}} 254 ; GCN-DAG: buffer_load_dword [[VAR:v[0-9]+]], off, s[{{[0-9]+:[0-9]+}}], 0 offset:16 255 ; SI: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]] 256 ; VI: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]] 257 ; GCN: buffer_store_dword [[RESULT]], 258 ; GCN: s_endpgm 259 260 ; EG: BCNT_INT 261 define void @v_ctpop_i32_add_vvar_inv(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 addrspace(1)* noalias %constptr) nounwind { 262 %val = load i32, i32 addrspace(1)* %in, align 4 263 %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone 264 %gep = getelementptr i32, i32 addrspace(1)* %constptr, i32 4 265 %const = load i32, i32 addrspace(1)* %gep, align 4 266 %add = add i32 %const, %ctpop 267 store i32 %add, i32 addrspace(1)* %out, align 4 268 ret void 269 } 270 271 ; FIXME: We currently disallow SALU instructions in all branches, 272 ; but there are some cases when the should be allowed. 273 274 ; FUNC-LABEL: {{^}}ctpop_i32_in_br: 275 ; SI: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xd 276 ; VI: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x34 277 ; GCN: s_bcnt1_i32_b32 [[SRESULT:s[0-9]+]], [[VAL]] 278 ; GCN: v_mov_b32_e32 [[RESULT]], [[SRESULT]] 279 ; GCN: buffer_store_dword [[RESULT]], 280 ; GCN: s_endpgm 281 ; EG: BCNT_INT 282 define void @ctpop_i32_in_br(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %ctpop_arg, i32 %cond) { 283 entry: 284 %tmp0 = icmp eq i32 %cond, 0 285 br i1 %tmp0, label %if, label %else 286 287 if: 288 %tmp2 = call i32 @llvm.ctpop.i32(i32 %ctpop_arg) 289 br label %endif 290 291 else: 292 %tmp3 = getelementptr i32, i32 addrspace(1)* %in, i32 1 293 %tmp4 = load i32, i32 addrspace(1)* %tmp3 294 br label %endif 295 296 endif: 297 %tmp5 = phi i32 [%tmp2, %if], [%tmp4, %else] 298 store i32 %tmp5, i32 addrspace(1)* %out 299 ret void 300 } 301