1 ; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s 2 ; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s 3 4 declare i32 @llvm.ctpop.i32(i32) nounwind readnone 5 declare <2 x i32> @llvm.ctpop.v2i32(<2 x i32>) nounwind readnone 6 declare <4 x i32> @llvm.ctpop.v4i32(<4 x i32>) nounwind readnone 7 declare <8 x i32> @llvm.ctpop.v8i32(<8 x i32>) nounwind readnone 8 declare <16 x i32> @llvm.ctpop.v16i32(<16 x i32>) nounwind readnone 9 10 ; FUNC-LABEL: @s_ctpop_i32: 11 ; SI: S_LOAD_DWORD [[SVAL:s[0-9]+]], 12 ; SI: S_BCNT1_I32_B32 [[SRESULT:s[0-9]+]], [[SVAL]] 13 ; SI: V_MOV_B32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]] 14 ; SI: BUFFER_STORE_DWORD [[VRESULT]], 15 ; SI: S_ENDPGM 16 17 ; EG: BCNT_INT 18 define void @s_ctpop_i32(i32 addrspace(1)* noalias %out, i32 %val) nounwind { 19 %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone 20 store i32 %ctpop, i32 addrspace(1)* %out, align 4 21 ret void 22 } 23 24 ; XXX - Why 0 in register? 25 ; FUNC-LABEL: @v_ctpop_i32: 26 ; SI: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]], 27 ; SI: V_MOV_B32_e32 [[VZERO:v[0-9]+]], 0 28 ; SI: V_BCNT_U32_B32_e32 [[RESULT:v[0-9]+]], [[VAL]], [[VZERO]] 29 ; SI: BUFFER_STORE_DWORD [[RESULT]], 30 ; SI: S_ENDPGM 31 32 ; EG: BCNT_INT 33 define void @v_ctpop_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind { 34 %val = load i32 addrspace(1)* %in, align 4 35 %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone 36 store i32 %ctpop, i32 addrspace(1)* %out, align 4 37 ret void 38 } 39 40 ; FUNC-LABEL: @v_ctpop_add_chain_i32 41 ; SI: BUFFER_LOAD_DWORD [[VAL0:v[0-9]+]], 42 ; SI: BUFFER_LOAD_DWORD [[VAL1:v[0-9]+]], 43 ; SI: V_MOV_B32_e32 [[VZERO:v[0-9]+]], 0 44 ; SI: V_BCNT_U32_B32_e32 [[MIDRESULT:v[0-9]+]], [[VAL1]], [[VZERO]] 45 ; SI-NOT: ADD 46 ; SI: V_BCNT_U32_B32_e64 [[RESULT:v[0-9]+]], [[VAL0]], [[MIDRESULT]] 47 ; SI: BUFFER_STORE_DWORD [[RESULT]], 48 ; SI: S_ENDPGM 49 50 ; EG: BCNT_INT 51 ; EG: BCNT_INT 52 define void @v_ctpop_add_chain_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in0, i32 addrspace(1)* noalias %in1) nounwind { 53 %val0 = load i32 addrspace(1)* %in0, align 4 54 %val1 = load i32 addrspace(1)* %in1, align 4 55 %ctpop0 = call i32 @llvm.ctpop.i32(i32 %val0) nounwind readnone 56 %ctpop1 = call i32 @llvm.ctpop.i32(i32 %val1) nounwind readnone 57 %add = add i32 %ctpop0, %ctpop1 58 store i32 %add, i32 addrspace(1)* %out, align 4 59 ret void 60 } 61 62 ; FUNC-LABEL: @v_ctpop_v2i32: 63 ; SI: V_BCNT_U32_B32_e32 64 ; SI: V_BCNT_U32_B32_e32 65 ; SI: S_ENDPGM 66 67 ; EG: BCNT_INT 68 ; EG: BCNT_INT 69 define void @v_ctpop_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x i32> addrspace(1)* noalias %in) nounwind { 70 %val = load <2 x i32> addrspace(1)* %in, align 8 71 %ctpop = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %val) nounwind readnone 72 store <2 x i32> %ctpop, <2 x i32> addrspace(1)* %out, align 8 73 ret void 74 } 75 76 ; FUNC-LABEL: @v_ctpop_v4i32: 77 ; SI: V_BCNT_U32_B32_e32 78 ; SI: V_BCNT_U32_B32_e32 79 ; SI: V_BCNT_U32_B32_e32 80 ; SI: V_BCNT_U32_B32_e32 81 ; SI: S_ENDPGM 82 83 ; EG: BCNT_INT 84 ; EG: BCNT_INT 85 ; EG: BCNT_INT 86 ; EG: BCNT_INT 87 define void @v_ctpop_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> addrspace(1)* noalias %in) nounwind { 88 %val = load <4 x i32> addrspace(1)* %in, align 16 89 %ctpop = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %val) nounwind readnone 90 store <4 x i32> %ctpop, <4 x i32> addrspace(1)* %out, align 16 91 ret void 92 } 93 94 ; FUNC-LABEL: @v_ctpop_v8i32: 95 ; SI: V_BCNT_U32_B32_e32 96 ; SI: V_BCNT_U32_B32_e32 97 ; SI: V_BCNT_U32_B32_e32 98 ; SI: V_BCNT_U32_B32_e32 99 ; SI: V_BCNT_U32_B32_e32 100 ; SI: V_BCNT_U32_B32_e32 101 ; SI: V_BCNT_U32_B32_e32 102 ; SI: V_BCNT_U32_B32_e32 103 ; SI: S_ENDPGM 104 105 ; EG: BCNT_INT 106 ; EG: BCNT_INT 107 ; EG: BCNT_INT 108 ; EG: BCNT_INT 109 ; EG: BCNT_INT 110 ; EG: BCNT_INT 111 ; EG: BCNT_INT 112 ; EG: BCNT_INT 113 define void @v_ctpop_v8i32(<8 x i32> addrspace(1)* noalias %out, <8 x i32> addrspace(1)* noalias %in) nounwind { 114 %val = load <8 x i32> addrspace(1)* %in, align 32 115 %ctpop = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %val) nounwind readnone 116 store <8 x i32> %ctpop, <8 x i32> addrspace(1)* %out, align 32 117 ret void 118 } 119 120 ; FUNC-LABEL: @v_ctpop_v16i32: 121 ; SI: V_BCNT_U32_B32_e32 122 ; SI: V_BCNT_U32_B32_e32 123 ; SI: V_BCNT_U32_B32_e32 124 ; SI: V_BCNT_U32_B32_e32 125 ; SI: V_BCNT_U32_B32_e32 126 ; SI: V_BCNT_U32_B32_e32 127 ; SI: V_BCNT_U32_B32_e32 128 ; SI: V_BCNT_U32_B32_e32 129 ; SI: V_BCNT_U32_B32_e32 130 ; SI: V_BCNT_U32_B32_e32 131 ; SI: V_BCNT_U32_B32_e32 132 ; SI: V_BCNT_U32_B32_e32 133 ; SI: V_BCNT_U32_B32_e32 134 ; SI: V_BCNT_U32_B32_e32 135 ; SI: V_BCNT_U32_B32_e32 136 ; SI: V_BCNT_U32_B32_e32 137 ; SI: S_ENDPGM 138 139 ; EG: BCNT_INT 140 ; EG: BCNT_INT 141 ; EG: BCNT_INT 142 ; EG: BCNT_INT 143 ; EG: BCNT_INT 144 ; EG: BCNT_INT 145 ; EG: BCNT_INT 146 ; EG: BCNT_INT 147 ; EG: BCNT_INT 148 ; EG: BCNT_INT 149 ; EG: BCNT_INT 150 ; EG: BCNT_INT 151 ; EG: BCNT_INT 152 ; EG: BCNT_INT 153 ; EG: BCNT_INT 154 ; EG: BCNT_INT 155 define void @v_ctpop_v16i32(<16 x i32> addrspace(1)* noalias %out, <16 x i32> addrspace(1)* noalias %in) nounwind { 156 %val = load <16 x i32> addrspace(1)* %in, align 32 157 %ctpop = call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %val) nounwind readnone 158 store <16 x i32> %ctpop, <16 x i32> addrspace(1)* %out, align 32 159 ret void 160 } 161 162 ; FUNC-LABEL: @v_ctpop_i32_add_inline_constant: 163 ; SI: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]], 164 ; SI: V_BCNT_U32_B32_e64 [[RESULT:v[0-9]+]], [[VAL]], 4 165 ; SI: BUFFER_STORE_DWORD [[RESULT]], 166 ; SI: S_ENDPGM 167 168 ; EG: BCNT_INT 169 define void @v_ctpop_i32_add_inline_constant(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind { 170 %val = load i32 addrspace(1)* %in, align 4 171 %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone 172 %add = add i32 %ctpop, 4 173 store i32 %add, i32 addrspace(1)* %out, align 4 174 ret void 175 } 176 177 ; FUNC-LABEL: @v_ctpop_i32_add_inline_constant_inv: 178 ; SI: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]], 179 ; SI: V_BCNT_U32_B32_e64 [[RESULT:v[0-9]+]], [[VAL]], 4 180 ; SI: BUFFER_STORE_DWORD [[RESULT]], 181 ; SI: S_ENDPGM 182 183 ; EG: BCNT_INT 184 define void @v_ctpop_i32_add_inline_constant_inv(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind { 185 %val = load i32 addrspace(1)* %in, align 4 186 %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone 187 %add = add i32 4, %ctpop 188 store i32 %add, i32 addrspace(1)* %out, align 4 189 ret void 190 } 191 192 ; FUNC-LABEL: @v_ctpop_i32_add_literal: 193 ; SI: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]], 194 ; SI: V_MOV_B32_e32 [[LIT:v[0-9]+]], 0x1869f 195 ; SI: V_BCNT_U32_B32_e32 [[RESULT:v[0-9]+]], [[VAL]], [[LIT]] 196 ; SI: BUFFER_STORE_DWORD [[RESULT]], 197 ; SI: S_ENDPGM 198 define void @v_ctpop_i32_add_literal(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind { 199 %val = load i32 addrspace(1)* %in, align 4 200 %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone 201 %add = add i32 %ctpop, 99999 202 store i32 %add, i32 addrspace(1)* %out, align 4 203 ret void 204 } 205 206 ; FUNC-LABEL: @v_ctpop_i32_add_var: 207 ; SI-DAG: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]], 208 ; SI-DAG: S_LOAD_DWORD [[VAR:s[0-9]+]], 209 ; SI: V_BCNT_U32_B32_e64 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]] 210 ; SI: BUFFER_STORE_DWORD [[RESULT]], 211 ; SI: S_ENDPGM 212 213 ; EG: BCNT_INT 214 define void @v_ctpop_i32_add_var(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 %const) nounwind { 215 %val = load i32 addrspace(1)* %in, align 4 216 %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone 217 %add = add i32 %ctpop, %const 218 store i32 %add, i32 addrspace(1)* %out, align 4 219 ret void 220 } 221 222 ; FUNC-LABEL: @v_ctpop_i32_add_var_inv: 223 ; SI-DAG: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]], 224 ; SI-DAG: S_LOAD_DWORD [[VAR:s[0-9]+]], 225 ; SI: V_BCNT_U32_B32_e64 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]] 226 ; SI: BUFFER_STORE_DWORD [[RESULT]], 227 ; SI: S_ENDPGM 228 229 ; EG: BCNT_INT 230 define void @v_ctpop_i32_add_var_inv(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 %const) nounwind { 231 %val = load i32 addrspace(1)* %in, align 4 232 %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone 233 %add = add i32 %const, %ctpop 234 store i32 %add, i32 addrspace(1)* %out, align 4 235 ret void 236 } 237 238 ; FUNC-LABEL: @v_ctpop_i32_add_vvar_inv 239 ; SI-DAG: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]], {{.*}} + 0x0 240 ; SI-DAG: BUFFER_LOAD_DWORD [[VAR:v[0-9]+]], {{.*}} + 0x10 241 ; SI: V_BCNT_U32_B32_e32 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]] 242 ; SI: BUFFER_STORE_DWORD [[RESULT]], 243 ; SI: S_ENDPGM 244 245 ; EG: BCNT_INT 246 define void @v_ctpop_i32_add_vvar_inv(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 addrspace(1)* noalias %constptr) nounwind { 247 %val = load i32 addrspace(1)* %in, align 4 248 %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone 249 %gep = getelementptr i32 addrspace(1)* %constptr, i32 4 250 %const = load i32 addrspace(1)* %gep, align 4 251 %add = add i32 %const, %ctpop 252 store i32 %add, i32 addrspace(1)* %out, align 4 253 ret void 254 } 255 256 ; FIXME: We currently disallow SALU instructions in all branches, 257 ; but there are some cases when the should be allowed. 258 259 ; FUNC-LABEL: @ctpop_i32_in_br 260 ; SI: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]], 261 ; SI: V_BCNT_U32_B32_e64 [[RESULT:v[0-9]+]], [[VAL]], 0 262 ; SI: BUFFER_STORE_DWORD [[RESULT]], 263 ; SI: S_ENDPGM 264 ; EG: BCNT_INT 265 define void @ctpop_i32_in_br(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %cond) { 266 entry: 267 %0 = icmp eq i32 %cond, 0 268 br i1 %0, label %if, label %else 269 270 if: 271 %1 = load i32 addrspace(1)* %in 272 %2 = call i32 @llvm.ctpop.i32(i32 %1) 273 br label %endif 274 275 else: 276 %3 = getelementptr i32 addrspace(1)* %in, i32 1 277 %4 = load i32 addrspace(1)* %3 278 br label %endif 279 280 endif: 281 %5 = phi i32 [%2, %if], [%4, %else] 282 store i32 %5, i32 addrspace(1)* %out 283 ret void 284 } 285