1 ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s 2 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s 3 ; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s 4 5 declare i32 @llvm.AMDGPU.bfe.u32(i32, i32, i32) nounwind readnone 6 7 ; FUNC-LABEL: {{^}}bfe_u32_arg_arg_arg: 8 ; SI: v_bfe_u32 9 ; EG: BFE_UINT 10 define void @bfe_u32_arg_arg_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) nounwind { 11 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 %src0, i32 %src1, i32 %src1) nounwind readnone 12 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 13 ret void 14 } 15 16 ; FUNC-LABEL: {{^}}bfe_u32_arg_arg_imm: 17 ; SI: v_bfe_u32 18 ; EG: BFE_UINT 19 define void @bfe_u32_arg_arg_imm(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind { 20 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 %src0, i32 %src1, i32 123) nounwind readnone 21 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 22 ret void 23 } 24 25 ; FUNC-LABEL: {{^}}bfe_u32_arg_imm_arg: 26 ; SI: v_bfe_u32 27 ; EG: BFE_UINT 28 define void @bfe_u32_arg_imm_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src2) nounwind { 29 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 %src0, i32 123, i32 %src2) nounwind readnone 30 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 31 ret void 32 } 33 34 ; FUNC-LABEL: {{^}}bfe_u32_imm_arg_arg: 35 ; SI: v_bfe_u32 36 ; EG: BFE_UINT 37 define void @bfe_u32_imm_arg_arg(i32 addrspace(1)* %out, i32 %src1, i32 %src2) nounwind { 38 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 123, i32 %src1, i32 %src2) nounwind readnone 39 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 40 ret void 41 } 42 43 ; FUNC-LABEL: {{^}}bfe_u32_arg_0_width_reg_offset: 44 ; SI-NOT: {{[^@]}}bfe 45 ; SI: s_endpgm 46 ; EG-NOT: BFE 47 define void @bfe_u32_arg_0_width_reg_offset(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind { 48 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 %src0, i32 %src1, i32 0) nounwind readnone 49 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 50 ret void 51 } 52 53 ; FUNC-LABEL: {{^}}bfe_u32_arg_0_width_imm_offset: 54 ; SI-NOT: {{[^@]}}bfe 55 ; SI: s_endpgm 56 ; EG-NOT: BFE 57 define void @bfe_u32_arg_0_width_imm_offset(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind { 58 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 %src0, i32 8, i32 0) nounwind readnone 59 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 60 ret void 61 } 62 63 ; FUNC-LABEL: {{^}}bfe_u32_zextload_i8: 64 ; SI: buffer_load_ubyte 65 ; SI-NOT: {{[^@]}}bfe 66 ; SI: s_endpgm 67 define void @bfe_u32_zextload_i8(i32 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind { 68 %load = load i8, i8 addrspace(1)* %in 69 %ext = zext i8 %load to i32 70 %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %ext, i32 0, i32 8) 71 store i32 %bfe, i32 addrspace(1)* %out, align 4 72 ret void 73 } 74 75 ; FUNC-LABEL: {{^}}bfe_u32_zext_in_reg_i8: 76 ; SI: buffer_load_dword 77 ; SI: v_add_i32 78 ; SI-NEXT: v_and_b32_e32 79 ; SI-NOT: {{[^@]}}bfe 80 ; SI: s_endpgm 81 define void @bfe_u32_zext_in_reg_i8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { 82 %load = load i32, i32 addrspace(1)* %in, align 4 83 %add = add i32 %load, 1 84 %ext = and i32 %add, 255 85 %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %ext, i32 0, i32 8) 86 store i32 %bfe, i32 addrspace(1)* %out, align 4 87 ret void 88 } 89 90 ; FUNC-LABEL: {{^}}bfe_u32_zext_in_reg_i16: 91 ; SI: buffer_load_dword 92 ; SI: v_add_i32 93 ; SI-NEXT: v_and_b32_e32 94 ; SI-NOT: {{[^@]}}bfe 95 ; SI: s_endpgm 96 define void @bfe_u32_zext_in_reg_i16(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { 97 %load = load i32, i32 addrspace(1)* %in, align 4 98 %add = add i32 %load, 1 99 %ext = and i32 %add, 65535 100 %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %ext, i32 0, i32 16) 101 store i32 %bfe, i32 addrspace(1)* %out, align 4 102 ret void 103 } 104 105 ; FUNC-LABEL: {{^}}bfe_u32_zext_in_reg_i8_offset_1: 106 ; SI: buffer_load_dword 107 ; SI: v_add_i32 108 ; SI: bfe 109 ; SI: s_endpgm 110 define void @bfe_u32_zext_in_reg_i8_offset_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { 111 %load = load i32, i32 addrspace(1)* %in, align 4 112 %add = add i32 %load, 1 113 %ext = and i32 %add, 255 114 %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %ext, i32 1, i32 8) 115 store i32 %bfe, i32 addrspace(1)* %out, align 4 116 ret void 117 } 118 119 ; FUNC-LABEL: {{^}}bfe_u32_zext_in_reg_i8_offset_3: 120 ; SI: buffer_load_dword 121 ; SI: v_add_i32 122 ; SI-NEXT: v_and_b32_e32 {{v[0-9]+}}, 0xf8 123 ; SI-NEXT: bfe 124 ; SI: s_endpgm 125 define void @bfe_u32_zext_in_reg_i8_offset_3(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { 126 %load = load i32, i32 addrspace(1)* %in, align 4 127 %add = add i32 %load, 1 128 %ext = and i32 %add, 255 129 %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %ext, i32 3, i32 8) 130 store i32 %bfe, i32 addrspace(1)* %out, align 4 131 ret void 132 } 133 134 ; FUNC-LABEL: {{^}}bfe_u32_zext_in_reg_i8_offset_7: 135 ; SI: buffer_load_dword 136 ; SI: v_add_i32 137 ; SI-NEXT: v_and_b32_e32 {{v[0-9]+}}, 0x80 138 ; SI-NEXT: bfe 139 ; SI: s_endpgm 140 define void @bfe_u32_zext_in_reg_i8_offset_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { 141 %load = load i32, i32 addrspace(1)* %in, align 4 142 %add = add i32 %load, 1 143 %ext = and i32 %add, 255 144 %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %ext, i32 7, i32 8) 145 store i32 %bfe, i32 addrspace(1)* %out, align 4 146 ret void 147 } 148 149 ; FUNC-LABEL: {{^}}bfe_u32_zext_in_reg_i16_offset_8: 150 ; SI: buffer_load_dword 151 ; SI: v_add_i32 152 ; SI-NEXT: bfe 153 ; SI: s_endpgm 154 define void @bfe_u32_zext_in_reg_i16_offset_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { 155 %load = load i32, i32 addrspace(1)* %in, align 4 156 %add = add i32 %load, 1 157 %ext = and i32 %add, 65535 158 %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %ext, i32 8, i32 8) 159 store i32 %bfe, i32 addrspace(1)* %out, align 4 160 ret void 161 } 162 163 ; FUNC-LABEL: {{^}}bfe_u32_test_1: 164 ; SI: buffer_load_dword 165 ; SI: v_and_b32_e32 {{v[0-9]+}}, 1, {{v[0-9]+}} 166 ; SI: s_endpgm 167 ; EG: AND_INT T{{[0-9]\.[XYZW]}}, T{{[0-9]\.[XYZW]}}, 1, 168 define void @bfe_u32_test_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { 169 %x = load i32, i32 addrspace(1)* %in, align 4 170 %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %x, i32 0, i32 1) 171 store i32 %bfe, i32 addrspace(1)* %out, align 4 172 ret void 173 } 174 175 define void @bfe_u32_test_2(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { 176 %x = load i32, i32 addrspace(1)* %in, align 4 177 %shl = shl i32 %x, 31 178 %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shl, i32 0, i32 8) 179 store i32 %bfe, i32 addrspace(1)* %out, align 4 180 ret void 181 } 182 183 define void @bfe_u32_test_3(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { 184 %x = load i32, i32 addrspace(1)* %in, align 4 185 %shl = shl i32 %x, 31 186 %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shl, i32 0, i32 1) 187 store i32 %bfe, i32 addrspace(1)* %out, align 4 188 ret void 189 } 190 191 ; FUNC-LABEL: {{^}}bfe_u32_test_4: 192 ; SI-NOT: lshl 193 ; SI-NOT: shr 194 ; SI-NOT: {{[^@]}}bfe 195 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0 196 ; SI: buffer_store_dword [[VREG]], 197 ; SI: s_endpgm 198 define void @bfe_u32_test_4(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { 199 %x = load i32, i32 addrspace(1)* %in, align 4 200 %shl = shl i32 %x, 31 201 %shr = lshr i32 %shl, 31 202 %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shr, i32 31, i32 1) 203 store i32 %bfe, i32 addrspace(1)* %out, align 4 204 ret void 205 } 206 207 ; FUNC-LABEL: {{^}}bfe_u32_test_5: 208 ; SI: buffer_load_dword 209 ; SI-NOT: lshl 210 ; SI-NOT: shr 211 ; SI: v_bfe_i32 {{v[0-9]+}}, {{v[0-9]+}}, 0, 1 212 ; SI: s_endpgm 213 define void @bfe_u32_test_5(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { 214 %x = load i32, i32 addrspace(1)* %in, align 4 215 %shl = shl i32 %x, 31 216 %shr = ashr i32 %shl, 31 217 %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shr, i32 0, i32 1) 218 store i32 %bfe, i32 addrspace(1)* %out, align 4 219 ret void 220 } 221 222 ; FUNC-LABEL: {{^}}bfe_u32_test_6: 223 ; SI: v_lshlrev_b32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}} 224 ; SI: v_lshrrev_b32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}} 225 ; SI: s_endpgm 226 define void @bfe_u32_test_6(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { 227 %x = load i32, i32 addrspace(1)* %in, align 4 228 %shl = shl i32 %x, 31 229 %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shl, i32 1, i32 31) 230 store i32 %bfe, i32 addrspace(1)* %out, align 4 231 ret void 232 } 233 234 ; FUNC-LABEL: {{^}}bfe_u32_test_7: 235 ; SI: v_lshlrev_b32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}} 236 ; SI-NOT: {{[^@]}}bfe 237 ; SI: s_endpgm 238 define void @bfe_u32_test_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { 239 %x = load i32, i32 addrspace(1)* %in, align 4 240 %shl = shl i32 %x, 31 241 %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shl, i32 0, i32 31) 242 store i32 %bfe, i32 addrspace(1)* %out, align 4 243 ret void 244 } 245 246 ; FUNC-LABEL: {{^}}bfe_u32_test_8: 247 ; SI-NOT: {{[^@]}}bfe 248 ; SI: v_and_b32_e32 {{v[0-9]+}}, 1, {{v[0-9]+}} 249 ; SI-NOT: {{[^@]}}bfe 250 ; SI: s_endpgm 251 define void @bfe_u32_test_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { 252 %x = load i32, i32 addrspace(1)* %in, align 4 253 %shl = shl i32 %x, 31 254 %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shl, i32 31, i32 1) 255 store i32 %bfe, i32 addrspace(1)* %out, align 4 256 ret void 257 } 258 259 ; FUNC-LABEL: {{^}}bfe_u32_test_9: 260 ; SI-NOT: {{[^@]}}bfe 261 ; SI: v_lshrrev_b32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}} 262 ; SI-NOT: {{[^@]}}bfe 263 ; SI: s_endpgm 264 define void @bfe_u32_test_9(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { 265 %x = load i32, i32 addrspace(1)* %in, align 4 266 %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %x, i32 31, i32 1) 267 store i32 %bfe, i32 addrspace(1)* %out, align 4 268 ret void 269 } 270 271 ; FUNC-LABEL: {{^}}bfe_u32_test_10: 272 ; SI-NOT: {{[^@]}}bfe 273 ; SI: v_lshrrev_b32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}} 274 ; SI-NOT: {{[^@]}}bfe 275 ; SI: s_endpgm 276 define void @bfe_u32_test_10(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { 277 %x = load i32, i32 addrspace(1)* %in, align 4 278 %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %x, i32 1, i32 31) 279 store i32 %bfe, i32 addrspace(1)* %out, align 4 280 ret void 281 } 282 283 ; FUNC-LABEL: {{^}}bfe_u32_test_11: 284 ; SI-NOT: {{[^@]}}bfe 285 ; SI: v_lshrrev_b32_e32 v{{[0-9]+}}, 8, v{{[0-9]+}} 286 ; SI-NOT: {{[^@]}}bfe 287 ; SI: s_endpgm 288 define void @bfe_u32_test_11(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { 289 %x = load i32, i32 addrspace(1)* %in, align 4 290 %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %x, i32 8, i32 24) 291 store i32 %bfe, i32 addrspace(1)* %out, align 4 292 ret void 293 } 294 295 ; FUNC-LABEL: {{^}}bfe_u32_test_12: 296 ; SI-NOT: {{[^@]}}bfe 297 ; SI: v_lshrrev_b32_e32 v{{[0-9]+}}, 24, v{{[0-9]+}} 298 ; SI-NOT: {{[^@]}}bfe 299 ; SI: s_endpgm 300 define void @bfe_u32_test_12(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { 301 %x = load i32, i32 addrspace(1)* %in, align 4 302 %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %x, i32 24, i32 8) 303 store i32 %bfe, i32 addrspace(1)* %out, align 4 304 ret void 305 } 306 307 ; FUNC-LABEL: {{^}}bfe_u32_test_13: 308 ; V_ASHRREV_U32_e32 {{v[0-9]+}}, 31, {{v[0-9]+}} 309 ; SI-NOT: {{[^@]}}bfe 310 ; SI: s_endpgm 311 define void @bfe_u32_test_13(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { 312 %x = load i32, i32 addrspace(1)* %in, align 4 313 %shl = ashr i32 %x, 31 314 %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shl, i32 31, i32 1) 315 store i32 %bfe, i32 addrspace(1)* %out, align 4 ret void 316 } 317 318 ; FUNC-LABEL: {{^}}bfe_u32_test_14: 319 ; SI-NOT: lshr 320 ; SI-NOT: {{[^@]}}bfe 321 ; SI: s_endpgm 322 define void @bfe_u32_test_14(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { 323 %x = load i32, i32 addrspace(1)* %in, align 4 324 %shl = lshr i32 %x, 31 325 %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shl, i32 31, i32 1) 326 store i32 %bfe, i32 addrspace(1)* %out, align 4 ret void 327 } 328 329 ; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_0: 330 ; SI-NOT: {{[^@]}}bfe 331 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0 332 ; SI: buffer_store_dword [[VREG]], 333 ; SI: s_endpgm 334 ; EG-NOT: BFE 335 define void @bfe_u32_constant_fold_test_0(i32 addrspace(1)* %out) nounwind { 336 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 0, i32 0, i32 0) nounwind readnone 337 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 338 ret void 339 } 340 341 ; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_1: 342 ; SI-NOT: {{[^@]}}bfe 343 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0 344 ; SI: buffer_store_dword [[VREG]], 345 ; SI: s_endpgm 346 ; EG-NOT: BFE 347 define void @bfe_u32_constant_fold_test_1(i32 addrspace(1)* %out) nounwind { 348 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 12334, i32 0, i32 0) nounwind readnone 349 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 350 ret void 351 } 352 353 ; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_2: 354 ; SI-NOT: {{[^@]}}bfe 355 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0 356 ; SI: buffer_store_dword [[VREG]], 357 ; SI: s_endpgm 358 ; EG-NOT: BFE 359 define void @bfe_u32_constant_fold_test_2(i32 addrspace(1)* %out) nounwind { 360 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 0, i32 0, i32 1) nounwind readnone 361 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 362 ret void 363 } 364 365 ; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_3: 366 ; SI-NOT: {{[^@]}}bfe 367 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 1 368 ; SI: buffer_store_dword [[VREG]], 369 ; SI: s_endpgm 370 ; EG-NOT: BFE 371 define void @bfe_u32_constant_fold_test_3(i32 addrspace(1)* %out) nounwind { 372 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 1, i32 0, i32 1) nounwind readnone 373 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 374 ret void 375 } 376 377 ; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_4: 378 ; SI-NOT: {{[^@]}}bfe 379 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], -1 380 ; SI: buffer_store_dword [[VREG]], 381 ; SI: s_endpgm 382 ; EG-NOT: BFE 383 define void @bfe_u32_constant_fold_test_4(i32 addrspace(1)* %out) nounwind { 384 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 4294967295, i32 0, i32 1) nounwind readnone 385 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 386 ret void 387 } 388 389 ; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_5: 390 ; SI-NOT: {{[^@]}}bfe 391 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 1 392 ; SI: buffer_store_dword [[VREG]], 393 ; SI: s_endpgm 394 ; EG-NOT: BFE 395 define void @bfe_u32_constant_fold_test_5(i32 addrspace(1)* %out) nounwind { 396 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 128, i32 7, i32 1) nounwind readnone 397 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 398 ret void 399 } 400 401 ; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_6: 402 ; SI-NOT: {{[^@]}}bfe 403 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0x80 404 ; SI: buffer_store_dword [[VREG]], 405 ; SI: s_endpgm 406 ; EG-NOT: BFE 407 define void @bfe_u32_constant_fold_test_6(i32 addrspace(1)* %out) nounwind { 408 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 128, i32 0, i32 8) nounwind readnone 409 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 410 ret void 411 } 412 413 ; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_7: 414 ; SI-NOT: {{[^@]}}bfe 415 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0x7f 416 ; SI: buffer_store_dword [[VREG]], 417 ; SI: s_endpgm 418 ; EG-NOT: BFE 419 define void @bfe_u32_constant_fold_test_7(i32 addrspace(1)* %out) nounwind { 420 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 127, i32 0, i32 8) nounwind readnone 421 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 422 ret void 423 } 424 425 ; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_8: 426 ; SI-NOT: {{[^@]}}bfe 427 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 1 428 ; SI: buffer_store_dword [[VREG]], 429 ; SI: s_endpgm 430 ; EG-NOT: BFE 431 define void @bfe_u32_constant_fold_test_8(i32 addrspace(1)* %out) nounwind { 432 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 127, i32 6, i32 8) nounwind readnone 433 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 434 ret void 435 } 436 437 ; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_9: 438 ; SI-NOT: {{[^@]}}bfe 439 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 1 440 ; SI: buffer_store_dword [[VREG]], 441 ; SI: s_endpgm 442 ; EG-NOT: BFE 443 define void @bfe_u32_constant_fold_test_9(i32 addrspace(1)* %out) nounwind { 444 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 65536, i32 16, i32 8) nounwind readnone 445 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 446 ret void 447 } 448 449 ; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_10: 450 ; SI-NOT: {{[^@]}}bfe 451 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0 452 ; SI: buffer_store_dword [[VREG]], 453 ; SI: s_endpgm 454 ; EG-NOT: BFE 455 define void @bfe_u32_constant_fold_test_10(i32 addrspace(1)* %out) nounwind { 456 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 65535, i32 16, i32 16) nounwind readnone 457 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 458 ret void 459 } 460 461 ; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_11: 462 ; SI-NOT: {{[^@]}}bfe 463 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 10 464 ; SI: buffer_store_dword [[VREG]], 465 ; SI: s_endpgm 466 ; EG-NOT: BFE 467 define void @bfe_u32_constant_fold_test_11(i32 addrspace(1)* %out) nounwind { 468 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 160, i32 4, i32 4) nounwind readnone 469 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 470 ret void 471 } 472 473 ; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_12: 474 ; SI-NOT: {{[^@]}}bfe 475 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0 476 ; SI: buffer_store_dword [[VREG]], 477 ; SI: s_endpgm 478 ; EG-NOT: BFE 479 define void @bfe_u32_constant_fold_test_12(i32 addrspace(1)* %out) nounwind { 480 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 160, i32 31, i32 1) nounwind readnone 481 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 482 ret void 483 } 484 485 ; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_13: 486 ; SI-NOT: {{[^@]}}bfe 487 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 1 488 ; SI: buffer_store_dword [[VREG]], 489 ; SI: s_endpgm 490 ; EG-NOT: BFE 491 define void @bfe_u32_constant_fold_test_13(i32 addrspace(1)* %out) nounwind { 492 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 131070, i32 16, i32 16) nounwind readnone 493 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 494 ret void 495 } 496 497 ; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_14: 498 ; SI-NOT: {{[^@]}}bfe 499 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 40 500 ; SI: buffer_store_dword [[VREG]], 501 ; SI: s_endpgm 502 ; EG-NOT: BFE 503 define void @bfe_u32_constant_fold_test_14(i32 addrspace(1)* %out) nounwind { 504 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 160, i32 2, i32 30) nounwind readnone 505 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 506 ret void 507 } 508 509 ; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_15: 510 ; SI-NOT: {{[^@]}}bfe 511 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 10 512 ; SI: buffer_store_dword [[VREG]], 513 ; SI: s_endpgm 514 ; EG-NOT: BFE 515 define void @bfe_u32_constant_fold_test_15(i32 addrspace(1)* %out) nounwind { 516 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 160, i32 4, i32 28) nounwind readnone 517 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 518 ret void 519 } 520 521 ; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_16: 522 ; SI-NOT: {{[^@]}}bfe 523 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0x7f 524 ; SI: buffer_store_dword [[VREG]], 525 ; SI: s_endpgm 526 ; EG-NOT: BFE 527 define void @bfe_u32_constant_fold_test_16(i32 addrspace(1)* %out) nounwind { 528 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 4294967295, i32 1, i32 7) nounwind readnone 529 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 530 ret void 531 } 532 533 ; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_17: 534 ; SI-NOT: {{[^@]}}bfe 535 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0x7f 536 ; SI: buffer_store_dword [[VREG]], 537 ; SI: s_endpgm 538 ; EG-NOT: BFE 539 define void @bfe_u32_constant_fold_test_17(i32 addrspace(1)* %out) nounwind { 540 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 255, i32 1, i32 31) nounwind readnone 541 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 542 ret void 543 } 544 545 ; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_18: 546 ; SI-NOT: {{[^@]}}bfe 547 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0 548 ; SI: buffer_store_dword [[VREG]], 549 ; SI: s_endpgm 550 ; EG-NOT: BFE 551 define void @bfe_u32_constant_fold_test_18(i32 addrspace(1)* %out) nounwind { 552 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 255, i32 31, i32 1) nounwind readnone 553 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 554 ret void 555 } 556 557 ; Make sure that SimplifyDemandedBits doesn't cause the and to be 558 ; reduced to the bits demanded by the bfe. 559 560 ; XXX: The operand to v_bfe_u32 could also just directly be the load register. 561 ; FUNC-LABEL: {{^}}simplify_bfe_u32_multi_use_arg: 562 ; SI: buffer_load_dword [[ARG:v[0-9]+]] 563 ; SI: v_and_b32_e32 [[AND:v[0-9]+]], 63, [[ARG]] 564 ; SI: v_bfe_u32 [[BFE:v[0-9]+]], [[AND]], 2, 2 565 ; SI-DAG: buffer_store_dword [[AND]] 566 ; SI-DAG: buffer_store_dword [[BFE]] 567 ; SI: s_endpgm 568 define void @simplify_bfe_u32_multi_use_arg(i32 addrspace(1)* %out0, 569 i32 addrspace(1)* %out1, 570 i32 addrspace(1)* %in) nounwind { 571 %src = load i32, i32 addrspace(1)* %in, align 4 572 %and = and i32 %src, 63 573 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 %and, i32 2, i32 2) nounwind readnone 574 store i32 %bfe_u32, i32 addrspace(1)* %out0, align 4 575 store i32 %and, i32 addrspace(1)* %out1, align 4 576 ret void 577 } 578 579 ; FUNC-LABEL: {{^}}lshr_and: 580 ; SI: s_bfe_u32 {{s[0-9]+}}, {{s[0-9]+}}, 0x30006 581 ; SI: buffer_store_dword 582 define void @lshr_and(i32 addrspace(1)* %out, i32 %a) nounwind { 583 %b = lshr i32 %a, 6 584 %c = and i32 %b, 7 585 store i32 %c, i32 addrspace(1)* %out, align 8 586 ret void 587 } 588 589 ; FUNC-LABEL: {{^}}v_lshr_and: 590 ; SI: v_bfe_u32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}, 3 591 ; SI: buffer_store_dword 592 define void @v_lshr_and(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind { 593 %c = lshr i32 %a, %b 594 %d = and i32 %c, 7 595 store i32 %d, i32 addrspace(1)* %out, align 8 596 ret void 597 } 598 599 ; FUNC-LABEL: {{^}}and_lshr: 600 ; SI: s_bfe_u32 {{s[0-9]+}}, {{s[0-9]+}}, 0x30006 601 ; SI: buffer_store_dword 602 define void @and_lshr(i32 addrspace(1)* %out, i32 %a) nounwind { 603 %b = and i32 %a, 448 604 %c = lshr i32 %b, 6 605 store i32 %c, i32 addrspace(1)* %out, align 8 606 ret void 607 } 608 609 ; FUNC-LABEL: {{^}}and_lshr2: 610 ; SI: s_bfe_u32 {{s[0-9]+}}, {{s[0-9]+}}, 0x30006 611 ; SI: buffer_store_dword 612 define void @and_lshr2(i32 addrspace(1)* %out, i32 %a) nounwind { 613 %b = and i32 %a, 511 614 %c = lshr i32 %b, 6 615 store i32 %c, i32 addrspace(1)* %out, align 8 616 ret void 617 } 618 619 ; FUNC-LABEL: {{^}}shl_lshr: 620 ; SI: s_bfe_u32 {{s[0-9]+}}, {{s[0-9]+}}, 0x150002 621 ; SI: buffer_store_dword 622 define void @shl_lshr(i32 addrspace(1)* %out, i32 %a) nounwind { 623 %b = shl i32 %a, 9 624 %c = lshr i32 %b, 11 625 store i32 %c, i32 addrspace(1)* %out, align 8 626 ret void 627 } 628