1 ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s 2 ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s 3 4 ; GCN-LABEL: {{^}}bfe_u32_arg_arg_arg: 5 ; GCN: v_bfe_u32 6 define amdgpu_kernel void @bfe_u32_arg_arg_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) #0 { 7 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %src0, i32 %src1, i32 %src1) 8 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 9 ret void 10 } 11 12 ; GCN-LABEL: {{^}}bfe_u32_arg_arg_imm: 13 ; GCN: v_bfe_u32 14 define amdgpu_kernel void @bfe_u32_arg_arg_imm(i32 addrspace(1)* %out, i32 %src0, i32 %src1) #0 { 15 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %src0, i32 %src1, i32 123) 16 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 17 ret void 18 } 19 20 ; GCN-LABEL: {{^}}bfe_u32_arg_imm_arg: 21 ; GCN: v_bfe_u32 22 define amdgpu_kernel void @bfe_u32_arg_imm_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src2) #0 { 23 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %src0, i32 123, i32 %src2) 24 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 25 ret void 26 } 27 28 ; GCN-LABEL: {{^}}bfe_u32_imm_arg_arg: 29 ; GCN: v_bfe_u32 30 define amdgpu_kernel void @bfe_u32_imm_arg_arg(i32 addrspace(1)* %out, i32 %src1, i32 %src2) #0 { 31 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 123, i32 %src1, i32 %src2) 32 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 33 ret void 34 } 35 36 ; GCN-LABEL: {{^}}bfe_u32_arg_0_width_reg_offset: 37 ; GCN-NOT: {{[^@]}}bfe 38 ; GCN: s_endpgm 39 define amdgpu_kernel void @bfe_u32_arg_0_width_reg_offset(i32 addrspace(1)* %out, i32 %src0, i32 %src1) #0 { 40 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %src0, i32 %src1, i32 0) 41 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 42 ret void 43 } 44 45 ; GCN-LABEL: {{^}}bfe_u32_arg_0_width_imm_offset: 46 ; GCN-NOT: {{[^@]}}bfe 47 ; GCN: s_endpgm 48 define amdgpu_kernel void @bfe_u32_arg_0_width_imm_offset(i32 addrspace(1)* %out, i32 %src0, i32 %src1) #0 { 49 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %src0, i32 8, i32 0) 50 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 51 ret void 52 } 53 54 ; GCN-LABEL: {{^}}bfe_u32_zextload_i8: 55 ; GCN: buffer_load_ubyte 56 ; GCN-NOT: {{[^@]}}bfe 57 ; GCN: s_endpgm 58 define amdgpu_kernel void @bfe_u32_zextload_i8(i32 addrspace(1)* %out, i8 addrspace(1)* %in) #0 { 59 %load = load i8, i8 addrspace(1)* %in 60 %ext = zext i8 %load to i32 61 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 0, i32 8) 62 store i32 %bfe, i32 addrspace(1)* %out, align 4 63 ret void 64 } 65 66 ; GCN-LABEL: {{^}}bfe_u32_zext_in_reg_i8: 67 ; GCN: buffer_load_dword 68 ; GCN: v_add_{{[iu]}}32 69 ; GCN-NEXT: v_and_b32_e32 70 ; FIXME: Should be using s_add_i32 71 ; GCN-NOT: {{[^@]}}bfe 72 ; GCN: s_endpgm 73 define amdgpu_kernel void @bfe_u32_zext_in_reg_i8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 74 %load = load i32, i32 addrspace(1)* %in, align 4 75 %add = add i32 %load, 1 76 %ext = and i32 %add, 255 77 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 0, i32 8) 78 store i32 %bfe, i32 addrspace(1)* %out, align 4 79 ret void 80 } 81 82 ; GCN-LABEL: {{^}}bfe_u32_zext_in_reg_i16: 83 ; GCN: buffer_load_dword 84 ; GCN: v_add_{{[iu]}}32 85 ; GCN-NEXT: v_and_b32_e32 86 ; GCN-NOT: {{[^@]}}bfe 87 ; GCN: s_endpgm 88 define amdgpu_kernel void @bfe_u32_zext_in_reg_i16(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 89 %load = load i32, i32 addrspace(1)* %in, align 4 90 %add = add i32 %load, 1 91 %ext = and i32 %add, 65535 92 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 0, i32 16) 93 store i32 %bfe, i32 addrspace(1)* %out, align 4 94 ret void 95 } 96 97 ; GCN-LABEL: {{^}}bfe_u32_zext_in_reg_i8_offset_1: 98 ; GCN: buffer_load_dword 99 ; GCN: v_add_{{[iu]}}32 100 ; GCN: bfe 101 ; GCN: s_endpgm 102 define amdgpu_kernel void @bfe_u32_zext_in_reg_i8_offset_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 103 %load = load i32, i32 addrspace(1)* %in, align 4 104 %add = add i32 %load, 1 105 %ext = and i32 %add, 255 106 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 1, i32 8) 107 store i32 %bfe, i32 addrspace(1)* %out, align 4 108 ret void 109 } 110 111 ; GCN-LABEL: {{^}}bfe_u32_zext_in_reg_i8_offset_3: 112 ; GCN: buffer_load_dword 113 ; GCN: v_add_{{[iu]}}32 114 ; GCN-NEXT: v_and_b32_e32 {{v[0-9]+}}, 0xf8 115 ; GCN-NEXT: bfe 116 ; GCN: s_endpgm 117 define amdgpu_kernel void @bfe_u32_zext_in_reg_i8_offset_3(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 118 %load = load i32, i32 addrspace(1)* %in, align 4 119 %add = add i32 %load, 1 120 %ext = and i32 %add, 255 121 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 3, i32 8) 122 store i32 %bfe, i32 addrspace(1)* %out, align 4 123 ret void 124 } 125 126 ; GCN-LABEL: {{^}}bfe_u32_zext_in_reg_i8_offset_7: 127 ; GCN: buffer_load_dword 128 ; GCN: v_add_{{[iu]}}32 129 ; GCN-NEXT: v_and_b32_e32 {{v[0-9]+}}, 0x80 130 ; GCN-NEXT: bfe 131 ; GCN: s_endpgm 132 define amdgpu_kernel void @bfe_u32_zext_in_reg_i8_offset_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 133 %load = load i32, i32 addrspace(1)* %in, align 4 134 %add = add i32 %load, 1 135 %ext = and i32 %add, 255 136 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 7, i32 8) 137 store i32 %bfe, i32 addrspace(1)* %out, align 4 138 ret void 139 } 140 141 ; GCN-LABEL: {{^}}bfe_u32_zext_in_reg_i16_offset_8: 142 ; GCN: buffer_load_dword 143 ; GCN: v_add_{{[iu]}}32 144 ; GCN-NEXT: bfe 145 ; GCN: s_endpgm 146 define amdgpu_kernel void @bfe_u32_zext_in_reg_i16_offset_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 147 %load = load i32, i32 addrspace(1)* %in, align 4 148 %add = add i32 %load, 1 149 %ext = and i32 %add, 65535 150 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 8, i32 8) 151 store i32 %bfe, i32 addrspace(1)* %out, align 4 152 ret void 153 } 154 155 ; GCN-LABEL: {{^}}bfe_u32_test_1: 156 ; GCN: buffer_load_dword 157 ; GCN: v_and_b32_e32 {{v[0-9]+}}, 1, {{v[0-9]+}} 158 ; GCN: s_endpgm 159 define amdgpu_kernel void @bfe_u32_test_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 160 %x = load i32, i32 addrspace(1)* %in, align 4 161 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %x, i32 0, i32 1) 162 store i32 %bfe, i32 addrspace(1)* %out, align 4 163 ret void 164 } 165 166 define amdgpu_kernel void @bfe_u32_test_2(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 167 %x = load i32, i32 addrspace(1)* %in, align 4 168 %shl = shl i32 %x, 31 169 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 0, i32 8) 170 store i32 %bfe, i32 addrspace(1)* %out, align 4 171 ret void 172 } 173 174 define amdgpu_kernel void @bfe_u32_test_3(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 175 %x = load i32, i32 addrspace(1)* %in, align 4 176 %shl = shl i32 %x, 31 177 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 0, i32 1) 178 store i32 %bfe, i32 addrspace(1)* %out, align 4 179 ret void 180 } 181 182 ; GCN-LABEL: {{^}}bfe_u32_test_4: 183 ; GCN-NOT: lshl 184 ; GCN-NOT: shr 185 ; GCN-NOT: {{[^@]}}bfe 186 ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 0 187 ; GCN: buffer_store_dword [[VREG]], 188 ; GCN: s_endpgm 189 define amdgpu_kernel void @bfe_u32_test_4(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 190 %x = load i32, i32 addrspace(1)* %in, align 4 191 %shl = shl i32 %x, 31 192 %shr = lshr i32 %shl, 31 193 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shr, i32 31, i32 1) 194 store i32 %bfe, i32 addrspace(1)* %out, align 4 195 ret void 196 } 197 198 ; GCN-LABEL: {{^}}bfe_u32_test_5: 199 ; GCN: buffer_load_dword 200 ; GCN-NOT: lshl 201 ; GCN-NOT: shr 202 ; GCN: v_bfe_i32 {{v[0-9]+}}, {{v[0-9]+}}, 0, 1 203 ; GCN: s_endpgm 204 define amdgpu_kernel void @bfe_u32_test_5(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 205 %x = load i32, i32 addrspace(1)* %in, align 4 206 %shl = shl i32 %x, 31 207 %shr = ashr i32 %shl, 31 208 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shr, i32 0, i32 1) 209 store i32 %bfe, i32 addrspace(1)* %out, align 4 210 ret void 211 } 212 213 ; GCN-LABEL: {{^}}bfe_u32_test_6: 214 ; GCN: v_lshlrev_b32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}} 215 ; GCN: v_lshrrev_b32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}} 216 ; GCN: s_endpgm 217 define amdgpu_kernel void @bfe_u32_test_6(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 218 %x = load i32, i32 addrspace(1)* %in, align 4 219 %shl = shl i32 %x, 31 220 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 1, i32 31) 221 store i32 %bfe, i32 addrspace(1)* %out, align 4 222 ret void 223 } 224 225 ; GCN-LABEL: {{^}}bfe_u32_test_7: 226 ; GCN: v_lshlrev_b32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}} 227 ; GCN-NOT: {{[^@]}}bfe 228 ; GCN: s_endpgm 229 define amdgpu_kernel void @bfe_u32_test_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 230 %x = load i32, i32 addrspace(1)* %in, align 4 231 %shl = shl i32 %x, 31 232 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 0, i32 31) 233 store i32 %bfe, i32 addrspace(1)* %out, align 4 234 ret void 235 } 236 237 ; GCN-LABEL: {{^}}bfe_u32_test_8: 238 ; GCN-NOT: {{[^@]}}bfe 239 ; GCN: v_and_b32_e32 {{v[0-9]+}}, 1, {{v[0-9]+}} 240 ; GCN-NOT: {{[^@]}}bfe 241 ; GCN: s_endpgm 242 define amdgpu_kernel void @bfe_u32_test_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 243 %x = load i32, i32 addrspace(1)* %in, align 4 244 %shl = shl i32 %x, 31 245 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 31, i32 1) 246 store i32 %bfe, i32 addrspace(1)* %out, align 4 247 ret void 248 } 249 250 ; GCN-LABEL: {{^}}bfe_u32_test_9: 251 ; GCN-NOT: {{[^@]}}bfe 252 ; GCN: v_lshrrev_b32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}} 253 ; GCN-NOT: {{[^@]}}bfe 254 ; GCN: s_endpgm 255 define amdgpu_kernel void @bfe_u32_test_9(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 256 %x = load i32, i32 addrspace(1)* %in, align 4 257 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %x, i32 31, i32 1) 258 store i32 %bfe, i32 addrspace(1)* %out, align 4 259 ret void 260 } 261 262 ; GCN-LABEL: {{^}}bfe_u32_test_10: 263 ; GCN-NOT: {{[^@]}}bfe 264 ; GCN: v_lshrrev_b32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}} 265 ; GCN-NOT: {{[^@]}}bfe 266 ; GCN: s_endpgm 267 define amdgpu_kernel void @bfe_u32_test_10(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 268 %x = load i32, i32 addrspace(1)* %in, align 4 269 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %x, i32 1, i32 31) 270 store i32 %bfe, i32 addrspace(1)* %out, align 4 271 ret void 272 } 273 274 ; GCN-LABEL: {{^}}bfe_u32_test_11: 275 ; GCN-NOT: {{[^@]}}bfe 276 ; GCN: v_lshrrev_b32_e32 v{{[0-9]+}}, 8, v{{[0-9]+}} 277 ; GCN-NOT: {{[^@]}}bfe 278 ; GCN: s_endpgm 279 define amdgpu_kernel void @bfe_u32_test_11(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 280 %x = load i32, i32 addrspace(1)* %in, align 4 281 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %x, i32 8, i32 24) 282 store i32 %bfe, i32 addrspace(1)* %out, align 4 283 ret void 284 } 285 286 ; GCN-LABEL: {{^}}bfe_u32_test_12: 287 ; GCN-NOT: {{[^@]}}bfe 288 ; GCN: v_lshrrev_b32_e32 v{{[0-9]+}}, 24, v{{[0-9]+}} 289 ; GCN-NOT: {{[^@]}}bfe 290 ; GCN: s_endpgm 291 define amdgpu_kernel void @bfe_u32_test_12(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 292 %x = load i32, i32 addrspace(1)* %in, align 4 293 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %x, i32 24, i32 8) 294 store i32 %bfe, i32 addrspace(1)* %out, align 4 295 ret void 296 } 297 298 ; GCN-LABEL: {{^}}bfe_u32_test_13: 299 ; V_ASHRREV_U32_e32 {{v[0-9]+}}, 31, {{v[0-9]+}} 300 ; GCN-NOT: {{[^@]}}bfe 301 ; GCN: s_endpgm 302 define amdgpu_kernel void @bfe_u32_test_13(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 303 %x = load i32, i32 addrspace(1)* %in, align 4 304 %shl = ashr i32 %x, 31 305 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 31, i32 1) 306 store i32 %bfe, i32 addrspace(1)* %out, align 4 ret void 307 } 308 309 ; GCN-LABEL: {{^}}bfe_u32_test_14: 310 ; GCN-NOT: lshr 311 ; GCN-NOT: {{[^@]}}bfe 312 ; GCN: s_endpgm 313 define amdgpu_kernel void @bfe_u32_test_14(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 314 %x = load i32, i32 addrspace(1)* %in, align 4 315 %shl = lshr i32 %x, 31 316 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 31, i32 1) 317 store i32 %bfe, i32 addrspace(1)* %out, align 4 ret void 318 } 319 320 ; GCN-LABEL: {{^}}bfe_u32_constant_fold_test_0: 321 ; GCN-NOT: {{[^@]}}bfe 322 ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 0 323 ; GCN: buffer_store_dword [[VREG]], 324 ; GCN: s_endpgm 325 ; EG-NOT: BFE 326 define amdgpu_kernel void @bfe_u32_constant_fold_test_0(i32 addrspace(1)* %out) #0 { 327 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 0, i32 0, i32 0) 328 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 329 ret void 330 } 331 332 ; GCN-LABEL: {{^}}bfe_u32_constant_fold_test_1: 333 ; GCN-NOT: {{[^@]}}bfe 334 ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 0 335 ; GCN: buffer_store_dword [[VREG]], 336 ; GCN: s_endpgm 337 ; EG-NOT: BFE 338 define amdgpu_kernel void @bfe_u32_constant_fold_test_1(i32 addrspace(1)* %out) #0 { 339 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 12334, i32 0, i32 0) 340 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 341 ret void 342 } 343 344 ; GCN-LABEL: {{^}}bfe_u32_constant_fold_test_2: 345 ; GCN-NOT: {{[^@]}}bfe 346 ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 0 347 ; GCN: buffer_store_dword [[VREG]], 348 ; GCN: s_endpgm 349 ; EG-NOT: BFE 350 define amdgpu_kernel void @bfe_u32_constant_fold_test_2(i32 addrspace(1)* %out) #0 { 351 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 0, i32 0, i32 1) 352 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 353 ret void 354 } 355 356 ; GCN-LABEL: {{^}}bfe_u32_constant_fold_test_3: 357 ; GCN-NOT: {{[^@]}}bfe 358 ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 1 359 ; GCN: buffer_store_dword [[VREG]], 360 ; GCN: s_endpgm 361 ; EG-NOT: BFE 362 define amdgpu_kernel void @bfe_u32_constant_fold_test_3(i32 addrspace(1)* %out) #0 { 363 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 1, i32 0, i32 1) 364 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 365 ret void 366 } 367 368 ; GCN-LABEL: {{^}}bfe_u32_constant_fold_test_4: 369 ; GCN-NOT: {{[^@]}}bfe 370 ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], -1 371 ; GCN: buffer_store_dword [[VREG]], 372 ; GCN: s_endpgm 373 ; EG-NOT: BFE 374 define amdgpu_kernel void @bfe_u32_constant_fold_test_4(i32 addrspace(1)* %out) #0 { 375 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 4294967295, i32 0, i32 1) 376 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 377 ret void 378 } 379 380 ; GCN-LABEL: {{^}}bfe_u32_constant_fold_test_5: 381 ; GCN-NOT: {{[^@]}}bfe 382 ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 1 383 ; GCN: buffer_store_dword [[VREG]], 384 ; GCN: s_endpgm 385 ; EG-NOT: BFE 386 define amdgpu_kernel void @bfe_u32_constant_fold_test_5(i32 addrspace(1)* %out) #0 { 387 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 128, i32 7, i32 1) 388 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 389 ret void 390 } 391 392 ; GCN-LABEL: {{^}}bfe_u32_constant_fold_test_6: 393 ; GCN-NOT: {{[^@]}}bfe 394 ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 0x80 395 ; GCN: buffer_store_dword [[VREG]], 396 ; GCN: s_endpgm 397 ; EG-NOT: BFE 398 define amdgpu_kernel void @bfe_u32_constant_fold_test_6(i32 addrspace(1)* %out) #0 { 399 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 128, i32 0, i32 8) 400 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 401 ret void 402 } 403 404 ; GCN-LABEL: {{^}}bfe_u32_constant_fold_test_7: 405 ; GCN-NOT: {{[^@]}}bfe 406 ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 0x7f 407 ; GCN: buffer_store_dword [[VREG]], 408 ; GCN: s_endpgm 409 ; EG-NOT: BFE 410 define amdgpu_kernel void @bfe_u32_constant_fold_test_7(i32 addrspace(1)* %out) #0 { 411 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 127, i32 0, i32 8) 412 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 413 ret void 414 } 415 416 ; GCN-LABEL: {{^}}bfe_u32_constant_fold_test_8: 417 ; GCN-NOT: {{[^@]}}bfe 418 ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 1 419 ; GCN: buffer_store_dword [[VREG]], 420 ; GCN: s_endpgm 421 ; EG-NOT: BFE 422 define amdgpu_kernel void @bfe_u32_constant_fold_test_8(i32 addrspace(1)* %out) #0 { 423 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 127, i32 6, i32 8) 424 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 425 ret void 426 } 427 428 ; GCN-LABEL: {{^}}bfe_u32_constant_fold_test_9: 429 ; GCN-NOT: {{[^@]}}bfe 430 ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 1 431 ; GCN: buffer_store_dword [[VREG]], 432 ; GCN: s_endpgm 433 ; EG-NOT: BFE 434 define amdgpu_kernel void @bfe_u32_constant_fold_test_9(i32 addrspace(1)* %out) #0 { 435 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 65536, i32 16, i32 8) 436 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 437 ret void 438 } 439 440 ; GCN-LABEL: {{^}}bfe_u32_constant_fold_test_10: 441 ; GCN-NOT: {{[^@]}}bfe 442 ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 0 443 ; GCN: buffer_store_dword [[VREG]], 444 ; GCN: s_endpgm 445 ; EG-NOT: BFE 446 define amdgpu_kernel void @bfe_u32_constant_fold_test_10(i32 addrspace(1)* %out) #0 { 447 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 65535, i32 16, i32 16) 448 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 449 ret void 450 } 451 452 ; GCN-LABEL: {{^}}bfe_u32_constant_fold_test_11: 453 ; GCN-NOT: {{[^@]}}bfe 454 ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 10 455 ; GCN: buffer_store_dword [[VREG]], 456 ; GCN: s_endpgm 457 ; EG-NOT: BFE 458 define amdgpu_kernel void @bfe_u32_constant_fold_test_11(i32 addrspace(1)* %out) #0 { 459 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 160, i32 4, i32 4) 460 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 461 ret void 462 } 463 464 ; GCN-LABEL: {{^}}bfe_u32_constant_fold_test_12: 465 ; GCN-NOT: {{[^@]}}bfe 466 ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 0 467 ; GCN: buffer_store_dword [[VREG]], 468 ; GCN: s_endpgm 469 ; EG-NOT: BFE 470 define amdgpu_kernel void @bfe_u32_constant_fold_test_12(i32 addrspace(1)* %out) #0 { 471 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 160, i32 31, i32 1) 472 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 473 ret void 474 } 475 476 ; GCN-LABEL: {{^}}bfe_u32_constant_fold_test_13: 477 ; GCN-NOT: {{[^@]}}bfe 478 ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 1 479 ; GCN: buffer_store_dword [[VREG]], 480 ; GCN: s_endpgm 481 ; EG-NOT: BFE 482 define amdgpu_kernel void @bfe_u32_constant_fold_test_13(i32 addrspace(1)* %out) #0 { 483 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 131070, i32 16, i32 16) 484 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 485 ret void 486 } 487 488 ; GCN-LABEL: {{^}}bfe_u32_constant_fold_test_14: 489 ; GCN-NOT: {{[^@]}}bfe 490 ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 40 491 ; GCN: buffer_store_dword [[VREG]], 492 ; GCN: s_endpgm 493 ; EG-NOT: BFE 494 define amdgpu_kernel void @bfe_u32_constant_fold_test_14(i32 addrspace(1)* %out) #0 { 495 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 160, i32 2, i32 30) 496 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 497 ret void 498 } 499 500 ; GCN-LABEL: {{^}}bfe_u32_constant_fold_test_15: 501 ; GCN-NOT: {{[^@]}}bfe 502 ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 10 503 ; GCN: buffer_store_dword [[VREG]], 504 ; GCN: s_endpgm 505 ; EG-NOT: BFE 506 define amdgpu_kernel void @bfe_u32_constant_fold_test_15(i32 addrspace(1)* %out) #0 { 507 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 160, i32 4, i32 28) 508 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 509 ret void 510 } 511 512 ; GCN-LABEL: {{^}}bfe_u32_constant_fold_test_16: 513 ; GCN-NOT: {{[^@]}}bfe 514 ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 0x7f 515 ; GCN: buffer_store_dword [[VREG]], 516 ; GCN: s_endpgm 517 ; EG-NOT: BFE 518 define amdgpu_kernel void @bfe_u32_constant_fold_test_16(i32 addrspace(1)* %out) #0 { 519 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 4294967295, i32 1, i32 7) 520 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 521 ret void 522 } 523 524 ; GCN-LABEL: {{^}}bfe_u32_constant_fold_test_17: 525 ; GCN-NOT: {{[^@]}}bfe 526 ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 0x7f 527 ; GCN: buffer_store_dword [[VREG]], 528 ; GCN: s_endpgm 529 ; EG-NOT: BFE 530 define amdgpu_kernel void @bfe_u32_constant_fold_test_17(i32 addrspace(1)* %out) #0 { 531 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 255, i32 1, i32 31) 532 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 533 ret void 534 } 535 536 ; GCN-LABEL: {{^}}bfe_u32_constant_fold_test_18: 537 ; GCN-NOT: {{[^@]}}bfe 538 ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 0 539 ; GCN: buffer_store_dword [[VREG]], 540 ; GCN: s_endpgm 541 ; EG-NOT: BFE 542 define amdgpu_kernel void @bfe_u32_constant_fold_test_18(i32 addrspace(1)* %out) #0 { 543 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 255, i32 31, i32 1) 544 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 545 ret void 546 } 547 548 ; Make sure that SimplifyDemandedBits doesn't cause the and to be 549 ; reduced to the bits demanded by the bfe. 550 551 ; XXX: The operand to v_bfe_u32 could also just directly be the load register. 552 ; GCN-LABEL: {{^}}simplify_bfe_u32_multi_use_arg: 553 ; GCN: buffer_load_dword [[ARG:v[0-9]+]] 554 ; GCN: v_and_b32_e32 [[AND:v[0-9]+]], 63, [[ARG]] 555 ; GCN: v_bfe_u32 [[BFE:v[0-9]+]], [[AND]], 2, 2 556 ; GCN-DAG: buffer_store_dword [[AND]] 557 ; GCN-DAG: buffer_store_dword [[BFE]] 558 ; GCN: s_endpgm 559 define amdgpu_kernel void @simplify_bfe_u32_multi_use_arg(i32 addrspace(1)* %out0, 560 i32 addrspace(1)* %out1, 561 i32 addrspace(1)* %in) #0 { 562 %src = load i32, i32 addrspace(1)* %in, align 4 563 %and = and i32 %src, 63 564 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %and, i32 2, i32 2) 565 store i32 %bfe_u32, i32 addrspace(1)* %out0, align 4 566 store i32 %and, i32 addrspace(1)* %out1, align 4 567 ret void 568 } 569 570 ; GCN-LABEL: {{^}}lshr_and: 571 ; GCN: s_bfe_u32 {{s[0-9]+}}, {{s[0-9]+}}, 0x30006 572 ; GCN: buffer_store_dword 573 define amdgpu_kernel void @lshr_and(i32 addrspace(1)* %out, i32 %a) #0 { 574 %b = lshr i32 %a, 6 575 %c = and i32 %b, 7 576 store i32 %c, i32 addrspace(1)* %out, align 8 577 ret void 578 } 579 580 ; GCN-LABEL: {{^}}v_lshr_and: 581 ; GCN: v_bfe_u32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}, 3 582 ; GCN: buffer_store_dword 583 define amdgpu_kernel void @v_lshr_and(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 { 584 %c = lshr i32 %a, %b 585 %d = and i32 %c, 7 586 store i32 %d, i32 addrspace(1)* %out, align 8 587 ret void 588 } 589 590 ; GCN-LABEL: {{^}}and_lshr: 591 ; GCN: s_bfe_u32 {{s[0-9]+}}, {{s[0-9]+}}, 0x30006 592 ; GCN: buffer_store_dword 593 define amdgpu_kernel void @and_lshr(i32 addrspace(1)* %out, i32 %a) #0 { 594 %b = and i32 %a, 448 595 %c = lshr i32 %b, 6 596 store i32 %c, i32 addrspace(1)* %out, align 8 597 ret void 598 } 599 600 ; GCN-LABEL: {{^}}and_lshr2: 601 ; GCN: s_bfe_u32 {{s[0-9]+}}, {{s[0-9]+}}, 0x30006 602 ; GCN: buffer_store_dword 603 define amdgpu_kernel void @and_lshr2(i32 addrspace(1)* %out, i32 %a) #0 { 604 %b = and i32 %a, 511 605 %c = lshr i32 %b, 6 606 store i32 %c, i32 addrspace(1)* %out, align 8 607 ret void 608 } 609 610 ; GCN-LABEL: {{^}}shl_lshr: 611 ; GCN: s_bfe_u32 {{s[0-9]+}}, {{s[0-9]+}}, 0x150002 612 ; GCN: buffer_store_dword 613 define amdgpu_kernel void @shl_lshr(i32 addrspace(1)* %out, i32 %a) #0 { 614 %b = shl i32 %a, 9 615 %c = lshr i32 %b, 11 616 store i32 %c, i32 addrspace(1)* %out, align 8 617 ret void 618 } 619 620 declare i32 @llvm.amdgcn.ubfe.i32(i32, i32, i32) #1 621 622 attributes #0 = { nounwind } 623 attributes #1 = { nounwind readnone } 624