1 ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s 2 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s 3 ; RUN: llc -march=r600 -mcpu=redwood -show-mc-encoding -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s 4 5 declare i32 @llvm.AMDGPU.bfe.i32(i32, i32, i32) nounwind readnone 6 7 ; FUNC-LABEL: {{^}}bfe_i32_arg_arg_arg: 8 ; SI: v_bfe_i32 9 ; EG: BFE_INT 10 ; EG: encoding: [{{[x0-9a-f]+,[x0-9a-f]+,[x0-9a-f]+,[x0-9a-f]+,[x0-9a-f]+}},0xac 11 define void @bfe_i32_arg_arg_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) nounwind { 12 %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 %src0, i32 %src1, i32 %src1) nounwind readnone 13 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 14 ret void 15 } 16 17 ; FUNC-LABEL: {{^}}bfe_i32_arg_arg_imm: 18 ; SI: v_bfe_i32 19 ; EG: BFE_INT 20 define void @bfe_i32_arg_arg_imm(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind { 21 %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 %src0, i32 %src1, i32 123) nounwind readnone 22 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 23 ret void 24 } 25 26 ; FUNC-LABEL: {{^}}bfe_i32_arg_imm_arg: 27 ; SI: v_bfe_i32 28 ; EG: BFE_INT 29 define void @bfe_i32_arg_imm_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src2) nounwind { 30 %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 %src0, i32 123, i32 %src2) nounwind readnone 31 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 32 ret void 33 } 34 35 ; FUNC-LABEL: {{^}}bfe_i32_imm_arg_arg: 36 ; SI: v_bfe_i32 37 ; EG: BFE_INT 38 define void @bfe_i32_imm_arg_arg(i32 addrspace(1)* %out, i32 %src1, i32 %src2) nounwind { 39 %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 123, i32 %src1, i32 %src2) nounwind readnone 40 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 41 ret void 42 } 43 44 ; FUNC-LABEL: {{^}}v_bfe_print_arg: 45 ; SI: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 2, 8 46 define void @v_bfe_print_arg(i32 addrspace(1)* %out, i32 addrspace(1)* %src0) nounwind { 47 %load = load i32, i32 addrspace(1)* %src0, align 4 48 %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 %load, i32 2, i32 8) nounwind readnone 49 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 50 ret void 51 } 52 53 ; FUNC-LABEL: {{^}}bfe_i32_arg_0_width_reg_offset: 54 ; SI-NOT: {{[^@]}}bfe 55 ; SI: s_endpgm 56 ; EG-NOT: BFE 57 define void @bfe_i32_arg_0_width_reg_offset(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind { 58 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.i32(i32 %src0, i32 %src1, i32 0) nounwind readnone 59 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 60 ret void 61 } 62 63 ; FUNC-LABEL: {{^}}bfe_i32_arg_0_width_imm_offset: 64 ; SI-NOT: {{[^@]}}bfe 65 ; SI: s_endpgm 66 ; EG-NOT: BFE 67 define void @bfe_i32_arg_0_width_imm_offset(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind { 68 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.i32(i32 %src0, i32 8, i32 0) nounwind readnone 69 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 70 ret void 71 } 72 73 ; FUNC-LABEL: {{^}}bfe_i32_test_6: 74 ; SI: v_lshlrev_b32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}} 75 ; SI: v_ashrrev_i32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}} 76 ; SI: s_endpgm 77 define void @bfe_i32_test_6(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { 78 %x = load i32, i32 addrspace(1)* %in, align 4 79 %shl = shl i32 %x, 31 80 %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shl, i32 1, i32 31) 81 store i32 %bfe, i32 addrspace(1)* %out, align 4 82 ret void 83 } 84 85 ; FUNC-LABEL: {{^}}bfe_i32_test_7: 86 ; SI-NOT: shl 87 ; SI-NOT: {{[^@]}}bfe 88 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0 89 ; SI: buffer_store_dword [[VREG]], 90 ; SI: s_endpgm 91 define void @bfe_i32_test_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { 92 %x = load i32, i32 addrspace(1)* %in, align 4 93 %shl = shl i32 %x, 31 94 %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shl, i32 0, i32 31) 95 store i32 %bfe, i32 addrspace(1)* %out, align 4 96 ret void 97 } 98 99 ; FUNC-LABEL: {{^}}bfe_i32_test_8: 100 ; SI: buffer_load_dword 101 ; SI: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 1 102 ; SI: s_endpgm 103 define void @bfe_i32_test_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { 104 %x = load i32, i32 addrspace(1)* %in, align 4 105 %shl = shl i32 %x, 31 106 %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shl, i32 31, i32 1) 107 store i32 %bfe, i32 addrspace(1)* %out, align 4 108 ret void 109 } 110 111 ; FUNC-LABEL: {{^}}bfe_i32_test_9: 112 ; SI-NOT: {{[^@]}}bfe 113 ; SI: v_ashrrev_i32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}} 114 ; SI-NOT: {{[^@]}}bfe 115 ; SI: s_endpgm 116 define void @bfe_i32_test_9(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { 117 %x = load i32, i32 addrspace(1)* %in, align 4 118 %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %x, i32 31, i32 1) 119 store i32 %bfe, i32 addrspace(1)* %out, align 4 120 ret void 121 } 122 123 ; FUNC-LABEL: {{^}}bfe_i32_test_10: 124 ; SI-NOT: {{[^@]}}bfe 125 ; SI: v_ashrrev_i32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}} 126 ; SI-NOT: {{[^@]}}bfe 127 ; SI: s_endpgm 128 define void @bfe_i32_test_10(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { 129 %x = load i32, i32 addrspace(1)* %in, align 4 130 %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %x, i32 1, i32 31) 131 store i32 %bfe, i32 addrspace(1)* %out, align 4 132 ret void 133 } 134 135 ; FUNC-LABEL: {{^}}bfe_i32_test_11: 136 ; SI-NOT: {{[^@]}}bfe 137 ; SI: v_ashrrev_i32_e32 v{{[0-9]+}}, 8, v{{[0-9]+}} 138 ; SI-NOT: {{[^@]}}bfe 139 ; SI: s_endpgm 140 define void @bfe_i32_test_11(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { 141 %x = load i32, i32 addrspace(1)* %in, align 4 142 %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %x, i32 8, i32 24) 143 store i32 %bfe, i32 addrspace(1)* %out, align 4 144 ret void 145 } 146 147 ; FUNC-LABEL: {{^}}bfe_i32_test_12: 148 ; SI-NOT: {{[^@]}}bfe 149 ; SI: v_ashrrev_i32_e32 v{{[0-9]+}}, 24, v{{[0-9]+}} 150 ; SI-NOT: {{[^@]}}bfe 151 ; SI: s_endpgm 152 define void @bfe_i32_test_12(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { 153 %x = load i32, i32 addrspace(1)* %in, align 4 154 %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %x, i32 24, i32 8) 155 store i32 %bfe, i32 addrspace(1)* %out, align 4 156 ret void 157 } 158 159 ; FUNC-LABEL: {{^}}bfe_i32_test_13: 160 ; SI: v_ashrrev_i32_e32 {{v[0-9]+}}, 31, {{v[0-9]+}} 161 ; SI-NOT: {{[^@]}}bfe 162 ; SI: s_endpgm 163 define void @bfe_i32_test_13(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { 164 %x = load i32, i32 addrspace(1)* %in, align 4 165 %shl = ashr i32 %x, 31 166 %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shl, i32 31, i32 1) 167 store i32 %bfe, i32 addrspace(1)* %out, align 4 ret void 168 } 169 170 ; FUNC-LABEL: {{^}}bfe_i32_test_14: 171 ; SI-NOT: lshr 172 ; SI-NOT: {{[^@]}}bfe 173 ; SI: s_endpgm 174 define void @bfe_i32_test_14(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { 175 %x = load i32, i32 addrspace(1)* %in, align 4 176 %shl = lshr i32 %x, 31 177 %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shl, i32 31, i32 1) 178 store i32 %bfe, i32 addrspace(1)* %out, align 4 ret void 179 } 180 181 ; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_0: 182 ; SI-NOT: {{[^@]}}bfe 183 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0 184 ; SI: buffer_store_dword [[VREG]], 185 ; SI: s_endpgm 186 ; EG-NOT: BFE 187 define void @bfe_i32_constant_fold_test_0(i32 addrspace(1)* %out) nounwind { 188 %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 0, i32 0, i32 0) nounwind readnone 189 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 190 ret void 191 } 192 193 ; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_1: 194 ; SI-NOT: {{[^@]}}bfe 195 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0 196 ; SI: buffer_store_dword [[VREG]], 197 ; SI: s_endpgm 198 ; EG-NOT: BFE 199 define void @bfe_i32_constant_fold_test_1(i32 addrspace(1)* %out) nounwind { 200 %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 12334, i32 0, i32 0) nounwind readnone 201 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 202 ret void 203 } 204 205 ; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_2: 206 ; SI-NOT: {{[^@]}}bfe 207 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0 208 ; SI: buffer_store_dword [[VREG]], 209 ; SI: s_endpgm 210 ; EG-NOT: BFE 211 define void @bfe_i32_constant_fold_test_2(i32 addrspace(1)* %out) nounwind { 212 %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 0, i32 0, i32 1) nounwind readnone 213 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 214 ret void 215 } 216 217 ; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_3: 218 ; SI-NOT: {{[^@]}}bfe 219 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], -1 220 ; SI: buffer_store_dword [[VREG]], 221 ; SI: s_endpgm 222 ; EG-NOT: BFE 223 define void @bfe_i32_constant_fold_test_3(i32 addrspace(1)* %out) nounwind { 224 %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 1, i32 0, i32 1) nounwind readnone 225 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 226 ret void 227 } 228 229 ; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_4: 230 ; SI-NOT: {{[^@]}}bfe 231 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], -1 232 ; SI: buffer_store_dword [[VREG]], 233 ; SI: s_endpgm 234 ; EG-NOT: BFE 235 define void @bfe_i32_constant_fold_test_4(i32 addrspace(1)* %out) nounwind { 236 %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 4294967295, i32 0, i32 1) nounwind readnone 237 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 238 ret void 239 } 240 241 ; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_5: 242 ; SI-NOT: {{[^@]}}bfe 243 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], -1 244 ; SI: buffer_store_dword [[VREG]], 245 ; SI: s_endpgm 246 ; EG-NOT: BFE 247 define void @bfe_i32_constant_fold_test_5(i32 addrspace(1)* %out) nounwind { 248 %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 128, i32 7, i32 1) nounwind readnone 249 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 250 ret void 251 } 252 253 ; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_6: 254 ; SI-NOT: {{[^@]}}bfe 255 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0xffffff80 256 ; SI: buffer_store_dword [[VREG]], 257 ; SI: s_endpgm 258 ; EG-NOT: BFE 259 define void @bfe_i32_constant_fold_test_6(i32 addrspace(1)* %out) nounwind { 260 %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 128, i32 0, i32 8) nounwind readnone 261 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 262 ret void 263 } 264 265 ; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_7: 266 ; SI-NOT: {{[^@]}}bfe 267 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0x7f 268 ; SI: buffer_store_dword [[VREG]], 269 ; SI: s_endpgm 270 ; EG-NOT: BFE 271 define void @bfe_i32_constant_fold_test_7(i32 addrspace(1)* %out) nounwind { 272 %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 127, i32 0, i32 8) nounwind readnone 273 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 274 ret void 275 } 276 277 ; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_8: 278 ; SI-NOT: {{[^@]}}bfe 279 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 1 280 ; SI: buffer_store_dword [[VREG]], 281 ; SI: s_endpgm 282 ; EG-NOT: BFE 283 define void @bfe_i32_constant_fold_test_8(i32 addrspace(1)* %out) nounwind { 284 %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 127, i32 6, i32 8) nounwind readnone 285 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 286 ret void 287 } 288 289 ; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_9: 290 ; SI-NOT: {{[^@]}}bfe 291 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 1 292 ; SI: buffer_store_dword [[VREG]], 293 ; SI: s_endpgm 294 ; EG-NOT: BFE 295 define void @bfe_i32_constant_fold_test_9(i32 addrspace(1)* %out) nounwind { 296 %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 65536, i32 16, i32 8) nounwind readnone 297 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 298 ret void 299 } 300 301 ; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_10: 302 ; SI-NOT: {{[^@]}}bfe 303 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0 304 ; SI: buffer_store_dword [[VREG]], 305 ; SI: s_endpgm 306 ; EG-NOT: BFE 307 define void @bfe_i32_constant_fold_test_10(i32 addrspace(1)* %out) nounwind { 308 %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 65535, i32 16, i32 16) nounwind readnone 309 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 310 ret void 311 } 312 313 ; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_11: 314 ; SI-NOT: {{[^@]}}bfe 315 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], -6 316 ; SI: buffer_store_dword [[VREG]], 317 ; SI: s_endpgm 318 ; EG-NOT: BFE 319 define void @bfe_i32_constant_fold_test_11(i32 addrspace(1)* %out) nounwind { 320 %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 160, i32 4, i32 4) nounwind readnone 321 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 322 ret void 323 } 324 325 ; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_12: 326 ; SI-NOT: {{[^@]}}bfe 327 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0 328 ; SI: buffer_store_dword [[VREG]], 329 ; SI: s_endpgm 330 ; EG-NOT: BFE 331 define void @bfe_i32_constant_fold_test_12(i32 addrspace(1)* %out) nounwind { 332 %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 160, i32 31, i32 1) nounwind readnone 333 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 334 ret void 335 } 336 337 ; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_13: 338 ; SI-NOT: {{[^@]}}bfe 339 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 1 340 ; SI: buffer_store_dword [[VREG]], 341 ; SI: s_endpgm 342 ; EG-NOT: BFE 343 define void @bfe_i32_constant_fold_test_13(i32 addrspace(1)* %out) nounwind { 344 %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 131070, i32 16, i32 16) nounwind readnone 345 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 346 ret void 347 } 348 349 ; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_14: 350 ; SI-NOT: {{[^@]}}bfe 351 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 40 352 ; SI: buffer_store_dword [[VREG]], 353 ; SI: s_endpgm 354 ; EG-NOT: BFE 355 define void @bfe_i32_constant_fold_test_14(i32 addrspace(1)* %out) nounwind { 356 %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 160, i32 2, i32 30) nounwind readnone 357 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 358 ret void 359 } 360 361 ; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_15: 362 ; SI-NOT: {{[^@]}}bfe 363 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 10 364 ; SI: buffer_store_dword [[VREG]], 365 ; SI: s_endpgm 366 ; EG-NOT: BFE 367 define void @bfe_i32_constant_fold_test_15(i32 addrspace(1)* %out) nounwind { 368 %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 160, i32 4, i32 28) nounwind readnone 369 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 370 ret void 371 } 372 373 ; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_16: 374 ; SI-NOT: {{[^@]}}bfe 375 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], -1 376 ; SI: buffer_store_dword [[VREG]], 377 ; SI: s_endpgm 378 ; EG-NOT: BFE 379 define void @bfe_i32_constant_fold_test_16(i32 addrspace(1)* %out) nounwind { 380 %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 4294967295, i32 1, i32 7) nounwind readnone 381 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 382 ret void 383 } 384 385 ; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_17: 386 ; SI-NOT: {{[^@]}}bfe 387 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0x7f 388 ; SI: buffer_store_dword [[VREG]], 389 ; SI: s_endpgm 390 ; EG-NOT: BFE 391 define void @bfe_i32_constant_fold_test_17(i32 addrspace(1)* %out) nounwind { 392 %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 255, i32 1, i32 31) nounwind readnone 393 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 394 ret void 395 } 396 397 ; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_18: 398 ; SI-NOT: {{[^@]}}bfe 399 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0 400 ; SI: buffer_store_dword [[VREG]], 401 ; SI: s_endpgm 402 ; EG-NOT: BFE 403 define void @bfe_i32_constant_fold_test_18(i32 addrspace(1)* %out) nounwind { 404 %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 255, i32 31, i32 1) nounwind readnone 405 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 406 ret void 407 } 408 409 ; FUNC-LABEL: {{^}}bfe_sext_in_reg_i24: 410 ; SI: buffer_load_dword [[LOAD:v[0-9]+]], 411 ; SI-NOT: v_lshl 412 ; SI-NOT: v_ashr 413 ; SI: v_bfe_i32 [[BFE:v[0-9]+]], [[LOAD]], 0, 24 414 ; SI: buffer_store_dword [[BFE]], 415 define void @bfe_sext_in_reg_i24(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { 416 %x = load i32, i32 addrspace(1)* %in, align 4 417 %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %x, i32 0, i32 24) 418 %shl = shl i32 %bfe, 8 419 %ashr = ashr i32 %shl, 8 420 store i32 %ashr, i32 addrspace(1)* %out, align 4 421 ret void 422 } 423 424 ; FUNC-LABEL: @simplify_demanded_bfe_sdiv 425 ; SI: buffer_load_dword [[LOAD:v[0-9]+]] 426 ; SI: v_bfe_i32 [[BFE:v[0-9]+]], [[LOAD]], 1, 16 427 ; SI: v_lshrrev_b32_e32 [[TMP0:v[0-9]+]], 31, [[BFE]] 428 ; SI: v_add_i32_e32 [[TMP1:v[0-9]+]], vcc, [[TMP0]], [[BFE]] 429 ; SI: v_ashrrev_i32_e32 [[TMP2:v[0-9]+]], 1, [[TMP1]] 430 ; SI: buffer_store_dword [[TMP2]] 431 define void @simplify_demanded_bfe_sdiv(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { 432 %src = load i32, i32 addrspace(1)* %in, align 4 433 %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %src, i32 1, i32 16) nounwind readnone 434 %div = sdiv i32 %bfe, 2 435 store i32 %div, i32 addrspace(1)* %out, align 4 436 ret void 437 } 438