1 ; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s 2 ; RUN: llc -march=r600 -mcpu=redwood -show-mc-encoding -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s 3 4 declare i32 @llvm.AMDGPU.bfe.i32(i32, i32, i32) nounwind readnone 5 6 ; FUNC-LABEL: @bfe_i32_arg_arg_arg 7 ; SI: V_BFE_I32 8 ; EG: BFE_INT 9 ; EG: encoding: [{{[x0-9a-f]+,[x0-9a-f]+,[x0-9a-f]+,[x0-9a-f]+,[x0-9a-f]+}},0xac 10 define void @bfe_i32_arg_arg_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) nounwind { 11 %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 %src0, i32 %src1, i32 %src1) nounwind readnone 12 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 13 ret void 14 } 15 16 ; FUNC-LABEL: @bfe_i32_arg_arg_imm 17 ; SI: V_BFE_I32 18 ; EG: BFE_INT 19 define void @bfe_i32_arg_arg_imm(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind { 20 %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 %src0, i32 %src1, i32 123) nounwind readnone 21 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 22 ret void 23 } 24 25 ; FUNC-LABEL: @bfe_i32_arg_imm_arg 26 ; SI: V_BFE_I32 27 ; EG: BFE_INT 28 define void @bfe_i32_arg_imm_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src2) nounwind { 29 %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 %src0, i32 123, i32 %src2) nounwind readnone 30 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 31 ret void 32 } 33 34 ; FUNC-LABEL: @bfe_i32_imm_arg_arg 35 ; SI: V_BFE_I32 36 ; EG: BFE_INT 37 define void @bfe_i32_imm_arg_arg(i32 addrspace(1)* %out, i32 %src1, i32 %src2) nounwind { 38 %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 123, i32 %src1, i32 %src2) nounwind readnone 39 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 40 ret void 41 } 42 43 ; FUNC-LABEL: @v_bfe_print_arg 44 ; SI: V_BFE_I32 v{{[0-9]+}}, v{{[0-9]+}}, 2, 8 45 define void @v_bfe_print_arg(i32 addrspace(1)* %out, i32 addrspace(1)* %src0) nounwind { 46 %load = load i32 addrspace(1)* %src0, align 4 47 %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 %load, i32 2, i32 8) nounwind readnone 48 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 49 ret void 50 } 51 52 ; FUNC-LABEL: @bfe_i32_arg_0_width_reg_offset 53 ; SI-NOT: BFE 54 ; SI: S_ENDPGM 55 ; EG-NOT: BFE 56 define void @bfe_i32_arg_0_width_reg_offset(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind { 57 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.i32(i32 %src0, i32 %src1, i32 0) nounwind readnone 58 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 59 ret void 60 } 61 62 ; FUNC-LABEL: @bfe_i32_arg_0_width_imm_offset 63 ; SI-NOT: BFE 64 ; SI: S_ENDPGM 65 ; EG-NOT: BFE 66 define void @bfe_i32_arg_0_width_imm_offset(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind { 67 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.i32(i32 %src0, i32 8, i32 0) nounwind readnone 68 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 69 ret void 70 } 71 72 ; FUNC-LABEL: @bfe_i32_test_6 73 ; SI: V_LSHLREV_B32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}} 74 ; SI: V_ASHRREV_I32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}} 75 ; SI: S_ENDPGM 76 define void @bfe_i32_test_6(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { 77 %x = load i32 addrspace(1)* %in, align 4 78 %shl = shl i32 %x, 31 79 %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shl, i32 1, i32 31) 80 store i32 %bfe, i32 addrspace(1)* %out, align 4 81 ret void 82 } 83 84 ; FUNC-LABEL: @bfe_i32_test_7 85 ; SI-NOT: SHL 86 ; SI-NOT: BFE 87 ; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0 88 ; SI: BUFFER_STORE_DWORD [[VREG]], 89 ; SI: S_ENDPGM 90 define void @bfe_i32_test_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { 91 %x = load i32 addrspace(1)* %in, align 4 92 %shl = shl i32 %x, 31 93 %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shl, i32 0, i32 31) 94 store i32 %bfe, i32 addrspace(1)* %out, align 4 95 ret void 96 } 97 98 ; FIXME: The shifts should be 1 BFE 99 ; FUNC-LABEL: @bfe_i32_test_8 100 ; SI: BUFFER_LOAD_DWORD 101 ; SI: V_BFE_I32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 1 102 ; SI: S_ENDPGM 103 define void @bfe_i32_test_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { 104 %x = load i32 addrspace(1)* %in, align 4 105 %shl = shl i32 %x, 31 106 %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shl, i32 31, i32 1) 107 store i32 %bfe, i32 addrspace(1)* %out, align 4 108 ret void 109 } 110 111 ; FUNC-LABEL: @bfe_i32_test_9 112 ; SI-NOT: BFE 113 ; SI: V_ASHRREV_I32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}} 114 ; SI-NOT: BFE 115 ; SI: S_ENDPGM 116 define void @bfe_i32_test_9(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { 117 %x = load i32 addrspace(1)* %in, align 4 118 %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %x, i32 31, i32 1) 119 store i32 %bfe, i32 addrspace(1)* %out, align 4 120 ret void 121 } 122 123 ; FUNC-LABEL: @bfe_i32_test_10 124 ; SI-NOT: BFE 125 ; SI: V_ASHRREV_I32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}} 126 ; SI-NOT: BFE 127 ; SI: S_ENDPGM 128 define void @bfe_i32_test_10(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { 129 %x = load i32 addrspace(1)* %in, align 4 130 %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %x, i32 1, i32 31) 131 store i32 %bfe, i32 addrspace(1)* %out, align 4 132 ret void 133 } 134 135 ; FUNC-LABEL: @bfe_i32_test_11 136 ; SI-NOT: BFE 137 ; SI: V_ASHRREV_I32_e32 v{{[0-9]+}}, 8, v{{[0-9]+}} 138 ; SI-NOT: BFE 139 ; SI: S_ENDPGM 140 define void @bfe_i32_test_11(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { 141 %x = load i32 addrspace(1)* %in, align 4 142 %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %x, i32 8, i32 24) 143 store i32 %bfe, i32 addrspace(1)* %out, align 4 144 ret void 145 } 146 147 ; FUNC-LABEL: @bfe_i32_test_12 148 ; SI-NOT: BFE 149 ; SI: V_ASHRREV_I32_e32 v{{[0-9]+}}, 24, v{{[0-9]+}} 150 ; SI-NOT: BFE 151 ; SI: S_ENDPGM 152 define void @bfe_i32_test_12(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { 153 %x = load i32 addrspace(1)* %in, align 4 154 %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %x, i32 24, i32 8) 155 store i32 %bfe, i32 addrspace(1)* %out, align 4 156 ret void 157 } 158 159 ; FUNC-LABEL: @bfe_i32_test_13 160 ; SI: V_ASHRREV_I32_e32 {{v[0-9]+}}, 31, {{v[0-9]+}} 161 ; SI-NOT: BFE 162 ; SI: S_ENDPGM 163 define void @bfe_i32_test_13(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { 164 %x = load i32 addrspace(1)* %in, align 4 165 %shl = ashr i32 %x, 31 166 %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shl, i32 31, i32 1) 167 store i32 %bfe, i32 addrspace(1)* %out, align 4 ret void 168 } 169 170 ; FUNC-LABEL: @bfe_i32_test_14 171 ; SI-NOT: LSHR 172 ; SI-NOT: BFE 173 ; SI: S_ENDPGM 174 define void @bfe_i32_test_14(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { 175 %x = load i32 addrspace(1)* %in, align 4 176 %shl = lshr i32 %x, 31 177 %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shl, i32 31, i32 1) 178 store i32 %bfe, i32 addrspace(1)* %out, align 4 ret void 179 } 180 181 ; FUNC-LABEL: @bfe_i32_constant_fold_test_0 182 ; SI-NOT: BFE 183 ; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0 184 ; SI: BUFFER_STORE_DWORD [[VREG]], 185 ; SI: S_ENDPGM 186 ; EG-NOT: BFE 187 define void @bfe_i32_constant_fold_test_0(i32 addrspace(1)* %out) nounwind { 188 %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 0, i32 0, i32 0) nounwind readnone 189 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 190 ret void 191 } 192 193 ; FUNC-LABEL: @bfe_i32_constant_fold_test_1 194 ; SI-NOT: BFE 195 ; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0 196 ; SI: BUFFER_STORE_DWORD [[VREG]], 197 ; SI: S_ENDPGM 198 ; EG-NOT: BFE 199 define void @bfe_i32_constant_fold_test_1(i32 addrspace(1)* %out) nounwind { 200 %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 12334, i32 0, i32 0) nounwind readnone 201 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 202 ret void 203 } 204 205 ; FUNC-LABEL: @bfe_i32_constant_fold_test_2 206 ; SI-NOT: BFE 207 ; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0 208 ; SI: BUFFER_STORE_DWORD [[VREG]], 209 ; SI: S_ENDPGM 210 ; EG-NOT: BFE 211 define void @bfe_i32_constant_fold_test_2(i32 addrspace(1)* %out) nounwind { 212 %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 0, i32 0, i32 1) nounwind readnone 213 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 214 ret void 215 } 216 217 ; FUNC-LABEL: @bfe_i32_constant_fold_test_3 218 ; SI-NOT: BFE 219 ; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], -1 220 ; SI: BUFFER_STORE_DWORD [[VREG]], 221 ; SI: S_ENDPGM 222 ; EG-NOT: BFE 223 define void @bfe_i32_constant_fold_test_3(i32 addrspace(1)* %out) nounwind { 224 %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 1, i32 0, i32 1) nounwind readnone 225 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 226 ret void 227 } 228 229 ; FUNC-LABEL: @bfe_i32_constant_fold_test_4 230 ; SI-NOT: BFE 231 ; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], -1 232 ; SI: BUFFER_STORE_DWORD [[VREG]], 233 ; SI: S_ENDPGM 234 ; EG-NOT: BFE 235 define void @bfe_i32_constant_fold_test_4(i32 addrspace(1)* %out) nounwind { 236 %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 4294967295, i32 0, i32 1) nounwind readnone 237 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 238 ret void 239 } 240 241 ; FUNC-LABEL: @bfe_i32_constant_fold_test_5 242 ; SI-NOT: BFE 243 ; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], -1 244 ; SI: BUFFER_STORE_DWORD [[VREG]], 245 ; SI: S_ENDPGM 246 ; EG-NOT: BFE 247 define void @bfe_i32_constant_fold_test_5(i32 addrspace(1)* %out) nounwind { 248 %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 128, i32 7, i32 1) nounwind readnone 249 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 250 ret void 251 } 252 253 ; FUNC-LABEL: @bfe_i32_constant_fold_test_6 254 ; SI-NOT: BFE 255 ; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0xffffff80 256 ; SI: BUFFER_STORE_DWORD [[VREG]], 257 ; SI: S_ENDPGM 258 ; EG-NOT: BFE 259 define void @bfe_i32_constant_fold_test_6(i32 addrspace(1)* %out) nounwind { 260 %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 128, i32 0, i32 8) nounwind readnone 261 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 262 ret void 263 } 264 265 ; FUNC-LABEL: @bfe_i32_constant_fold_test_7 266 ; SI-NOT: BFE 267 ; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0x7f 268 ; SI: BUFFER_STORE_DWORD [[VREG]], 269 ; SI: S_ENDPGM 270 ; EG-NOT: BFE 271 define void @bfe_i32_constant_fold_test_7(i32 addrspace(1)* %out) nounwind { 272 %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 127, i32 0, i32 8) nounwind readnone 273 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 274 ret void 275 } 276 277 ; FUNC-LABEL: @bfe_i32_constant_fold_test_8 278 ; SI-NOT: BFE 279 ; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 1 280 ; SI: BUFFER_STORE_DWORD [[VREG]], 281 ; SI: S_ENDPGM 282 ; EG-NOT: BFE 283 define void @bfe_i32_constant_fold_test_8(i32 addrspace(1)* %out) nounwind { 284 %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 127, i32 6, i32 8) nounwind readnone 285 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 286 ret void 287 } 288 289 ; FUNC-LABEL: @bfe_i32_constant_fold_test_9 290 ; SI-NOT: BFE 291 ; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 1 292 ; SI: BUFFER_STORE_DWORD [[VREG]], 293 ; SI: S_ENDPGM 294 ; EG-NOT: BFE 295 define void @bfe_i32_constant_fold_test_9(i32 addrspace(1)* %out) nounwind { 296 %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 65536, i32 16, i32 8) nounwind readnone 297 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 298 ret void 299 } 300 301 ; FUNC-LABEL: @bfe_i32_constant_fold_test_10 302 ; SI-NOT: BFE 303 ; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0 304 ; SI: BUFFER_STORE_DWORD [[VREG]], 305 ; SI: S_ENDPGM 306 ; EG-NOT: BFE 307 define void @bfe_i32_constant_fold_test_10(i32 addrspace(1)* %out) nounwind { 308 %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 65535, i32 16, i32 16) nounwind readnone 309 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 310 ret void 311 } 312 313 ; FUNC-LABEL: @bfe_i32_constant_fold_test_11 314 ; SI-NOT: BFE 315 ; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], -6 316 ; SI: BUFFER_STORE_DWORD [[VREG]], 317 ; SI: S_ENDPGM 318 ; EG-NOT: BFE 319 define void @bfe_i32_constant_fold_test_11(i32 addrspace(1)* %out) nounwind { 320 %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 160, i32 4, i32 4) nounwind readnone 321 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 322 ret void 323 } 324 325 ; FUNC-LABEL: @bfe_i32_constant_fold_test_12 326 ; SI-NOT: BFE 327 ; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0 328 ; SI: BUFFER_STORE_DWORD [[VREG]], 329 ; SI: S_ENDPGM 330 ; EG-NOT: BFE 331 define void @bfe_i32_constant_fold_test_12(i32 addrspace(1)* %out) nounwind { 332 %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 160, i32 31, i32 1) nounwind readnone 333 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 334 ret void 335 } 336 337 ; FUNC-LABEL: @bfe_i32_constant_fold_test_13 338 ; SI-NOT: BFE 339 ; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 1 340 ; SI: BUFFER_STORE_DWORD [[VREG]], 341 ; SI: S_ENDPGM 342 ; EG-NOT: BFE 343 define void @bfe_i32_constant_fold_test_13(i32 addrspace(1)* %out) nounwind { 344 %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 131070, i32 16, i32 16) nounwind readnone 345 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 346 ret void 347 } 348 349 ; FUNC-LABEL: @bfe_i32_constant_fold_test_14 350 ; SI-NOT: BFE 351 ; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 40 352 ; SI: BUFFER_STORE_DWORD [[VREG]], 353 ; SI: S_ENDPGM 354 ; EG-NOT: BFE 355 define void @bfe_i32_constant_fold_test_14(i32 addrspace(1)* %out) nounwind { 356 %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 160, i32 2, i32 30) nounwind readnone 357 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 358 ret void 359 } 360 361 ; FUNC-LABEL: @bfe_i32_constant_fold_test_15 362 ; SI-NOT: BFE 363 ; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 10 364 ; SI: BUFFER_STORE_DWORD [[VREG]], 365 ; SI: S_ENDPGM 366 ; EG-NOT: BFE 367 define void @bfe_i32_constant_fold_test_15(i32 addrspace(1)* %out) nounwind { 368 %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 160, i32 4, i32 28) nounwind readnone 369 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 370 ret void 371 } 372 373 ; FUNC-LABEL: @bfe_i32_constant_fold_test_16 374 ; SI-NOT: BFE 375 ; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], -1 376 ; SI: BUFFER_STORE_DWORD [[VREG]], 377 ; SI: S_ENDPGM 378 ; EG-NOT: BFE 379 define void @bfe_i32_constant_fold_test_16(i32 addrspace(1)* %out) nounwind { 380 %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 4294967295, i32 1, i32 7) nounwind readnone 381 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 382 ret void 383 } 384 385 ; FUNC-LABEL: @bfe_i32_constant_fold_test_17 386 ; SI-NOT: BFE 387 ; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0x7f 388 ; SI: BUFFER_STORE_DWORD [[VREG]], 389 ; SI: S_ENDPGM 390 ; EG-NOT: BFE 391 define void @bfe_i32_constant_fold_test_17(i32 addrspace(1)* %out) nounwind { 392 %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 255, i32 1, i32 31) nounwind readnone 393 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 394 ret void 395 } 396 397 ; FUNC-LABEL: @bfe_i32_constant_fold_test_18 398 ; SI-NOT: BFE 399 ; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0 400 ; SI: BUFFER_STORE_DWORD [[VREG]], 401 ; SI: S_ENDPGM 402 ; EG-NOT: BFE 403 define void @bfe_i32_constant_fold_test_18(i32 addrspace(1)* %out) nounwind { 404 %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 255, i32 31, i32 1) nounwind readnone 405 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 406 ret void 407 } 408 409 ; XXX - This should really be a single BFE, but the sext_inreg of the 410 ; extended type i24 is never custom lowered. 411 ; FUNC-LABEL: @bfe_sext_in_reg_i24 412 ; SI: BUFFER_LOAD_DWORD [[LOAD:v[0-9]+]], 413 ; SI: V_LSHLREV_B32_e32 {{v[0-9]+}}, 8, {{v[0-9]+}} 414 ; SI: V_ASHRREV_I32_e32 {{v[0-9]+}}, 8, {{v[0-9]+}} 415 ; XSI: V_BFE_I32 [[BFE:v[0-9]+]], [[LOAD]], 0, 8 416 ; XSI-NOT: SHL 417 ; XSI-NOT: SHR 418 ; XSI: BUFFER_STORE_DWORD [[BFE]], 419 define void @bfe_sext_in_reg_i24(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { 420 %x = load i32 addrspace(1)* %in, align 4 421 %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %x, i32 0, i32 24) 422 %shl = shl i32 %bfe, 8 423 %ashr = ashr i32 %shl, 8 424 store i32 %ashr, i32 addrspace(1)* %out, align 4 425 ret void 426 } 427