Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
      2 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
      3 ; RUN: llc -march=r600 -mcpu=redwood -show-mc-encoding -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
      4 
      5 declare i32 @llvm.AMDGPU.bfe.i32(i32, i32, i32) nounwind readnone
      6 
      7 ; FUNC-LABEL: {{^}}bfe_i32_arg_arg_arg:
      8 ; SI: v_bfe_i32
      9 ; EG: BFE_INT
     10 ; EG: encoding: [{{[x0-9a-f]+,[x0-9a-f]+,[x0-9a-f]+,[x0-9a-f]+,[x0-9a-f]+}},0xac
     11 define void @bfe_i32_arg_arg_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) nounwind {
     12   %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 %src0, i32 %src1, i32 %src1) nounwind readnone
     13   store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
     14   ret void
     15 }
     16 
     17 ; FUNC-LABEL: {{^}}bfe_i32_arg_arg_imm:
     18 ; SI: v_bfe_i32
     19 ; EG: BFE_INT
     20 define void @bfe_i32_arg_arg_imm(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind {
     21   %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 %src0, i32 %src1, i32 123) nounwind readnone
     22   store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
     23   ret void
     24 }
     25 
     26 ; FUNC-LABEL: {{^}}bfe_i32_arg_imm_arg:
     27 ; SI: v_bfe_i32
     28 ; EG: BFE_INT
     29 define void @bfe_i32_arg_imm_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src2) nounwind {
     30   %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 %src0, i32 123, i32 %src2) nounwind readnone
     31   store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
     32   ret void
     33 }
     34 
     35 ; FUNC-LABEL: {{^}}bfe_i32_imm_arg_arg:
     36 ; SI: v_bfe_i32
     37 ; EG: BFE_INT
     38 define void @bfe_i32_imm_arg_arg(i32 addrspace(1)* %out, i32 %src1, i32 %src2) nounwind {
     39   %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 123, i32 %src1, i32 %src2) nounwind readnone
     40   store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
     41   ret void
     42 }
     43 
     44 ; FUNC-LABEL: {{^}}v_bfe_print_arg:
     45 ; SI: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 2, 8
     46 define void @v_bfe_print_arg(i32 addrspace(1)* %out, i32 addrspace(1)* %src0) nounwind {
     47   %load = load i32, i32 addrspace(1)* %src0, align 4
     48   %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 %load, i32 2, i32 8) nounwind readnone
     49   store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
     50   ret void
     51 }
     52 
     53 ; FUNC-LABEL: {{^}}bfe_i32_arg_0_width_reg_offset:
     54 ; SI-NOT: {{[^@]}}bfe
     55 ; SI: s_endpgm
     56 ; EG-NOT: BFE
     57 define void @bfe_i32_arg_0_width_reg_offset(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind {
     58   %bfe_u32 = call i32 @llvm.AMDGPU.bfe.i32(i32 %src0, i32 %src1, i32 0) nounwind readnone
     59   store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
     60   ret void
     61 }
     62 
     63 ; FUNC-LABEL: {{^}}bfe_i32_arg_0_width_imm_offset:
     64 ; SI-NOT: {{[^@]}}bfe
     65 ; SI: s_endpgm
     66 ; EG-NOT: BFE
     67 define void @bfe_i32_arg_0_width_imm_offset(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind {
     68   %bfe_u32 = call i32 @llvm.AMDGPU.bfe.i32(i32 %src0, i32 8, i32 0) nounwind readnone
     69   store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
     70   ret void
     71 }
     72 
     73 ; FUNC-LABEL: {{^}}bfe_i32_test_6:
     74 ; SI: v_lshlrev_b32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}}
     75 ; SI: v_ashrrev_i32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}
     76 ; SI: s_endpgm
     77 define void @bfe_i32_test_6(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
     78   %x = load i32, i32 addrspace(1)* %in, align 4
     79   %shl = shl i32 %x, 31
     80   %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shl, i32 1, i32 31)
     81   store i32 %bfe, i32 addrspace(1)* %out, align 4
     82   ret void
     83 }
     84 
     85 ; FUNC-LABEL: {{^}}bfe_i32_test_7:
     86 ; SI-NOT: shl
     87 ; SI-NOT: {{[^@]}}bfe
     88 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
     89 ; SI: buffer_store_dword [[VREG]],
     90 ; SI: s_endpgm
     91 define void @bfe_i32_test_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
     92   %x = load i32, i32 addrspace(1)* %in, align 4
     93   %shl = shl i32 %x, 31
     94   %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shl, i32 0, i32 31)
     95   store i32 %bfe, i32 addrspace(1)* %out, align 4
     96   ret void
     97 }
     98 
     99 ; FUNC-LABEL: {{^}}bfe_i32_test_8:
    100 ; SI: buffer_load_dword
    101 ; SI: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 1
    102 ; SI: s_endpgm
    103 define void @bfe_i32_test_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
    104   %x = load i32, i32 addrspace(1)* %in, align 4
    105   %shl = shl i32 %x, 31
    106   %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shl, i32 31, i32 1)
    107   store i32 %bfe, i32 addrspace(1)* %out, align 4
    108   ret void
    109 }
    110 
    111 ; FUNC-LABEL: {{^}}bfe_i32_test_9:
    112 ; SI-NOT: {{[^@]}}bfe
    113 ; SI: v_ashrrev_i32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}}
    114 ; SI-NOT: {{[^@]}}bfe
    115 ; SI: s_endpgm
    116 define void @bfe_i32_test_9(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
    117   %x = load i32, i32 addrspace(1)* %in, align 4
    118   %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %x, i32 31, i32 1)
    119   store i32 %bfe, i32 addrspace(1)* %out, align 4
    120   ret void
    121 }
    122 
    123 ; FUNC-LABEL: {{^}}bfe_i32_test_10:
    124 ; SI-NOT: {{[^@]}}bfe
    125 ; SI: v_ashrrev_i32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}
    126 ; SI-NOT: {{[^@]}}bfe
    127 ; SI: s_endpgm
    128 define void @bfe_i32_test_10(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
    129   %x = load i32, i32 addrspace(1)* %in, align 4
    130   %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %x, i32 1, i32 31)
    131   store i32 %bfe, i32 addrspace(1)* %out, align 4
    132   ret void
    133 }
    134 
    135 ; FUNC-LABEL: {{^}}bfe_i32_test_11:
    136 ; SI-NOT: {{[^@]}}bfe
    137 ; SI: v_ashrrev_i32_e32 v{{[0-9]+}}, 8, v{{[0-9]+}}
    138 ; SI-NOT: {{[^@]}}bfe
    139 ; SI: s_endpgm
    140 define void @bfe_i32_test_11(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
    141   %x = load i32, i32 addrspace(1)* %in, align 4
    142   %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %x, i32 8, i32 24)
    143   store i32 %bfe, i32 addrspace(1)* %out, align 4
    144   ret void
    145 }
    146 
    147 ; FUNC-LABEL: {{^}}bfe_i32_test_12:
    148 ; SI-NOT: {{[^@]}}bfe
    149 ; SI: v_ashrrev_i32_e32 v{{[0-9]+}}, 24, v{{[0-9]+}}
    150 ; SI-NOT: {{[^@]}}bfe
    151 ; SI: s_endpgm
    152 define void @bfe_i32_test_12(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
    153   %x = load i32, i32 addrspace(1)* %in, align 4
    154   %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %x, i32 24, i32 8)
    155   store i32 %bfe, i32 addrspace(1)* %out, align 4
    156   ret void
    157 }
    158 
    159 ; FUNC-LABEL: {{^}}bfe_i32_test_13:
    160 ; SI: v_ashrrev_i32_e32 {{v[0-9]+}}, 31, {{v[0-9]+}}
    161 ; SI-NOT: {{[^@]}}bfe
    162 ; SI: s_endpgm
    163 define void @bfe_i32_test_13(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
    164   %x = load i32, i32 addrspace(1)* %in, align 4
    165   %shl = ashr i32 %x, 31
    166   %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shl, i32 31, i32 1)
    167   store i32 %bfe, i32 addrspace(1)* %out, align 4 ret void
    168 }
    169 
    170 ; FUNC-LABEL: {{^}}bfe_i32_test_14:
    171 ; SI-NOT: lshr
    172 ; SI-NOT: {{[^@]}}bfe
    173 ; SI: s_endpgm
    174 define void @bfe_i32_test_14(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
    175   %x = load i32, i32 addrspace(1)* %in, align 4
    176   %shl = lshr i32 %x, 31
    177   %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shl, i32 31, i32 1)
    178   store i32 %bfe, i32 addrspace(1)* %out, align 4 ret void
    179 }
    180 
    181 ; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_0:
    182 ; SI-NOT: {{[^@]}}bfe
    183 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
    184 ; SI: buffer_store_dword [[VREG]],
    185 ; SI: s_endpgm
    186 ; EG-NOT: BFE
    187 define void @bfe_i32_constant_fold_test_0(i32 addrspace(1)* %out) nounwind {
    188   %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 0, i32 0, i32 0) nounwind readnone
    189   store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
    190   ret void
    191 }
    192 
    193 ; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_1:
    194 ; SI-NOT: {{[^@]}}bfe
    195 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
    196 ; SI: buffer_store_dword [[VREG]],
    197 ; SI: s_endpgm
    198 ; EG-NOT: BFE
    199 define void @bfe_i32_constant_fold_test_1(i32 addrspace(1)* %out) nounwind {
    200   %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 12334, i32 0, i32 0) nounwind readnone
    201   store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
    202   ret void
    203 }
    204 
    205 ; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_2:
    206 ; SI-NOT: {{[^@]}}bfe
    207 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
    208 ; SI: buffer_store_dword [[VREG]],
    209 ; SI: s_endpgm
    210 ; EG-NOT: BFE
    211 define void @bfe_i32_constant_fold_test_2(i32 addrspace(1)* %out) nounwind {
    212   %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 0, i32 0, i32 1) nounwind readnone
    213   store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
    214   ret void
    215 }
    216 
    217 ; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_3:
    218 ; SI-NOT: {{[^@]}}bfe
    219 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], -1
    220 ; SI: buffer_store_dword [[VREG]],
    221 ; SI: s_endpgm
    222 ; EG-NOT: BFE
    223 define void @bfe_i32_constant_fold_test_3(i32 addrspace(1)* %out) nounwind {
    224   %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 1, i32 0, i32 1) nounwind readnone
    225   store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
    226   ret void
    227 }
    228 
    229 ; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_4:
    230 ; SI-NOT: {{[^@]}}bfe
    231 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], -1
    232 ; SI: buffer_store_dword [[VREG]],
    233 ; SI: s_endpgm
    234 ; EG-NOT: BFE
    235 define void @bfe_i32_constant_fold_test_4(i32 addrspace(1)* %out) nounwind {
    236   %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 4294967295, i32 0, i32 1) nounwind readnone
    237   store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
    238   ret void
    239 }
    240 
    241 ; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_5:
    242 ; SI-NOT: {{[^@]}}bfe
    243 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], -1
    244 ; SI: buffer_store_dword [[VREG]],
    245 ; SI: s_endpgm
    246 ; EG-NOT: BFE
    247 define void @bfe_i32_constant_fold_test_5(i32 addrspace(1)* %out) nounwind {
    248   %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 128, i32 7, i32 1) nounwind readnone
    249   store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
    250   ret void
    251 }
    252 
    253 ; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_6:
    254 ; SI-NOT: {{[^@]}}bfe
    255 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0xffffff80
    256 ; SI: buffer_store_dword [[VREG]],
    257 ; SI: s_endpgm
    258 ; EG-NOT: BFE
    259 define void @bfe_i32_constant_fold_test_6(i32 addrspace(1)* %out) nounwind {
    260   %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 128, i32 0, i32 8) nounwind readnone
    261   store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
    262   ret void
    263 }
    264 
    265 ; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_7:
    266 ; SI-NOT: {{[^@]}}bfe
    267 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0x7f
    268 ; SI: buffer_store_dword [[VREG]],
    269 ; SI: s_endpgm
    270 ; EG-NOT: BFE
    271 define void @bfe_i32_constant_fold_test_7(i32 addrspace(1)* %out) nounwind {
    272   %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 127, i32 0, i32 8) nounwind readnone
    273   store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
    274   ret void
    275 }
    276 
    277 ; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_8:
    278 ; SI-NOT: {{[^@]}}bfe
    279 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 1
    280 ; SI: buffer_store_dword [[VREG]],
    281 ; SI: s_endpgm
    282 ; EG-NOT: BFE
    283 define void @bfe_i32_constant_fold_test_8(i32 addrspace(1)* %out) nounwind {
    284   %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 127, i32 6, i32 8) nounwind readnone
    285   store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
    286   ret void
    287 }
    288 
    289 ; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_9:
    290 ; SI-NOT: {{[^@]}}bfe
    291 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 1
    292 ; SI: buffer_store_dword [[VREG]],
    293 ; SI: s_endpgm
    294 ; EG-NOT: BFE
    295 define void @bfe_i32_constant_fold_test_9(i32 addrspace(1)* %out) nounwind {
    296   %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 65536, i32 16, i32 8) nounwind readnone
    297   store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
    298   ret void
    299 }
    300 
    301 ; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_10:
    302 ; SI-NOT: {{[^@]}}bfe
    303 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
    304 ; SI: buffer_store_dword [[VREG]],
    305 ; SI: s_endpgm
    306 ; EG-NOT: BFE
    307 define void @bfe_i32_constant_fold_test_10(i32 addrspace(1)* %out) nounwind {
    308   %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 65535, i32 16, i32 16) nounwind readnone
    309   store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
    310   ret void
    311 }
    312 
    313 ; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_11:
    314 ; SI-NOT: {{[^@]}}bfe
    315 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], -6
    316 ; SI: buffer_store_dword [[VREG]],
    317 ; SI: s_endpgm
    318 ; EG-NOT: BFE
    319 define void @bfe_i32_constant_fold_test_11(i32 addrspace(1)* %out) nounwind {
    320   %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 160, i32 4, i32 4) nounwind readnone
    321   store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
    322   ret void
    323 }
    324 
    325 ; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_12:
    326 ; SI-NOT: {{[^@]}}bfe
    327 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
    328 ; SI: buffer_store_dword [[VREG]],
    329 ; SI: s_endpgm
    330 ; EG-NOT: BFE
    331 define void @bfe_i32_constant_fold_test_12(i32 addrspace(1)* %out) nounwind {
    332   %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 160, i32 31, i32 1) nounwind readnone
    333   store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
    334   ret void
    335 }
    336 
    337 ; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_13:
    338 ; SI-NOT: {{[^@]}}bfe
    339 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 1
    340 ; SI: buffer_store_dword [[VREG]],
    341 ; SI: s_endpgm
    342 ; EG-NOT: BFE
    343 define void @bfe_i32_constant_fold_test_13(i32 addrspace(1)* %out) nounwind {
    344   %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 131070, i32 16, i32 16) nounwind readnone
    345   store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
    346   ret void
    347 }
    348 
    349 ; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_14:
    350 ; SI-NOT: {{[^@]}}bfe
    351 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 40
    352 ; SI: buffer_store_dword [[VREG]],
    353 ; SI: s_endpgm
    354 ; EG-NOT: BFE
    355 define void @bfe_i32_constant_fold_test_14(i32 addrspace(1)* %out) nounwind {
    356   %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 160, i32 2, i32 30) nounwind readnone
    357   store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
    358   ret void
    359 }
    360 
    361 ; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_15:
    362 ; SI-NOT: {{[^@]}}bfe
    363 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 10
    364 ; SI: buffer_store_dword [[VREG]],
    365 ; SI: s_endpgm
    366 ; EG-NOT: BFE
    367 define void @bfe_i32_constant_fold_test_15(i32 addrspace(1)* %out) nounwind {
    368   %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 160, i32 4, i32 28) nounwind readnone
    369   store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
    370   ret void
    371 }
    372 
    373 ; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_16:
    374 ; SI-NOT: {{[^@]}}bfe
    375 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], -1
    376 ; SI: buffer_store_dword [[VREG]],
    377 ; SI: s_endpgm
    378 ; EG-NOT: BFE
    379 define void @bfe_i32_constant_fold_test_16(i32 addrspace(1)* %out) nounwind {
    380   %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 4294967295, i32 1, i32 7) nounwind readnone
    381   store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
    382   ret void
    383 }
    384 
    385 ; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_17:
    386 ; SI-NOT: {{[^@]}}bfe
    387 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0x7f
    388 ; SI: buffer_store_dword [[VREG]],
    389 ; SI: s_endpgm
    390 ; EG-NOT: BFE
    391 define void @bfe_i32_constant_fold_test_17(i32 addrspace(1)* %out) nounwind {
    392   %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 255, i32 1, i32 31) nounwind readnone
    393   store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
    394   ret void
    395 }
    396 
    397 ; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_18:
    398 ; SI-NOT: {{[^@]}}bfe
    399 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
    400 ; SI: buffer_store_dword [[VREG]],
    401 ; SI: s_endpgm
    402 ; EG-NOT: BFE
    403 define void @bfe_i32_constant_fold_test_18(i32 addrspace(1)* %out) nounwind {
    404   %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 255, i32 31, i32 1) nounwind readnone
    405   store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
    406   ret void
    407 }
    408 
    409 ; FUNC-LABEL: {{^}}bfe_sext_in_reg_i24:
    410 ; SI: buffer_load_dword [[LOAD:v[0-9]+]],
    411 ; SI-NOT: v_lshl
    412 ; SI-NOT: v_ashr
    413 ; SI: v_bfe_i32 [[BFE:v[0-9]+]], [[LOAD]], 0, 24
    414 ; SI: buffer_store_dword [[BFE]],
    415 define void @bfe_sext_in_reg_i24(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
    416   %x = load i32, i32 addrspace(1)* %in, align 4
    417   %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %x, i32 0, i32 24)
    418   %shl = shl i32 %bfe, 8
    419   %ashr = ashr i32 %shl, 8
    420   store i32 %ashr, i32 addrspace(1)* %out, align 4
    421   ret void
    422 }
    423 
    424 ; FUNC-LABEL: @simplify_demanded_bfe_sdiv
    425 ; SI: buffer_load_dword [[LOAD:v[0-9]+]]
    426 ; SI: v_bfe_i32 [[BFE:v[0-9]+]], [[LOAD]], 1, 16
    427 ; SI: v_lshrrev_b32_e32 [[TMP0:v[0-9]+]], 31, [[BFE]]
    428 ; SI: v_add_i32_e32 [[TMP1:v[0-9]+]], vcc, [[TMP0]], [[BFE]]
    429 ; SI: v_ashrrev_i32_e32 [[TMP2:v[0-9]+]], 1, [[TMP1]]
    430 ; SI: buffer_store_dword [[TMP2]]
    431 define void @simplify_demanded_bfe_sdiv(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
    432   %src = load i32, i32 addrspace(1)* %in, align 4
    433   %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %src, i32 1, i32 16) nounwind readnone
    434   %div = sdiv i32 %bfe, 2
    435   store i32 %div, i32 addrspace(1)* %out, align 4
    436   ret void
    437 }
    438