Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
      2 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
      3 ; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
      4 
      5 declare i32 @llvm.AMDGPU.bfe.u32(i32, i32, i32) nounwind readnone
      6 
      7 ; FUNC-LABEL: {{^}}bfe_u32_arg_arg_arg:
      8 ; SI: v_bfe_u32
      9 ; EG: BFE_UINT
     10 define void @bfe_u32_arg_arg_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) nounwind {
     11   %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 %src0, i32 %src1, i32 %src1) nounwind readnone
     12   store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
     13   ret void
     14 }
     15 
     16 ; FUNC-LABEL: {{^}}bfe_u32_arg_arg_imm:
     17 ; SI: v_bfe_u32
     18 ; EG: BFE_UINT
     19 define void @bfe_u32_arg_arg_imm(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind {
     20   %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 %src0, i32 %src1, i32 123) nounwind readnone
     21   store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
     22   ret void
     23 }
     24 
     25 ; FUNC-LABEL: {{^}}bfe_u32_arg_imm_arg:
     26 ; SI: v_bfe_u32
     27 ; EG: BFE_UINT
     28 define void @bfe_u32_arg_imm_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src2) nounwind {
     29   %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 %src0, i32 123, i32 %src2) nounwind readnone
     30   store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
     31   ret void
     32 }
     33 
     34 ; FUNC-LABEL: {{^}}bfe_u32_imm_arg_arg:
     35 ; SI: v_bfe_u32
     36 ; EG: BFE_UINT
     37 define void @bfe_u32_imm_arg_arg(i32 addrspace(1)* %out, i32 %src1, i32 %src2) nounwind {
     38   %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 123, i32 %src1, i32 %src2) nounwind readnone
     39   store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
     40   ret void
     41 }
     42 
     43 ; FUNC-LABEL: {{^}}bfe_u32_arg_0_width_reg_offset:
     44 ; SI-NOT: {{[^@]}}bfe
     45 ; SI: s_endpgm
     46 ; EG-NOT: BFE
     47 define void @bfe_u32_arg_0_width_reg_offset(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind {
     48   %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 %src0, i32 %src1, i32 0) nounwind readnone
     49   store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
     50   ret void
     51 }
     52 
     53 ; FUNC-LABEL: {{^}}bfe_u32_arg_0_width_imm_offset:
     54 ; SI-NOT: {{[^@]}}bfe
     55 ; SI: s_endpgm
     56 ; EG-NOT: BFE
     57 define void @bfe_u32_arg_0_width_imm_offset(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind {
     58   %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 %src0, i32 8, i32 0) nounwind readnone
     59   store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
     60   ret void
     61 }
     62 
     63 ; FUNC-LABEL: {{^}}bfe_u32_zextload_i8:
     64 ; SI: buffer_load_ubyte
     65 ; SI-NOT: {{[^@]}}bfe
     66 ; SI: s_endpgm
     67 define void @bfe_u32_zextload_i8(i32 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind {
     68   %load = load i8, i8 addrspace(1)* %in
     69   %ext = zext i8 %load to i32
     70   %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %ext, i32 0, i32 8)
     71   store i32 %bfe, i32 addrspace(1)* %out, align 4
     72   ret void
     73 }
     74 
     75 ; FUNC-LABEL: {{^}}bfe_u32_zext_in_reg_i8:
     76 ; SI: buffer_load_dword
     77 ; SI: v_add_i32
     78 ; SI-NEXT: v_and_b32_e32
     79 ; SI-NOT: {{[^@]}}bfe
     80 ; SI: s_endpgm
     81 define void @bfe_u32_zext_in_reg_i8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
     82   %load = load i32, i32 addrspace(1)* %in, align 4
     83   %add = add i32 %load, 1
     84   %ext = and i32 %add, 255
     85   %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %ext, i32 0, i32 8)
     86   store i32 %bfe, i32 addrspace(1)* %out, align 4
     87   ret void
     88 }
     89 
     90 ; FUNC-LABEL: {{^}}bfe_u32_zext_in_reg_i16:
     91 ; SI: buffer_load_dword
     92 ; SI: v_add_i32
     93 ; SI-NEXT: v_and_b32_e32
     94 ; SI-NOT: {{[^@]}}bfe
     95 ; SI: s_endpgm
     96 define void @bfe_u32_zext_in_reg_i16(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
     97   %load = load i32, i32 addrspace(1)* %in, align 4
     98   %add = add i32 %load, 1
     99   %ext = and i32 %add, 65535
    100   %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %ext, i32 0, i32 16)
    101   store i32 %bfe, i32 addrspace(1)* %out, align 4
    102   ret void
    103 }
    104 
    105 ; FUNC-LABEL: {{^}}bfe_u32_zext_in_reg_i8_offset_1:
    106 ; SI: buffer_load_dword
    107 ; SI: v_add_i32
    108 ; SI: bfe
    109 ; SI: s_endpgm
    110 define void @bfe_u32_zext_in_reg_i8_offset_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
    111   %load = load i32, i32 addrspace(1)* %in, align 4
    112   %add = add i32 %load, 1
    113   %ext = and i32 %add, 255
    114   %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %ext, i32 1, i32 8)
    115   store i32 %bfe, i32 addrspace(1)* %out, align 4
    116   ret void
    117 }
    118 
    119 ; FUNC-LABEL: {{^}}bfe_u32_zext_in_reg_i8_offset_3:
    120 ; SI: buffer_load_dword
    121 ; SI: v_add_i32
    122 ; SI-NEXT: v_and_b32_e32 {{v[0-9]+}}, 0xf8
    123 ; SI-NEXT: bfe
    124 ; SI: s_endpgm
    125 define void @bfe_u32_zext_in_reg_i8_offset_3(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
    126   %load = load i32, i32 addrspace(1)* %in, align 4
    127   %add = add i32 %load, 1
    128   %ext = and i32 %add, 255
    129   %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %ext, i32 3, i32 8)
    130   store i32 %bfe, i32 addrspace(1)* %out, align 4
    131   ret void
    132 }
    133 
    134 ; FUNC-LABEL: {{^}}bfe_u32_zext_in_reg_i8_offset_7:
    135 ; SI: buffer_load_dword
    136 ; SI: v_add_i32
    137 ; SI-NEXT: v_and_b32_e32 {{v[0-9]+}}, 0x80
    138 ; SI-NEXT: bfe
    139 ; SI: s_endpgm
    140 define void @bfe_u32_zext_in_reg_i8_offset_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
    141   %load = load i32, i32 addrspace(1)* %in, align 4
    142   %add = add i32 %load, 1
    143   %ext = and i32 %add, 255
    144   %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %ext, i32 7, i32 8)
    145   store i32 %bfe, i32 addrspace(1)* %out, align 4
    146   ret void
    147 }
    148 
    149 ; FUNC-LABEL: {{^}}bfe_u32_zext_in_reg_i16_offset_8:
    150 ; SI: buffer_load_dword
    151 ; SI: v_add_i32
    152 ; SI-NEXT: bfe
    153 ; SI: s_endpgm
    154 define void @bfe_u32_zext_in_reg_i16_offset_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
    155   %load = load i32, i32 addrspace(1)* %in, align 4
    156   %add = add i32 %load, 1
    157   %ext = and i32 %add, 65535
    158   %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %ext, i32 8, i32 8)
    159   store i32 %bfe, i32 addrspace(1)* %out, align 4
    160   ret void
    161 }
    162 
    163 ; FUNC-LABEL: {{^}}bfe_u32_test_1:
    164 ; SI: buffer_load_dword
    165 ; SI: v_and_b32_e32 {{v[0-9]+}}, 1, {{v[0-9]+}}
    166 ; SI: s_endpgm
    167 ; EG: AND_INT T{{[0-9]\.[XYZW]}}, T{{[0-9]\.[XYZW]}}, 1,
    168 define void @bfe_u32_test_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
    169   %x = load i32, i32 addrspace(1)* %in, align 4
    170   %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %x, i32 0, i32 1)
    171   store i32 %bfe, i32 addrspace(1)* %out, align 4
    172   ret void
    173 }
    174 
    175 define void @bfe_u32_test_2(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
    176   %x = load i32, i32 addrspace(1)* %in, align 4
    177   %shl = shl i32 %x, 31
    178   %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shl, i32 0, i32 8)
    179   store i32 %bfe, i32 addrspace(1)* %out, align 4
    180   ret void
    181 }
    182 
    183 define void @bfe_u32_test_3(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
    184   %x = load i32, i32 addrspace(1)* %in, align 4
    185   %shl = shl i32 %x, 31
    186   %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shl, i32 0, i32 1)
    187   store i32 %bfe, i32 addrspace(1)* %out, align 4
    188   ret void
    189 }
    190 
    191 ; FUNC-LABEL: {{^}}bfe_u32_test_4:
    192 ; SI-NOT: lshl
    193 ; SI-NOT: shr
    194 ; SI-NOT: {{[^@]}}bfe
    195 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
    196 ; SI: buffer_store_dword [[VREG]],
    197 ; SI: s_endpgm
    198 define void @bfe_u32_test_4(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
    199   %x = load i32, i32 addrspace(1)* %in, align 4
    200   %shl = shl i32 %x, 31
    201   %shr = lshr i32 %shl, 31
    202   %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shr, i32 31, i32 1)
    203   store i32 %bfe, i32 addrspace(1)* %out, align 4
    204   ret void
    205 }
    206 
    207 ; FUNC-LABEL: {{^}}bfe_u32_test_5:
    208 ; SI: buffer_load_dword
    209 ; SI-NOT: lshl
    210 ; SI-NOT: shr
    211 ; SI: v_bfe_i32 {{v[0-9]+}}, {{v[0-9]+}}, 0, 1
    212 ; SI: s_endpgm
    213 define void @bfe_u32_test_5(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
    214   %x = load i32, i32 addrspace(1)* %in, align 4
    215   %shl = shl i32 %x, 31
    216   %shr = ashr i32 %shl, 31
    217   %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shr, i32 0, i32 1)
    218   store i32 %bfe, i32 addrspace(1)* %out, align 4
    219   ret void
    220 }
    221 
    222 ; FUNC-LABEL: {{^}}bfe_u32_test_6:
    223 ; SI: v_lshlrev_b32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}}
    224 ; SI: v_lshrrev_b32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}
    225 ; SI: s_endpgm
    226 define void @bfe_u32_test_6(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
    227   %x = load i32, i32 addrspace(1)* %in, align 4
    228   %shl = shl i32 %x, 31
    229   %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shl, i32 1, i32 31)
    230   store i32 %bfe, i32 addrspace(1)* %out, align 4
    231   ret void
    232 }
    233 
    234 ; FUNC-LABEL: {{^}}bfe_u32_test_7:
    235 ; SI: v_lshlrev_b32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}}
    236 ; SI-NOT: {{[^@]}}bfe
    237 ; SI: s_endpgm
    238 define void @bfe_u32_test_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
    239   %x = load i32, i32 addrspace(1)* %in, align 4
    240   %shl = shl i32 %x, 31
    241   %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shl, i32 0, i32 31)
    242   store i32 %bfe, i32 addrspace(1)* %out, align 4
    243   ret void
    244 }
    245 
    246 ; FUNC-LABEL: {{^}}bfe_u32_test_8:
    247 ; SI-NOT: {{[^@]}}bfe
    248 ; SI: v_and_b32_e32 {{v[0-9]+}}, 1, {{v[0-9]+}}
    249 ; SI-NOT: {{[^@]}}bfe
    250 ; SI: s_endpgm
    251 define void @bfe_u32_test_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
    252   %x = load i32, i32 addrspace(1)* %in, align 4
    253   %shl = shl i32 %x, 31
    254   %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shl, i32 31, i32 1)
    255   store i32 %bfe, i32 addrspace(1)* %out, align 4
    256   ret void
    257 }
    258 
    259 ; FUNC-LABEL: {{^}}bfe_u32_test_9:
    260 ; SI-NOT: {{[^@]}}bfe
    261 ; SI: v_lshrrev_b32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}}
    262 ; SI-NOT: {{[^@]}}bfe
    263 ; SI: s_endpgm
    264 define void @bfe_u32_test_9(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
    265   %x = load i32, i32 addrspace(1)* %in, align 4
    266   %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %x, i32 31, i32 1)
    267   store i32 %bfe, i32 addrspace(1)* %out, align 4
    268   ret void
    269 }
    270 
    271 ; FUNC-LABEL: {{^}}bfe_u32_test_10:
    272 ; SI-NOT: {{[^@]}}bfe
    273 ; SI: v_lshrrev_b32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}
    274 ; SI-NOT: {{[^@]}}bfe
    275 ; SI: s_endpgm
    276 define void @bfe_u32_test_10(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
    277   %x = load i32, i32 addrspace(1)* %in, align 4
    278   %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %x, i32 1, i32 31)
    279   store i32 %bfe, i32 addrspace(1)* %out, align 4
    280   ret void
    281 }
    282 
    283 ; FUNC-LABEL: {{^}}bfe_u32_test_11:
    284 ; SI-NOT: {{[^@]}}bfe
    285 ; SI: v_lshrrev_b32_e32 v{{[0-9]+}}, 8, v{{[0-9]+}}
    286 ; SI-NOT: {{[^@]}}bfe
    287 ; SI: s_endpgm
    288 define void @bfe_u32_test_11(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
    289   %x = load i32, i32 addrspace(1)* %in, align 4
    290   %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %x, i32 8, i32 24)
    291   store i32 %bfe, i32 addrspace(1)* %out, align 4
    292   ret void
    293 }
    294 
    295 ; FUNC-LABEL: {{^}}bfe_u32_test_12:
    296 ; SI-NOT: {{[^@]}}bfe
    297 ; SI: v_lshrrev_b32_e32 v{{[0-9]+}}, 24, v{{[0-9]+}}
    298 ; SI-NOT: {{[^@]}}bfe
    299 ; SI: s_endpgm
    300 define void @bfe_u32_test_12(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
    301   %x = load i32, i32 addrspace(1)* %in, align 4
    302   %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %x, i32 24, i32 8)
    303   store i32 %bfe, i32 addrspace(1)* %out, align 4
    304   ret void
    305 }
    306 
    307 ; FUNC-LABEL: {{^}}bfe_u32_test_13:
    308 ; V_ASHRREV_U32_e32 {{v[0-9]+}}, 31, {{v[0-9]+}}
    309 ; SI-NOT: {{[^@]}}bfe
    310 ; SI: s_endpgm
    311 define void @bfe_u32_test_13(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
    312   %x = load i32, i32 addrspace(1)* %in, align 4
    313   %shl = ashr i32 %x, 31
    314   %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shl, i32 31, i32 1)
    315   store i32 %bfe, i32 addrspace(1)* %out, align 4 ret void
    316 }
    317 
    318 ; FUNC-LABEL: {{^}}bfe_u32_test_14:
    319 ; SI-NOT: lshr
    320 ; SI-NOT: {{[^@]}}bfe
    321 ; SI: s_endpgm
    322 define void @bfe_u32_test_14(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
    323   %x = load i32, i32 addrspace(1)* %in, align 4
    324   %shl = lshr i32 %x, 31
    325   %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shl, i32 31, i32 1)
    326   store i32 %bfe, i32 addrspace(1)* %out, align 4 ret void
    327 }
    328 
    329 ; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_0:
    330 ; SI-NOT: {{[^@]}}bfe
    331 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
    332 ; SI: buffer_store_dword [[VREG]],
    333 ; SI: s_endpgm
    334 ; EG-NOT: BFE
    335 define void @bfe_u32_constant_fold_test_0(i32 addrspace(1)* %out) nounwind {
    336   %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 0, i32 0, i32 0) nounwind readnone
    337   store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
    338   ret void
    339 }
    340 
    341 ; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_1:
    342 ; SI-NOT: {{[^@]}}bfe
    343 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
    344 ; SI: buffer_store_dword [[VREG]],
    345 ; SI: s_endpgm
    346 ; EG-NOT: BFE
    347 define void @bfe_u32_constant_fold_test_1(i32 addrspace(1)* %out) nounwind {
    348   %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 12334, i32 0, i32 0) nounwind readnone
    349   store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
    350   ret void
    351 }
    352 
    353 ; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_2:
    354 ; SI-NOT: {{[^@]}}bfe
    355 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
    356 ; SI: buffer_store_dword [[VREG]],
    357 ; SI: s_endpgm
    358 ; EG-NOT: BFE
    359 define void @bfe_u32_constant_fold_test_2(i32 addrspace(1)* %out) nounwind {
    360   %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 0, i32 0, i32 1) nounwind readnone
    361   store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
    362   ret void
    363 }
    364 
    365 ; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_3:
    366 ; SI-NOT: {{[^@]}}bfe
    367 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 1
    368 ; SI: buffer_store_dword [[VREG]],
    369 ; SI: s_endpgm
    370 ; EG-NOT: BFE
    371 define void @bfe_u32_constant_fold_test_3(i32 addrspace(1)* %out) nounwind {
    372   %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 1, i32 0, i32 1) nounwind readnone
    373   store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
    374   ret void
    375 }
    376 
    377 ; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_4:
    378 ; SI-NOT: {{[^@]}}bfe
    379 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], -1
    380 ; SI: buffer_store_dword [[VREG]],
    381 ; SI: s_endpgm
    382 ; EG-NOT: BFE
    383 define void @bfe_u32_constant_fold_test_4(i32 addrspace(1)* %out) nounwind {
    384   %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 4294967295, i32 0, i32 1) nounwind readnone
    385   store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
    386   ret void
    387 }
    388 
    389 ; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_5:
    390 ; SI-NOT: {{[^@]}}bfe
    391 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 1
    392 ; SI: buffer_store_dword [[VREG]],
    393 ; SI: s_endpgm
    394 ; EG-NOT: BFE
    395 define void @bfe_u32_constant_fold_test_5(i32 addrspace(1)* %out) nounwind {
    396   %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 128, i32 7, i32 1) nounwind readnone
    397   store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
    398   ret void
    399 }
    400 
    401 ; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_6:
    402 ; SI-NOT: {{[^@]}}bfe
    403 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0x80
    404 ; SI: buffer_store_dword [[VREG]],
    405 ; SI: s_endpgm
    406 ; EG-NOT: BFE
    407 define void @bfe_u32_constant_fold_test_6(i32 addrspace(1)* %out) nounwind {
    408   %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 128, i32 0, i32 8) nounwind readnone
    409   store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
    410   ret void
    411 }
    412 
    413 ; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_7:
    414 ; SI-NOT: {{[^@]}}bfe
    415 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0x7f
    416 ; SI: buffer_store_dword [[VREG]],
    417 ; SI: s_endpgm
    418 ; EG-NOT: BFE
    419 define void @bfe_u32_constant_fold_test_7(i32 addrspace(1)* %out) nounwind {
    420   %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 127, i32 0, i32 8) nounwind readnone
    421   store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
    422   ret void
    423 }
    424 
    425 ; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_8:
    426 ; SI-NOT: {{[^@]}}bfe
    427 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 1
    428 ; SI: buffer_store_dword [[VREG]],
    429 ; SI: s_endpgm
    430 ; EG-NOT: BFE
    431 define void @bfe_u32_constant_fold_test_8(i32 addrspace(1)* %out) nounwind {
    432   %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 127, i32 6, i32 8) nounwind readnone
    433   store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
    434   ret void
    435 }
    436 
    437 ; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_9:
    438 ; SI-NOT: {{[^@]}}bfe
    439 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 1
    440 ; SI: buffer_store_dword [[VREG]],
    441 ; SI: s_endpgm
    442 ; EG-NOT: BFE
    443 define void @bfe_u32_constant_fold_test_9(i32 addrspace(1)* %out) nounwind {
    444   %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 65536, i32 16, i32 8) nounwind readnone
    445   store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
    446   ret void
    447 }
    448 
    449 ; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_10:
    450 ; SI-NOT: {{[^@]}}bfe
    451 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
    452 ; SI: buffer_store_dword [[VREG]],
    453 ; SI: s_endpgm
    454 ; EG-NOT: BFE
    455 define void @bfe_u32_constant_fold_test_10(i32 addrspace(1)* %out) nounwind {
    456   %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 65535, i32 16, i32 16) nounwind readnone
    457   store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
    458   ret void
    459 }
    460 
    461 ; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_11:
    462 ; SI-NOT: {{[^@]}}bfe
    463 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 10
    464 ; SI: buffer_store_dword [[VREG]],
    465 ; SI: s_endpgm
    466 ; EG-NOT: BFE
    467 define void @bfe_u32_constant_fold_test_11(i32 addrspace(1)* %out) nounwind {
    468   %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 160, i32 4, i32 4) nounwind readnone
    469   store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
    470   ret void
    471 }
    472 
    473 ; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_12:
    474 ; SI-NOT: {{[^@]}}bfe
    475 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
    476 ; SI: buffer_store_dword [[VREG]],
    477 ; SI: s_endpgm
    478 ; EG-NOT: BFE
    479 define void @bfe_u32_constant_fold_test_12(i32 addrspace(1)* %out) nounwind {
    480   %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 160, i32 31, i32 1) nounwind readnone
    481   store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
    482   ret void
    483 }
    484 
    485 ; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_13:
    486 ; SI-NOT: {{[^@]}}bfe
    487 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 1
    488 ; SI: buffer_store_dword [[VREG]],
    489 ; SI: s_endpgm
    490 ; EG-NOT: BFE
    491 define void @bfe_u32_constant_fold_test_13(i32 addrspace(1)* %out) nounwind {
    492   %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 131070, i32 16, i32 16) nounwind readnone
    493   store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
    494   ret void
    495 }
    496 
    497 ; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_14:
    498 ; SI-NOT: {{[^@]}}bfe
    499 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 40
    500 ; SI: buffer_store_dword [[VREG]],
    501 ; SI: s_endpgm
    502 ; EG-NOT: BFE
    503 define void @bfe_u32_constant_fold_test_14(i32 addrspace(1)* %out) nounwind {
    504   %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 160, i32 2, i32 30) nounwind readnone
    505   store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
    506   ret void
    507 }
    508 
    509 ; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_15:
    510 ; SI-NOT: {{[^@]}}bfe
    511 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 10
    512 ; SI: buffer_store_dword [[VREG]],
    513 ; SI: s_endpgm
    514 ; EG-NOT: BFE
    515 define void @bfe_u32_constant_fold_test_15(i32 addrspace(1)* %out) nounwind {
    516   %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 160, i32 4, i32 28) nounwind readnone
    517   store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
    518   ret void
    519 }
    520 
    521 ; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_16:
    522 ; SI-NOT: {{[^@]}}bfe
    523 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0x7f
    524 ; SI: buffer_store_dword [[VREG]],
    525 ; SI: s_endpgm
    526 ; EG-NOT: BFE
    527 define void @bfe_u32_constant_fold_test_16(i32 addrspace(1)* %out) nounwind {
    528   %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 4294967295, i32 1, i32 7) nounwind readnone
    529   store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
    530   ret void
    531 }
    532 
    533 ; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_17:
    534 ; SI-NOT: {{[^@]}}bfe
    535 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0x7f
    536 ; SI: buffer_store_dword [[VREG]],
    537 ; SI: s_endpgm
    538 ; EG-NOT: BFE
    539 define void @bfe_u32_constant_fold_test_17(i32 addrspace(1)* %out) nounwind {
    540   %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 255, i32 1, i32 31) nounwind readnone
    541   store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
    542   ret void
    543 }
    544 
    545 ; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_18:
    546 ; SI-NOT: {{[^@]}}bfe
    547 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
    548 ; SI: buffer_store_dword [[VREG]],
    549 ; SI: s_endpgm
    550 ; EG-NOT: BFE
    551 define void @bfe_u32_constant_fold_test_18(i32 addrspace(1)* %out) nounwind {
    552   %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 255, i32 31, i32 1) nounwind readnone
    553   store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
    554   ret void
    555 }
    556 
    557 ; Make sure that SimplifyDemandedBits doesn't cause the and to be
    558 ; reduced to the bits demanded by the bfe.
    559 
    560 ; XXX: The operand to v_bfe_u32 could also just directly be the load register.
    561 ; FUNC-LABEL: {{^}}simplify_bfe_u32_multi_use_arg:
    562 ; SI: buffer_load_dword [[ARG:v[0-9]+]]
    563 ; SI: v_and_b32_e32 [[AND:v[0-9]+]], 63, [[ARG]]
    564 ; SI: v_bfe_u32 [[BFE:v[0-9]+]], [[AND]], 2, 2
    565 ; SI-DAG: buffer_store_dword [[AND]]
    566 ; SI-DAG: buffer_store_dword [[BFE]]
    567 ; SI: s_endpgm
    568 define void @simplify_bfe_u32_multi_use_arg(i32 addrspace(1)* %out0,
    569                                             i32 addrspace(1)* %out1,
    570                                             i32 addrspace(1)* %in) nounwind {
    571   %src = load i32, i32 addrspace(1)* %in, align 4
    572   %and = and i32 %src, 63
    573   %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 %and, i32 2, i32 2) nounwind readnone
    574   store i32 %bfe_u32, i32 addrspace(1)* %out0, align 4
    575   store i32 %and, i32 addrspace(1)* %out1, align 4
    576   ret void
    577 }
    578 
    579 ; FUNC-LABEL: {{^}}lshr_and:
    580 ; SI: s_bfe_u32 {{s[0-9]+}}, {{s[0-9]+}}, 0x30006
    581 ; SI: buffer_store_dword
    582 define void @lshr_and(i32 addrspace(1)* %out, i32 %a) nounwind {
    583   %b = lshr i32 %a, 6
    584   %c = and i32 %b, 7
    585   store i32 %c, i32 addrspace(1)* %out, align 8
    586   ret void
    587 }
    588 
    589 ; FUNC-LABEL: {{^}}v_lshr_and:
    590 ; SI: v_bfe_u32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}, 3
    591 ; SI: buffer_store_dword
    592 define void @v_lshr_and(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
    593   %c = lshr i32 %a, %b
    594   %d = and i32 %c, 7
    595   store i32 %d, i32 addrspace(1)* %out, align 8
    596   ret void
    597 }
    598 
    599 ; FUNC-LABEL: {{^}}and_lshr:
    600 ; SI: s_bfe_u32 {{s[0-9]+}}, {{s[0-9]+}}, 0x30006
    601 ; SI: buffer_store_dword
    602 define void @and_lshr(i32 addrspace(1)* %out, i32 %a) nounwind {
    603   %b = and i32 %a, 448
    604   %c = lshr i32 %b, 6
    605   store i32 %c, i32 addrspace(1)* %out, align 8
    606   ret void
    607 }
    608 
    609 ; FUNC-LABEL: {{^}}and_lshr2:
    610 ; SI: s_bfe_u32 {{s[0-9]+}}, {{s[0-9]+}}, 0x30006
    611 ; SI: buffer_store_dword
    612 define void @and_lshr2(i32 addrspace(1)* %out, i32 %a) nounwind {
    613   %b = and i32 %a, 511
    614   %c = lshr i32 %b, 6
    615   store i32 %c, i32 addrspace(1)* %out, align 8
    616   ret void
    617 }
    618 
    619 ; FUNC-LABEL: {{^}}shl_lshr:
    620 ; SI: s_bfe_u32 {{s[0-9]+}}, {{s[0-9]+}}, 0x150002
    621 ; SI: buffer_store_dword
    622 define void @shl_lshr(i32 addrspace(1)* %out, i32 %a) nounwind {
    623   %b = shl i32 %a, 9
    624   %c = lshr i32 %b, 11
    625   store i32 %c, i32 addrspace(1)* %out, align 8
    626   ret void
    627 }
    628