Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
      2 
      3 ; Make sure 64-bit BFE pattern does a 32-bit BFE on the relevant half.
      4 
      5 ; Extract the high bit of the low half
      6 ; GCN-LABEL: {{^}}v_uextract_bit_31_i64:
      7 ; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
      8 ; GCN-DAG: v_lshrrev_b32_e32 v[[SHIFT:[0-9]+]], 31, [[VAL]]
      9 ; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
     10 ; GCN: buffer_store_dwordx2 v{{\[}}[[SHIFT]]:[[ZERO]]{{\]}}
     11 define void @v_uextract_bit_31_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
     12   %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
     13   %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
     14   %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x
     15   %ld.64 = load i64, i64 addrspace(1)* %in.gep
     16   %srl = lshr i64 %ld.64, 31
     17   %bit = and i64 %srl, 1
     18   store i64 %bit, i64 addrspace(1)* %out.gep
     19   ret void
     20 }
     21 
     22 ; Extract the high bit of the high half
     23 ; GCN-LABEL: {{^}}v_uextract_bit_63_i64:
     24 ; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
     25 ; GCN-DAG: v_lshrrev_b32_e32 v[[SHIFT:[0-9]+]], 31, [[VAL]]
     26 ; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
     27 ; GCN: buffer_store_dwordx2 v{{\[}}[[SHIFT]]:[[ZERO]]{{\]}}
     28 define void @v_uextract_bit_63_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
     29   %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
     30   %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
     31   %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x
     32   %ld.64 = load i64, i64 addrspace(1)* %in.gep
     33   %srl = lshr i64 %ld.64, 63
     34   %bit = and i64 %srl, 1
     35   store i64 %bit, i64 addrspace(1)* %out.gep
     36   ret void
     37 }
     38 
     39 ; GCN-LABEL: {{^}}v_uextract_bit_1_i64:
     40 ; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
     41 ; GCN-DAG: v_bfe_u32 v[[BFE:[0-9]+]], [[VAL]], 1, 1
     42 ; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
     43 ; GCN: buffer_store_dwordx2 v{{\[}}[[BFE]]:[[ZERO]]{{\]}}
     44 define void @v_uextract_bit_1_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
     45   %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
     46   %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
     47   %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x
     48   %ld.64 = load i64, i64 addrspace(1)* %in.gep
     49   %srl = lshr i64 %ld.64, 1
     50   %bit = and i64 %srl, 1
     51   store i64 %bit, i64 addrspace(1)* %out.gep
     52   ret void
     53 }
     54 
     55 ; GCN-LABEL: {{^}}v_uextract_bit_20_i64:
     56 ; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
     57 ; GCN-DAG: v_bfe_u32 v[[BFE:[0-9]+]], [[VAL]], 20, 1
     58 ; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
     59 ; GCN: buffer_store_dwordx2 v{{\[}}[[BFE]]:[[ZERO]]{{\]}}
     60 define void @v_uextract_bit_20_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
     61   %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
     62   %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
     63   %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x
     64   %ld.64 = load i64, i64 addrspace(1)* %in.gep
     65   %srl = lshr i64 %ld.64, 20
     66   %bit = and i64 %srl, 1
     67   store i64 %bit, i64 addrspace(1)* %out.gep
     68   ret void
     69 }
     70 
     71 ; GCN-LABEL: {{^}}v_uextract_bit_32_i64:
     72 ; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
     73 ; GCN-DAG: v_and_b32_e32 v[[AND:[0-9]+]], 1, [[VAL]]
     74 ; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
     75 ; GCN: buffer_store_dwordx2 v{{\[}}[[AND]]:[[ZERO]]{{\]}}
     76 define void @v_uextract_bit_32_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
     77   %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
     78   %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
     79   %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x
     80   %ld.64 = load i64, i64 addrspace(1)* %in.gep
     81   %srl = lshr i64 %ld.64, 32
     82   %bit = and i64 %srl, 1
     83   store i64 %bit, i64 addrspace(1)* %out.gep
     84   ret void
     85 }
     86 
     87 ; GCN-LABEL: {{^}}v_uextract_bit_33_i64:
     88 ; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
     89 ; GCN-DAG: v_bfe_u32 v[[BFE:[0-9]+]], [[VAL]], 1, 1{{$}}
     90 ; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
     91 ; GCN: buffer_store_dwordx2 v{{\[}}[[SHIFT]]:[[ZERO]]{{\]}}
     92 define void @v_uextract_bit_33_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
     93   %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
     94   %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
     95   %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x
     96   %ld.64 = load i64, i64 addrspace(1)* %in.gep
     97   %srl = lshr i64 %ld.64, 33
     98   %bit = and i64 %srl, 1
     99   store i64 %bit, i64 addrspace(1)* %out.gep
    100   ret void
    101 }
    102 
    103 ; GCN-LABEL: {{^}}v_uextract_bit_20_21_i64:
    104 ; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
    105 ; GCN-DAG: v_bfe_u32 v[[BFE:[0-9]+]], [[VAL]], 20, 2
    106 ; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
    107 ; GCN: buffer_store_dwordx2 v{{\[}}[[BFE]]:[[ZERO]]{{\]}}
    108 define void @v_uextract_bit_20_21_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
    109   %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
    110   %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
    111   %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x
    112   %ld.64 = load i64, i64 addrspace(1)* %in.gep
    113   %srl = lshr i64 %ld.64, 20
    114   %bit = and i64 %srl, 3
    115   store i64 %bit, i64 addrspace(1)* %out.gep
    116   ret void
    117 }
    118 
    119 ; GCN-LABEL: {{^}}v_uextract_bit_1_30_i64:
    120 ; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
    121 ; GCN-DAG: v_bfe_u32 v[[BFE:[0-9]+]], [[VAL]], 1, 30
    122 ; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
    123 ; GCN: buffer_store_dwordx2 v{{\[}}[[BFE]]:[[ZERO]]{{\]}}
    124 define void @v_uextract_bit_1_30_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
    125   %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
    126   %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
    127   %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x
    128   %ld.64 = load i64, i64 addrspace(1)* %in.gep
    129   %srl = lshr i64 %ld.64, 1
    130   %bit = and i64 %srl, 1073741823
    131   store i64 %bit, i64 addrspace(1)* %out.gep
    132   ret void
    133 }
    134 
    135 ; GCN-LABEL: {{^}}v_uextract_bit_1_31_i64:
    136 ; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
    137 ; GCN-DAG: v_lshrrev_b32_e32 v[[SHIFT:[0-9]+]], 1, [[VAL]]
    138 ; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
    139 ; GCN: buffer_store_dwordx2 v{{\[}}[[SHIFT]]:[[ZERO]]{{\]}}
    140 define void @v_uextract_bit_1_31_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
    141   %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
    142   %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
    143   %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x
    144   %ld.64 = load i64, i64 addrspace(1)* %in.gep
    145   %srl = lshr i64 %ld.64, 1
    146   %bit = and i64 %srl, 2147483647
    147   store i64 %bit, i64 addrspace(1)* %out.gep
    148   ret void
    149 }
    150 
    151 ; Spans the dword boundary, so requires full shift
    152 ; GCN-LABEL: {{^}}v_uextract_bit_31_32_i64:
    153 ; GCN: buffer_load_dwordx2 [[VAL:v\[[0-9]+:[0-9]+\]]]
    154 ; GCN: v_lshr_b64 v{{\[}}[[SHRLO:[0-9]+]]:[[SHRHI:[0-9]+]]{{\]}}, [[VAL]], 31
    155 ; GCN-DAG: v_and_b32_e32 v[[AND:[0-9]+]], 3, v[[SHRLO]]{{$}}
    156 ; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
    157 ; GCN: buffer_store_dwordx2 v{{\[}}[[AND]]:[[ZERO]]{{\]}}
    158 define void @v_uextract_bit_31_32_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
    159   %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
    160   %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
    161   %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x
    162   %ld.64 = load i64, i64 addrspace(1)* %in.gep
    163   %srl = lshr i64 %ld.64, 31
    164   %bit = and i64 %srl, 3
    165   store i64 %bit, i64 addrspace(1)* %out.gep
    166   ret void
    167 }
    168 
    169 ; GCN-LABEL: {{^}}v_uextract_bit_32_33_i64:
    170 ; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
    171 ; GCN-DAG: v_bfe_u32 v[[BFE:[0-9]+]], [[VAL]], 1, 2
    172 ; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
    173 ; GCN: buffer_store_dwordx2 v{{\[}}[[BFE]]:[[ZERO]]{{\]}}
    174 define void @v_uextract_bit_32_33_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
    175   %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
    176   %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
    177   %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x
    178   %ld.64 = load i64, i64 addrspace(1)* %in.gep
    179   %srl = lshr i64 %ld.64, 33
    180   %bit = and i64 %srl, 3
    181   store i64 %bit, i64 addrspace(1)* %out.gep
    182   ret void
    183 }
    184 
    185 ; GCN-LABEL: {{^}}v_uextract_bit_30_60_i64:
    186 ; GCN: buffer_load_dwordx2 [[VAL:v\[[0-9]+:[0-9]+\]]]
    187 ; GCN: v_lshr_b64 v{{\[}}[[SHRLO:[0-9]+]]:[[SHRHI:[0-9]+]]{{\]}}, [[VAL]], 30
    188 ; GCN-DAG: v_and_b32_e32 v[[AND:[0-9]+]], 0x3fffffff, v[[SHRLO]]{{$}}
    189 ; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
    190 ; GCN: buffer_store_dwordx2 v{{\[}}[[AND]]:[[ZERO]]{{\]}}
    191 define void @v_uextract_bit_30_60_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
    192   %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
    193   %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
    194   %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x
    195   %ld.64 = load i64, i64 addrspace(1)* %in.gep
    196   %srl = lshr i64 %ld.64, 30
    197   %bit = and i64 %srl, 1073741823
    198   store i64 %bit, i64 addrspace(1)* %out.gep
    199   ret void
    200 }
    201 
    202 ; GCN-LABEL: {{^}}v_uextract_bit_33_63_i64:
    203 ; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
    204 ; GCN-DAG: v_bfe_u32 v[[BFE:[0-9]+]], [[VAL]], 1, 30
    205 ; GCN-DAG: v_mov_b32_e32 v[[BFE:[0-9]+]], 0{{$}}
    206 ; GCN: buffer_store_dwordx2 v{{\[}}[[SHIFT]]:[[ZERO]]{{\]}}
    207 define void @v_uextract_bit_33_63_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
    208   %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
    209   %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
    210   %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x
    211   %ld.64 = load i64, i64 addrspace(1)* %in.gep
    212   %srl = lshr i64 %ld.64, 33
    213   %bit = and i64 %srl, 1073741823
    214   store i64 %bit, i64 addrspace(1)* %out.gep
    215   ret void
    216 }
    217 
    218 ; GCN-LABEL: {{^}}v_uextract_bit_31_63_i64:
    219 ; GCN: buffer_load_dwordx2 [[VAL:v\[[0-9]+:[0-9]+\]]]
    220 ; GCN: v_lshr_b64 v{{\[}}[[SHRLO:[0-9]+]]:[[SHRHI:[0-9]+]]{{\]}}, [[VAL]], 31
    221 ; GCN-NEXT: v_mov_b32_e32 v[[SHRHI]], 0{{$}}
    222 ; GCN: buffer_store_dwordx2 v{{\[}}[[SHRLO]]:[[SHRHI]]{{\]}}
    223 define void @v_uextract_bit_31_63_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
    224   %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
    225   %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
    226   %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x
    227   %ld.64 = load i64, i64 addrspace(1)* %in.gep
    228   %srl = lshr i64 %ld.64, 31
    229   %and = and i64 %srl, 4294967295
    230   store i64 %and, i64 addrspace(1)* %out
    231   ret void
    232 }
    233 
    234 ; trunc applied before and mask
    235 ; GCN-LABEL: {{^}}v_uextract_bit_31_i64_trunc_i32:
    236 ; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
    237 ; GCN-DAG: v_lshrrev_b32_e32 v[[SHIFT:[0-9]+]], 31, [[VAL]]
    238 ; GCN: buffer_store_dword v[[SHIFT]]
    239 define void @v_uextract_bit_31_i64_trunc_i32(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
    240   %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
    241   %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
    242   %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x
    243   %ld.64 = load i64, i64 addrspace(1)* %in.gep
    244   %srl = lshr i64 %ld.64, 31
    245   %trunc = trunc i64 %srl to i32
    246   %bit = and i32 %trunc, 1
    247   store i32 %bit, i32 addrspace(1)* %out.gep
    248   ret void
    249 }
    250 
    251 ; GCN-LABEL: {{^}}v_uextract_bit_3_i64_trunc_i32:
    252 ; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
    253 ; GCN: v_bfe_u32 [[BFE:v[0-9]+]], [[VAL]], 3, 1{{$}}
    254 ; GCN: buffer_store_dword [[BFE]]
    255 define void @v_uextract_bit_3_i64_trunc_i32(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
    256   %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
    257   %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
    258   %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x
    259   %ld.64 = load i64, i64 addrspace(1)* %in.gep
    260   %srl = lshr i64 %ld.64, 3
    261   %trunc = trunc i64 %srl to i32
    262   %bit = and i32 %trunc, 1
    263   store i32 %bit, i32 addrspace(1)* %out.gep
    264   ret void
    265 }
    266 
    267 ; GCN-LABEL: {{^}}v_uextract_bit_33_i64_trunc_i32:
    268 ; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
    269 ; GCN: v_bfe_u32 [[BFE:v[0-9]+]], [[VAL]], 1, 1{{$}}
    270 ; GCN: buffer_store_dword [[BFE]]
    271 define void @v_uextract_bit_33_i64_trunc_i32(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
    272   %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
    273   %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
    274   %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x
    275   %ld.64 = load i64, i64 addrspace(1)* %in.gep
    276   %srl = lshr i64 %ld.64, 33
    277   %trunc = trunc i64 %srl to i32
    278   %bit = and i32 %trunc, 1
    279   store i32 %bit, i32 addrspace(1)* %out.gep
    280   ret void
    281 }
    282 
    283 ; GCN-LABEL: {{^}}v_uextract_bit_31_32_i64_trunc_i32:
    284 ; GCN: buffer_load_dwordx2 [[VAL:v\[[0-9]+:[0-9]+\]]]
    285 ; GCN: v_lshr_b64 v{{\[}}[[SHRLO:[0-9]+]]:[[SHRHI:[0-9]+]]{{\]}}, [[VAL]], 31
    286 ; GCN-NEXT: v_and_b32_e32 v[[SHRLO]], 3, v[[SHRLO]]
    287 ; GCN-NOT: v[[SHRLO]]
    288 ; GCN: buffer_store_dword v[[SHRLO]]
    289 define void @v_uextract_bit_31_32_i64_trunc_i32(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
    290   %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
    291   %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
    292   %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x
    293   %ld.64 = load i64, i64 addrspace(1)* %in.gep
    294   %srl = lshr i64 %ld.64, 31
    295   %trunc = trunc i64 %srl to i32
    296   %bit = and i32 %trunc, 3
    297   store i32 %bit, i32 addrspace(1)* %out.gep
    298   ret void
    299 }
    300 
    301 ; GCN-LABEL: {{^}}and_not_mask_i64:
    302 ; GCN: buffer_load_dwordx2 v{{\[}}[[VALLO:[0-9]+]]:[[VALHI:[0-9]+]]{{\]}}
    303 ; GCN: v_mov_b32_e32 v[[SHRHI]], 0{{$}}
    304 ; GCN: v_lshrrev_b32_e32 [[SHR:v[0-9]+]], 20, v[[VALLO]]
    305 ; GCN-DAG: v_and_b32_e32 v[[SHRLO]], 4, [[SHR]]
    306 ; GCN-NOT: v[[SHRLO]]
    307 ; GCN-NOT: v[[SHRHI]]
    308 ; GCN: buffer_store_dwordx2 v{{\[}}[[SHRLO]]:[[SHRHI]]{{\]}}
    309 define void @and_not_mask_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
    310   %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
    311   %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
    312   %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x
    313   %ld.64 = load i64, i64 addrspace(1)* %in.gep
    314   %srl = lshr i64 %ld.64, 20
    315   %bit = and i64 %srl, 4
    316   store i64 %bit, i64 addrspace(1)* %out.gep
    317   ret void
    318 }
    319 
    320 ; The instruction count is the same with/without hasOneUse, but
    321 ; keeping the 32-bit and has a smaller encoding size than the bfe.
    322 
    323 ; GCN-LABEL: {{^}}v_uextract_bit_27_29_multi_use_shift_i64:
    324 ; GCN: buffer_load_dwordx2 [[VAL:v\[[0-9]+:[0-9]+\]]]
    325 ; GCN-DAG: v_lshr_b64 v{{\[}}[[SHRLO:[0-9]+]]:[[SHRHI:[0-9]+]]{{\]}}, [[VAL]], 27
    326 ; GCN-DAG: v_and_b32_e32 v[[AND:[0-9]+]], 3, v[[SHRLO]]
    327 ; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
    328 ; GCN: buffer_store_dwordx2 v{{\[}}[[SHRLO]]:[[SHRHI]]{{\]}}
    329 ; GCN: buffer_store_dwordx2 v{{\[}}[[AND]]:[[ZERO]]{{\]}}
    330 define void @v_uextract_bit_27_29_multi_use_shift_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
    331   %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
    332   %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
    333   %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x
    334   %ld.64 = load i64, i64 addrspace(1)* %in.gep
    335   %srl = lshr i64 %ld.64, 27
    336   %bit = and i64 %srl, 3
    337   store volatile i64 %srl, i64 addrspace(1)* %out
    338   store volatile i64 %bit, i64 addrspace(1)* %out
    339   ret void
    340 }
    341 
    342 ; GCN-LABEL: {{^}}v_uextract_bit_34_37_multi_use_shift_i64:
    343 ; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
    344 ; GCN-DAG: v_lshrrev_b32_e32 v[[SHR:[0-9]+]], 2, [[VAL]]
    345 ; GCN-DAG: v_bfe_u32 v[[BFE:[0-9]+]], [[VAL]], 2, 3
    346 ; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
    347 ; GCN-DAG: buffer_store_dwordx2 v{{\[}}[[SHR]]:[[ZERO]]{{\]}}
    348 ; GCN: buffer_store_dwordx2 v{{\[}}[[BFE]]:[[ZERO]]{{\]}}
    349 define void @v_uextract_bit_34_37_multi_use_shift_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
    350   %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
    351   %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
    352   %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x
    353   %ld.64 = load i64, i64 addrspace(1)* %in.gep
    354   %srl = lshr i64 %ld.64, 34
    355   %bit = and i64 %srl, 7
    356   store volatile i64 %srl, i64 addrspace(1)* %out
    357   store volatile i64 %bit, i64 addrspace(1)* %out
    358   ret void
    359 }
    360 
    361 ; GCN-LABEL: {{^}}v_uextract_bit_33_36_use_upper_half_shift_i64:
    362 ; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
    363 ; GCN-DAG: v_bfe_u32 v[[BFE:[0-9]+]], [[VAL]], 1, 3
    364 ; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
    365 ; GCN: buffer_store_dwordx2 v{{\[}}[[BFE]]:[[ZERO]]{{\]}}
    366 ; GCN: buffer_store_dword v[[ZERO]]
    367 define void @v_uextract_bit_33_36_use_upper_half_shift_i64(i64 addrspace(1)* %out0, i32 addrspace(1)* %out1, i64 addrspace(1)* %in) #1 {
    368   %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
    369   %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
    370   %out0.gep = getelementptr i64, i64 addrspace(1)* %out0, i32 %id.x
    371   %out1.gep = getelementptr i32, i32 addrspace(1)* %out1, i32 %id.x
    372   %ld.64 = load i64, i64 addrspace(1)* %in.gep
    373   %srl = lshr i64 %ld.64, 33
    374   %bit = and i64 %srl, 7
    375   store volatile i64 %bit, i64 addrspace(1)* %out0.gep
    376 
    377   %srl.srl32 = lshr i64 %srl, 32
    378   %srl.hi = trunc i64 %srl.srl32 to i32
    379   store volatile i32 %srl.hi, i32 addrspace(1)* %out1.gep
    380   ret void
    381 }
    382 
    383 declare i32 @llvm.amdgcn.workitem.id.x() #0
    384 
    385 attributes #0 = { nounwind readnone }
    386 attributes #1 = { nounwind }
    387