1 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s 2 3 ; Make sure 64-bit BFE pattern does a 32-bit BFE on the relevant half. 4 5 ; Extract the high bit of the low half 6 ; GCN-LABEL: {{^}}v_uextract_bit_31_i64: 7 ; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} 8 ; GCN-DAG: v_lshrrev_b32_e32 v[[SHIFT:[0-9]+]], 31, [[VAL]] 9 ; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}} 10 ; GCN: buffer_store_dwordx2 v{{\[}}[[SHIFT]]:[[ZERO]]{{\]}} 11 define void @v_uextract_bit_31_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 { 12 %id.x = tail call i32 @llvm.amdgcn.workitem.id.x() 13 %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x 14 %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x 15 %ld.64 = load i64, i64 addrspace(1)* %in.gep 16 %srl = lshr i64 %ld.64, 31 17 %bit = and i64 %srl, 1 18 store i64 %bit, i64 addrspace(1)* %out.gep 19 ret void 20 } 21 22 ; Extract the high bit of the high half 23 ; GCN-LABEL: {{^}}v_uextract_bit_63_i64: 24 ; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}} 25 ; GCN-DAG: v_lshrrev_b32_e32 v[[SHIFT:[0-9]+]], 31, [[VAL]] 26 ; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}} 27 ; GCN: buffer_store_dwordx2 v{{\[}}[[SHIFT]]:[[ZERO]]{{\]}} 28 define void @v_uextract_bit_63_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 { 29 %id.x = tail call i32 @llvm.amdgcn.workitem.id.x() 30 %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x 31 %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x 32 %ld.64 = load i64, i64 addrspace(1)* %in.gep 33 %srl = lshr i64 %ld.64, 63 34 %bit = and i64 %srl, 1 35 store i64 %bit, i64 addrspace(1)* %out.gep 36 ret void 37 } 38 39 ; GCN-LABEL: {{^}}v_uextract_bit_1_i64: 40 ; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} 41 ; GCN-DAG: v_bfe_u32 v[[BFE:[0-9]+]], [[VAL]], 1, 1 42 ; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}} 43 ; GCN: buffer_store_dwordx2 v{{\[}}[[BFE]]:[[ZERO]]{{\]}} 44 define void @v_uextract_bit_1_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 { 45 %id.x = tail call i32 @llvm.amdgcn.workitem.id.x() 46 %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x 47 %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x 48 %ld.64 = load i64, i64 addrspace(1)* %in.gep 49 %srl = lshr i64 %ld.64, 1 50 %bit = and i64 %srl, 1 51 store i64 %bit, i64 addrspace(1)* %out.gep 52 ret void 53 } 54 55 ; GCN-LABEL: {{^}}v_uextract_bit_20_i64: 56 ; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} 57 ; GCN-DAG: v_bfe_u32 v[[BFE:[0-9]+]], [[VAL]], 20, 1 58 ; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}} 59 ; GCN: buffer_store_dwordx2 v{{\[}}[[BFE]]:[[ZERO]]{{\]}} 60 define void @v_uextract_bit_20_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 { 61 %id.x = tail call i32 @llvm.amdgcn.workitem.id.x() 62 %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x 63 %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x 64 %ld.64 = load i64, i64 addrspace(1)* %in.gep 65 %srl = lshr i64 %ld.64, 20 66 %bit = and i64 %srl, 1 67 store i64 %bit, i64 addrspace(1)* %out.gep 68 ret void 69 } 70 71 ; GCN-LABEL: {{^}}v_uextract_bit_32_i64: 72 ; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}} 73 ; GCN-DAG: v_and_b32_e32 v[[AND:[0-9]+]], 1, [[VAL]] 74 ; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}} 75 ; GCN: buffer_store_dwordx2 v{{\[}}[[AND]]:[[ZERO]]{{\]}} 76 define void @v_uextract_bit_32_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 { 77 %id.x = tail call i32 @llvm.amdgcn.workitem.id.x() 78 %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x 79 %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x 80 %ld.64 = load i64, i64 addrspace(1)* %in.gep 81 %srl = lshr i64 %ld.64, 32 82 %bit = and i64 %srl, 1 83 store i64 %bit, i64 addrspace(1)* %out.gep 84 ret void 85 } 86 87 ; GCN-LABEL: {{^}}v_uextract_bit_33_i64: 88 ; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}} 89 ; GCN-DAG: v_bfe_u32 v[[BFE:[0-9]+]], [[VAL]], 1, 1{{$}} 90 ; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}} 91 ; GCN: buffer_store_dwordx2 v{{\[}}[[SHIFT]]:[[ZERO]]{{\]}} 92 define void @v_uextract_bit_33_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 { 93 %id.x = tail call i32 @llvm.amdgcn.workitem.id.x() 94 %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x 95 %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x 96 %ld.64 = load i64, i64 addrspace(1)* %in.gep 97 %srl = lshr i64 %ld.64, 33 98 %bit = and i64 %srl, 1 99 store i64 %bit, i64 addrspace(1)* %out.gep 100 ret void 101 } 102 103 ; GCN-LABEL: {{^}}v_uextract_bit_20_21_i64: 104 ; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} 105 ; GCN-DAG: v_bfe_u32 v[[BFE:[0-9]+]], [[VAL]], 20, 2 106 ; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}} 107 ; GCN: buffer_store_dwordx2 v{{\[}}[[BFE]]:[[ZERO]]{{\]}} 108 define void @v_uextract_bit_20_21_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 { 109 %id.x = tail call i32 @llvm.amdgcn.workitem.id.x() 110 %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x 111 %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x 112 %ld.64 = load i64, i64 addrspace(1)* %in.gep 113 %srl = lshr i64 %ld.64, 20 114 %bit = and i64 %srl, 3 115 store i64 %bit, i64 addrspace(1)* %out.gep 116 ret void 117 } 118 119 ; GCN-LABEL: {{^}}v_uextract_bit_1_30_i64: 120 ; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} 121 ; GCN-DAG: v_bfe_u32 v[[BFE:[0-9]+]], [[VAL]], 1, 30 122 ; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}} 123 ; GCN: buffer_store_dwordx2 v{{\[}}[[BFE]]:[[ZERO]]{{\]}} 124 define void @v_uextract_bit_1_30_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 { 125 %id.x = tail call i32 @llvm.amdgcn.workitem.id.x() 126 %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x 127 %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x 128 %ld.64 = load i64, i64 addrspace(1)* %in.gep 129 %srl = lshr i64 %ld.64, 1 130 %bit = and i64 %srl, 1073741823 131 store i64 %bit, i64 addrspace(1)* %out.gep 132 ret void 133 } 134 135 ; GCN-LABEL: {{^}}v_uextract_bit_1_31_i64: 136 ; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} 137 ; GCN-DAG: v_lshrrev_b32_e32 v[[SHIFT:[0-9]+]], 1, [[VAL]] 138 ; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}} 139 ; GCN: buffer_store_dwordx2 v{{\[}}[[SHIFT]]:[[ZERO]]{{\]}} 140 define void @v_uextract_bit_1_31_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 { 141 %id.x = tail call i32 @llvm.amdgcn.workitem.id.x() 142 %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x 143 %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x 144 %ld.64 = load i64, i64 addrspace(1)* %in.gep 145 %srl = lshr i64 %ld.64, 1 146 %bit = and i64 %srl, 2147483647 147 store i64 %bit, i64 addrspace(1)* %out.gep 148 ret void 149 } 150 151 ; Spans the dword boundary, so requires full shift 152 ; GCN-LABEL: {{^}}v_uextract_bit_31_32_i64: 153 ; GCN: buffer_load_dwordx2 [[VAL:v\[[0-9]+:[0-9]+\]]] 154 ; GCN: v_lshr_b64 v{{\[}}[[SHRLO:[0-9]+]]:[[SHRHI:[0-9]+]]{{\]}}, [[VAL]], 31 155 ; GCN-DAG: v_and_b32_e32 v[[AND:[0-9]+]], 3, v[[SHRLO]]{{$}} 156 ; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}} 157 ; GCN: buffer_store_dwordx2 v{{\[}}[[AND]]:[[ZERO]]{{\]}} 158 define void @v_uextract_bit_31_32_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 { 159 %id.x = tail call i32 @llvm.amdgcn.workitem.id.x() 160 %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x 161 %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x 162 %ld.64 = load i64, i64 addrspace(1)* %in.gep 163 %srl = lshr i64 %ld.64, 31 164 %bit = and i64 %srl, 3 165 store i64 %bit, i64 addrspace(1)* %out.gep 166 ret void 167 } 168 169 ; GCN-LABEL: {{^}}v_uextract_bit_32_33_i64: 170 ; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}} 171 ; GCN-DAG: v_bfe_u32 v[[BFE:[0-9]+]], [[VAL]], 1, 2 172 ; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}} 173 ; GCN: buffer_store_dwordx2 v{{\[}}[[BFE]]:[[ZERO]]{{\]}} 174 define void @v_uextract_bit_32_33_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 { 175 %id.x = tail call i32 @llvm.amdgcn.workitem.id.x() 176 %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x 177 %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x 178 %ld.64 = load i64, i64 addrspace(1)* %in.gep 179 %srl = lshr i64 %ld.64, 33 180 %bit = and i64 %srl, 3 181 store i64 %bit, i64 addrspace(1)* %out.gep 182 ret void 183 } 184 185 ; GCN-LABEL: {{^}}v_uextract_bit_30_60_i64: 186 ; GCN: buffer_load_dwordx2 [[VAL:v\[[0-9]+:[0-9]+\]]] 187 ; GCN: v_lshr_b64 v{{\[}}[[SHRLO:[0-9]+]]:[[SHRHI:[0-9]+]]{{\]}}, [[VAL]], 30 188 ; GCN-DAG: v_and_b32_e32 v[[AND:[0-9]+]], 0x3fffffff, v[[SHRLO]]{{$}} 189 ; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}} 190 ; GCN: buffer_store_dwordx2 v{{\[}}[[AND]]:[[ZERO]]{{\]}} 191 define void @v_uextract_bit_30_60_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 { 192 %id.x = tail call i32 @llvm.amdgcn.workitem.id.x() 193 %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x 194 %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x 195 %ld.64 = load i64, i64 addrspace(1)* %in.gep 196 %srl = lshr i64 %ld.64, 30 197 %bit = and i64 %srl, 1073741823 198 store i64 %bit, i64 addrspace(1)* %out.gep 199 ret void 200 } 201 202 ; GCN-LABEL: {{^}}v_uextract_bit_33_63_i64: 203 ; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}} 204 ; GCN-DAG: v_bfe_u32 v[[BFE:[0-9]+]], [[VAL]], 1, 30 205 ; GCN-DAG: v_mov_b32_e32 v[[BFE:[0-9]+]], 0{{$}} 206 ; GCN: buffer_store_dwordx2 v{{\[}}[[SHIFT]]:[[ZERO]]{{\]}} 207 define void @v_uextract_bit_33_63_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 { 208 %id.x = tail call i32 @llvm.amdgcn.workitem.id.x() 209 %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x 210 %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x 211 %ld.64 = load i64, i64 addrspace(1)* %in.gep 212 %srl = lshr i64 %ld.64, 33 213 %bit = and i64 %srl, 1073741823 214 store i64 %bit, i64 addrspace(1)* %out.gep 215 ret void 216 } 217 218 ; GCN-LABEL: {{^}}v_uextract_bit_31_63_i64: 219 ; GCN: buffer_load_dwordx2 [[VAL:v\[[0-9]+:[0-9]+\]]] 220 ; GCN: v_lshr_b64 v{{\[}}[[SHRLO:[0-9]+]]:[[SHRHI:[0-9]+]]{{\]}}, [[VAL]], 31 221 ; GCN-NEXT: v_mov_b32_e32 v[[SHRHI]], 0{{$}} 222 ; GCN: buffer_store_dwordx2 v{{\[}}[[SHRLO]]:[[SHRHI]]{{\]}} 223 define void @v_uextract_bit_31_63_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 { 224 %id.x = tail call i32 @llvm.amdgcn.workitem.id.x() 225 %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x 226 %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x 227 %ld.64 = load i64, i64 addrspace(1)* %in.gep 228 %srl = lshr i64 %ld.64, 31 229 %and = and i64 %srl, 4294967295 230 store i64 %and, i64 addrspace(1)* %out 231 ret void 232 } 233 234 ; trunc applied before and mask 235 ; GCN-LABEL: {{^}}v_uextract_bit_31_i64_trunc_i32: 236 ; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} 237 ; GCN-DAG: v_lshrrev_b32_e32 v[[SHIFT:[0-9]+]], 31, [[VAL]] 238 ; GCN: buffer_store_dword v[[SHIFT]] 239 define void @v_uextract_bit_31_i64_trunc_i32(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 { 240 %id.x = tail call i32 @llvm.amdgcn.workitem.id.x() 241 %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x 242 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x 243 %ld.64 = load i64, i64 addrspace(1)* %in.gep 244 %srl = lshr i64 %ld.64, 31 245 %trunc = trunc i64 %srl to i32 246 %bit = and i32 %trunc, 1 247 store i32 %bit, i32 addrspace(1)* %out.gep 248 ret void 249 } 250 251 ; GCN-LABEL: {{^}}v_uextract_bit_3_i64_trunc_i32: 252 ; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} 253 ; GCN: v_bfe_u32 [[BFE:v[0-9]+]], [[VAL]], 3, 1{{$}} 254 ; GCN: buffer_store_dword [[BFE]] 255 define void @v_uextract_bit_3_i64_trunc_i32(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 { 256 %id.x = tail call i32 @llvm.amdgcn.workitem.id.x() 257 %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x 258 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x 259 %ld.64 = load i64, i64 addrspace(1)* %in.gep 260 %srl = lshr i64 %ld.64, 3 261 %trunc = trunc i64 %srl to i32 262 %bit = and i32 %trunc, 1 263 store i32 %bit, i32 addrspace(1)* %out.gep 264 ret void 265 } 266 267 ; GCN-LABEL: {{^}}v_uextract_bit_33_i64_trunc_i32: 268 ; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}} 269 ; GCN: v_bfe_u32 [[BFE:v[0-9]+]], [[VAL]], 1, 1{{$}} 270 ; GCN: buffer_store_dword [[BFE]] 271 define void @v_uextract_bit_33_i64_trunc_i32(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 { 272 %id.x = tail call i32 @llvm.amdgcn.workitem.id.x() 273 %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x 274 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x 275 %ld.64 = load i64, i64 addrspace(1)* %in.gep 276 %srl = lshr i64 %ld.64, 33 277 %trunc = trunc i64 %srl to i32 278 %bit = and i32 %trunc, 1 279 store i32 %bit, i32 addrspace(1)* %out.gep 280 ret void 281 } 282 283 ; GCN-LABEL: {{^}}v_uextract_bit_31_32_i64_trunc_i32: 284 ; GCN: buffer_load_dwordx2 [[VAL:v\[[0-9]+:[0-9]+\]]] 285 ; GCN: v_lshr_b64 v{{\[}}[[SHRLO:[0-9]+]]:[[SHRHI:[0-9]+]]{{\]}}, [[VAL]], 31 286 ; GCN-NEXT: v_and_b32_e32 v[[SHRLO]], 3, v[[SHRLO]] 287 ; GCN-NOT: v[[SHRLO]] 288 ; GCN: buffer_store_dword v[[SHRLO]] 289 define void @v_uextract_bit_31_32_i64_trunc_i32(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 { 290 %id.x = tail call i32 @llvm.amdgcn.workitem.id.x() 291 %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x 292 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x 293 %ld.64 = load i64, i64 addrspace(1)* %in.gep 294 %srl = lshr i64 %ld.64, 31 295 %trunc = trunc i64 %srl to i32 296 %bit = and i32 %trunc, 3 297 store i32 %bit, i32 addrspace(1)* %out.gep 298 ret void 299 } 300 301 ; GCN-LABEL: {{^}}and_not_mask_i64: 302 ; GCN: buffer_load_dwordx2 v{{\[}}[[VALLO:[0-9]+]]:[[VALHI:[0-9]+]]{{\]}} 303 ; GCN: v_mov_b32_e32 v[[SHRHI]], 0{{$}} 304 ; GCN: v_lshrrev_b32_e32 [[SHR:v[0-9]+]], 20, v[[VALLO]] 305 ; GCN-DAG: v_and_b32_e32 v[[SHRLO]], 4, [[SHR]] 306 ; GCN-NOT: v[[SHRLO]] 307 ; GCN-NOT: v[[SHRHI]] 308 ; GCN: buffer_store_dwordx2 v{{\[}}[[SHRLO]]:[[SHRHI]]{{\]}} 309 define void @and_not_mask_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 { 310 %id.x = tail call i32 @llvm.amdgcn.workitem.id.x() 311 %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x 312 %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x 313 %ld.64 = load i64, i64 addrspace(1)* %in.gep 314 %srl = lshr i64 %ld.64, 20 315 %bit = and i64 %srl, 4 316 store i64 %bit, i64 addrspace(1)* %out.gep 317 ret void 318 } 319 320 ; The instruction count is the same with/without hasOneUse, but 321 ; keeping the 32-bit and has a smaller encoding size than the bfe. 322 323 ; GCN-LABEL: {{^}}v_uextract_bit_27_29_multi_use_shift_i64: 324 ; GCN: buffer_load_dwordx2 [[VAL:v\[[0-9]+:[0-9]+\]]] 325 ; GCN-DAG: v_lshr_b64 v{{\[}}[[SHRLO:[0-9]+]]:[[SHRHI:[0-9]+]]{{\]}}, [[VAL]], 27 326 ; GCN-DAG: v_and_b32_e32 v[[AND:[0-9]+]], 3, v[[SHRLO]] 327 ; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}} 328 ; GCN: buffer_store_dwordx2 v{{\[}}[[SHRLO]]:[[SHRHI]]{{\]}} 329 ; GCN: buffer_store_dwordx2 v{{\[}}[[AND]]:[[ZERO]]{{\]}} 330 define void @v_uextract_bit_27_29_multi_use_shift_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 { 331 %id.x = tail call i32 @llvm.amdgcn.workitem.id.x() 332 %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x 333 %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x 334 %ld.64 = load i64, i64 addrspace(1)* %in.gep 335 %srl = lshr i64 %ld.64, 27 336 %bit = and i64 %srl, 3 337 store volatile i64 %srl, i64 addrspace(1)* %out 338 store volatile i64 %bit, i64 addrspace(1)* %out 339 ret void 340 } 341 342 ; GCN-LABEL: {{^}}v_uextract_bit_34_37_multi_use_shift_i64: 343 ; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}} 344 ; GCN-DAG: v_lshrrev_b32_e32 v[[SHR:[0-9]+]], 2, [[VAL]] 345 ; GCN-DAG: v_bfe_u32 v[[BFE:[0-9]+]], [[VAL]], 2, 3 346 ; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}} 347 ; GCN-DAG: buffer_store_dwordx2 v{{\[}}[[SHR]]:[[ZERO]]{{\]}} 348 ; GCN: buffer_store_dwordx2 v{{\[}}[[BFE]]:[[ZERO]]{{\]}} 349 define void @v_uextract_bit_34_37_multi_use_shift_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 { 350 %id.x = tail call i32 @llvm.amdgcn.workitem.id.x() 351 %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x 352 %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x 353 %ld.64 = load i64, i64 addrspace(1)* %in.gep 354 %srl = lshr i64 %ld.64, 34 355 %bit = and i64 %srl, 7 356 store volatile i64 %srl, i64 addrspace(1)* %out 357 store volatile i64 %bit, i64 addrspace(1)* %out 358 ret void 359 } 360 361 ; GCN-LABEL: {{^}}v_uextract_bit_33_36_use_upper_half_shift_i64: 362 ; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}} 363 ; GCN-DAG: v_bfe_u32 v[[BFE:[0-9]+]], [[VAL]], 1, 3 364 ; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}} 365 ; GCN: buffer_store_dwordx2 v{{\[}}[[BFE]]:[[ZERO]]{{\]}} 366 ; GCN: buffer_store_dword v[[ZERO]] 367 define void @v_uextract_bit_33_36_use_upper_half_shift_i64(i64 addrspace(1)* %out0, i32 addrspace(1)* %out1, i64 addrspace(1)* %in) #1 { 368 %id.x = tail call i32 @llvm.amdgcn.workitem.id.x() 369 %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x 370 %out0.gep = getelementptr i64, i64 addrspace(1)* %out0, i32 %id.x 371 %out1.gep = getelementptr i32, i32 addrspace(1)* %out1, i32 %id.x 372 %ld.64 = load i64, i64 addrspace(1)* %in.gep 373 %srl = lshr i64 %ld.64, 33 374 %bit = and i64 %srl, 7 375 store volatile i64 %bit, i64 addrspace(1)* %out0.gep 376 377 %srl.srl32 = lshr i64 %srl, 32 378 %srl.hi = trunc i64 %srl.srl32 to i32 379 store volatile i32 %srl.hi, i32 addrspace(1)* %out1.gep 380 ret void 381 } 382 383 declare i32 @llvm.amdgcn.workitem.id.x() #0 384 385 attributes #0 = { nounwind readnone } 386 attributes #1 = { nounwind } 387