1 ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s 2 ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s 3 4 ; GCN-LABEL: {{^}}bfe_i32_arg_arg_arg: 5 ; GCN: v_bfe_i32 6 define amdgpu_kernel void @bfe_i32_arg_arg_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) #0 { 7 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 %src0, i32 %src1, i32 %src1) 8 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 9 ret void 10 } 11 12 ; GCN-LABEL: {{^}}bfe_i32_arg_arg_imm: 13 ; GCN: v_bfe_i32 14 define amdgpu_kernel void @bfe_i32_arg_arg_imm(i32 addrspace(1)* %out, i32 %src0, i32 %src1) #0 { 15 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 %src0, i32 %src1, i32 123) 16 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 17 ret void 18 } 19 20 ; GCN-LABEL: {{^}}bfe_i32_arg_imm_arg: 21 ; GCN: v_bfe_i32 22 define amdgpu_kernel void @bfe_i32_arg_imm_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src2) #0 { 23 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 %src0, i32 123, i32 %src2) 24 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 25 ret void 26 } 27 28 ; GCN-LABEL: {{^}}bfe_i32_imm_arg_arg: 29 ; GCN: v_bfe_i32 30 define amdgpu_kernel void @bfe_i32_imm_arg_arg(i32 addrspace(1)* %out, i32 %src1, i32 %src2) #0 { 31 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 123, i32 %src1, i32 %src2) 32 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 33 ret void 34 } 35 36 ; GCN-LABEL: {{^}}v_bfe_print_arg: 37 ; GCN: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 2, 8 38 define amdgpu_kernel void @v_bfe_print_arg(i32 addrspace(1)* %out, i32 addrspace(1)* %src0) #0 { 39 %load = load i32, i32 addrspace(1)* %src0, align 4 40 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 %load, i32 2, i32 8) 41 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 42 ret void 43 } 44 45 ; GCN-LABEL: {{^}}bfe_i32_arg_0_width_reg_offset: 46 ; GCN-NOT: {{[^@]}}bfe 47 ; GCN: s_endpgm 48 define amdgpu_kernel void @bfe_i32_arg_0_width_reg_offset(i32 addrspace(1)* %out, i32 %src0, i32 %src1) #0 { 49 %bfe_u32 = call i32 @llvm.amdgcn.sbfe.i32(i32 %src0, i32 %src1, i32 0) 50 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 51 ret void 52 } 53 54 ; GCN-LABEL: {{^}}bfe_i32_arg_0_width_imm_offset: 55 ; GCN-NOT: {{[^@]}}bfe 56 ; GCN: s_endpgm 57 define amdgpu_kernel void @bfe_i32_arg_0_width_imm_offset(i32 addrspace(1)* %out, i32 %src0, i32 %src1) #0 { 58 %bfe_u32 = call i32 @llvm.amdgcn.sbfe.i32(i32 %src0, i32 8, i32 0) 59 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 60 ret void 61 } 62 63 ; GCN-LABEL: {{^}}bfe_i32_test_6: 64 ; GCN: v_lshlrev_b32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}} 65 ; GCN: v_ashrrev_i32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}} 66 ; GCN: s_endpgm 67 define amdgpu_kernel void @bfe_i32_test_6(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 68 %x = load i32, i32 addrspace(1)* %in, align 4 69 %shl = shl i32 %x, 31 70 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %shl, i32 1, i32 31) 71 store i32 %bfe, i32 addrspace(1)* %out, align 4 72 ret void 73 } 74 75 ; GCN-LABEL: {{^}}bfe_i32_test_7: 76 ; GCN-NOT: shl 77 ; GCN-NOT: {{[^@]}}bfe 78 ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 0 79 ; GCN: buffer_store_dword [[VREG]], 80 ; GCN: s_endpgm 81 define amdgpu_kernel void @bfe_i32_test_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 82 %x = load i32, i32 addrspace(1)* %in, align 4 83 %shl = shl i32 %x, 31 84 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %shl, i32 0, i32 31) 85 store i32 %bfe, i32 addrspace(1)* %out, align 4 86 ret void 87 } 88 89 ; GCN-LABEL: {{^}}bfe_i32_test_8: 90 ; GCN: buffer_load_dword 91 ; GCN: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 1 92 ; GCN: s_endpgm 93 define amdgpu_kernel void @bfe_i32_test_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 94 %x = load i32, i32 addrspace(1)* %in, align 4 95 %shl = shl i32 %x, 31 96 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %shl, i32 31, i32 1) 97 store i32 %bfe, i32 addrspace(1)* %out, align 4 98 ret void 99 } 100 101 ; GCN-LABEL: {{^}}bfe_i32_test_9: 102 ; GCN-NOT: {{[^@]}}bfe 103 ; GCN: v_ashrrev_i32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}} 104 ; GCN-NOT: {{[^@]}}bfe 105 ; GCN: s_endpgm 106 define amdgpu_kernel void @bfe_i32_test_9(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 107 %x = load i32, i32 addrspace(1)* %in, align 4 108 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %x, i32 31, i32 1) 109 store i32 %bfe, i32 addrspace(1)* %out, align 4 110 ret void 111 } 112 113 ; GCN-LABEL: {{^}}bfe_i32_test_10: 114 ; GCN-NOT: {{[^@]}}bfe 115 ; GCN: v_ashrrev_i32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}} 116 ; GCN-NOT: {{[^@]}}bfe 117 ; GCN: s_endpgm 118 define amdgpu_kernel void @bfe_i32_test_10(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 119 %x = load i32, i32 addrspace(1)* %in, align 4 120 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %x, i32 1, i32 31) 121 store i32 %bfe, i32 addrspace(1)* %out, align 4 122 ret void 123 } 124 125 ; GCN-LABEL: {{^}}bfe_i32_test_11: 126 ; GCN-NOT: {{[^@]}}bfe 127 ; GCN: v_ashrrev_i32_e32 v{{[0-9]+}}, 8, v{{[0-9]+}} 128 ; GCN-NOT: {{[^@]}}bfe 129 ; GCN: s_endpgm 130 define amdgpu_kernel void @bfe_i32_test_11(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 131 %x = load i32, i32 addrspace(1)* %in, align 4 132 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %x, i32 8, i32 24) 133 store i32 %bfe, i32 addrspace(1)* %out, align 4 134 ret void 135 } 136 137 ; GCN-LABEL: {{^}}bfe_i32_test_12: 138 ; GCN-NOT: {{[^@]}}bfe 139 ; GCN: v_ashrrev_i32_e32 v{{[0-9]+}}, 24, v{{[0-9]+}} 140 ; GCN-NOT: {{[^@]}}bfe 141 ; GCN: s_endpgm 142 define amdgpu_kernel void @bfe_i32_test_12(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 143 %x = load i32, i32 addrspace(1)* %in, align 4 144 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %x, i32 24, i32 8) 145 store i32 %bfe, i32 addrspace(1)* %out, align 4 146 ret void 147 } 148 149 ; GCN-LABEL: {{^}}bfe_i32_test_13: 150 ; GCN: v_ashrrev_i32_e32 {{v[0-9]+}}, 31, {{v[0-9]+}} 151 ; GCN-NOT: {{[^@]}}bfe 152 ; GCN: s_endpgm 153 define amdgpu_kernel void @bfe_i32_test_13(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 154 %x = load i32, i32 addrspace(1)* %in, align 4 155 %shl = ashr i32 %x, 31 156 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %shl, i32 31, i32 1) 157 store i32 %bfe, i32 addrspace(1)* %out, align 4 ret void 158 } 159 160 ; GCN-LABEL: {{^}}bfe_i32_test_14: 161 ; GCN-NOT: lshr 162 ; GCN-NOT: {{[^@]}}bfe 163 ; GCN: s_endpgm 164 define amdgpu_kernel void @bfe_i32_test_14(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 165 %x = load i32, i32 addrspace(1)* %in, align 4 166 %shl = lshr i32 %x, 31 167 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %shl, i32 31, i32 1) 168 store i32 %bfe, i32 addrspace(1)* %out, align 4 ret void 169 } 170 171 ; GCN-LABEL: {{^}}bfe_i32_constant_fold_test_0: 172 ; GCN-NOT: {{[^@]}}bfe 173 ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 0 174 ; GCN: buffer_store_dword [[VREG]], 175 ; GCN: s_endpgm 176 define amdgpu_kernel void @bfe_i32_constant_fold_test_0(i32 addrspace(1)* %out) #0 { 177 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 0, i32 0, i32 0) 178 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 179 ret void 180 } 181 182 ; GCN-LABEL: {{^}}bfe_i32_constant_fold_test_1: 183 ; GCN-NOT: {{[^@]}}bfe 184 ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 0 185 ; GCN: buffer_store_dword [[VREG]], 186 ; GCN: s_endpgm 187 define amdgpu_kernel void @bfe_i32_constant_fold_test_1(i32 addrspace(1)* %out) #0 { 188 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 12334, i32 0, i32 0) 189 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 190 ret void 191 } 192 193 ; GCN-LABEL: {{^}}bfe_i32_constant_fold_test_2: 194 ; GCN-NOT: {{[^@]}}bfe 195 ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 0 196 ; GCN: buffer_store_dword [[VREG]], 197 ; GCN: s_endpgm 198 define amdgpu_kernel void @bfe_i32_constant_fold_test_2(i32 addrspace(1)* %out) #0 { 199 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 0, i32 0, i32 1) 200 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 201 ret void 202 } 203 204 ; GCN-LABEL: {{^}}bfe_i32_constant_fold_test_3: 205 ; GCN-NOT: {{[^@]}}bfe 206 ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], -1 207 ; GCN: buffer_store_dword [[VREG]], 208 ; GCN: s_endpgm 209 define amdgpu_kernel void @bfe_i32_constant_fold_test_3(i32 addrspace(1)* %out) #0 { 210 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 1, i32 0, i32 1) 211 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 212 ret void 213 } 214 215 ; GCN-LABEL: {{^}}bfe_i32_constant_fold_test_4: 216 ; GCN-NOT: {{[^@]}}bfe 217 ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], -1 218 ; GCN: buffer_store_dword [[VREG]], 219 ; GCN: s_endpgm 220 define amdgpu_kernel void @bfe_i32_constant_fold_test_4(i32 addrspace(1)* %out) #0 { 221 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 4294967295, i32 0, i32 1) 222 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 223 ret void 224 } 225 226 ; GCN-LABEL: {{^}}bfe_i32_constant_fold_test_5: 227 ; GCN-NOT: {{[^@]}}bfe 228 ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], -1 229 ; GCN: buffer_store_dword [[VREG]], 230 ; GCN: s_endpgm 231 define amdgpu_kernel void @bfe_i32_constant_fold_test_5(i32 addrspace(1)* %out) #0 { 232 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 128, i32 7, i32 1) 233 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 234 ret void 235 } 236 237 ; GCN-LABEL: {{^}}bfe_i32_constant_fold_test_6: 238 ; GCN-NOT: {{[^@]}}bfe 239 ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 0xffffff80 240 ; GCN: buffer_store_dword [[VREG]], 241 ; GCN: s_endpgm 242 define amdgpu_kernel void @bfe_i32_constant_fold_test_6(i32 addrspace(1)* %out) #0 { 243 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 128, i32 0, i32 8) 244 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 245 ret void 246 } 247 248 ; GCN-LABEL: {{^}}bfe_i32_constant_fold_test_7: 249 ; GCN-NOT: {{[^@]}}bfe 250 ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 0x7f 251 ; GCN: buffer_store_dword [[VREG]], 252 ; GCN: s_endpgm 253 define amdgpu_kernel void @bfe_i32_constant_fold_test_7(i32 addrspace(1)* %out) #0 { 254 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 127, i32 0, i32 8) 255 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 256 ret void 257 } 258 259 ; GCN-LABEL: {{^}}bfe_i32_constant_fold_test_8: 260 ; GCN-NOT: {{[^@]}}bfe 261 ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 1 262 ; GCN: buffer_store_dword [[VREG]], 263 ; GCN: s_endpgm 264 define amdgpu_kernel void @bfe_i32_constant_fold_test_8(i32 addrspace(1)* %out) #0 { 265 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 127, i32 6, i32 8) 266 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 267 ret void 268 } 269 270 ; GCN-LABEL: {{^}}bfe_i32_constant_fold_test_9: 271 ; GCN-NOT: {{[^@]}}bfe 272 ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 1 273 ; GCN: buffer_store_dword [[VREG]], 274 ; GCN: s_endpgm 275 define amdgpu_kernel void @bfe_i32_constant_fold_test_9(i32 addrspace(1)* %out) #0 { 276 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 65536, i32 16, i32 8) 277 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 278 ret void 279 } 280 281 ; GCN-LABEL: {{^}}bfe_i32_constant_fold_test_10: 282 ; GCN-NOT: {{[^@]}}bfe 283 ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 0 284 ; GCN: buffer_store_dword [[VREG]], 285 ; GCN: s_endpgm 286 define amdgpu_kernel void @bfe_i32_constant_fold_test_10(i32 addrspace(1)* %out) #0 { 287 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 65535, i32 16, i32 16) 288 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 289 ret void 290 } 291 292 ; GCN-LABEL: {{^}}bfe_i32_constant_fold_test_11: 293 ; GCN-NOT: {{[^@]}}bfe 294 ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], -6 295 ; GCN: buffer_store_dword [[VREG]], 296 ; GCN: s_endpgm 297 define amdgpu_kernel void @bfe_i32_constant_fold_test_11(i32 addrspace(1)* %out) #0 { 298 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 160, i32 4, i32 4) 299 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 300 ret void 301 } 302 303 ; GCN-LABEL: {{^}}bfe_i32_constant_fold_test_12: 304 ; GCN-NOT: {{[^@]}}bfe 305 ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 0 306 ; GCN: buffer_store_dword [[VREG]], 307 ; GCN: s_endpgm 308 define amdgpu_kernel void @bfe_i32_constant_fold_test_12(i32 addrspace(1)* %out) #0 { 309 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 160, i32 31, i32 1) 310 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 311 ret void 312 } 313 314 ; GCN-LABEL: {{^}}bfe_i32_constant_fold_test_13: 315 ; GCN-NOT: {{[^@]}}bfe 316 ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 1 317 ; GCN: buffer_store_dword [[VREG]], 318 ; GCN: s_endpgm 319 define amdgpu_kernel void @bfe_i32_constant_fold_test_13(i32 addrspace(1)* %out) #0 { 320 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 131070, i32 16, i32 16) 321 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 322 ret void 323 } 324 325 ; GCN-LABEL: {{^}}bfe_i32_constant_fold_test_14: 326 ; GCN-NOT: {{[^@]}}bfe 327 ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 40 328 ; GCN: buffer_store_dword [[VREG]], 329 ; GCN: s_endpgm 330 define amdgpu_kernel void @bfe_i32_constant_fold_test_14(i32 addrspace(1)* %out) #0 { 331 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 160, i32 2, i32 30) 332 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 333 ret void 334 } 335 336 ; GCN-LABEL: {{^}}bfe_i32_constant_fold_test_15: 337 ; GCN-NOT: {{[^@]}}bfe 338 ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 10 339 ; GCN: buffer_store_dword [[VREG]], 340 ; GCN: s_endpgm 341 define amdgpu_kernel void @bfe_i32_constant_fold_test_15(i32 addrspace(1)* %out) #0 { 342 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 160, i32 4, i32 28) 343 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 344 ret void 345 } 346 347 ; GCN-LABEL: {{^}}bfe_i32_constant_fold_test_16: 348 ; GCN-NOT: {{[^@]}}bfe 349 ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], -1 350 ; GCN: buffer_store_dword [[VREG]], 351 ; GCN: s_endpgm 352 define amdgpu_kernel void @bfe_i32_constant_fold_test_16(i32 addrspace(1)* %out) #0 { 353 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 4294967295, i32 1, i32 7) 354 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 355 ret void 356 } 357 358 ; GCN-LABEL: {{^}}bfe_i32_constant_fold_test_17: 359 ; GCN-NOT: {{[^@]}}bfe 360 ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 0x7f 361 ; GCN: buffer_store_dword [[VREG]], 362 ; GCN: s_endpgm 363 define amdgpu_kernel void @bfe_i32_constant_fold_test_17(i32 addrspace(1)* %out) #0 { 364 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 255, i32 1, i32 31) 365 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 366 ret void 367 } 368 369 ; GCN-LABEL: {{^}}bfe_i32_constant_fold_test_18: 370 ; GCN-NOT: {{[^@]}}bfe 371 ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 0 372 ; GCN: buffer_store_dword [[VREG]], 373 ; GCN: s_endpgm 374 define amdgpu_kernel void @bfe_i32_constant_fold_test_18(i32 addrspace(1)* %out) #0 { 375 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 255, i32 31, i32 1) 376 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 377 ret void 378 } 379 380 ; GCN-LABEL: {{^}}bfe_sext_in_reg_i24: 381 ; GCN: buffer_load_dword [[LOAD:v[0-9]+]], 382 ; GCN-NOT: v_lshl 383 ; GCN-NOT: v_ashr 384 ; GCN: v_bfe_i32 [[BFE:v[0-9]+]], [[LOAD]], 0, 24 385 ; GCN: buffer_store_dword [[BFE]], 386 define amdgpu_kernel void @bfe_sext_in_reg_i24(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 387 %x = load i32, i32 addrspace(1)* %in, align 4 388 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %x, i32 0, i32 24) 389 %shl = shl i32 %bfe, 8 390 %ashr = ashr i32 %shl, 8 391 store i32 %ashr, i32 addrspace(1)* %out, align 4 392 ret void 393 } 394 395 ; GCN-LABEL: @simplify_demanded_bfe_sdiv 396 ; GCN: buffer_load_dword [[LOAD:v[0-9]+]] 397 ; GCN: v_bfe_i32 [[BFE:v[0-9]+]], [[LOAD]], 1, 16 398 ; GCN: v_lshrrev_b32_e32 [[TMP0:v[0-9]+]], 31, [[BFE]] 399 ; GCN: v_add_{{[iu]}}32_e32 [[TMP1:v[0-9]+]], vcc, [[TMP0]], [[BFE]] 400 ; GCN: v_ashrrev_i32_e32 [[TMP2:v[0-9]+]], 1, [[TMP1]] 401 ; GCN: buffer_store_dword [[TMP2]] 402 define amdgpu_kernel void @simplify_demanded_bfe_sdiv(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 403 %src = load i32, i32 addrspace(1)* %in, align 4 404 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %src, i32 1, i32 16) 405 %div = sdiv i32 %bfe, 2 406 store i32 %div, i32 addrspace(1)* %out, align 4 407 ret void 408 } 409 410 ; GCN-LABEL: {{^}}bfe_0_width: 411 ; GCN-NOT: {{[^@]}}bfe 412 ; GCN: s_endpgm 413 define amdgpu_kernel void @bfe_0_width(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 { 414 %load = load i32, i32 addrspace(1)* %ptr, align 4 415 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %load, i32 8, i32 0) 416 store i32 %bfe, i32 addrspace(1)* %out, align 4 417 ret void 418 } 419 420 ; GCN-LABEL: {{^}}bfe_8_bfe_8: 421 ; GCN: v_bfe_i32 422 ; GCN-NOT: {{[^@]}}bfe 423 ; GCN: s_endpgm 424 define amdgpu_kernel void @bfe_8_bfe_8(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 { 425 %load = load i32, i32 addrspace(1)* %ptr, align 4 426 %bfe0 = call i32 @llvm.amdgcn.sbfe.i32(i32 %load, i32 0, i32 8) 427 %bfe1 = call i32 @llvm.amdgcn.sbfe.i32(i32 %bfe0, i32 0, i32 8) 428 store i32 %bfe1, i32 addrspace(1)* %out, align 4 429 ret void 430 } 431 432 ; GCN-LABEL: {{^}}bfe_8_bfe_16: 433 ; GCN: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 8 434 ; GCN: s_endpgm 435 define amdgpu_kernel void @bfe_8_bfe_16(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 { 436 %load = load i32, i32 addrspace(1)* %ptr, align 4 437 %bfe0 = call i32 @llvm.amdgcn.sbfe.i32(i32 %load, i32 0, i32 8) 438 %bfe1 = call i32 @llvm.amdgcn.sbfe.i32(i32 %bfe0, i32 0, i32 16) 439 store i32 %bfe1, i32 addrspace(1)* %out, align 4 440 ret void 441 } 442 443 ; This really should be folded into 1 444 ; GCN-LABEL: {{^}}bfe_16_bfe_8: 445 ; GCN: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 8 446 ; GCN-NOT: {{[^@]}}bfe 447 ; GCN: s_endpgm 448 define amdgpu_kernel void @bfe_16_bfe_8(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 { 449 %load = load i32, i32 addrspace(1)* %ptr, align 4 450 %bfe0 = call i32 @llvm.amdgcn.sbfe.i32(i32 %load, i32 0, i32 16) 451 %bfe1 = call i32 @llvm.amdgcn.sbfe.i32(i32 %bfe0, i32 0, i32 8) 452 store i32 %bfe1, i32 addrspace(1)* %out, align 4 453 ret void 454 } 455 456 ; Make sure there isn't a redundant BFE 457 ; GCN-LABEL: {{^}}sext_in_reg_i8_to_i32_bfe: 458 ; GCN: s_sext_i32_i8 s{{[0-9]+}}, s{{[0-9]+}} 459 ; GCN-NOT: {{[^@]}}bfe 460 ; GCN: s_endpgm 461 define amdgpu_kernel void @sext_in_reg_i8_to_i32_bfe(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 { 462 %c = add i32 %a, %b ; add to prevent folding into extload 463 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %c, i32 0, i32 8) 464 %shl = shl i32 %bfe, 24 465 %ashr = ashr i32 %shl, 24 466 store i32 %ashr, i32 addrspace(1)* %out, align 4 467 ret void 468 } 469 470 ; GCN-LABEL: {{^}}sext_in_reg_i8_to_i32_bfe_wrong: 471 define amdgpu_kernel void @sext_in_reg_i8_to_i32_bfe_wrong(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 { 472 %c = add i32 %a, %b ; add to prevent folding into extload 473 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %c, i32 8, i32 0) 474 %shl = shl i32 %bfe, 24 475 %ashr = ashr i32 %shl, 24 476 store i32 %ashr, i32 addrspace(1)* %out, align 4 477 ret void 478 } 479 480 ; GCN-LABEL: {{^}}sextload_i8_to_i32_bfe: 481 ; GCN: buffer_load_sbyte 482 ; GCN-NOT: {{[^@]}}bfe 483 ; GCN: s_endpgm 484 define amdgpu_kernel void @sextload_i8_to_i32_bfe(i32 addrspace(1)* %out, i8 addrspace(1)* %ptr) #0 { 485 %load = load i8, i8 addrspace(1)* %ptr, align 1 486 %sext = sext i8 %load to i32 487 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %sext, i32 0, i32 8) 488 %shl = shl i32 %bfe, 24 489 %ashr = ashr i32 %shl, 24 490 store i32 %ashr, i32 addrspace(1)* %out, align 4 491 ret void 492 } 493 494 ; GCN: .text 495 ; GCN-LABEL: {{^}}sextload_i8_to_i32_bfe_0:{{.*$}} 496 ; GCN-NOT: {{[^@]}}bfe 497 ; GCN: s_endpgm 498 define amdgpu_kernel void @sextload_i8_to_i32_bfe_0(i32 addrspace(1)* %out, i8 addrspace(1)* %ptr) #0 { 499 %load = load i8, i8 addrspace(1)* %ptr, align 1 500 %sext = sext i8 %load to i32 501 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %sext, i32 8, i32 0) 502 %shl = shl i32 %bfe, 24 503 %ashr = ashr i32 %shl, 24 504 store i32 %ashr, i32 addrspace(1)* %out, align 4 505 ret void 506 } 507 508 ; GCN-LABEL: {{^}}sext_in_reg_i1_bfe_offset_0: 509 ; GCN-NOT: shr 510 ; GCN-NOT: shl 511 ; GCN: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 1 512 ; GCN: s_endpgm 513 define amdgpu_kernel void @sext_in_reg_i1_bfe_offset_0(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 514 %x = load i32, i32 addrspace(1)* %in, align 4 515 %shl = shl i32 %x, 31 516 %shr = ashr i32 %shl, 31 517 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %shr, i32 0, i32 1) 518 store i32 %bfe, i32 addrspace(1)* %out, align 4 519 ret void 520 } 521 522 ; GCN-LABEL: {{^}}sext_in_reg_i1_bfe_offset_1: 523 ; GCN: buffer_load_dword 524 ; GCN-NOT: shl 525 ; GCN-NOT: shr 526 ; GCN: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 1, 1 527 ; GCN: s_endpgm 528 define amdgpu_kernel void @sext_in_reg_i1_bfe_offset_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 529 %x = load i32, i32 addrspace(1)* %in, align 4 530 %shl = shl i32 %x, 30 531 %shr = ashr i32 %shl, 30 532 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %shr, i32 1, i32 1) 533 store i32 %bfe, i32 addrspace(1)* %out, align 4 534 ret void 535 } 536 537 ; GCN-LABEL: {{^}}sext_in_reg_i2_bfe_offset_1: 538 ; GCN: buffer_load_dword 539 ; GCN-NOT: v_lshl 540 ; GCN-NOT: v_ashr 541 ; GCN: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 2 542 ; GCN: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 1, 2 543 ; GCN: s_endpgm 544 define amdgpu_kernel void @sext_in_reg_i2_bfe_offset_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 545 %x = load i32, i32 addrspace(1)* %in, align 4 546 %shl = shl i32 %x, 30 547 %shr = ashr i32 %shl, 30 548 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %shr, i32 1, i32 2) 549 store i32 %bfe, i32 addrspace(1)* %out, align 4 550 ret void 551 } 552 553 declare i32 @llvm.amdgcn.sbfe.i32(i32, i32, i32) #1 554 555 attributes #0 = { nounwind } 556 attributes #1 = { nounwind readnone } 557