1 /* 2 * Copyright 2013 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * on the rights to use, copy, modify, merge, publish, distribute, sub 8 * license, and/or sell copies of the Software, and to permit persons to whom 9 * the Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 */ 23 24 /* Resource binding slots and sampler states (each described with 8 or 25 * 4 dwords) are stored in lists in memory which is accessed by shaders 26 * using scalar load instructions. 27 * 28 * This file is responsible for managing such lists. It keeps a copy of all 29 * descriptors in CPU memory and re-uploads a whole list if some slots have 30 * been changed. 31 * 32 * This code is also reponsible for updating shader pointers to those lists. 33 * 34 * Note that CP DMA can't be used for updating the lists, because a GPU hang 35 * could leave the list in a mid-IB state and the next IB would get wrong 36 * descriptors and the whole context would be unusable at that point. 37 * (Note: The register shadowing can't be used due to the same reason) 38 * 39 * Also, uploading descriptors to newly allocated memory doesn't require 40 * a KCACHE flush. 41 * 42 * 43 * Possible scenarios for one 16 dword image+sampler slot: 44 * 45 * | Image | w/ FMASK | Buffer | NULL 46 * [ 0: 3] Image[0:3] | Image[0:3] | Null[0:3] | Null[0:3] 47 * [ 4: 7] Image[4:7] | Image[4:7] | Buffer[0:3] | 0 48 * [ 8:11] Null[0:3] | Fmask[0:3] | Null[0:3] | Null[0:3] 49 * [12:15] Sampler[0:3] | Fmask[4:7] | Sampler[0:3] | Sampler[0:3] 50 * 51 * FMASK implies MSAA, therefore no sampler state. 52 * Sampler states are never unbound except when FMASK is bound. 53 */ 54 55 #include "radeon/r600_cs.h" 56 #include "si_pipe.h" 57 #include "sid.h" 58 #include "gfx9d.h" 59 60 #include "util/hash_table.h" 61 #include "util/u_idalloc.h" 62 #include "util/u_format.h" 63 #include "util/u_memory.h" 64 #include "util/u_upload_mgr.h" 65 66 67 /* NULL image and buffer descriptor for textures (alpha = 1) and images 68 * (alpha = 0). 69 * 70 * For images, all fields must be zero except for the swizzle, which 71 * supports arbitrary combinations of 0s and 1s. The texture type must be 72 * any valid type (e.g. 1D). If the texture type isn't set, the hw hangs. 73 * 74 * For buffers, all fields must be zero. If they are not, the hw hangs. 75 * 76 * This is the only reason why the buffer descriptor must be in words [4:7]. 77 */ 78 static uint32_t null_texture_descriptor[8] = { 79 0, 80 0, 81 0, 82 S_008F1C_DST_SEL_W(V_008F1C_SQ_SEL_1) | 83 S_008F1C_TYPE(V_008F1C_SQ_RSRC_IMG_1D) 84 /* the rest must contain zeros, which is also used by the buffer 85 * descriptor */ 86 }; 87 88 static uint32_t null_image_descriptor[8] = { 89 0, 90 0, 91 0, 92 S_008F1C_TYPE(V_008F1C_SQ_RSRC_IMG_1D) 93 /* the rest must contain zeros, which is also used by the buffer 94 * descriptor */ 95 }; 96 97 static uint64_t si_desc_extract_buffer_address(uint32_t *desc) 98 { 99 return desc[0] | ((uint64_t)G_008F04_BASE_ADDRESS_HI(desc[1]) << 32); 100 } 101 102 static void si_init_descriptor_list(uint32_t *desc_list, 103 unsigned element_dw_size, 104 unsigned num_elements, 105 const uint32_t *null_descriptor) 106 { 107 int i; 108 109 /* Initialize the array to NULL descriptors if the element size is 8. */ 110 if (null_descriptor) { 111 assert(element_dw_size % 8 == 0); 112 for (i = 0; i < num_elements * element_dw_size / 8; i++) 113 memcpy(desc_list + i * 8, null_descriptor, 8 * 4); 114 } 115 } 116 117 static void si_init_descriptors(struct si_descriptors *desc, 118 unsigned shader_userdata_index, 119 unsigned element_dw_size, 120 unsigned num_elements) 121 { 122 desc->list = CALLOC(num_elements, element_dw_size * 4); 123 desc->element_dw_size = element_dw_size; 124 desc->num_elements = num_elements; 125 desc->shader_userdata_offset = shader_userdata_index * 4; 126 desc->slot_index_to_bind_directly = -1; 127 } 128 129 static void si_release_descriptors(struct si_descriptors *desc) 130 { 131 r600_resource_reference(&desc->buffer, NULL); 132 FREE(desc->list); 133 } 134 135 static bool si_upload_descriptors(struct si_context *sctx, 136 struct si_descriptors *desc) 137 { 138 unsigned slot_size = desc->element_dw_size * 4; 139 unsigned first_slot_offset = desc->first_active_slot * slot_size; 140 unsigned upload_size = desc->num_active_slots * slot_size; 141 142 /* Skip the upload if no shader is using the descriptors. dirty_mask 143 * will stay dirty and the descriptors will be uploaded when there is 144 * a shader using them. 145 */ 146 if (!upload_size) 147 return true; 148 149 /* If there is just one active descriptor, bind it directly. */ 150 if ((int)desc->first_active_slot == desc->slot_index_to_bind_directly && 151 desc->num_active_slots == 1) { 152 uint32_t *descriptor = &desc->list[desc->slot_index_to_bind_directly * 153 desc->element_dw_size]; 154 155 /* The buffer is already in the buffer list. */ 156 r600_resource_reference(&desc->buffer, NULL); 157 desc->gpu_list = NULL; 158 desc->gpu_address = si_desc_extract_buffer_address(descriptor); 159 si_mark_atom_dirty(sctx, &sctx->shader_pointers.atom); 160 return true; 161 } 162 163 uint32_t *ptr; 164 unsigned buffer_offset; 165 u_upload_alloc(sctx->b.b.const_uploader, first_slot_offset, upload_size, 166 si_optimal_tcc_alignment(sctx, upload_size), 167 &buffer_offset, (struct pipe_resource**)&desc->buffer, 168 (void**)&ptr); 169 if (!desc->buffer) { 170 desc->gpu_address = 0; 171 return false; /* skip the draw call */ 172 } 173 174 util_memcpy_cpu_to_le32(ptr, (char*)desc->list + first_slot_offset, 175 upload_size); 176 desc->gpu_list = ptr - first_slot_offset / 4; 177 178 radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, desc->buffer, 179 RADEON_USAGE_READ, RADEON_PRIO_DESCRIPTORS); 180 181 /* The shader pointer should point to slot 0. */ 182 buffer_offset -= first_slot_offset; 183 desc->gpu_address = desc->buffer->gpu_address + buffer_offset; 184 185 si_mark_atom_dirty(sctx, &sctx->shader_pointers.atom); 186 return true; 187 } 188 189 static void 190 si_descriptors_begin_new_cs(struct si_context *sctx, struct si_descriptors *desc) 191 { 192 if (!desc->buffer) 193 return; 194 195 radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, desc->buffer, 196 RADEON_USAGE_READ, RADEON_PRIO_DESCRIPTORS); 197 } 198 199 /* SAMPLER VIEWS */ 200 201 static inline enum radeon_bo_priority 202 si_get_sampler_view_priority(struct r600_resource *res) 203 { 204 if (res->b.b.target == PIPE_BUFFER) 205 return RADEON_PRIO_SAMPLER_BUFFER; 206 207 if (res->b.b.nr_samples > 1) 208 return RADEON_PRIO_SAMPLER_TEXTURE_MSAA; 209 210 return RADEON_PRIO_SAMPLER_TEXTURE; 211 } 212 213 static unsigned 214 si_sampler_and_image_descriptors_idx(unsigned shader) 215 { 216 return SI_DESCS_FIRST_SHADER + shader * SI_NUM_SHADER_DESCS + 217 SI_SHADER_DESCS_SAMPLERS_AND_IMAGES; 218 } 219 220 static struct si_descriptors * 221 si_sampler_and_image_descriptors(struct si_context *sctx, unsigned shader) 222 { 223 return &sctx->descriptors[si_sampler_and_image_descriptors_idx(shader)]; 224 } 225 226 static void si_release_sampler_views(struct si_samplers *samplers) 227 { 228 int i; 229 230 for (i = 0; i < ARRAY_SIZE(samplers->views); i++) { 231 pipe_sampler_view_reference(&samplers->views[i], NULL); 232 } 233 } 234 235 static void si_sampler_view_add_buffer(struct si_context *sctx, 236 struct pipe_resource *resource, 237 enum radeon_bo_usage usage, 238 bool is_stencil_sampler, 239 bool check_mem) 240 { 241 struct r600_resource *rres; 242 struct r600_texture *rtex; 243 enum radeon_bo_priority priority; 244 245 if (!resource) 246 return; 247 248 if (resource->target != PIPE_BUFFER) { 249 struct r600_texture *tex = (struct r600_texture*)resource; 250 251 if (tex->is_depth && !si_can_sample_zs(tex, is_stencil_sampler)) 252 resource = &tex->flushed_depth_texture->resource.b.b; 253 } 254 255 rres = (struct r600_resource*)resource; 256 priority = si_get_sampler_view_priority(rres); 257 258 radeon_add_to_buffer_list_check_mem(&sctx->b, &sctx->b.gfx, 259 rres, usage, priority, 260 check_mem); 261 262 if (resource->target == PIPE_BUFFER) 263 return; 264 265 /* Now add separate DCC or HTILE. */ 266 rtex = (struct r600_texture*)resource; 267 if (rtex->dcc_separate_buffer) { 268 radeon_add_to_buffer_list_check_mem(&sctx->b, &sctx->b.gfx, 269 rtex->dcc_separate_buffer, usage, 270 RADEON_PRIO_DCC, check_mem); 271 } 272 } 273 274 static void si_sampler_views_begin_new_cs(struct si_context *sctx, 275 struct si_samplers *samplers) 276 { 277 unsigned mask = samplers->enabled_mask; 278 279 /* Add buffers to the CS. */ 280 while (mask) { 281 int i = u_bit_scan(&mask); 282 struct si_sampler_view *sview = (struct si_sampler_view *)samplers->views[i]; 283 284 si_sampler_view_add_buffer(sctx, sview->base.texture, 285 RADEON_USAGE_READ, 286 sview->is_stencil_sampler, false); 287 } 288 } 289 290 /* Set buffer descriptor fields that can be changed by reallocations. */ 291 static void si_set_buf_desc_address(struct r600_resource *buf, 292 uint64_t offset, uint32_t *state) 293 { 294 uint64_t va = buf->gpu_address + offset; 295 296 state[0] = va; 297 state[1] &= C_008F04_BASE_ADDRESS_HI; 298 state[1] |= S_008F04_BASE_ADDRESS_HI(va >> 32); 299 } 300 301 /* Set texture descriptor fields that can be changed by reallocations. 302 * 303 * \param tex texture 304 * \param base_level_info information of the level of BASE_ADDRESS 305 * \param base_level the level of BASE_ADDRESS 306 * \param first_level pipe_sampler_view.u.tex.first_level 307 * \param block_width util_format_get_blockwidth() 308 * \param is_stencil select between separate Z & Stencil 309 * \param state descriptor to update 310 */ 311 void si_set_mutable_tex_desc_fields(struct si_screen *sscreen, 312 struct r600_texture *tex, 313 const struct legacy_surf_level *base_level_info, 314 unsigned base_level, unsigned first_level, 315 unsigned block_width, bool is_stencil, 316 uint32_t *state) 317 { 318 uint64_t va, meta_va = 0; 319 320 if (tex->is_depth && !si_can_sample_zs(tex, is_stencil)) { 321 tex = tex->flushed_depth_texture; 322 is_stencil = false; 323 } 324 325 va = tex->resource.gpu_address; 326 327 if (sscreen->info.chip_class >= GFX9) { 328 /* Only stencil_offset needs to be added here. */ 329 if (is_stencil) 330 va += tex->surface.u.gfx9.stencil_offset; 331 else 332 va += tex->surface.u.gfx9.surf_offset; 333 } else { 334 va += base_level_info->offset; 335 } 336 337 state[0] = va >> 8; 338 state[1] &= C_008F14_BASE_ADDRESS_HI; 339 state[1] |= S_008F14_BASE_ADDRESS_HI(va >> 40); 340 341 /* Only macrotiled modes can set tile swizzle. 342 * GFX9 doesn't use (legacy) base_level_info. 343 */ 344 if (sscreen->info.chip_class >= GFX9 || 345 base_level_info->mode == RADEON_SURF_MODE_2D) 346 state[0] |= tex->surface.tile_swizzle; 347 348 if (sscreen->info.chip_class >= VI) { 349 state[6] &= C_008F28_COMPRESSION_EN; 350 state[7] = 0; 351 352 if (vi_dcc_enabled(tex, first_level)) { 353 meta_va = (!tex->dcc_separate_buffer ? tex->resource.gpu_address : 0) + 354 tex->dcc_offset; 355 356 if (sscreen->info.chip_class == VI) { 357 meta_va += base_level_info->dcc_offset; 358 assert(base_level_info->mode == RADEON_SURF_MODE_2D); 359 } 360 361 meta_va |= (uint32_t)tex->surface.tile_swizzle << 8; 362 } else if (vi_tc_compat_htile_enabled(tex, first_level)) { 363 meta_va = tex->resource.gpu_address + tex->htile_offset; 364 } 365 366 if (meta_va) { 367 state[6] |= S_008F28_COMPRESSION_EN(1); 368 state[7] = meta_va >> 8; 369 } 370 } 371 372 if (sscreen->info.chip_class >= GFX9) { 373 state[3] &= C_008F1C_SW_MODE; 374 state[4] &= C_008F20_PITCH_GFX9; 375 376 if (is_stencil) { 377 state[3] |= S_008F1C_SW_MODE(tex->surface.u.gfx9.stencil.swizzle_mode); 378 state[4] |= S_008F20_PITCH_GFX9(tex->surface.u.gfx9.stencil.epitch); 379 } else { 380 state[3] |= S_008F1C_SW_MODE(tex->surface.u.gfx9.surf.swizzle_mode); 381 state[4] |= S_008F20_PITCH_GFX9(tex->surface.u.gfx9.surf.epitch); 382 } 383 384 state[5] &= C_008F24_META_DATA_ADDRESS & 385 C_008F24_META_PIPE_ALIGNED & 386 C_008F24_META_RB_ALIGNED; 387 if (meta_va) { 388 struct gfx9_surf_meta_flags meta; 389 390 if (tex->dcc_offset) 391 meta = tex->surface.u.gfx9.dcc; 392 else 393 meta = tex->surface.u.gfx9.htile; 394 395 state[5] |= S_008F24_META_DATA_ADDRESS(meta_va >> 40) | 396 S_008F24_META_PIPE_ALIGNED(meta.pipe_aligned) | 397 S_008F24_META_RB_ALIGNED(meta.rb_aligned); 398 } 399 } else { 400 /* SI-CI-VI */ 401 unsigned pitch = base_level_info->nblk_x * block_width; 402 unsigned index = si_tile_mode_index(tex, base_level, is_stencil); 403 404 state[3] &= C_008F1C_TILING_INDEX; 405 state[3] |= S_008F1C_TILING_INDEX(index); 406 state[4] &= C_008F20_PITCH_GFX6; 407 state[4] |= S_008F20_PITCH_GFX6(pitch - 1); 408 } 409 } 410 411 static void si_set_sampler_state_desc(struct si_sampler_state *sstate, 412 struct si_sampler_view *sview, 413 struct r600_texture *tex, 414 uint32_t *desc) 415 { 416 if (sview && sview->is_integer) 417 memcpy(desc, sstate->integer_val, 4*4); 418 else if (tex && tex->upgraded_depth && 419 (!sview || !sview->is_stencil_sampler)) 420 memcpy(desc, sstate->upgraded_depth_val, 4*4); 421 else 422 memcpy(desc, sstate->val, 4*4); 423 } 424 425 static void si_set_sampler_view_desc(struct si_context *sctx, 426 struct si_sampler_view *sview, 427 struct si_sampler_state *sstate, 428 uint32_t *desc) 429 { 430 struct pipe_sampler_view *view = &sview->base; 431 struct r600_texture *rtex = (struct r600_texture *)view->texture; 432 bool is_buffer = rtex->resource.b.b.target == PIPE_BUFFER; 433 434 if (unlikely(!is_buffer && sview->dcc_incompatible)) { 435 if (vi_dcc_enabled(rtex, view->u.tex.first_level)) 436 if (!si_texture_disable_dcc(&sctx->b, rtex)) 437 sctx->b.decompress_dcc(&sctx->b.b, rtex); 438 439 sview->dcc_incompatible = false; 440 } 441 442 assert(rtex); /* views with texture == NULL aren't supported */ 443 memcpy(desc, sview->state, 8*4); 444 445 if (is_buffer) { 446 si_set_buf_desc_address(&rtex->resource, 447 sview->base.u.buf.offset, 448 desc + 4); 449 } else { 450 bool is_separate_stencil = rtex->db_compatible && 451 sview->is_stencil_sampler; 452 453 si_set_mutable_tex_desc_fields(sctx->screen, rtex, 454 sview->base_level_info, 455 sview->base_level, 456 sview->base.u.tex.first_level, 457 sview->block_width, 458 is_separate_stencil, 459 desc); 460 } 461 462 if (!is_buffer && rtex->fmask.size) { 463 memcpy(desc + 8, sview->fmask_state, 8*4); 464 } else { 465 /* Disable FMASK and bind sampler state in [12:15]. */ 466 memcpy(desc + 8, null_texture_descriptor, 4*4); 467 468 if (sstate) 469 si_set_sampler_state_desc(sstate, sview, 470 is_buffer ? NULL : rtex, 471 desc + 12); 472 } 473 } 474 475 static bool color_needs_decompression(struct r600_texture *rtex) 476 { 477 return rtex->fmask.size || 478 (rtex->dirty_level_mask && 479 (rtex->cmask.size || rtex->dcc_offset)); 480 } 481 482 static bool depth_needs_decompression(struct r600_texture *rtex) 483 { 484 /* If the depth/stencil texture is TC-compatible, no decompression 485 * will be done. The decompression function will only flush DB caches 486 * to make it coherent with shaders. That's necessary because the driver 487 * doesn't flush DB caches in any other case. 488 */ 489 return rtex->db_compatible; 490 } 491 492 static void si_set_sampler_view(struct si_context *sctx, 493 unsigned shader, 494 unsigned slot, struct pipe_sampler_view *view, 495 bool disallow_early_out) 496 { 497 struct si_samplers *samplers = &sctx->samplers[shader]; 498 struct si_sampler_view *rview = (struct si_sampler_view*)view; 499 struct si_descriptors *descs = si_sampler_and_image_descriptors(sctx, shader); 500 unsigned desc_slot = si_get_sampler_slot(slot); 501 uint32_t *desc = descs->list + desc_slot * 16; 502 503 if (samplers->views[slot] == view && !disallow_early_out) 504 return; 505 506 if (view) { 507 struct r600_texture *rtex = (struct r600_texture *)view->texture; 508 509 si_set_sampler_view_desc(sctx, rview, 510 samplers->sampler_states[slot], desc); 511 512 if (rtex->resource.b.b.target == PIPE_BUFFER) { 513 rtex->resource.bind_history |= PIPE_BIND_SAMPLER_VIEW; 514 samplers->needs_depth_decompress_mask &= ~(1u << slot); 515 samplers->needs_color_decompress_mask &= ~(1u << slot); 516 } else { 517 if (depth_needs_decompression(rtex)) { 518 samplers->needs_depth_decompress_mask |= 1u << slot; 519 } else { 520 samplers->needs_depth_decompress_mask &= ~(1u << slot); 521 } 522 if (color_needs_decompression(rtex)) { 523 samplers->needs_color_decompress_mask |= 1u << slot; 524 } else { 525 samplers->needs_color_decompress_mask &= ~(1u << slot); 526 } 527 528 if (rtex->dcc_offset && 529 p_atomic_read(&rtex->framebuffers_bound)) 530 sctx->need_check_render_feedback = true; 531 } 532 533 pipe_sampler_view_reference(&samplers->views[slot], view); 534 samplers->enabled_mask |= 1u << slot; 535 536 /* Since this can flush, it must be done after enabled_mask is 537 * updated. */ 538 si_sampler_view_add_buffer(sctx, view->texture, 539 RADEON_USAGE_READ, 540 rview->is_stencil_sampler, true); 541 } else { 542 pipe_sampler_view_reference(&samplers->views[slot], NULL); 543 memcpy(desc, null_texture_descriptor, 8*4); 544 /* Only clear the lower dwords of FMASK. */ 545 memcpy(desc + 8, null_texture_descriptor, 4*4); 546 /* Re-set the sampler state if we are transitioning from FMASK. */ 547 if (samplers->sampler_states[slot]) 548 si_set_sampler_state_desc(samplers->sampler_states[slot], NULL, NULL, 549 desc + 12); 550 551 samplers->enabled_mask &= ~(1u << slot); 552 samplers->needs_depth_decompress_mask &= ~(1u << slot); 553 samplers->needs_color_decompress_mask &= ~(1u << slot); 554 } 555 556 sctx->descriptors_dirty |= 1u << si_sampler_and_image_descriptors_idx(shader); 557 } 558 559 static void si_update_shader_needs_decompress_mask(struct si_context *sctx, 560 unsigned shader) 561 { 562 struct si_samplers *samplers = &sctx->samplers[shader]; 563 unsigned shader_bit = 1 << shader; 564 565 if (samplers->needs_depth_decompress_mask || 566 samplers->needs_color_decompress_mask || 567 sctx->images[shader].needs_color_decompress_mask) 568 sctx->shader_needs_decompress_mask |= shader_bit; 569 else 570 sctx->shader_needs_decompress_mask &= ~shader_bit; 571 } 572 573 static void si_set_sampler_views(struct pipe_context *ctx, 574 enum pipe_shader_type shader, unsigned start, 575 unsigned count, 576 struct pipe_sampler_view **views) 577 { 578 struct si_context *sctx = (struct si_context *)ctx; 579 int i; 580 581 if (!count || shader >= SI_NUM_SHADERS) 582 return; 583 584 if (views) { 585 for (i = 0; i < count; i++) 586 si_set_sampler_view(sctx, shader, start + i, views[i], false); 587 } else { 588 for (i = 0; i < count; i++) 589 si_set_sampler_view(sctx, shader, start + i, NULL, false); 590 } 591 592 si_update_shader_needs_decompress_mask(sctx, shader); 593 } 594 595 static void 596 si_samplers_update_needs_color_decompress_mask(struct si_samplers *samplers) 597 { 598 unsigned mask = samplers->enabled_mask; 599 600 while (mask) { 601 int i = u_bit_scan(&mask); 602 struct pipe_resource *res = samplers->views[i]->texture; 603 604 if (res && res->target != PIPE_BUFFER) { 605 struct r600_texture *rtex = (struct r600_texture *)res; 606 607 if (color_needs_decompression(rtex)) { 608 samplers->needs_color_decompress_mask |= 1u << i; 609 } else { 610 samplers->needs_color_decompress_mask &= ~(1u << i); 611 } 612 } 613 } 614 } 615 616 /* IMAGE VIEWS */ 617 618 static void 619 si_release_image_views(struct si_images *images) 620 { 621 unsigned i; 622 623 for (i = 0; i < SI_NUM_IMAGES; ++i) { 624 struct pipe_image_view *view = &images->views[i]; 625 626 pipe_resource_reference(&view->resource, NULL); 627 } 628 } 629 630 static void 631 si_image_views_begin_new_cs(struct si_context *sctx, struct si_images *images) 632 { 633 uint mask = images->enabled_mask; 634 635 /* Add buffers to the CS. */ 636 while (mask) { 637 int i = u_bit_scan(&mask); 638 struct pipe_image_view *view = &images->views[i]; 639 640 assert(view->resource); 641 642 si_sampler_view_add_buffer(sctx, view->resource, 643 RADEON_USAGE_READWRITE, false, false); 644 } 645 } 646 647 static void 648 si_disable_shader_image(struct si_context *ctx, unsigned shader, unsigned slot) 649 { 650 struct si_images *images = &ctx->images[shader]; 651 652 if (images->enabled_mask & (1u << slot)) { 653 struct si_descriptors *descs = si_sampler_and_image_descriptors(ctx, shader); 654 unsigned desc_slot = si_get_image_slot(slot); 655 656 pipe_resource_reference(&images->views[slot].resource, NULL); 657 images->needs_color_decompress_mask &= ~(1 << slot); 658 659 memcpy(descs->list + desc_slot*8, null_image_descriptor, 8*4); 660 images->enabled_mask &= ~(1u << slot); 661 ctx->descriptors_dirty |= 1u << si_sampler_and_image_descriptors_idx(shader); 662 } 663 } 664 665 static void 666 si_mark_image_range_valid(const struct pipe_image_view *view) 667 { 668 struct r600_resource *res = (struct r600_resource *)view->resource; 669 670 assert(res && res->b.b.target == PIPE_BUFFER); 671 672 util_range_add(&res->valid_buffer_range, 673 view->u.buf.offset, 674 view->u.buf.offset + view->u.buf.size); 675 } 676 677 static void si_set_shader_image_desc(struct si_context *ctx, 678 const struct pipe_image_view *view, 679 bool skip_decompress, 680 uint32_t *desc) 681 { 682 struct si_screen *screen = ctx->screen; 683 struct r600_resource *res; 684 685 res = (struct r600_resource *)view->resource; 686 687 if (res->b.b.target == PIPE_BUFFER) { 688 if (view->access & PIPE_IMAGE_ACCESS_WRITE) 689 si_mark_image_range_valid(view); 690 691 si_make_buffer_descriptor(screen, res, 692 view->format, 693 view->u.buf.offset, 694 view->u.buf.size, desc); 695 si_set_buf_desc_address(res, view->u.buf.offset, desc + 4); 696 } else { 697 static const unsigned char swizzle[4] = { 0, 1, 2, 3 }; 698 struct r600_texture *tex = (struct r600_texture *)res; 699 unsigned level = view->u.tex.level; 700 unsigned width, height, depth, hw_level; 701 bool uses_dcc = vi_dcc_enabled(tex, level); 702 unsigned access = view->access; 703 704 /* Clear the write flag when writes can't occur. 705 * Note that DCC_DECOMPRESS for MSAA doesn't work in some cases, 706 * so we don't wanna trigger it. 707 */ 708 if (tex->is_depth || tex->resource.b.b.nr_samples >= 2) { 709 assert(!"Z/S and MSAA image stores are not supported"); 710 access &= ~PIPE_IMAGE_ACCESS_WRITE; 711 } 712 713 assert(!tex->is_depth); 714 assert(tex->fmask.size == 0); 715 716 if (uses_dcc && !skip_decompress && 717 (view->access & PIPE_IMAGE_ACCESS_WRITE || 718 !vi_dcc_formats_compatible(res->b.b.format, view->format))) { 719 /* If DCC can't be disabled, at least decompress it. 720 * The decompression is relatively cheap if the surface 721 * has been decompressed already. 722 */ 723 if (!si_texture_disable_dcc(&ctx->b, tex)) 724 ctx->b.decompress_dcc(&ctx->b.b, tex); 725 } 726 727 if (ctx->b.chip_class >= GFX9) { 728 /* Always set the base address. The swizzle modes don't 729 * allow setting mipmap level offsets as the base. 730 */ 731 width = res->b.b.width0; 732 height = res->b.b.height0; 733 depth = res->b.b.depth0; 734 hw_level = level; 735 } else { 736 /* Always force the base level to the selected level. 737 * 738 * This is required for 3D textures, where otherwise 739 * selecting a single slice for non-layered bindings 740 * fails. It doesn't hurt the other targets. 741 */ 742 width = u_minify(res->b.b.width0, level); 743 height = u_minify(res->b.b.height0, level); 744 depth = u_minify(res->b.b.depth0, level); 745 hw_level = 0; 746 } 747 748 si_make_texture_descriptor(screen, tex, 749 false, res->b.b.target, 750 view->format, swizzle, 751 hw_level, hw_level, 752 view->u.tex.first_layer, 753 view->u.tex.last_layer, 754 width, height, depth, 755 desc, NULL); 756 si_set_mutable_tex_desc_fields(screen, tex, 757 &tex->surface.u.legacy.level[level], 758 level, level, 759 util_format_get_blockwidth(view->format), 760 false, desc); 761 } 762 } 763 764 static void si_set_shader_image(struct si_context *ctx, 765 unsigned shader, 766 unsigned slot, const struct pipe_image_view *view, 767 bool skip_decompress) 768 { 769 struct si_images *images = &ctx->images[shader]; 770 struct si_descriptors *descs = si_sampler_and_image_descriptors(ctx, shader); 771 struct r600_resource *res; 772 unsigned desc_slot = si_get_image_slot(slot); 773 uint32_t *desc = descs->list + desc_slot * 8; 774 775 if (!view || !view->resource) { 776 si_disable_shader_image(ctx, shader, slot); 777 return; 778 } 779 780 res = (struct r600_resource *)view->resource; 781 782 if (&images->views[slot] != view) 783 util_copy_image_view(&images->views[slot], view); 784 785 si_set_shader_image_desc(ctx, view, skip_decompress, desc); 786 787 if (res->b.b.target == PIPE_BUFFER) { 788 images->needs_color_decompress_mask &= ~(1 << slot); 789 res->bind_history |= PIPE_BIND_SHADER_IMAGE; 790 } else { 791 struct r600_texture *tex = (struct r600_texture *)res; 792 unsigned level = view->u.tex.level; 793 794 if (color_needs_decompression(tex)) { 795 images->needs_color_decompress_mask |= 1 << slot; 796 } else { 797 images->needs_color_decompress_mask &= ~(1 << slot); 798 } 799 800 if (vi_dcc_enabled(tex, level) && 801 p_atomic_read(&tex->framebuffers_bound)) 802 ctx->need_check_render_feedback = true; 803 } 804 805 images->enabled_mask |= 1u << slot; 806 ctx->descriptors_dirty |= 1u << si_sampler_and_image_descriptors_idx(shader); 807 808 /* Since this can flush, it must be done after enabled_mask is updated. */ 809 si_sampler_view_add_buffer(ctx, &res->b.b, 810 (view->access & PIPE_IMAGE_ACCESS_WRITE) ? 811 RADEON_USAGE_READWRITE : RADEON_USAGE_READ, 812 false, true); 813 } 814 815 static void 816 si_set_shader_images(struct pipe_context *pipe, 817 enum pipe_shader_type shader, 818 unsigned start_slot, unsigned count, 819 const struct pipe_image_view *views) 820 { 821 struct si_context *ctx = (struct si_context *)pipe; 822 unsigned i, slot; 823 824 assert(shader < SI_NUM_SHADERS); 825 826 if (!count) 827 return; 828 829 assert(start_slot + count <= SI_NUM_IMAGES); 830 831 if (views) { 832 for (i = 0, slot = start_slot; i < count; ++i, ++slot) 833 si_set_shader_image(ctx, shader, slot, &views[i], false); 834 } else { 835 for (i = 0, slot = start_slot; i < count; ++i, ++slot) 836 si_set_shader_image(ctx, shader, slot, NULL, false); 837 } 838 839 si_update_shader_needs_decompress_mask(ctx, shader); 840 } 841 842 static void 843 si_images_update_needs_color_decompress_mask(struct si_images *images) 844 { 845 unsigned mask = images->enabled_mask; 846 847 while (mask) { 848 int i = u_bit_scan(&mask); 849 struct pipe_resource *res = images->views[i].resource; 850 851 if (res && res->target != PIPE_BUFFER) { 852 struct r600_texture *rtex = (struct r600_texture *)res; 853 854 if (color_needs_decompression(rtex)) { 855 images->needs_color_decompress_mask |= 1 << i; 856 } else { 857 images->needs_color_decompress_mask &= ~(1 << i); 858 } 859 } 860 } 861 } 862 863 /* SAMPLER STATES */ 864 865 static void si_bind_sampler_states(struct pipe_context *ctx, 866 enum pipe_shader_type shader, 867 unsigned start, unsigned count, void **states) 868 { 869 struct si_context *sctx = (struct si_context *)ctx; 870 struct si_samplers *samplers = &sctx->samplers[shader]; 871 struct si_descriptors *desc = si_sampler_and_image_descriptors(sctx, shader); 872 struct si_sampler_state **sstates = (struct si_sampler_state**)states; 873 int i; 874 875 if (!count || shader >= SI_NUM_SHADERS) 876 return; 877 878 for (i = 0; i < count; i++) { 879 unsigned slot = start + i; 880 unsigned desc_slot = si_get_sampler_slot(slot); 881 882 if (!sstates[i] || 883 sstates[i] == samplers->sampler_states[slot]) 884 continue; 885 886 #ifdef DEBUG 887 assert(sstates[i]->magic == SI_SAMPLER_STATE_MAGIC); 888 #endif 889 samplers->sampler_states[slot] = sstates[i]; 890 891 /* If FMASK is bound, don't overwrite it. 892 * The sampler state will be set after FMASK is unbound. 893 */ 894 struct si_sampler_view *sview = 895 (struct si_sampler_view *)samplers->views[slot]; 896 897 struct r600_texture *tex = NULL; 898 899 if (sview && sview->base.texture && 900 sview->base.texture->target != PIPE_BUFFER) 901 tex = (struct r600_texture *)sview->base.texture; 902 903 if (tex && tex->fmask.size) 904 continue; 905 906 si_set_sampler_state_desc(sstates[i], sview, tex, 907 desc->list + desc_slot * 16 + 12); 908 909 sctx->descriptors_dirty |= 1u << si_sampler_and_image_descriptors_idx(shader); 910 } 911 } 912 913 /* BUFFER RESOURCES */ 914 915 static void si_init_buffer_resources(struct si_buffer_resources *buffers, 916 struct si_descriptors *descs, 917 unsigned num_buffers, 918 unsigned shader_userdata_index, 919 enum radeon_bo_usage shader_usage, 920 enum radeon_bo_usage shader_usage_constbuf, 921 enum radeon_bo_priority priority, 922 enum radeon_bo_priority priority_constbuf) 923 { 924 buffers->shader_usage = shader_usage; 925 buffers->shader_usage_constbuf = shader_usage_constbuf; 926 buffers->priority = priority; 927 buffers->priority_constbuf = priority_constbuf; 928 buffers->buffers = CALLOC(num_buffers, sizeof(struct pipe_resource*)); 929 930 si_init_descriptors(descs, shader_userdata_index, 4, num_buffers); 931 } 932 933 static void si_release_buffer_resources(struct si_buffer_resources *buffers, 934 struct si_descriptors *descs) 935 { 936 int i; 937 938 for (i = 0; i < descs->num_elements; i++) { 939 pipe_resource_reference(&buffers->buffers[i], NULL); 940 } 941 942 FREE(buffers->buffers); 943 } 944 945 static void si_buffer_resources_begin_new_cs(struct si_context *sctx, 946 struct si_buffer_resources *buffers) 947 { 948 unsigned mask = buffers->enabled_mask; 949 950 /* Add buffers to the CS. */ 951 while (mask) { 952 int i = u_bit_scan(&mask); 953 954 radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, 955 r600_resource(buffers->buffers[i]), 956 i < SI_NUM_SHADER_BUFFERS ? buffers->shader_usage : 957 buffers->shader_usage_constbuf, 958 i < SI_NUM_SHADER_BUFFERS ? buffers->priority : 959 buffers->priority_constbuf); 960 } 961 } 962 963 static void si_get_buffer_from_descriptors(struct si_buffer_resources *buffers, 964 struct si_descriptors *descs, 965 unsigned idx, struct pipe_resource **buf, 966 unsigned *offset, unsigned *size) 967 { 968 pipe_resource_reference(buf, buffers->buffers[idx]); 969 if (*buf) { 970 struct r600_resource *res = r600_resource(*buf); 971 const uint32_t *desc = descs->list + idx * 4; 972 uint64_t va; 973 974 *size = desc[2]; 975 976 assert(G_008F04_STRIDE(desc[1]) == 0); 977 va = ((uint64_t)desc[1] << 32) | desc[0]; 978 979 assert(va >= res->gpu_address && va + *size <= res->gpu_address + res->bo_size); 980 *offset = va - res->gpu_address; 981 } 982 } 983 984 /* VERTEX BUFFERS */ 985 986 static void si_vertex_buffers_begin_new_cs(struct si_context *sctx) 987 { 988 struct si_descriptors *desc = &sctx->vertex_buffers; 989 int count = sctx->vertex_elements ? sctx->vertex_elements->count : 0; 990 int i; 991 992 for (i = 0; i < count; i++) { 993 int vb = sctx->vertex_elements->vertex_buffer_index[i]; 994 995 if (vb >= ARRAY_SIZE(sctx->vertex_buffer)) 996 continue; 997 if (!sctx->vertex_buffer[vb].buffer.resource) 998 continue; 999 1000 radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, 1001 (struct r600_resource*)sctx->vertex_buffer[vb].buffer.resource, 1002 RADEON_USAGE_READ, RADEON_PRIO_VERTEX_BUFFER); 1003 } 1004 1005 if (!desc->buffer) 1006 return; 1007 radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, 1008 desc->buffer, RADEON_USAGE_READ, 1009 RADEON_PRIO_DESCRIPTORS); 1010 } 1011 1012 bool si_upload_vertex_buffer_descriptors(struct si_context *sctx) 1013 { 1014 struct si_vertex_elements *velems = sctx->vertex_elements; 1015 struct si_descriptors *desc = &sctx->vertex_buffers; 1016 unsigned i, count; 1017 unsigned desc_list_byte_size; 1018 unsigned first_vb_use_mask; 1019 uint32_t *ptr; 1020 1021 if (!sctx->vertex_buffers_dirty || !velems) 1022 return true; 1023 1024 count = velems->count; 1025 1026 if (!count) 1027 return true; 1028 1029 desc_list_byte_size = velems->desc_list_byte_size; 1030 first_vb_use_mask = velems->first_vb_use_mask; 1031 1032 /* Vertex buffer descriptors are the only ones which are uploaded 1033 * directly through a staging buffer and don't go through 1034 * the fine-grained upload path. 1035 */ 1036 unsigned buffer_offset = 0; 1037 u_upload_alloc(sctx->b.b.const_uploader, 0, 1038 desc_list_byte_size, 1039 si_optimal_tcc_alignment(sctx, desc_list_byte_size), 1040 &buffer_offset, 1041 (struct pipe_resource**)&desc->buffer, (void**)&ptr); 1042 if (!desc->buffer) { 1043 desc->gpu_address = 0; 1044 return false; 1045 } 1046 1047 desc->gpu_address = desc->buffer->gpu_address + buffer_offset; 1048 desc->list = ptr; 1049 radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, 1050 desc->buffer, RADEON_USAGE_READ, 1051 RADEON_PRIO_DESCRIPTORS); 1052 1053 assert(count <= SI_MAX_ATTRIBS); 1054 1055 for (i = 0; i < count; i++) { 1056 struct pipe_vertex_buffer *vb; 1057 struct r600_resource *rbuffer; 1058 unsigned vbo_index = velems->vertex_buffer_index[i]; 1059 uint32_t *desc = &ptr[i*4]; 1060 1061 vb = &sctx->vertex_buffer[vbo_index]; 1062 rbuffer = (struct r600_resource*)vb->buffer.resource; 1063 if (!rbuffer) { 1064 memset(desc, 0, 16); 1065 continue; 1066 } 1067 1068 int64_t offset = (int64_t)((int)vb->buffer_offset) + 1069 velems->src_offset[i]; 1070 uint64_t va = rbuffer->gpu_address + offset; 1071 1072 int64_t num_records = (int64_t)rbuffer->b.b.width0 - offset; 1073 if (sctx->b.chip_class != VI && vb->stride) { 1074 /* Round up by rounding down and adding 1 */ 1075 num_records = (num_records - velems->format_size[i]) / 1076 vb->stride + 1; 1077 } 1078 assert(num_records >= 0 && num_records <= UINT_MAX); 1079 1080 desc[0] = va; 1081 desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) | 1082 S_008F04_STRIDE(vb->stride); 1083 desc[2] = num_records; 1084 desc[3] = velems->rsrc_word3[i]; 1085 1086 if (first_vb_use_mask & (1 << i)) { 1087 radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, 1088 (struct r600_resource*)vb->buffer.resource, 1089 RADEON_USAGE_READ, RADEON_PRIO_VERTEX_BUFFER); 1090 } 1091 } 1092 1093 /* Don't flush the const cache. It would have a very negative effect 1094 * on performance (confirmed by testing). New descriptors are always 1095 * uploaded to a fresh new buffer, so I don't think flushing the const 1096 * cache is needed. */ 1097 si_mark_atom_dirty(sctx, &sctx->shader_pointers.atom); 1098 sctx->vertex_buffers_dirty = false; 1099 sctx->vertex_buffer_pointer_dirty = true; 1100 sctx->prefetch_L2_mask |= SI_PREFETCH_VBO_DESCRIPTORS; 1101 return true; 1102 } 1103 1104 1105 /* CONSTANT BUFFERS */ 1106 1107 static unsigned 1108 si_const_and_shader_buffer_descriptors_idx(unsigned shader) 1109 { 1110 return SI_DESCS_FIRST_SHADER + shader * SI_NUM_SHADER_DESCS + 1111 SI_SHADER_DESCS_CONST_AND_SHADER_BUFFERS; 1112 } 1113 1114 static struct si_descriptors * 1115 si_const_and_shader_buffer_descriptors(struct si_context *sctx, unsigned shader) 1116 { 1117 return &sctx->descriptors[si_const_and_shader_buffer_descriptors_idx(shader)]; 1118 } 1119 1120 void si_upload_const_buffer(struct si_context *sctx, struct r600_resource **rbuffer, 1121 const uint8_t *ptr, unsigned size, uint32_t *const_offset) 1122 { 1123 void *tmp; 1124 1125 u_upload_alloc(sctx->b.b.const_uploader, 0, size, 1126 si_optimal_tcc_alignment(sctx, size), 1127 const_offset, 1128 (struct pipe_resource**)rbuffer, &tmp); 1129 if (*rbuffer) 1130 util_memcpy_cpu_to_le32(tmp, ptr, size); 1131 } 1132 1133 static void si_set_constant_buffer(struct si_context *sctx, 1134 struct si_buffer_resources *buffers, 1135 unsigned descriptors_idx, 1136 uint slot, const struct pipe_constant_buffer *input) 1137 { 1138 struct si_descriptors *descs = &sctx->descriptors[descriptors_idx]; 1139 assert(slot < descs->num_elements); 1140 pipe_resource_reference(&buffers->buffers[slot], NULL); 1141 1142 /* CIK cannot unbind a constant buffer (S_BUFFER_LOAD is buggy 1143 * with a NULL buffer). We need to use a dummy buffer instead. */ 1144 if (sctx->b.chip_class == CIK && 1145 (!input || (!input->buffer && !input->user_buffer))) 1146 input = &sctx->null_const_buf; 1147 1148 if (input && (input->buffer || input->user_buffer)) { 1149 struct pipe_resource *buffer = NULL; 1150 uint64_t va; 1151 1152 /* Upload the user buffer if needed. */ 1153 if (input->user_buffer) { 1154 unsigned buffer_offset; 1155 1156 si_upload_const_buffer(sctx, 1157 (struct r600_resource**)&buffer, input->user_buffer, 1158 input->buffer_size, &buffer_offset); 1159 if (!buffer) { 1160 /* Just unbind on failure. */ 1161 si_set_constant_buffer(sctx, buffers, descriptors_idx, slot, NULL); 1162 return; 1163 } 1164 va = r600_resource(buffer)->gpu_address + buffer_offset; 1165 } else { 1166 pipe_resource_reference(&buffer, input->buffer); 1167 va = r600_resource(buffer)->gpu_address + input->buffer_offset; 1168 /* Only track usage for non-user buffers. */ 1169 r600_resource(buffer)->bind_history |= PIPE_BIND_CONSTANT_BUFFER; 1170 } 1171 1172 /* Set the descriptor. */ 1173 uint32_t *desc = descs->list + slot*4; 1174 desc[0] = va; 1175 desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) | 1176 S_008F04_STRIDE(0); 1177 desc[2] = input->buffer_size; 1178 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | 1179 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | 1180 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | 1181 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) | 1182 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | 1183 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32); 1184 1185 buffers->buffers[slot] = buffer; 1186 radeon_add_to_buffer_list_check_mem(&sctx->b, &sctx->b.gfx, 1187 (struct r600_resource*)buffer, 1188 buffers->shader_usage_constbuf, 1189 buffers->priority_constbuf, true); 1190 buffers->enabled_mask |= 1u << slot; 1191 } else { 1192 /* Clear the descriptor. */ 1193 memset(descs->list + slot*4, 0, sizeof(uint32_t) * 4); 1194 buffers->enabled_mask &= ~(1u << slot); 1195 } 1196 1197 sctx->descriptors_dirty |= 1u << descriptors_idx; 1198 } 1199 1200 void si_set_rw_buffer(struct si_context *sctx, 1201 uint slot, const struct pipe_constant_buffer *input) 1202 { 1203 si_set_constant_buffer(sctx, &sctx->rw_buffers, 1204 SI_DESCS_RW_BUFFERS, slot, input); 1205 } 1206 1207 static void si_pipe_set_constant_buffer(struct pipe_context *ctx, 1208 enum pipe_shader_type shader, uint slot, 1209 const struct pipe_constant_buffer *input) 1210 { 1211 struct si_context *sctx = (struct si_context *)ctx; 1212 1213 if (shader >= SI_NUM_SHADERS) 1214 return; 1215 1216 slot = si_get_constbuf_slot(slot); 1217 si_set_constant_buffer(sctx, &sctx->const_and_shader_buffers[shader], 1218 si_const_and_shader_buffer_descriptors_idx(shader), 1219 slot, input); 1220 } 1221 1222 void si_get_pipe_constant_buffer(struct si_context *sctx, uint shader, 1223 uint slot, struct pipe_constant_buffer *cbuf) 1224 { 1225 cbuf->user_buffer = NULL; 1226 si_get_buffer_from_descriptors( 1227 &sctx->const_and_shader_buffers[shader], 1228 si_const_and_shader_buffer_descriptors(sctx, shader), 1229 si_get_constbuf_slot(slot), 1230 &cbuf->buffer, &cbuf->buffer_offset, &cbuf->buffer_size); 1231 } 1232 1233 /* SHADER BUFFERS */ 1234 1235 static void si_set_shader_buffers(struct pipe_context *ctx, 1236 enum pipe_shader_type shader, 1237 unsigned start_slot, unsigned count, 1238 const struct pipe_shader_buffer *sbuffers) 1239 { 1240 struct si_context *sctx = (struct si_context *)ctx; 1241 struct si_buffer_resources *buffers = &sctx->const_and_shader_buffers[shader]; 1242 struct si_descriptors *descs = si_const_and_shader_buffer_descriptors(sctx, shader); 1243 unsigned i; 1244 1245 assert(start_slot + count <= SI_NUM_SHADER_BUFFERS); 1246 1247 for (i = 0; i < count; ++i) { 1248 const struct pipe_shader_buffer *sbuffer = sbuffers ? &sbuffers[i] : NULL; 1249 struct r600_resource *buf; 1250 unsigned slot = si_get_shaderbuf_slot(start_slot + i); 1251 uint32_t *desc = descs->list + slot * 4; 1252 uint64_t va; 1253 1254 if (!sbuffer || !sbuffer->buffer) { 1255 pipe_resource_reference(&buffers->buffers[slot], NULL); 1256 memset(desc, 0, sizeof(uint32_t) * 4); 1257 buffers->enabled_mask &= ~(1u << slot); 1258 sctx->descriptors_dirty |= 1259 1u << si_const_and_shader_buffer_descriptors_idx(shader); 1260 continue; 1261 } 1262 1263 buf = (struct r600_resource *)sbuffer->buffer; 1264 va = buf->gpu_address + sbuffer->buffer_offset; 1265 1266 desc[0] = va; 1267 desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) | 1268 S_008F04_STRIDE(0); 1269 desc[2] = sbuffer->buffer_size; 1270 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | 1271 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | 1272 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | 1273 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) | 1274 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | 1275 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32); 1276 1277 pipe_resource_reference(&buffers->buffers[slot], &buf->b.b); 1278 radeon_add_to_buffer_list_check_mem(&sctx->b, &sctx->b.gfx, buf, 1279 buffers->shader_usage, 1280 buffers->priority, true); 1281 buf->bind_history |= PIPE_BIND_SHADER_BUFFER; 1282 1283 buffers->enabled_mask |= 1u << slot; 1284 sctx->descriptors_dirty |= 1285 1u << si_const_and_shader_buffer_descriptors_idx(shader); 1286 1287 util_range_add(&buf->valid_buffer_range, sbuffer->buffer_offset, 1288 sbuffer->buffer_offset + sbuffer->buffer_size); 1289 } 1290 } 1291 1292 void si_get_shader_buffers(struct si_context *sctx, 1293 enum pipe_shader_type shader, 1294 uint start_slot, uint count, 1295 struct pipe_shader_buffer *sbuf) 1296 { 1297 struct si_buffer_resources *buffers = &sctx->const_and_shader_buffers[shader]; 1298 struct si_descriptors *descs = si_const_and_shader_buffer_descriptors(sctx, shader); 1299 1300 for (unsigned i = 0; i < count; ++i) { 1301 si_get_buffer_from_descriptors( 1302 buffers, descs, 1303 si_get_shaderbuf_slot(start_slot + i), 1304 &sbuf[i].buffer, &sbuf[i].buffer_offset, 1305 &sbuf[i].buffer_size); 1306 } 1307 } 1308 1309 /* RING BUFFERS */ 1310 1311 void si_set_ring_buffer(struct pipe_context *ctx, uint slot, 1312 struct pipe_resource *buffer, 1313 unsigned stride, unsigned num_records, 1314 bool add_tid, bool swizzle, 1315 unsigned element_size, unsigned index_stride, uint64_t offset) 1316 { 1317 struct si_context *sctx = (struct si_context *)ctx; 1318 struct si_buffer_resources *buffers = &sctx->rw_buffers; 1319 struct si_descriptors *descs = &sctx->descriptors[SI_DESCS_RW_BUFFERS]; 1320 1321 /* The stride field in the resource descriptor has 14 bits */ 1322 assert(stride < (1 << 14)); 1323 1324 assert(slot < descs->num_elements); 1325 pipe_resource_reference(&buffers->buffers[slot], NULL); 1326 1327 if (buffer) { 1328 uint64_t va; 1329 1330 va = r600_resource(buffer)->gpu_address + offset; 1331 1332 switch (element_size) { 1333 default: 1334 assert(!"Unsupported ring buffer element size"); 1335 case 0: 1336 case 2: 1337 element_size = 0; 1338 break; 1339 case 4: 1340 element_size = 1; 1341 break; 1342 case 8: 1343 element_size = 2; 1344 break; 1345 case 16: 1346 element_size = 3; 1347 break; 1348 } 1349 1350 switch (index_stride) { 1351 default: 1352 assert(!"Unsupported ring buffer index stride"); 1353 case 0: 1354 case 8: 1355 index_stride = 0; 1356 break; 1357 case 16: 1358 index_stride = 1; 1359 break; 1360 case 32: 1361 index_stride = 2; 1362 break; 1363 case 64: 1364 index_stride = 3; 1365 break; 1366 } 1367 1368 if (sctx->b.chip_class >= VI && stride) 1369 num_records *= stride; 1370 1371 /* Set the descriptor. */ 1372 uint32_t *desc = descs->list + slot*4; 1373 desc[0] = va; 1374 desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) | 1375 S_008F04_STRIDE(stride) | 1376 S_008F04_SWIZZLE_ENABLE(swizzle); 1377 desc[2] = num_records; 1378 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | 1379 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | 1380 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | 1381 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) | 1382 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | 1383 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) | 1384 S_008F0C_INDEX_STRIDE(index_stride) | 1385 S_008F0C_ADD_TID_ENABLE(add_tid); 1386 1387 if (sctx->b.chip_class >= GFX9) 1388 assert(!swizzle || element_size == 1); /* always 4 bytes on GFX9 */ 1389 else 1390 desc[3] |= S_008F0C_ELEMENT_SIZE(element_size); 1391 1392 pipe_resource_reference(&buffers->buffers[slot], buffer); 1393 radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, 1394 (struct r600_resource*)buffer, 1395 buffers->shader_usage, buffers->priority); 1396 buffers->enabled_mask |= 1u << slot; 1397 } else { 1398 /* Clear the descriptor. */ 1399 memset(descs->list + slot*4, 0, sizeof(uint32_t) * 4); 1400 buffers->enabled_mask &= ~(1u << slot); 1401 } 1402 1403 sctx->descriptors_dirty |= 1u << SI_DESCS_RW_BUFFERS; 1404 } 1405 1406 static void si_desc_reset_buffer_offset(struct pipe_context *ctx, 1407 uint32_t *desc, uint64_t old_buf_va, 1408 struct pipe_resource *new_buf) 1409 { 1410 /* Retrieve the buffer offset from the descriptor. */ 1411 uint64_t old_desc_va = si_desc_extract_buffer_address(desc); 1412 1413 assert(old_buf_va <= old_desc_va); 1414 uint64_t offset_within_buffer = old_desc_va - old_buf_va; 1415 1416 /* Update the descriptor. */ 1417 si_set_buf_desc_address(r600_resource(new_buf), offset_within_buffer, 1418 desc); 1419 } 1420 1421 /* INTERNAL CONST BUFFERS */ 1422 1423 static void si_set_polygon_stipple(struct pipe_context *ctx, 1424 const struct pipe_poly_stipple *state) 1425 { 1426 struct si_context *sctx = (struct si_context *)ctx; 1427 struct pipe_constant_buffer cb = {}; 1428 unsigned stipple[32]; 1429 int i; 1430 1431 for (i = 0; i < 32; i++) 1432 stipple[i] = util_bitreverse(state->stipple[i]); 1433 1434 cb.user_buffer = stipple; 1435 cb.buffer_size = sizeof(stipple); 1436 1437 si_set_rw_buffer(sctx, SI_PS_CONST_POLY_STIPPLE, &cb); 1438 } 1439 1440 /* TEXTURE METADATA ENABLE/DISABLE */ 1441 1442 static void 1443 si_resident_handles_update_needs_color_decompress(struct si_context *sctx) 1444 { 1445 util_dynarray_clear(&sctx->resident_tex_needs_color_decompress); 1446 util_dynarray_clear(&sctx->resident_img_needs_color_decompress); 1447 1448 util_dynarray_foreach(&sctx->resident_tex_handles, 1449 struct si_texture_handle *, tex_handle) { 1450 struct pipe_resource *res = (*tex_handle)->view->texture; 1451 struct r600_texture *rtex; 1452 1453 if (!res || res->target == PIPE_BUFFER) 1454 continue; 1455 1456 rtex = (struct r600_texture *)res; 1457 if (!color_needs_decompression(rtex)) 1458 continue; 1459 1460 util_dynarray_append(&sctx->resident_tex_needs_color_decompress, 1461 struct si_texture_handle *, *tex_handle); 1462 } 1463 1464 util_dynarray_foreach(&sctx->resident_img_handles, 1465 struct si_image_handle *, img_handle) { 1466 struct pipe_image_view *view = &(*img_handle)->view; 1467 struct pipe_resource *res = view->resource; 1468 struct r600_texture *rtex; 1469 1470 if (!res || res->target == PIPE_BUFFER) 1471 continue; 1472 1473 rtex = (struct r600_texture *)res; 1474 if (!color_needs_decompression(rtex)) 1475 continue; 1476 1477 util_dynarray_append(&sctx->resident_img_needs_color_decompress, 1478 struct si_image_handle *, *img_handle); 1479 } 1480 } 1481 1482 /* CMASK can be enabled (for fast clear) and disabled (for texture export) 1483 * while the texture is bound, possibly by a different context. In that case, 1484 * call this function to update needs_*_decompress_masks. 1485 */ 1486 void si_update_needs_color_decompress_masks(struct si_context *sctx) 1487 { 1488 for (int i = 0; i < SI_NUM_SHADERS; ++i) { 1489 si_samplers_update_needs_color_decompress_mask(&sctx->samplers[i]); 1490 si_images_update_needs_color_decompress_mask(&sctx->images[i]); 1491 si_update_shader_needs_decompress_mask(sctx, i); 1492 } 1493 1494 si_resident_handles_update_needs_color_decompress(sctx); 1495 } 1496 1497 /* BUFFER DISCARD/INVALIDATION */ 1498 1499 /** Reset descriptors of buffer resources after \p buf has been invalidated. */ 1500 static void si_reset_buffer_resources(struct si_context *sctx, 1501 struct si_buffer_resources *buffers, 1502 unsigned descriptors_idx, 1503 unsigned slot_mask, 1504 struct pipe_resource *buf, 1505 uint64_t old_va, 1506 enum radeon_bo_usage usage, 1507 enum radeon_bo_priority priority) 1508 { 1509 struct si_descriptors *descs = &sctx->descriptors[descriptors_idx]; 1510 unsigned mask = buffers->enabled_mask & slot_mask; 1511 1512 while (mask) { 1513 unsigned i = u_bit_scan(&mask); 1514 if (buffers->buffers[i] == buf) { 1515 si_desc_reset_buffer_offset(&sctx->b.b, 1516 descs->list + i*4, 1517 old_va, buf); 1518 sctx->descriptors_dirty |= 1u << descriptors_idx; 1519 1520 radeon_add_to_buffer_list_check_mem(&sctx->b, &sctx->b.gfx, 1521 (struct r600_resource *)buf, 1522 usage, priority, true); 1523 } 1524 } 1525 } 1526 1527 static void si_rebind_buffer(struct pipe_context *ctx, struct pipe_resource *buf, 1528 uint64_t old_va) 1529 { 1530 struct si_context *sctx = (struct si_context*)ctx; 1531 struct r600_resource *rbuffer = r600_resource(buf); 1532 unsigned i, shader; 1533 unsigned num_elems = sctx->vertex_elements ? 1534 sctx->vertex_elements->count : 0; 1535 1536 /* We changed the buffer, now we need to bind it where the old one 1537 * was bound. This consists of 2 things: 1538 * 1) Updating the resource descriptor and dirtying it. 1539 * 2) Adding a relocation to the CS, so that it's usable. 1540 */ 1541 1542 /* Vertex buffers. */ 1543 if (rbuffer->bind_history & PIPE_BIND_VERTEX_BUFFER) { 1544 for (i = 0; i < num_elems; i++) { 1545 int vb = sctx->vertex_elements->vertex_buffer_index[i]; 1546 1547 if (vb >= ARRAY_SIZE(sctx->vertex_buffer)) 1548 continue; 1549 if (!sctx->vertex_buffer[vb].buffer.resource) 1550 continue; 1551 1552 if (sctx->vertex_buffer[vb].buffer.resource == buf) { 1553 sctx->vertex_buffers_dirty = true; 1554 break; 1555 } 1556 } 1557 } 1558 1559 /* Streamout buffers. (other internal buffers can't be invalidated) */ 1560 if (rbuffer->bind_history & PIPE_BIND_STREAM_OUTPUT) { 1561 for (i = SI_VS_STREAMOUT_BUF0; i <= SI_VS_STREAMOUT_BUF3; i++) { 1562 struct si_buffer_resources *buffers = &sctx->rw_buffers; 1563 struct si_descriptors *descs = 1564 &sctx->descriptors[SI_DESCS_RW_BUFFERS]; 1565 1566 if (buffers->buffers[i] != buf) 1567 continue; 1568 1569 si_desc_reset_buffer_offset(ctx, descs->list + i*4, 1570 old_va, buf); 1571 sctx->descriptors_dirty |= 1u << SI_DESCS_RW_BUFFERS; 1572 1573 radeon_add_to_buffer_list_check_mem(&sctx->b, &sctx->b.gfx, 1574 rbuffer, buffers->shader_usage, 1575 RADEON_PRIO_SHADER_RW_BUFFER, 1576 true); 1577 1578 /* Update the streamout state. */ 1579 if (sctx->streamout.begin_emitted) 1580 si_emit_streamout_end(sctx); 1581 sctx->streamout.append_bitmask = 1582 sctx->streamout.enabled_mask; 1583 si_streamout_buffers_dirty(sctx); 1584 } 1585 } 1586 1587 /* Constant and shader buffers. */ 1588 if (rbuffer->bind_history & PIPE_BIND_CONSTANT_BUFFER) { 1589 for (shader = 0; shader < SI_NUM_SHADERS; shader++) 1590 si_reset_buffer_resources(sctx, &sctx->const_and_shader_buffers[shader], 1591 si_const_and_shader_buffer_descriptors_idx(shader), 1592 u_bit_consecutive(SI_NUM_SHADER_BUFFERS, SI_NUM_CONST_BUFFERS), 1593 buf, old_va, 1594 sctx->const_and_shader_buffers[shader].shader_usage_constbuf, 1595 sctx->const_and_shader_buffers[shader].priority_constbuf); 1596 } 1597 1598 if (rbuffer->bind_history & PIPE_BIND_SHADER_BUFFER) { 1599 for (shader = 0; shader < SI_NUM_SHADERS; shader++) 1600 si_reset_buffer_resources(sctx, &sctx->const_and_shader_buffers[shader], 1601 si_const_and_shader_buffer_descriptors_idx(shader), 1602 u_bit_consecutive(0, SI_NUM_SHADER_BUFFERS), 1603 buf, old_va, 1604 sctx->const_and_shader_buffers[shader].shader_usage, 1605 sctx->const_and_shader_buffers[shader].priority); 1606 } 1607 1608 if (rbuffer->bind_history & PIPE_BIND_SAMPLER_VIEW) { 1609 /* Texture buffers - update bindings. */ 1610 for (shader = 0; shader < SI_NUM_SHADERS; shader++) { 1611 struct si_samplers *samplers = &sctx->samplers[shader]; 1612 struct si_descriptors *descs = 1613 si_sampler_and_image_descriptors(sctx, shader); 1614 unsigned mask = samplers->enabled_mask; 1615 1616 while (mask) { 1617 unsigned i = u_bit_scan(&mask); 1618 if (samplers->views[i]->texture == buf) { 1619 unsigned desc_slot = si_get_sampler_slot(i); 1620 1621 si_desc_reset_buffer_offset(ctx, 1622 descs->list + 1623 desc_slot * 16 + 4, 1624 old_va, buf); 1625 sctx->descriptors_dirty |= 1626 1u << si_sampler_and_image_descriptors_idx(shader); 1627 1628 radeon_add_to_buffer_list_check_mem(&sctx->b, &sctx->b.gfx, 1629 rbuffer, RADEON_USAGE_READ, 1630 RADEON_PRIO_SAMPLER_BUFFER, 1631 true); 1632 } 1633 } 1634 } 1635 } 1636 1637 /* Shader images */ 1638 if (rbuffer->bind_history & PIPE_BIND_SHADER_IMAGE) { 1639 for (shader = 0; shader < SI_NUM_SHADERS; ++shader) { 1640 struct si_images *images = &sctx->images[shader]; 1641 struct si_descriptors *descs = 1642 si_sampler_and_image_descriptors(sctx, shader); 1643 unsigned mask = images->enabled_mask; 1644 1645 while (mask) { 1646 unsigned i = u_bit_scan(&mask); 1647 1648 if (images->views[i].resource == buf) { 1649 unsigned desc_slot = si_get_image_slot(i); 1650 1651 if (images->views[i].access & PIPE_IMAGE_ACCESS_WRITE) 1652 si_mark_image_range_valid(&images->views[i]); 1653 1654 si_desc_reset_buffer_offset( 1655 ctx, descs->list + desc_slot * 8 + 4, 1656 old_va, buf); 1657 sctx->descriptors_dirty |= 1658 1u << si_sampler_and_image_descriptors_idx(shader); 1659 1660 radeon_add_to_buffer_list_check_mem( 1661 &sctx->b, &sctx->b.gfx, rbuffer, 1662 RADEON_USAGE_READWRITE, 1663 RADEON_PRIO_SAMPLER_BUFFER, true); 1664 } 1665 } 1666 } 1667 } 1668 1669 /* Bindless texture handles */ 1670 if (rbuffer->texture_handle_allocated) { 1671 struct si_descriptors *descs = &sctx->bindless_descriptors; 1672 1673 util_dynarray_foreach(&sctx->resident_tex_handles, 1674 struct si_texture_handle *, tex_handle) { 1675 struct pipe_sampler_view *view = (*tex_handle)->view; 1676 unsigned desc_slot = (*tex_handle)->desc_slot; 1677 1678 if (view->texture == buf) { 1679 si_set_buf_desc_address(rbuffer, 1680 view->u.buf.offset, 1681 descs->list + 1682 desc_slot * 16 + 4); 1683 1684 (*tex_handle)->desc_dirty = true; 1685 sctx->bindless_descriptors_dirty = true; 1686 1687 radeon_add_to_buffer_list_check_mem( 1688 &sctx->b, &sctx->b.gfx, rbuffer, 1689 RADEON_USAGE_READ, 1690 RADEON_PRIO_SAMPLER_BUFFER, true); 1691 } 1692 } 1693 } 1694 1695 /* Bindless image handles */ 1696 if (rbuffer->image_handle_allocated) { 1697 struct si_descriptors *descs = &sctx->bindless_descriptors; 1698 1699 util_dynarray_foreach(&sctx->resident_img_handles, 1700 struct si_image_handle *, img_handle) { 1701 struct pipe_image_view *view = &(*img_handle)->view; 1702 unsigned desc_slot = (*img_handle)->desc_slot; 1703 1704 if (view->resource == buf) { 1705 if (view->access & PIPE_IMAGE_ACCESS_WRITE) 1706 si_mark_image_range_valid(view); 1707 1708 si_set_buf_desc_address(rbuffer, 1709 view->u.buf.offset, 1710 descs->list + 1711 desc_slot * 16 + 4); 1712 1713 (*img_handle)->desc_dirty = true; 1714 sctx->bindless_descriptors_dirty = true; 1715 1716 radeon_add_to_buffer_list_check_mem( 1717 &sctx->b, &sctx->b.gfx, rbuffer, 1718 RADEON_USAGE_READWRITE, 1719 RADEON_PRIO_SAMPLER_BUFFER, true); 1720 } 1721 } 1722 } 1723 } 1724 1725 /* Reallocate a buffer a update all resource bindings where the buffer is 1726 * bound. 1727 * 1728 * This is used to avoid CPU-GPU synchronizations, because it makes the buffer 1729 * idle by discarding its contents. Apps usually tell us when to do this using 1730 * map_buffer flags, for example. 1731 */ 1732 static void si_invalidate_buffer(struct pipe_context *ctx, struct pipe_resource *buf) 1733 { 1734 struct si_context *sctx = (struct si_context*)ctx; 1735 struct r600_resource *rbuffer = r600_resource(buf); 1736 uint64_t old_va = rbuffer->gpu_address; 1737 1738 /* Reallocate the buffer in the same pipe_resource. */ 1739 si_alloc_resource(sctx->screen, rbuffer); 1740 1741 si_rebind_buffer(ctx, buf, old_va); 1742 } 1743 1744 static void si_upload_bindless_descriptor(struct si_context *sctx, 1745 unsigned desc_slot, 1746 unsigned num_dwords) 1747 { 1748 struct si_descriptors *desc = &sctx->bindless_descriptors; 1749 struct radeon_winsys_cs *cs = sctx->b.gfx.cs; 1750 unsigned desc_slot_offset = desc_slot * 16; 1751 uint32_t *data; 1752 uint64_t va; 1753 1754 data = desc->list + desc_slot_offset; 1755 va = desc->gpu_address + desc_slot_offset * 4; 1756 1757 radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 2 + num_dwords, 0)); 1758 radeon_emit(cs, S_370_DST_SEL(V_370_TC_L2) | 1759 S_370_WR_CONFIRM(1) | 1760 S_370_ENGINE_SEL(V_370_ME)); 1761 radeon_emit(cs, va); 1762 radeon_emit(cs, va >> 32); 1763 radeon_emit_array(cs, data, num_dwords); 1764 } 1765 1766 static void si_upload_bindless_descriptors(struct si_context *sctx) 1767 { 1768 if (!sctx->bindless_descriptors_dirty) 1769 return; 1770 1771 /* Wait for graphics/compute to be idle before updating the resident 1772 * descriptors directly in memory, in case the GPU is using them. 1773 */ 1774 sctx->b.flags |= SI_CONTEXT_PS_PARTIAL_FLUSH | 1775 SI_CONTEXT_CS_PARTIAL_FLUSH; 1776 si_emit_cache_flush(sctx); 1777 1778 util_dynarray_foreach(&sctx->resident_tex_handles, 1779 struct si_texture_handle *, tex_handle) { 1780 unsigned desc_slot = (*tex_handle)->desc_slot; 1781 1782 if (!(*tex_handle)->desc_dirty) 1783 continue; 1784 1785 si_upload_bindless_descriptor(sctx, desc_slot, 16); 1786 (*tex_handle)->desc_dirty = false; 1787 } 1788 1789 util_dynarray_foreach(&sctx->resident_img_handles, 1790 struct si_image_handle *, img_handle) { 1791 unsigned desc_slot = (*img_handle)->desc_slot; 1792 1793 if (!(*img_handle)->desc_dirty) 1794 continue; 1795 1796 si_upload_bindless_descriptor(sctx, desc_slot, 8); 1797 (*img_handle)->desc_dirty = false; 1798 } 1799 1800 /* Invalidate L1 because it doesn't know that L2 changed. */ 1801 sctx->b.flags |= SI_CONTEXT_INV_SMEM_L1; 1802 si_emit_cache_flush(sctx); 1803 1804 sctx->bindless_descriptors_dirty = false; 1805 } 1806 1807 /* Update mutable image descriptor fields of all resident textures. */ 1808 static void si_update_bindless_texture_descriptor(struct si_context *sctx, 1809 struct si_texture_handle *tex_handle) 1810 { 1811 struct si_sampler_view *sview = (struct si_sampler_view *)tex_handle->view; 1812 struct si_descriptors *desc = &sctx->bindless_descriptors; 1813 unsigned desc_slot_offset = tex_handle->desc_slot * 16; 1814 uint32_t desc_list[16]; 1815 1816 if (sview->base.texture->target == PIPE_BUFFER) 1817 return; 1818 1819 memcpy(desc_list, desc->list + desc_slot_offset, sizeof(desc_list)); 1820 si_set_sampler_view_desc(sctx, sview, &tex_handle->sstate, 1821 desc->list + desc_slot_offset); 1822 1823 if (memcmp(desc_list, desc->list + desc_slot_offset, 1824 sizeof(desc_list))) { 1825 tex_handle->desc_dirty = true; 1826 sctx->bindless_descriptors_dirty = true; 1827 } 1828 } 1829 1830 static void si_update_bindless_image_descriptor(struct si_context *sctx, 1831 struct si_image_handle *img_handle) 1832 { 1833 struct si_descriptors *desc = &sctx->bindless_descriptors; 1834 unsigned desc_slot_offset = img_handle->desc_slot * 16; 1835 struct pipe_image_view *view = &img_handle->view; 1836 uint32_t desc_list[8]; 1837 1838 if (view->resource->target == PIPE_BUFFER) 1839 return; 1840 1841 memcpy(desc_list, desc->list + desc_slot_offset, 1842 sizeof(desc_list)); 1843 si_set_shader_image_desc(sctx, view, true, 1844 desc->list + desc_slot_offset); 1845 1846 if (memcmp(desc_list, desc->list + desc_slot_offset, 1847 sizeof(desc_list))) { 1848 img_handle->desc_dirty = true; 1849 sctx->bindless_descriptors_dirty = true; 1850 } 1851 } 1852 1853 static void si_update_all_resident_texture_descriptors(struct si_context *sctx) 1854 { 1855 util_dynarray_foreach(&sctx->resident_tex_handles, 1856 struct si_texture_handle *, tex_handle) { 1857 si_update_bindless_texture_descriptor(sctx, *tex_handle); 1858 } 1859 1860 util_dynarray_foreach(&sctx->resident_img_handles, 1861 struct si_image_handle *, img_handle) { 1862 si_update_bindless_image_descriptor(sctx, *img_handle); 1863 } 1864 1865 si_upload_bindless_descriptors(sctx); 1866 } 1867 1868 /* Update mutable image descriptor fields of all bound textures. */ 1869 void si_update_all_texture_descriptors(struct si_context *sctx) 1870 { 1871 unsigned shader; 1872 1873 for (shader = 0; shader < SI_NUM_SHADERS; shader++) { 1874 struct si_samplers *samplers = &sctx->samplers[shader]; 1875 struct si_images *images = &sctx->images[shader]; 1876 unsigned mask; 1877 1878 /* Images. */ 1879 mask = images->enabled_mask; 1880 while (mask) { 1881 unsigned i = u_bit_scan(&mask); 1882 struct pipe_image_view *view = &images->views[i]; 1883 1884 if (!view->resource || 1885 view->resource->target == PIPE_BUFFER) 1886 continue; 1887 1888 si_set_shader_image(sctx, shader, i, view, true); 1889 } 1890 1891 /* Sampler views. */ 1892 mask = samplers->enabled_mask; 1893 while (mask) { 1894 unsigned i = u_bit_scan(&mask); 1895 struct pipe_sampler_view *view = samplers->views[i]; 1896 1897 if (!view || 1898 !view->texture || 1899 view->texture->target == PIPE_BUFFER) 1900 continue; 1901 1902 si_set_sampler_view(sctx, shader, i, 1903 samplers->views[i], true); 1904 } 1905 1906 si_update_shader_needs_decompress_mask(sctx, shader); 1907 } 1908 1909 si_update_all_resident_texture_descriptors(sctx); 1910 } 1911 1912 /* SHADER USER DATA */ 1913 1914 static void si_mark_shader_pointers_dirty(struct si_context *sctx, 1915 unsigned shader) 1916 { 1917 sctx->shader_pointers_dirty |= 1918 u_bit_consecutive(SI_DESCS_FIRST_SHADER + shader * SI_NUM_SHADER_DESCS, 1919 SI_NUM_SHADER_DESCS); 1920 1921 if (shader == PIPE_SHADER_VERTEX) 1922 sctx->vertex_buffer_pointer_dirty = sctx->vertex_buffers.buffer != NULL; 1923 1924 si_mark_atom_dirty(sctx, &sctx->shader_pointers.atom); 1925 } 1926 1927 static void si_shader_pointers_begin_new_cs(struct si_context *sctx) 1928 { 1929 sctx->shader_pointers_dirty = u_bit_consecutive(0, SI_NUM_DESCS); 1930 sctx->vertex_buffer_pointer_dirty = sctx->vertex_buffers.buffer != NULL; 1931 si_mark_atom_dirty(sctx, &sctx->shader_pointers.atom); 1932 sctx->graphics_bindless_pointer_dirty = sctx->bindless_descriptors.buffer != NULL; 1933 sctx->compute_bindless_pointer_dirty = sctx->bindless_descriptors.buffer != NULL; 1934 } 1935 1936 /* Set a base register address for user data constants in the given shader. 1937 * This assigns a mapping from PIPE_SHADER_* to SPI_SHADER_USER_DATA_*. 1938 */ 1939 static void si_set_user_data_base(struct si_context *sctx, 1940 unsigned shader, uint32_t new_base) 1941 { 1942 uint32_t *base = &sctx->shader_pointers.sh_base[shader]; 1943 1944 if (*base != new_base) { 1945 *base = new_base; 1946 1947 if (new_base) { 1948 si_mark_shader_pointers_dirty(sctx, shader); 1949 1950 if (shader == PIPE_SHADER_VERTEX) 1951 sctx->last_vs_state = ~0; 1952 } 1953 } 1954 } 1955 1956 /* This must be called when these shaders are changed from non-NULL to NULL 1957 * and vice versa: 1958 * - geometry shader 1959 * - tessellation control shader 1960 * - tessellation evaluation shader 1961 */ 1962 void si_shader_change_notify(struct si_context *sctx) 1963 { 1964 /* VS can be bound as VS, ES, or LS. */ 1965 if (sctx->tes_shader.cso) { 1966 if (sctx->b.chip_class >= GFX9) { 1967 si_set_user_data_base(sctx, PIPE_SHADER_VERTEX, 1968 R_00B430_SPI_SHADER_USER_DATA_LS_0); 1969 } else { 1970 si_set_user_data_base(sctx, PIPE_SHADER_VERTEX, 1971 R_00B530_SPI_SHADER_USER_DATA_LS_0); 1972 } 1973 } else if (sctx->gs_shader.cso) { 1974 si_set_user_data_base(sctx, PIPE_SHADER_VERTEX, 1975 R_00B330_SPI_SHADER_USER_DATA_ES_0); 1976 } else { 1977 si_set_user_data_base(sctx, PIPE_SHADER_VERTEX, 1978 R_00B130_SPI_SHADER_USER_DATA_VS_0); 1979 } 1980 1981 /* TES can be bound as ES, VS, or not bound. */ 1982 if (sctx->tes_shader.cso) { 1983 if (sctx->gs_shader.cso) 1984 si_set_user_data_base(sctx, PIPE_SHADER_TESS_EVAL, 1985 R_00B330_SPI_SHADER_USER_DATA_ES_0); 1986 else 1987 si_set_user_data_base(sctx, PIPE_SHADER_TESS_EVAL, 1988 R_00B130_SPI_SHADER_USER_DATA_VS_0); 1989 } else { 1990 si_set_user_data_base(sctx, PIPE_SHADER_TESS_EVAL, 0); 1991 } 1992 } 1993 1994 static void si_emit_shader_pointer_head(struct radeon_winsys_cs *cs, 1995 struct si_descriptors *desc, 1996 unsigned sh_base, 1997 unsigned pointer_count) 1998 { 1999 radeon_emit(cs, PKT3(PKT3_SET_SH_REG, pointer_count * 2, 0)); 2000 radeon_emit(cs, (sh_base + desc->shader_userdata_offset - SI_SH_REG_OFFSET) >> 2); 2001 } 2002 2003 static void si_emit_shader_pointer_body(struct radeon_winsys_cs *cs, 2004 struct si_descriptors *desc) 2005 { 2006 uint64_t va = desc->gpu_address; 2007 2008 radeon_emit(cs, va); 2009 radeon_emit(cs, va >> 32); 2010 } 2011 2012 static void si_emit_shader_pointer(struct si_context *sctx, 2013 struct si_descriptors *desc, 2014 unsigned sh_base) 2015 { 2016 struct radeon_winsys_cs *cs = sctx->b.gfx.cs; 2017 2018 si_emit_shader_pointer_head(cs, desc, sh_base, 1); 2019 si_emit_shader_pointer_body(cs, desc); 2020 } 2021 2022 static void si_emit_consecutive_shader_pointers(struct si_context *sctx, 2023 unsigned pointer_mask, 2024 unsigned sh_base) 2025 { 2026 if (!sh_base) 2027 return; 2028 2029 struct radeon_winsys_cs *cs = sctx->b.gfx.cs; 2030 unsigned mask = sctx->shader_pointers_dirty & pointer_mask; 2031 2032 while (mask) { 2033 int start, count; 2034 u_bit_scan_consecutive_range(&mask, &start, &count); 2035 2036 struct si_descriptors *descs = &sctx->descriptors[start]; 2037 2038 si_emit_shader_pointer_head(cs, descs, sh_base, count); 2039 for (int i = 0; i < count; i++) 2040 si_emit_shader_pointer_body(cs, descs + i); 2041 } 2042 } 2043 2044 static void si_emit_global_shader_pointers(struct si_context *sctx, 2045 struct si_descriptors *descs) 2046 { 2047 if (sctx->b.chip_class == GFX9) { 2048 /* Broadcast it to all shader stages. */ 2049 si_emit_shader_pointer(sctx, descs, 2050 R_00B530_SPI_SHADER_USER_DATA_COMMON_0); 2051 return; 2052 } 2053 2054 si_emit_shader_pointer(sctx, descs, 2055 R_00B030_SPI_SHADER_USER_DATA_PS_0); 2056 si_emit_shader_pointer(sctx, descs, 2057 R_00B130_SPI_SHADER_USER_DATA_VS_0); 2058 si_emit_shader_pointer(sctx, descs, 2059 R_00B330_SPI_SHADER_USER_DATA_ES_0); 2060 si_emit_shader_pointer(sctx, descs, 2061 R_00B230_SPI_SHADER_USER_DATA_GS_0); 2062 si_emit_shader_pointer(sctx, descs, 2063 R_00B430_SPI_SHADER_USER_DATA_HS_0); 2064 si_emit_shader_pointer(sctx, descs, 2065 R_00B530_SPI_SHADER_USER_DATA_LS_0); 2066 } 2067 2068 void si_emit_graphics_shader_pointers(struct si_context *sctx, 2069 struct r600_atom *atom) 2070 { 2071 uint32_t *sh_base = sctx->shader_pointers.sh_base; 2072 2073 if (sctx->shader_pointers_dirty & (1 << SI_DESCS_RW_BUFFERS)) { 2074 si_emit_global_shader_pointers(sctx, 2075 &sctx->descriptors[SI_DESCS_RW_BUFFERS]); 2076 } 2077 2078 si_emit_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(VERTEX), 2079 sh_base[PIPE_SHADER_VERTEX]); 2080 si_emit_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(TESS_CTRL), 2081 sh_base[PIPE_SHADER_TESS_CTRL]); 2082 si_emit_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(TESS_EVAL), 2083 sh_base[PIPE_SHADER_TESS_EVAL]); 2084 si_emit_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(GEOMETRY), 2085 sh_base[PIPE_SHADER_GEOMETRY]); 2086 si_emit_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(FRAGMENT), 2087 sh_base[PIPE_SHADER_FRAGMENT]); 2088 2089 sctx->shader_pointers_dirty &= 2090 ~u_bit_consecutive(SI_DESCS_RW_BUFFERS, SI_DESCS_FIRST_COMPUTE); 2091 2092 if (sctx->vertex_buffer_pointer_dirty) { 2093 si_emit_shader_pointer(sctx, &sctx->vertex_buffers, 2094 sh_base[PIPE_SHADER_VERTEX]); 2095 sctx->vertex_buffer_pointer_dirty = false; 2096 } 2097 2098 if (sctx->graphics_bindless_pointer_dirty) { 2099 si_emit_global_shader_pointers(sctx, 2100 &sctx->bindless_descriptors); 2101 sctx->graphics_bindless_pointer_dirty = false; 2102 } 2103 } 2104 2105 void si_emit_compute_shader_pointers(struct si_context *sctx) 2106 { 2107 unsigned base = R_00B900_COMPUTE_USER_DATA_0; 2108 2109 si_emit_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(COMPUTE), 2110 R_00B900_COMPUTE_USER_DATA_0); 2111 sctx->shader_pointers_dirty &= ~SI_DESCS_SHADER_MASK(COMPUTE); 2112 2113 if (sctx->compute_bindless_pointer_dirty) { 2114 si_emit_shader_pointer(sctx, &sctx->bindless_descriptors, base); 2115 sctx->compute_bindless_pointer_dirty = false; 2116 } 2117 } 2118 2119 /* BINDLESS */ 2120 2121 static void si_init_bindless_descriptors(struct si_context *sctx, 2122 struct si_descriptors *desc, 2123 unsigned shader_userdata_index, 2124 unsigned num_elements) 2125 { 2126 MAYBE_UNUSED unsigned desc_slot; 2127 2128 si_init_descriptors(desc, shader_userdata_index, 16, num_elements); 2129 sctx->bindless_descriptors.num_active_slots = num_elements; 2130 2131 /* The first bindless descriptor is stored at slot 1, because 0 is not 2132 * considered to be a valid handle. 2133 */ 2134 sctx->num_bindless_descriptors = 1; 2135 2136 /* Track which bindless slots are used (or not). */ 2137 util_idalloc_init(&sctx->bindless_used_slots); 2138 util_idalloc_resize(&sctx->bindless_used_slots, num_elements); 2139 2140 /* Reserve slot 0 because it's an invalid handle for bindless. */ 2141 desc_slot = util_idalloc_alloc(&sctx->bindless_used_slots); 2142 assert(desc_slot == 0); 2143 } 2144 2145 static void si_release_bindless_descriptors(struct si_context *sctx) 2146 { 2147 si_release_descriptors(&sctx->bindless_descriptors); 2148 util_idalloc_fini(&sctx->bindless_used_slots); 2149 } 2150 2151 static unsigned si_get_first_free_bindless_slot(struct si_context *sctx) 2152 { 2153 struct si_descriptors *desc = &sctx->bindless_descriptors; 2154 unsigned desc_slot; 2155 2156 desc_slot = util_idalloc_alloc(&sctx->bindless_used_slots); 2157 if (desc_slot >= desc->num_elements) { 2158 /* The array of bindless descriptors is full, resize it. */ 2159 unsigned slot_size = desc->element_dw_size * 4; 2160 unsigned new_num_elements = desc->num_elements * 2; 2161 2162 desc->list = REALLOC(desc->list, desc->num_elements * slot_size, 2163 new_num_elements * slot_size); 2164 desc->num_elements = new_num_elements; 2165 desc->num_active_slots = new_num_elements; 2166 } 2167 2168 assert(desc_slot); 2169 return desc_slot; 2170 } 2171 2172 static unsigned 2173 si_create_bindless_descriptor(struct si_context *sctx, uint32_t *desc_list, 2174 unsigned size) 2175 { 2176 struct si_descriptors *desc = &sctx->bindless_descriptors; 2177 unsigned desc_slot, desc_slot_offset; 2178 2179 /* Find a free slot. */ 2180 desc_slot = si_get_first_free_bindless_slot(sctx); 2181 2182 /* For simplicity, sampler and image bindless descriptors use fixed 2183 * 16-dword slots for now. Image descriptors only need 8-dword but this 2184 * doesn't really matter because no real apps use image handles. 2185 */ 2186 desc_slot_offset = desc_slot * 16; 2187 2188 /* Copy the descriptor into the array. */ 2189 memcpy(desc->list + desc_slot_offset, desc_list, size); 2190 2191 /* Re-upload the whole array of bindless descriptors into a new buffer. 2192 */ 2193 if (!si_upload_descriptors(sctx, desc)) 2194 return 0; 2195 2196 /* Make sure to re-emit the shader pointers for all stages. */ 2197 sctx->graphics_bindless_pointer_dirty = true; 2198 sctx->compute_bindless_pointer_dirty = true; 2199 2200 return desc_slot; 2201 } 2202 2203 static void si_update_bindless_buffer_descriptor(struct si_context *sctx, 2204 unsigned desc_slot, 2205 struct pipe_resource *resource, 2206 uint64_t offset, 2207 bool *desc_dirty) 2208 { 2209 struct si_descriptors *desc = &sctx->bindless_descriptors; 2210 struct r600_resource *buf = r600_resource(resource); 2211 unsigned desc_slot_offset = desc_slot * 16; 2212 uint32_t *desc_list = desc->list + desc_slot_offset + 4; 2213 uint64_t old_desc_va; 2214 2215 assert(resource->target == PIPE_BUFFER); 2216 2217 /* Retrieve the old buffer addr from the descriptor. */ 2218 old_desc_va = si_desc_extract_buffer_address(desc_list); 2219 2220 if (old_desc_va != buf->gpu_address + offset) { 2221 /* The buffer has been invalidated when the handle wasn't 2222 * resident, update the descriptor and the dirty flag. 2223 */ 2224 si_set_buf_desc_address(buf, offset, &desc_list[0]); 2225 2226 *desc_dirty = true; 2227 } 2228 } 2229 2230 static uint64_t si_create_texture_handle(struct pipe_context *ctx, 2231 struct pipe_sampler_view *view, 2232 const struct pipe_sampler_state *state) 2233 { 2234 struct si_sampler_view *sview = (struct si_sampler_view *)view; 2235 struct si_context *sctx = (struct si_context *)ctx; 2236 struct si_texture_handle *tex_handle; 2237 struct si_sampler_state *sstate; 2238 uint32_t desc_list[16]; 2239 uint64_t handle; 2240 2241 tex_handle = CALLOC_STRUCT(si_texture_handle); 2242 if (!tex_handle) 2243 return 0; 2244 2245 memset(desc_list, 0, sizeof(desc_list)); 2246 si_init_descriptor_list(&desc_list[0], 16, 1, null_texture_descriptor); 2247 2248 sstate = ctx->create_sampler_state(ctx, state); 2249 if (!sstate) { 2250 FREE(tex_handle); 2251 return 0; 2252 } 2253 2254 si_set_sampler_view_desc(sctx, sview, sstate, &desc_list[0]); 2255 memcpy(&tex_handle->sstate, sstate, sizeof(*sstate)); 2256 ctx->delete_sampler_state(ctx, sstate); 2257 2258 tex_handle->desc_slot = si_create_bindless_descriptor(sctx, desc_list, 2259 sizeof(desc_list)); 2260 if (!tex_handle->desc_slot) { 2261 FREE(tex_handle); 2262 return 0; 2263 } 2264 2265 handle = tex_handle->desc_slot; 2266 2267 if (!_mesa_hash_table_insert(sctx->tex_handles, (void *)handle, 2268 tex_handle)) { 2269 FREE(tex_handle); 2270 return 0; 2271 } 2272 2273 pipe_sampler_view_reference(&tex_handle->view, view); 2274 2275 r600_resource(sview->base.texture)->texture_handle_allocated = true; 2276 2277 return handle; 2278 } 2279 2280 static void si_delete_texture_handle(struct pipe_context *ctx, uint64_t handle) 2281 { 2282 struct si_context *sctx = (struct si_context *)ctx; 2283 struct si_texture_handle *tex_handle; 2284 struct hash_entry *entry; 2285 2286 entry = _mesa_hash_table_search(sctx->tex_handles, (void *)handle); 2287 if (!entry) 2288 return; 2289 2290 tex_handle = (struct si_texture_handle *)entry->data; 2291 2292 /* Allow this descriptor slot to be re-used. */ 2293 util_idalloc_free(&sctx->bindless_used_slots, tex_handle->desc_slot); 2294 2295 pipe_sampler_view_reference(&tex_handle->view, NULL); 2296 _mesa_hash_table_remove(sctx->tex_handles, entry); 2297 FREE(tex_handle); 2298 } 2299 2300 static void si_make_texture_handle_resident(struct pipe_context *ctx, 2301 uint64_t handle, bool resident) 2302 { 2303 struct si_context *sctx = (struct si_context *)ctx; 2304 struct si_texture_handle *tex_handle; 2305 struct si_sampler_view *sview; 2306 struct hash_entry *entry; 2307 2308 entry = _mesa_hash_table_search(sctx->tex_handles, (void *)handle); 2309 if (!entry) 2310 return; 2311 2312 tex_handle = (struct si_texture_handle *)entry->data; 2313 sview = (struct si_sampler_view *)tex_handle->view; 2314 2315 if (resident) { 2316 if (sview->base.texture->target != PIPE_BUFFER) { 2317 struct r600_texture *rtex = 2318 (struct r600_texture *)sview->base.texture; 2319 2320 if (depth_needs_decompression(rtex)) { 2321 util_dynarray_append( 2322 &sctx->resident_tex_needs_depth_decompress, 2323 struct si_texture_handle *, 2324 tex_handle); 2325 } 2326 2327 if (color_needs_decompression(rtex)) { 2328 util_dynarray_append( 2329 &sctx->resident_tex_needs_color_decompress, 2330 struct si_texture_handle *, 2331 tex_handle); 2332 } 2333 2334 if (rtex->dcc_offset && 2335 p_atomic_read(&rtex->framebuffers_bound)) 2336 sctx->need_check_render_feedback = true; 2337 2338 si_update_bindless_texture_descriptor(sctx, tex_handle); 2339 } else { 2340 si_update_bindless_buffer_descriptor(sctx, 2341 tex_handle->desc_slot, 2342 sview->base.texture, 2343 sview->base.u.buf.offset, 2344 &tex_handle->desc_dirty); 2345 } 2346 2347 /* Re-upload the descriptor if it has been updated while it 2348 * wasn't resident. 2349 */ 2350 if (tex_handle->desc_dirty) 2351 sctx->bindless_descriptors_dirty = true; 2352 2353 /* Add the texture handle to the per-context list. */ 2354 util_dynarray_append(&sctx->resident_tex_handles, 2355 struct si_texture_handle *, tex_handle); 2356 2357 /* Add the buffers to the current CS in case si_begin_new_cs() 2358 * is not going to be called. 2359 */ 2360 si_sampler_view_add_buffer(sctx, sview->base.texture, 2361 RADEON_USAGE_READ, 2362 sview->is_stencil_sampler, false); 2363 } else { 2364 /* Remove the texture handle from the per-context list. */ 2365 util_dynarray_delete_unordered(&sctx->resident_tex_handles, 2366 struct si_texture_handle *, 2367 tex_handle); 2368 2369 if (sview->base.texture->target != PIPE_BUFFER) { 2370 util_dynarray_delete_unordered( 2371 &sctx->resident_tex_needs_depth_decompress, 2372 struct si_texture_handle *, tex_handle); 2373 2374 util_dynarray_delete_unordered( 2375 &sctx->resident_tex_needs_color_decompress, 2376 struct si_texture_handle *, tex_handle); 2377 } 2378 } 2379 } 2380 2381 static uint64_t si_create_image_handle(struct pipe_context *ctx, 2382 const struct pipe_image_view *view) 2383 { 2384 struct si_context *sctx = (struct si_context *)ctx; 2385 struct si_image_handle *img_handle; 2386 uint32_t desc_list[8]; 2387 uint64_t handle; 2388 2389 if (!view || !view->resource) 2390 return 0; 2391 2392 img_handle = CALLOC_STRUCT(si_image_handle); 2393 if (!img_handle) 2394 return 0; 2395 2396 memset(desc_list, 0, sizeof(desc_list)); 2397 si_init_descriptor_list(&desc_list[0], 8, 1, null_image_descriptor); 2398 2399 si_set_shader_image_desc(sctx, view, false, &desc_list[0]); 2400 2401 img_handle->desc_slot = si_create_bindless_descriptor(sctx, desc_list, 2402 sizeof(desc_list)); 2403 if (!img_handle->desc_slot) { 2404 FREE(img_handle); 2405 return 0; 2406 } 2407 2408 handle = img_handle->desc_slot; 2409 2410 if (!_mesa_hash_table_insert(sctx->img_handles, (void *)handle, 2411 img_handle)) { 2412 FREE(img_handle); 2413 return 0; 2414 } 2415 2416 util_copy_image_view(&img_handle->view, view); 2417 2418 r600_resource(view->resource)->image_handle_allocated = true; 2419 2420 return handle; 2421 } 2422 2423 static void si_delete_image_handle(struct pipe_context *ctx, uint64_t handle) 2424 { 2425 struct si_context *sctx = (struct si_context *)ctx; 2426 struct si_image_handle *img_handle; 2427 struct hash_entry *entry; 2428 2429 entry = _mesa_hash_table_search(sctx->img_handles, (void *)handle); 2430 if (!entry) 2431 return; 2432 2433 img_handle = (struct si_image_handle *)entry->data; 2434 2435 util_copy_image_view(&img_handle->view, NULL); 2436 _mesa_hash_table_remove(sctx->img_handles, entry); 2437 FREE(img_handle); 2438 } 2439 2440 static void si_make_image_handle_resident(struct pipe_context *ctx, 2441 uint64_t handle, unsigned access, 2442 bool resident) 2443 { 2444 struct si_context *sctx = (struct si_context *)ctx; 2445 struct si_image_handle *img_handle; 2446 struct pipe_image_view *view; 2447 struct r600_resource *res; 2448 struct hash_entry *entry; 2449 2450 entry = _mesa_hash_table_search(sctx->img_handles, (void *)handle); 2451 if (!entry) 2452 return; 2453 2454 img_handle = (struct si_image_handle *)entry->data; 2455 view = &img_handle->view; 2456 res = (struct r600_resource *)view->resource; 2457 2458 if (resident) { 2459 if (res->b.b.target != PIPE_BUFFER) { 2460 struct r600_texture *rtex = (struct r600_texture *)res; 2461 unsigned level = view->u.tex.level; 2462 2463 if (color_needs_decompression(rtex)) { 2464 util_dynarray_append( 2465 &sctx->resident_img_needs_color_decompress, 2466 struct si_image_handle *, 2467 img_handle); 2468 } 2469 2470 if (vi_dcc_enabled(rtex, level) && 2471 p_atomic_read(&rtex->framebuffers_bound)) 2472 sctx->need_check_render_feedback = true; 2473 2474 si_update_bindless_image_descriptor(sctx, img_handle); 2475 } else { 2476 si_update_bindless_buffer_descriptor(sctx, 2477 img_handle->desc_slot, 2478 view->resource, 2479 view->u.buf.offset, 2480 &img_handle->desc_dirty); 2481 } 2482 2483 /* Re-upload the descriptor if it has been updated while it 2484 * wasn't resident. 2485 */ 2486 if (img_handle->desc_dirty) 2487 sctx->bindless_descriptors_dirty = true; 2488 2489 /* Add the image handle to the per-context list. */ 2490 util_dynarray_append(&sctx->resident_img_handles, 2491 struct si_image_handle *, img_handle); 2492 2493 /* Add the buffers to the current CS in case si_begin_new_cs() 2494 * is not going to be called. 2495 */ 2496 si_sampler_view_add_buffer(sctx, view->resource, 2497 (access & PIPE_IMAGE_ACCESS_WRITE) ? 2498 RADEON_USAGE_READWRITE : 2499 RADEON_USAGE_READ, false, false); 2500 } else { 2501 /* Remove the image handle from the per-context list. */ 2502 util_dynarray_delete_unordered(&sctx->resident_img_handles, 2503 struct si_image_handle *, 2504 img_handle); 2505 2506 if (res->b.b.target != PIPE_BUFFER) { 2507 util_dynarray_delete_unordered( 2508 &sctx->resident_img_needs_color_decompress, 2509 struct si_image_handle *, 2510 img_handle); 2511 } 2512 } 2513 } 2514 2515 2516 void si_all_resident_buffers_begin_new_cs(struct si_context *sctx) 2517 { 2518 unsigned num_resident_tex_handles, num_resident_img_handles; 2519 2520 num_resident_tex_handles = sctx->resident_tex_handles.size / 2521 sizeof(struct si_texture_handle *); 2522 num_resident_img_handles = sctx->resident_img_handles.size / 2523 sizeof(struct si_image_handle *); 2524 2525 /* Add all resident texture handles. */ 2526 util_dynarray_foreach(&sctx->resident_tex_handles, 2527 struct si_texture_handle *, tex_handle) { 2528 struct si_sampler_view *sview = 2529 (struct si_sampler_view *)(*tex_handle)->view; 2530 2531 si_sampler_view_add_buffer(sctx, sview->base.texture, 2532 RADEON_USAGE_READ, 2533 sview->is_stencil_sampler, false); 2534 } 2535 2536 /* Add all resident image handles. */ 2537 util_dynarray_foreach(&sctx->resident_img_handles, 2538 struct si_image_handle *, img_handle) { 2539 struct pipe_image_view *view = &(*img_handle)->view; 2540 2541 si_sampler_view_add_buffer(sctx, view->resource, 2542 RADEON_USAGE_READWRITE, 2543 false, false); 2544 } 2545 2546 sctx->b.num_resident_handles += num_resident_tex_handles + 2547 num_resident_img_handles; 2548 } 2549 2550 /* INIT/DEINIT/UPLOAD */ 2551 2552 void si_init_all_descriptors(struct si_context *sctx) 2553 { 2554 int i; 2555 2556 STATIC_ASSERT(GFX9_SGPR_TCS_CONST_AND_SHADER_BUFFERS % 2 == 0); 2557 STATIC_ASSERT(GFX9_SGPR_GS_CONST_AND_SHADER_BUFFERS % 2 == 0); 2558 2559 for (i = 0; i < SI_NUM_SHADERS; i++) { 2560 bool gfx9_tcs = false; 2561 bool gfx9_gs = false; 2562 unsigned num_sampler_slots = SI_NUM_IMAGES / 2 + SI_NUM_SAMPLERS; 2563 unsigned num_buffer_slots = SI_NUM_SHADER_BUFFERS + SI_NUM_CONST_BUFFERS; 2564 struct si_descriptors *desc; 2565 2566 if (sctx->b.chip_class >= GFX9) { 2567 gfx9_tcs = i == PIPE_SHADER_TESS_CTRL; 2568 gfx9_gs = i == PIPE_SHADER_GEOMETRY; 2569 } 2570 2571 desc = si_const_and_shader_buffer_descriptors(sctx, i); 2572 si_init_buffer_resources(&sctx->const_and_shader_buffers[i], desc, 2573 num_buffer_slots, 2574 gfx9_tcs ? GFX9_SGPR_TCS_CONST_AND_SHADER_BUFFERS : 2575 gfx9_gs ? GFX9_SGPR_GS_CONST_AND_SHADER_BUFFERS : 2576 SI_SGPR_CONST_AND_SHADER_BUFFERS, 2577 RADEON_USAGE_READWRITE, 2578 RADEON_USAGE_READ, 2579 RADEON_PRIO_SHADER_RW_BUFFER, 2580 RADEON_PRIO_CONST_BUFFER); 2581 desc->slot_index_to_bind_directly = si_get_constbuf_slot(0); 2582 2583 desc = si_sampler_and_image_descriptors(sctx, i); 2584 si_init_descriptors(desc, 2585 gfx9_tcs ? GFX9_SGPR_TCS_SAMPLERS_AND_IMAGES : 2586 gfx9_gs ? GFX9_SGPR_GS_SAMPLERS_AND_IMAGES : 2587 SI_SGPR_SAMPLERS_AND_IMAGES, 2588 16, num_sampler_slots); 2589 2590 int j; 2591 for (j = 0; j < SI_NUM_IMAGES; j++) 2592 memcpy(desc->list + j * 8, null_image_descriptor, 8 * 4); 2593 for (; j < SI_NUM_IMAGES + SI_NUM_SAMPLERS * 2; j++) 2594 memcpy(desc->list + j * 8, null_texture_descriptor, 8 * 4); 2595 } 2596 2597 si_init_buffer_resources(&sctx->rw_buffers, 2598 &sctx->descriptors[SI_DESCS_RW_BUFFERS], 2599 SI_NUM_RW_BUFFERS, SI_SGPR_RW_BUFFERS, 2600 /* The second set of usage/priority is used by 2601 * const buffers in RW buffer slots. */ 2602 RADEON_USAGE_READWRITE, RADEON_USAGE_READ, 2603 RADEON_PRIO_SHADER_RINGS, RADEON_PRIO_CONST_BUFFER); 2604 sctx->descriptors[SI_DESCS_RW_BUFFERS].num_active_slots = SI_NUM_RW_BUFFERS; 2605 2606 si_init_descriptors(&sctx->vertex_buffers, SI_SGPR_VERTEX_BUFFERS, 2607 4, SI_NUM_VERTEX_BUFFERS); 2608 FREE(sctx->vertex_buffers.list); /* not used */ 2609 sctx->vertex_buffers.list = NULL; 2610 2611 /* Initialize an array of 1024 bindless descriptors, when the limit is 2612 * reached, just make it larger and re-upload the whole array. 2613 */ 2614 si_init_bindless_descriptors(sctx, &sctx->bindless_descriptors, 2615 SI_SGPR_BINDLESS_SAMPLERS_AND_IMAGES, 2616 1024); 2617 2618 sctx->descriptors_dirty = u_bit_consecutive(0, SI_NUM_DESCS); 2619 2620 /* Set pipe_context functions. */ 2621 sctx->b.b.bind_sampler_states = si_bind_sampler_states; 2622 sctx->b.b.set_shader_images = si_set_shader_images; 2623 sctx->b.b.set_constant_buffer = si_pipe_set_constant_buffer; 2624 sctx->b.b.set_polygon_stipple = si_set_polygon_stipple; 2625 sctx->b.b.set_shader_buffers = si_set_shader_buffers; 2626 sctx->b.b.set_sampler_views = si_set_sampler_views; 2627 sctx->b.b.create_texture_handle = si_create_texture_handle; 2628 sctx->b.b.delete_texture_handle = si_delete_texture_handle; 2629 sctx->b.b.make_texture_handle_resident = si_make_texture_handle_resident; 2630 sctx->b.b.create_image_handle = si_create_image_handle; 2631 sctx->b.b.delete_image_handle = si_delete_image_handle; 2632 sctx->b.b.make_image_handle_resident = si_make_image_handle_resident; 2633 sctx->b.invalidate_buffer = si_invalidate_buffer; 2634 sctx->b.rebind_buffer = si_rebind_buffer; 2635 2636 /* Shader user data. */ 2637 si_init_atom(sctx, &sctx->shader_pointers.atom, &sctx->atoms.s.shader_pointers, 2638 si_emit_graphics_shader_pointers); 2639 2640 /* Set default and immutable mappings. */ 2641 si_set_user_data_base(sctx, PIPE_SHADER_VERTEX, R_00B130_SPI_SHADER_USER_DATA_VS_0); 2642 2643 if (sctx->b.chip_class >= GFX9) { 2644 si_set_user_data_base(sctx, PIPE_SHADER_TESS_CTRL, 2645 R_00B430_SPI_SHADER_USER_DATA_LS_0); 2646 si_set_user_data_base(sctx, PIPE_SHADER_GEOMETRY, 2647 R_00B330_SPI_SHADER_USER_DATA_ES_0); 2648 } else { 2649 si_set_user_data_base(sctx, PIPE_SHADER_TESS_CTRL, 2650 R_00B430_SPI_SHADER_USER_DATA_HS_0); 2651 si_set_user_data_base(sctx, PIPE_SHADER_GEOMETRY, 2652 R_00B230_SPI_SHADER_USER_DATA_GS_0); 2653 } 2654 si_set_user_data_base(sctx, PIPE_SHADER_FRAGMENT, R_00B030_SPI_SHADER_USER_DATA_PS_0); 2655 } 2656 2657 static bool si_upload_shader_descriptors(struct si_context *sctx, unsigned mask) 2658 { 2659 unsigned dirty = sctx->descriptors_dirty & mask; 2660 2661 /* Assume nothing will go wrong: */ 2662 sctx->shader_pointers_dirty |= dirty; 2663 2664 while (dirty) { 2665 unsigned i = u_bit_scan(&dirty); 2666 2667 if (!si_upload_descriptors(sctx, &sctx->descriptors[i])) 2668 return false; 2669 } 2670 2671 sctx->descriptors_dirty &= ~mask; 2672 2673 si_upload_bindless_descriptors(sctx); 2674 2675 return true; 2676 } 2677 2678 bool si_upload_graphics_shader_descriptors(struct si_context *sctx) 2679 { 2680 const unsigned mask = u_bit_consecutive(0, SI_DESCS_FIRST_COMPUTE); 2681 return si_upload_shader_descriptors(sctx, mask); 2682 } 2683 2684 bool si_upload_compute_shader_descriptors(struct si_context *sctx) 2685 { 2686 /* Does not update rw_buffers as that is not needed for compute shaders 2687 * and the input buffer is using the same SGPR's anyway. 2688 */ 2689 const unsigned mask = u_bit_consecutive(SI_DESCS_FIRST_COMPUTE, 2690 SI_NUM_DESCS - SI_DESCS_FIRST_COMPUTE); 2691 return si_upload_shader_descriptors(sctx, mask); 2692 } 2693 2694 void si_release_all_descriptors(struct si_context *sctx) 2695 { 2696 int i; 2697 2698 for (i = 0; i < SI_NUM_SHADERS; i++) { 2699 si_release_buffer_resources(&sctx->const_and_shader_buffers[i], 2700 si_const_and_shader_buffer_descriptors(sctx, i)); 2701 si_release_sampler_views(&sctx->samplers[i]); 2702 si_release_image_views(&sctx->images[i]); 2703 } 2704 si_release_buffer_resources(&sctx->rw_buffers, 2705 &sctx->descriptors[SI_DESCS_RW_BUFFERS]); 2706 for (i = 0; i < SI_NUM_VERTEX_BUFFERS; i++) 2707 pipe_vertex_buffer_unreference(&sctx->vertex_buffer[i]); 2708 2709 for (i = 0; i < SI_NUM_DESCS; ++i) 2710 si_release_descriptors(&sctx->descriptors[i]); 2711 2712 sctx->vertex_buffers.list = NULL; /* points into a mapped buffer */ 2713 si_release_descriptors(&sctx->vertex_buffers); 2714 si_release_bindless_descriptors(sctx); 2715 } 2716 2717 void si_all_descriptors_begin_new_cs(struct si_context *sctx) 2718 { 2719 int i; 2720 2721 for (i = 0; i < SI_NUM_SHADERS; i++) { 2722 si_buffer_resources_begin_new_cs(sctx, &sctx->const_and_shader_buffers[i]); 2723 si_sampler_views_begin_new_cs(sctx, &sctx->samplers[i]); 2724 si_image_views_begin_new_cs(sctx, &sctx->images[i]); 2725 } 2726 si_buffer_resources_begin_new_cs(sctx, &sctx->rw_buffers); 2727 si_vertex_buffers_begin_new_cs(sctx); 2728 2729 for (i = 0; i < SI_NUM_DESCS; ++i) 2730 si_descriptors_begin_new_cs(sctx, &sctx->descriptors[i]); 2731 si_descriptors_begin_new_cs(sctx, &sctx->bindless_descriptors); 2732 2733 si_shader_pointers_begin_new_cs(sctx); 2734 } 2735 2736 void si_set_active_descriptors(struct si_context *sctx, unsigned desc_idx, 2737 uint64_t new_active_mask) 2738 { 2739 struct si_descriptors *desc = &sctx->descriptors[desc_idx]; 2740 2741 /* Ignore no-op updates and updates that disable all slots. */ 2742 if (!new_active_mask || 2743 new_active_mask == u_bit_consecutive64(desc->first_active_slot, 2744 desc->num_active_slots)) 2745 return; 2746 2747 int first, count; 2748 u_bit_scan_consecutive_range64(&new_active_mask, &first, &count); 2749 assert(new_active_mask == 0); 2750 2751 /* Upload/dump descriptors if slots are being enabled. */ 2752 if (first < desc->first_active_slot || 2753 first + count > desc->first_active_slot + desc->num_active_slots) 2754 sctx->descriptors_dirty |= 1u << desc_idx; 2755 2756 desc->first_active_slot = first; 2757 desc->num_active_slots = count; 2758 } 2759 2760 void si_set_active_descriptors_for_shader(struct si_context *sctx, 2761 struct si_shader_selector *sel) 2762 { 2763 if (!sel) 2764 return; 2765 2766 si_set_active_descriptors(sctx, 2767 si_const_and_shader_buffer_descriptors_idx(sel->type), 2768 sel->active_const_and_shader_buffers); 2769 si_set_active_descriptors(sctx, 2770 si_sampler_and_image_descriptors_idx(sel->type), 2771 sel->active_samplers_and_images); 2772 } 2773