1 /* 2 * Copyright 2013 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * on the rights to use, copy, modify, merge, publish, distribute, sub 8 * license, and/or sell copies of the Software, and to permit persons to whom 9 * the Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 * 23 * Authors: 24 * Marek Olk <marek.olsak (at) amd.com> 25 */ 26 27 /* Resource binding slots and sampler states (each described with 8 or 28 * 4 dwords) are stored in lists in memory which is accessed by shaders 29 * using scalar load instructions. 30 * 31 * This file is responsible for managing such lists. It keeps a copy of all 32 * descriptors in CPU memory and re-uploads a whole list if some slots have 33 * been changed. 34 * 35 * This code is also reponsible for updating shader pointers to those lists. 36 * 37 * Note that CP DMA can't be used for updating the lists, because a GPU hang 38 * could leave the list in a mid-IB state and the next IB would get wrong 39 * descriptors and the whole context would be unusable at that point. 40 * (Note: The register shadowing can't be used due to the same reason) 41 * 42 * Also, uploading descriptors to newly allocated memory doesn't require 43 * a KCACHE flush. 44 * 45 * 46 * Possible scenarios for one 16 dword image+sampler slot: 47 * 48 * | Image | w/ FMASK | Buffer | NULL 49 * [ 0: 3] Image[0:3] | Image[0:3] | Null[0:3] | Null[0:3] 50 * [ 4: 7] Image[4:7] | Image[4:7] | Buffer[0:3] | 0 51 * [ 8:11] Null[0:3] | Fmask[0:3] | Null[0:3] | Null[0:3] 52 * [12:15] Sampler[0:3] | Fmask[4:7] | Sampler[0:3] | Sampler[0:3] 53 * 54 * FMASK implies MSAA, therefore no sampler state. 55 * Sampler states are never unbound except when FMASK is bound. 56 */ 57 58 #include "radeon/r600_cs.h" 59 #include "si_pipe.h" 60 #include "sid.h" 61 62 #include "util/u_format.h" 63 #include "util/u_memory.h" 64 #include "util/u_upload_mgr.h" 65 66 67 /* NULL image and buffer descriptor for textures (alpha = 1) and images 68 * (alpha = 0). 69 * 70 * For images, all fields must be zero except for the swizzle, which 71 * supports arbitrary combinations of 0s and 1s. The texture type must be 72 * any valid type (e.g. 1D). If the texture type isn't set, the hw hangs. 73 * 74 * For buffers, all fields must be zero. If they are not, the hw hangs. 75 * 76 * This is the only reason why the buffer descriptor must be in words [4:7]. 77 */ 78 static uint32_t null_texture_descriptor[8] = { 79 0, 80 0, 81 0, 82 S_008F1C_DST_SEL_W(V_008F1C_SQ_SEL_1) | 83 S_008F1C_TYPE(V_008F1C_SQ_RSRC_IMG_1D) 84 /* the rest must contain zeros, which is also used by the buffer 85 * descriptor */ 86 }; 87 88 static uint32_t null_image_descriptor[8] = { 89 0, 90 0, 91 0, 92 S_008F1C_TYPE(V_008F1C_SQ_RSRC_IMG_1D) 93 /* the rest must contain zeros, which is also used by the buffer 94 * descriptor */ 95 }; 96 97 static void si_init_descriptors(struct si_descriptors *desc, 98 unsigned shader_userdata_index, 99 unsigned element_dw_size, 100 unsigned num_elements, 101 const uint32_t *null_descriptor, 102 unsigned *ce_offset) 103 { 104 int i; 105 106 assert(num_elements <= sizeof(desc->dirty_mask)*8); 107 108 desc->list = CALLOC(num_elements, element_dw_size * 4); 109 desc->element_dw_size = element_dw_size; 110 desc->num_elements = num_elements; 111 desc->dirty_mask = num_elements == 32 ? ~0u : (1u << num_elements) - 1; 112 desc->shader_userdata_offset = shader_userdata_index * 4; 113 114 if (ce_offset) { 115 desc->ce_offset = *ce_offset; 116 117 /* make sure that ce_offset stays 32 byte aligned */ 118 *ce_offset += align(element_dw_size * num_elements * 4, 32); 119 } 120 121 /* Initialize the array to NULL descriptors if the element size is 8. */ 122 if (null_descriptor) { 123 assert(element_dw_size % 8 == 0); 124 for (i = 0; i < num_elements * element_dw_size / 8; i++) 125 memcpy(desc->list + i * 8, null_descriptor, 126 8 * 4); 127 } 128 } 129 130 static void si_release_descriptors(struct si_descriptors *desc) 131 { 132 r600_resource_reference(&desc->buffer, NULL); 133 FREE(desc->list); 134 } 135 136 static bool si_ce_upload(struct si_context *sctx, unsigned ce_offset, unsigned size, 137 unsigned *out_offset, struct r600_resource **out_buf) { 138 uint64_t va; 139 140 u_suballocator_alloc(sctx->ce_suballocator, size, 64, out_offset, 141 (struct pipe_resource**)out_buf); 142 if (!out_buf) 143 return false; 144 145 va = (*out_buf)->gpu_address + *out_offset; 146 147 radeon_emit(sctx->ce_ib, PKT3(PKT3_DUMP_CONST_RAM, 3, 0)); 148 radeon_emit(sctx->ce_ib, ce_offset); 149 radeon_emit(sctx->ce_ib, size / 4); 150 radeon_emit(sctx->ce_ib, va); 151 radeon_emit(sctx->ce_ib, va >> 32); 152 153 radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, *out_buf, 154 RADEON_USAGE_READWRITE, RADEON_PRIO_DESCRIPTORS); 155 156 sctx->ce_need_synchronization = true; 157 return true; 158 } 159 160 static void si_ce_reinitialize_descriptors(struct si_context *sctx, 161 struct si_descriptors *desc) 162 { 163 if (desc->buffer) { 164 struct r600_resource *buffer = (struct r600_resource*)desc->buffer; 165 unsigned list_size = desc->num_elements * desc->element_dw_size * 4; 166 uint64_t va = buffer->gpu_address + desc->buffer_offset; 167 struct radeon_winsys_cs *ib = sctx->ce_preamble_ib; 168 169 if (!ib) 170 ib = sctx->ce_ib; 171 172 list_size = align(list_size, 32); 173 174 radeon_emit(ib, PKT3(PKT3_LOAD_CONST_RAM, 3, 0)); 175 radeon_emit(ib, va); 176 radeon_emit(ib, va >> 32); 177 radeon_emit(ib, list_size / 4); 178 radeon_emit(ib, desc->ce_offset); 179 180 radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, desc->buffer, 181 RADEON_USAGE_READ, RADEON_PRIO_DESCRIPTORS); 182 } 183 desc->ce_ram_dirty = false; 184 } 185 186 void si_ce_reinitialize_all_descriptors(struct si_context *sctx) 187 { 188 int i; 189 190 for (i = 0; i < SI_NUM_DESCS; ++i) 191 si_ce_reinitialize_descriptors(sctx, &sctx->descriptors[i]); 192 } 193 194 void si_ce_enable_loads(struct radeon_winsys_cs *ib) 195 { 196 radeon_emit(ib, PKT3(PKT3_CONTEXT_CONTROL, 1, 0)); 197 radeon_emit(ib, CONTEXT_CONTROL_LOAD_ENABLE(1) | 198 CONTEXT_CONTROL_LOAD_CE_RAM(1)); 199 radeon_emit(ib, CONTEXT_CONTROL_SHADOW_ENABLE(1)); 200 } 201 202 static bool si_upload_descriptors(struct si_context *sctx, 203 struct si_descriptors *desc, 204 struct r600_atom * atom) 205 { 206 unsigned list_size = desc->num_elements * desc->element_dw_size * 4; 207 208 if (!desc->dirty_mask) 209 return true; 210 211 if (sctx->ce_ib) { 212 uint32_t const* list = (uint32_t const*)desc->list; 213 214 if (desc->ce_ram_dirty) 215 si_ce_reinitialize_descriptors(sctx, desc); 216 217 while(desc->dirty_mask) { 218 int begin, count; 219 u_bit_scan_consecutive_range(&desc->dirty_mask, &begin, 220 &count); 221 222 begin *= desc->element_dw_size; 223 count *= desc->element_dw_size; 224 225 radeon_emit(sctx->ce_ib, 226 PKT3(PKT3_WRITE_CONST_RAM, count, 0)); 227 radeon_emit(sctx->ce_ib, desc->ce_offset + begin * 4); 228 radeon_emit_array(sctx->ce_ib, list + begin, count); 229 } 230 231 if (!si_ce_upload(sctx, desc->ce_offset, list_size, 232 &desc->buffer_offset, &desc->buffer)) 233 return false; 234 } else { 235 void *ptr; 236 237 u_upload_alloc(sctx->b.uploader, 0, list_size, 256, 238 &desc->buffer_offset, 239 (struct pipe_resource**)&desc->buffer, &ptr); 240 if (!desc->buffer) 241 return false; /* skip the draw call */ 242 243 util_memcpy_cpu_to_le32(ptr, desc->list, list_size); 244 desc->gpu_list = ptr; 245 246 radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, desc->buffer, 247 RADEON_USAGE_READ, RADEON_PRIO_DESCRIPTORS); 248 } 249 desc->dirty_mask = 0; 250 251 if (atom) 252 si_mark_atom_dirty(sctx, atom); 253 254 return true; 255 } 256 257 static void 258 si_descriptors_begin_new_cs(struct si_context *sctx, struct si_descriptors *desc) 259 { 260 desc->ce_ram_dirty = true; 261 262 if (!desc->buffer) 263 return; 264 265 radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, desc->buffer, 266 RADEON_USAGE_READ, RADEON_PRIO_DESCRIPTORS); 267 } 268 269 /* SAMPLER VIEWS */ 270 271 static unsigned 272 si_sampler_descriptors_idx(unsigned shader) 273 { 274 return SI_DESCS_FIRST_SHADER + shader * SI_NUM_SHADER_DESCS + 275 SI_SHADER_DESCS_SAMPLERS; 276 } 277 278 static struct si_descriptors * 279 si_sampler_descriptors(struct si_context *sctx, unsigned shader) 280 { 281 return &sctx->descriptors[si_sampler_descriptors_idx(shader)]; 282 } 283 284 static void si_release_sampler_views(struct si_sampler_views *views) 285 { 286 int i; 287 288 for (i = 0; i < ARRAY_SIZE(views->views); i++) { 289 pipe_sampler_view_reference(&views->views[i], NULL); 290 } 291 } 292 293 static void si_sampler_view_add_buffer(struct si_context *sctx, 294 struct pipe_resource *resource, 295 enum radeon_bo_usage usage, 296 bool is_stencil_sampler, 297 bool check_mem) 298 { 299 struct r600_resource *rres; 300 struct r600_texture *rtex; 301 enum radeon_bo_priority priority; 302 303 if (!resource) 304 return; 305 306 if (resource->target != PIPE_BUFFER) { 307 struct r600_texture *tex = (struct r600_texture*)resource; 308 309 if (tex->is_depth && !r600_can_sample_zs(tex, is_stencil_sampler)) 310 resource = &tex->flushed_depth_texture->resource.b.b; 311 } 312 313 rres = (struct r600_resource*)resource; 314 priority = r600_get_sampler_view_priority(rres); 315 316 radeon_add_to_buffer_list_check_mem(&sctx->b, &sctx->b.gfx, 317 rres, usage, priority, 318 check_mem); 319 320 if (resource->target == PIPE_BUFFER) 321 return; 322 323 /* Now add separate DCC or HTILE. */ 324 rtex = (struct r600_texture*)resource; 325 if (rtex->dcc_separate_buffer) { 326 radeon_add_to_buffer_list_check_mem(&sctx->b, &sctx->b.gfx, 327 rtex->dcc_separate_buffer, usage, 328 RADEON_PRIO_DCC, check_mem); 329 } 330 331 if (rtex->htile_buffer && 332 rtex->tc_compatible_htile && 333 !is_stencil_sampler) { 334 radeon_add_to_buffer_list_check_mem(&sctx->b, &sctx->b.gfx, 335 rtex->htile_buffer, usage, 336 RADEON_PRIO_HTILE, check_mem); 337 } 338 } 339 340 static void si_sampler_views_begin_new_cs(struct si_context *sctx, 341 struct si_sampler_views *views) 342 { 343 unsigned mask = views->enabled_mask; 344 345 /* Add buffers to the CS. */ 346 while (mask) { 347 int i = u_bit_scan(&mask); 348 struct si_sampler_view *sview = (struct si_sampler_view *)views->views[i]; 349 350 si_sampler_view_add_buffer(sctx, sview->base.texture, 351 RADEON_USAGE_READ, 352 sview->is_stencil_sampler, false); 353 } 354 } 355 356 /* Set buffer descriptor fields that can be changed by reallocations. */ 357 static void si_set_buf_desc_address(struct r600_resource *buf, 358 uint64_t offset, uint32_t *state) 359 { 360 uint64_t va = buf->gpu_address + offset; 361 362 state[0] = va; 363 state[1] &= C_008F04_BASE_ADDRESS_HI; 364 state[1] |= S_008F04_BASE_ADDRESS_HI(va >> 32); 365 } 366 367 /* Set texture descriptor fields that can be changed by reallocations. 368 * 369 * \param tex texture 370 * \param base_level_info information of the level of BASE_ADDRESS 371 * \param base_level the level of BASE_ADDRESS 372 * \param first_level pipe_sampler_view.u.tex.first_level 373 * \param block_width util_format_get_blockwidth() 374 * \param is_stencil select between separate Z & Stencil 375 * \param state descriptor to update 376 */ 377 void si_set_mutable_tex_desc_fields(struct r600_texture *tex, 378 const struct radeon_surf_level *base_level_info, 379 unsigned base_level, unsigned first_level, 380 unsigned block_width, bool is_stencil, 381 uint32_t *state) 382 { 383 uint64_t va; 384 unsigned pitch = base_level_info->nblk_x * block_width; 385 386 if (tex->is_depth && !r600_can_sample_zs(tex, is_stencil)) { 387 tex = tex->flushed_depth_texture; 388 is_stencil = false; 389 } 390 391 va = tex->resource.gpu_address + base_level_info->offset; 392 393 state[1] &= C_008F14_BASE_ADDRESS_HI; 394 state[3] &= C_008F1C_TILING_INDEX; 395 state[4] &= C_008F20_PITCH; 396 state[6] &= C_008F28_COMPRESSION_EN; 397 398 state[0] = va >> 8; 399 state[1] |= S_008F14_BASE_ADDRESS_HI(va >> 40); 400 state[3] |= S_008F1C_TILING_INDEX(si_tile_mode_index(tex, base_level, 401 is_stencil)); 402 state[4] |= S_008F20_PITCH(pitch - 1); 403 404 if (tex->dcc_offset && first_level < tex->surface.num_dcc_levels) { 405 state[6] |= S_008F28_COMPRESSION_EN(1); 406 state[7] = ((!tex->dcc_separate_buffer ? tex->resource.gpu_address : 0) + 407 tex->dcc_offset + 408 base_level_info->dcc_offset) >> 8; 409 } else if (tex->tc_compatible_htile) { 410 state[6] |= S_008F28_COMPRESSION_EN(1); 411 state[7] = tex->htile_buffer->gpu_address >> 8; 412 } 413 } 414 415 static void si_set_sampler_view(struct si_context *sctx, 416 unsigned shader, 417 unsigned slot, struct pipe_sampler_view *view, 418 bool disallow_early_out) 419 { 420 struct si_sampler_views *views = &sctx->samplers[shader].views; 421 struct si_sampler_view *rview = (struct si_sampler_view*)view; 422 struct si_descriptors *descs = si_sampler_descriptors(sctx, shader); 423 uint32_t *desc = descs->list + slot * 16; 424 425 if (views->views[slot] == view && !disallow_early_out) 426 return; 427 428 if (view) { 429 struct r600_texture *rtex = (struct r600_texture *)view->texture; 430 431 assert(rtex); /* views with texture == NULL aren't supported */ 432 pipe_sampler_view_reference(&views->views[slot], view); 433 memcpy(desc, rview->state, 8*4); 434 435 if (rtex->resource.b.b.target == PIPE_BUFFER) { 436 rtex->resource.bind_history |= PIPE_BIND_SAMPLER_VIEW; 437 438 si_set_buf_desc_address(&rtex->resource, 439 view->u.buf.offset, 440 desc + 4); 441 } else { 442 bool is_separate_stencil = 443 rtex->db_compatible && 444 rview->is_stencil_sampler; 445 446 si_set_mutable_tex_desc_fields(rtex, 447 rview->base_level_info, 448 rview->base_level, 449 rview->base.u.tex.first_level, 450 rview->block_width, 451 is_separate_stencil, 452 desc); 453 } 454 455 if (rtex->resource.b.b.target != PIPE_BUFFER && 456 rtex->fmask.size) { 457 memcpy(desc + 8, 458 rview->fmask_state, 8*4); 459 } else { 460 /* Disable FMASK and bind sampler state in [12:15]. */ 461 memcpy(desc + 8, 462 null_texture_descriptor, 4*4); 463 464 if (views->sampler_states[slot]) 465 memcpy(desc + 12, 466 views->sampler_states[slot]->val, 4*4); 467 } 468 469 views->enabled_mask |= 1u << slot; 470 471 /* Since this can flush, it must be done after enabled_mask is 472 * updated. */ 473 si_sampler_view_add_buffer(sctx, view->texture, 474 RADEON_USAGE_READ, 475 rview->is_stencil_sampler, true); 476 } else { 477 pipe_sampler_view_reference(&views->views[slot], NULL); 478 memcpy(desc, null_texture_descriptor, 8*4); 479 /* Only clear the lower dwords of FMASK. */ 480 memcpy(desc + 8, null_texture_descriptor, 4*4); 481 /* Re-set the sampler state if we are transitioning from FMASK. */ 482 if (views->sampler_states[slot]) 483 memcpy(desc + 12, 484 views->sampler_states[slot]->val, 4*4); 485 486 views->enabled_mask &= ~(1u << slot); 487 } 488 489 descs->dirty_mask |= 1u << slot; 490 sctx->descriptors_dirty |= 1u << si_sampler_descriptors_idx(shader); 491 } 492 493 static bool is_compressed_colortex(struct r600_texture *rtex) 494 { 495 return rtex->cmask.size || rtex->fmask.size || 496 (rtex->dcc_offset && rtex->dirty_level_mask); 497 } 498 499 static void si_update_compressed_tex_shader_mask(struct si_context *sctx, 500 unsigned shader) 501 { 502 struct si_textures_info *samplers = &sctx->samplers[shader]; 503 unsigned shader_bit = 1 << shader; 504 505 if (samplers->depth_texture_mask || 506 samplers->compressed_colortex_mask || 507 sctx->images[shader].compressed_colortex_mask) 508 sctx->compressed_tex_shader_mask |= shader_bit; 509 else 510 sctx->compressed_tex_shader_mask &= ~shader_bit; 511 } 512 513 static void si_set_sampler_views(struct pipe_context *ctx, 514 enum pipe_shader_type shader, unsigned start, 515 unsigned count, 516 struct pipe_sampler_view **views) 517 { 518 struct si_context *sctx = (struct si_context *)ctx; 519 struct si_textures_info *samplers = &sctx->samplers[shader]; 520 int i; 521 522 if (!count || shader >= SI_NUM_SHADERS) 523 return; 524 525 for (i = 0; i < count; i++) { 526 unsigned slot = start + i; 527 528 if (!views || !views[i]) { 529 samplers->depth_texture_mask &= ~(1u << slot); 530 samplers->compressed_colortex_mask &= ~(1u << slot); 531 si_set_sampler_view(sctx, shader, slot, NULL, false); 532 continue; 533 } 534 535 si_set_sampler_view(sctx, shader, slot, views[i], false); 536 537 if (views[i]->texture && views[i]->texture->target != PIPE_BUFFER) { 538 struct r600_texture *rtex = 539 (struct r600_texture*)views[i]->texture; 540 struct si_sampler_view *rview = (struct si_sampler_view *)views[i]; 541 542 if (rtex->db_compatible && 543 (!rtex->tc_compatible_htile || rview->is_stencil_sampler)) { 544 samplers->depth_texture_mask |= 1u << slot; 545 } else { 546 samplers->depth_texture_mask &= ~(1u << slot); 547 } 548 if (is_compressed_colortex(rtex)) { 549 samplers->compressed_colortex_mask |= 1u << slot; 550 } else { 551 samplers->compressed_colortex_mask &= ~(1u << slot); 552 } 553 554 if (rtex->dcc_offset && 555 p_atomic_read(&rtex->framebuffers_bound)) 556 sctx->need_check_render_feedback = true; 557 } else { 558 samplers->depth_texture_mask &= ~(1u << slot); 559 samplers->compressed_colortex_mask &= ~(1u << slot); 560 } 561 } 562 563 si_update_compressed_tex_shader_mask(sctx, shader); 564 } 565 566 static void 567 si_samplers_update_compressed_colortex_mask(struct si_textures_info *samplers) 568 { 569 unsigned mask = samplers->views.enabled_mask; 570 571 while (mask) { 572 int i = u_bit_scan(&mask); 573 struct pipe_resource *res = samplers->views.views[i]->texture; 574 575 if (res && res->target != PIPE_BUFFER) { 576 struct r600_texture *rtex = (struct r600_texture *)res; 577 578 if (is_compressed_colortex(rtex)) { 579 samplers->compressed_colortex_mask |= 1u << i; 580 } else { 581 samplers->compressed_colortex_mask &= ~(1u << i); 582 } 583 } 584 } 585 } 586 587 /* IMAGE VIEWS */ 588 589 static unsigned 590 si_image_descriptors_idx(unsigned shader) 591 { 592 return SI_DESCS_FIRST_SHADER + shader * SI_NUM_SHADER_DESCS + 593 SI_SHADER_DESCS_IMAGES; 594 } 595 596 static struct si_descriptors* 597 si_image_descriptors(struct si_context *sctx, unsigned shader) 598 { 599 return &sctx->descriptors[si_image_descriptors_idx(shader)]; 600 } 601 602 static void 603 si_release_image_views(struct si_images_info *images) 604 { 605 unsigned i; 606 607 for (i = 0; i < SI_NUM_IMAGES; ++i) { 608 struct pipe_image_view *view = &images->views[i]; 609 610 pipe_resource_reference(&view->resource, NULL); 611 } 612 } 613 614 static void 615 si_image_views_begin_new_cs(struct si_context *sctx, struct si_images_info *images) 616 { 617 uint mask = images->enabled_mask; 618 619 /* Add buffers to the CS. */ 620 while (mask) { 621 int i = u_bit_scan(&mask); 622 struct pipe_image_view *view = &images->views[i]; 623 624 assert(view->resource); 625 626 si_sampler_view_add_buffer(sctx, view->resource, 627 RADEON_USAGE_READWRITE, false, false); 628 } 629 } 630 631 static void 632 si_disable_shader_image(struct si_context *ctx, unsigned shader, unsigned slot) 633 { 634 struct si_images_info *images = &ctx->images[shader]; 635 636 if (images->enabled_mask & (1u << slot)) { 637 struct si_descriptors *descs = si_image_descriptors(ctx, shader); 638 639 pipe_resource_reference(&images->views[slot].resource, NULL); 640 images->compressed_colortex_mask &= ~(1 << slot); 641 642 memcpy(descs->list + slot*8, null_image_descriptor, 8*4); 643 images->enabled_mask &= ~(1u << slot); 644 descs->dirty_mask |= 1u << slot; 645 ctx->descriptors_dirty |= 1u << si_image_descriptors_idx(shader); 646 } 647 } 648 649 static void 650 si_mark_image_range_valid(const struct pipe_image_view *view) 651 { 652 struct r600_resource *res = (struct r600_resource *)view->resource; 653 654 assert(res && res->b.b.target == PIPE_BUFFER); 655 656 util_range_add(&res->valid_buffer_range, 657 view->u.buf.offset, 658 view->u.buf.offset + view->u.buf.size); 659 } 660 661 static void si_set_shader_image(struct si_context *ctx, 662 unsigned shader, 663 unsigned slot, const struct pipe_image_view *view, 664 bool skip_decompress) 665 { 666 struct si_screen *screen = ctx->screen; 667 struct si_images_info *images = &ctx->images[shader]; 668 struct si_descriptors *descs = si_image_descriptors(ctx, shader); 669 struct r600_resource *res; 670 uint32_t *desc = descs->list + slot * 8; 671 672 if (!view || !view->resource) { 673 si_disable_shader_image(ctx, shader, slot); 674 return; 675 } 676 677 res = (struct r600_resource *)view->resource; 678 679 if (&images->views[slot] != view) 680 util_copy_image_view(&images->views[slot], view); 681 682 if (res->b.b.target == PIPE_BUFFER) { 683 if (view->access & PIPE_IMAGE_ACCESS_WRITE) 684 si_mark_image_range_valid(view); 685 686 si_make_buffer_descriptor(screen, res, 687 view->format, 688 view->u.buf.offset, 689 view->u.buf.size, 690 descs->list + slot * 8); 691 si_set_buf_desc_address(res, view->u.buf.offset, desc + 4); 692 693 images->compressed_colortex_mask &= ~(1 << slot); 694 res->bind_history |= PIPE_BIND_SHADER_IMAGE; 695 } else { 696 static const unsigned char swizzle[4] = { 0, 1, 2, 3 }; 697 struct r600_texture *tex = (struct r600_texture *)res; 698 unsigned level = view->u.tex.level; 699 unsigned width, height, depth; 700 bool uses_dcc = tex->dcc_offset && 701 level < tex->surface.num_dcc_levels; 702 703 assert(!tex->is_depth); 704 assert(tex->fmask.size == 0); 705 706 if (uses_dcc && !skip_decompress && 707 (view->access & PIPE_IMAGE_ACCESS_WRITE || 708 !vi_dcc_formats_compatible(res->b.b.format, view->format))) { 709 /* If DCC can't be disabled, at least decompress it. 710 * The decompression is relatively cheap if the surface 711 * has been decompressed already. 712 */ 713 if (r600_texture_disable_dcc(&ctx->b, tex)) 714 uses_dcc = false; 715 else 716 ctx->b.decompress_dcc(&ctx->b.b, tex); 717 } 718 719 if (is_compressed_colortex(tex)) { 720 images->compressed_colortex_mask |= 1 << slot; 721 } else { 722 images->compressed_colortex_mask &= ~(1 << slot); 723 } 724 725 if (uses_dcc && 726 p_atomic_read(&tex->framebuffers_bound)) 727 ctx->need_check_render_feedback = true; 728 729 /* Always force the base level to the selected level. 730 * 731 * This is required for 3D textures, where otherwise 732 * selecting a single slice for non-layered bindings 733 * fails. It doesn't hurt the other targets. 734 */ 735 width = u_minify(res->b.b.width0, level); 736 height = u_minify(res->b.b.height0, level); 737 depth = u_minify(res->b.b.depth0, level); 738 739 si_make_texture_descriptor(screen, tex, 740 false, res->b.b.target, 741 view->format, swizzle, 742 0, 0, 743 view->u.tex.first_layer, 744 view->u.tex.last_layer, 745 width, height, depth, 746 desc, NULL); 747 si_set_mutable_tex_desc_fields(tex, &tex->surface.level[level], 748 level, level, 749 util_format_get_blockwidth(view->format), 750 false, desc); 751 } 752 753 images->enabled_mask |= 1u << slot; 754 descs->dirty_mask |= 1u << slot; 755 ctx->descriptors_dirty |= 1u << si_image_descriptors_idx(shader); 756 757 /* Since this can flush, it must be done after enabled_mask is updated. */ 758 si_sampler_view_add_buffer(ctx, &res->b.b, 759 RADEON_USAGE_READWRITE, false, true); 760 } 761 762 static void 763 si_set_shader_images(struct pipe_context *pipe, 764 enum pipe_shader_type shader, 765 unsigned start_slot, unsigned count, 766 const struct pipe_image_view *views) 767 { 768 struct si_context *ctx = (struct si_context *)pipe; 769 unsigned i, slot; 770 771 assert(shader < SI_NUM_SHADERS); 772 773 if (!count) 774 return; 775 776 assert(start_slot + count <= SI_NUM_IMAGES); 777 778 if (views) { 779 for (i = 0, slot = start_slot; i < count; ++i, ++slot) 780 si_set_shader_image(ctx, shader, slot, &views[i], false); 781 } else { 782 for (i = 0, slot = start_slot; i < count; ++i, ++slot) 783 si_set_shader_image(ctx, shader, slot, NULL, false); 784 } 785 786 si_update_compressed_tex_shader_mask(ctx, shader); 787 } 788 789 static void 790 si_images_update_compressed_colortex_mask(struct si_images_info *images) 791 { 792 unsigned mask = images->enabled_mask; 793 794 while (mask) { 795 int i = u_bit_scan(&mask); 796 struct pipe_resource *res = images->views[i].resource; 797 798 if (res && res->target != PIPE_BUFFER) { 799 struct r600_texture *rtex = (struct r600_texture *)res; 800 801 if (is_compressed_colortex(rtex)) { 802 images->compressed_colortex_mask |= 1 << i; 803 } else { 804 images->compressed_colortex_mask &= ~(1 << i); 805 } 806 } 807 } 808 } 809 810 /* SAMPLER STATES */ 811 812 static void si_bind_sampler_states(struct pipe_context *ctx, 813 enum pipe_shader_type shader, 814 unsigned start, unsigned count, void **states) 815 { 816 struct si_context *sctx = (struct si_context *)ctx; 817 struct si_textures_info *samplers = &sctx->samplers[shader]; 818 struct si_descriptors *desc = si_sampler_descriptors(sctx, shader); 819 struct si_sampler_state **sstates = (struct si_sampler_state**)states; 820 int i; 821 822 if (!count || shader >= SI_NUM_SHADERS) 823 return; 824 825 for (i = 0; i < count; i++) { 826 unsigned slot = start + i; 827 828 if (!sstates[i] || 829 sstates[i] == samplers->views.sampler_states[slot]) 830 continue; 831 832 #ifdef DEBUG 833 assert(sstates[i]->magic == SI_SAMPLER_STATE_MAGIC); 834 #endif 835 samplers->views.sampler_states[slot] = sstates[i]; 836 837 /* If FMASK is bound, don't overwrite it. 838 * The sampler state will be set after FMASK is unbound. 839 */ 840 if (samplers->views.views[slot] && 841 samplers->views.views[slot]->texture && 842 samplers->views.views[slot]->texture->target != PIPE_BUFFER && 843 ((struct r600_texture*)samplers->views.views[slot]->texture)->fmask.size) 844 continue; 845 846 memcpy(desc->list + slot * 16 + 12, sstates[i]->val, 4*4); 847 desc->dirty_mask |= 1u << slot; 848 sctx->descriptors_dirty |= 1u << si_sampler_descriptors_idx(shader); 849 } 850 } 851 852 /* BUFFER RESOURCES */ 853 854 static void si_init_buffer_resources(struct si_buffer_resources *buffers, 855 struct si_descriptors *descs, 856 unsigned num_buffers, 857 unsigned shader_userdata_index, 858 enum radeon_bo_usage shader_usage, 859 enum radeon_bo_priority priority, 860 unsigned *ce_offset) 861 { 862 buffers->shader_usage = shader_usage; 863 buffers->priority = priority; 864 buffers->buffers = CALLOC(num_buffers, sizeof(struct pipe_resource*)); 865 866 si_init_descriptors(descs, shader_userdata_index, 4, 867 num_buffers, NULL, ce_offset); 868 } 869 870 static void si_release_buffer_resources(struct si_buffer_resources *buffers, 871 struct si_descriptors *descs) 872 { 873 int i; 874 875 for (i = 0; i < descs->num_elements; i++) { 876 pipe_resource_reference(&buffers->buffers[i], NULL); 877 } 878 879 FREE(buffers->buffers); 880 } 881 882 static void si_buffer_resources_begin_new_cs(struct si_context *sctx, 883 struct si_buffer_resources *buffers) 884 { 885 unsigned mask = buffers->enabled_mask; 886 887 /* Add buffers to the CS. */ 888 while (mask) { 889 int i = u_bit_scan(&mask); 890 891 radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, 892 (struct r600_resource*)buffers->buffers[i], 893 buffers->shader_usage, buffers->priority); 894 } 895 } 896 897 static void si_get_buffer_from_descriptors(struct si_buffer_resources *buffers, 898 struct si_descriptors *descs, 899 unsigned idx, struct pipe_resource **buf, 900 unsigned *offset, unsigned *size) 901 { 902 pipe_resource_reference(buf, buffers->buffers[idx]); 903 if (*buf) { 904 struct r600_resource *res = r600_resource(*buf); 905 const uint32_t *desc = descs->list + idx * 4; 906 uint64_t va; 907 908 *size = desc[2]; 909 910 assert(G_008F04_STRIDE(desc[1]) == 0); 911 va = ((uint64_t)desc[1] << 32) | desc[0]; 912 913 assert(va >= res->gpu_address && va + *size <= res->gpu_address + res->bo_size); 914 *offset = va - res->gpu_address; 915 } 916 } 917 918 /* VERTEX BUFFERS */ 919 920 static void si_vertex_buffers_begin_new_cs(struct si_context *sctx) 921 { 922 struct si_descriptors *desc = &sctx->vertex_buffers; 923 int count = sctx->vertex_elements ? sctx->vertex_elements->count : 0; 924 int i; 925 926 for (i = 0; i < count; i++) { 927 int vb = sctx->vertex_elements->elements[i].vertex_buffer_index; 928 929 if (vb >= ARRAY_SIZE(sctx->vertex_buffer)) 930 continue; 931 if (!sctx->vertex_buffer[vb].buffer) 932 continue; 933 934 radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, 935 (struct r600_resource*)sctx->vertex_buffer[vb].buffer, 936 RADEON_USAGE_READ, RADEON_PRIO_VERTEX_BUFFER); 937 } 938 939 if (!desc->buffer) 940 return; 941 radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, 942 desc->buffer, RADEON_USAGE_READ, 943 RADEON_PRIO_DESCRIPTORS); 944 } 945 946 bool si_upload_vertex_buffer_descriptors(struct si_context *sctx) 947 { 948 struct si_vertex_element *velems = sctx->vertex_elements; 949 struct si_descriptors *desc = &sctx->vertex_buffers; 950 unsigned i, count = velems->count; 951 uint64_t va; 952 uint32_t *ptr; 953 954 if (!sctx->vertex_buffers_dirty || !count || !velems) 955 return true; 956 957 unsigned fix_size3 = velems->fix_size3; 958 unsigned first_vb_use_mask = velems->first_vb_use_mask; 959 960 /* Vertex buffer descriptors are the only ones which are uploaded 961 * directly through a staging buffer and don't go through 962 * the fine-grained upload path. 963 */ 964 u_upload_alloc(sctx->b.uploader, 0, count * 16, 256, &desc->buffer_offset, 965 (struct pipe_resource**)&desc->buffer, (void**)&ptr); 966 if (!desc->buffer) 967 return false; 968 969 radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, 970 desc->buffer, RADEON_USAGE_READ, 971 RADEON_PRIO_DESCRIPTORS); 972 973 assert(count <= SI_NUM_VERTEX_BUFFERS); 974 975 for (i = 0; i < count; i++) { 976 struct pipe_vertex_element *ve = &velems->elements[i]; 977 struct pipe_vertex_buffer *vb; 978 struct r600_resource *rbuffer; 979 unsigned offset; 980 unsigned vbo_index = ve->vertex_buffer_index; 981 uint32_t *desc = &ptr[i*4]; 982 983 vb = &sctx->vertex_buffer[vbo_index]; 984 rbuffer = (struct r600_resource*)vb->buffer; 985 if (!rbuffer) { 986 memset(desc, 0, 16); 987 continue; 988 } 989 990 offset = vb->buffer_offset + ve->src_offset; 991 va = rbuffer->gpu_address + offset; 992 993 /* Fill in T# buffer resource description */ 994 desc[0] = va; 995 desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) | 996 S_008F04_STRIDE(vb->stride); 997 998 if (sctx->b.chip_class <= CIK && vb->stride) { 999 /* Round up by rounding down and adding 1 */ 1000 desc[2] = (vb->buffer->width0 - offset - 1001 velems->format_size[i]) / 1002 vb->stride + 1; 1003 } else { 1004 uint32_t size3; 1005 1006 desc[2] = vb->buffer->width0 - offset; 1007 1008 /* For attributes of size 3 with byte or short 1009 * components, we use a 4-component data format. 1010 * 1011 * As a consequence, we have to round the buffer size 1012 * up so that the hardware sees four components as 1013 * being inside the buffer if and only if the first 1014 * three components are in the buffer. 1015 * 1016 * Since the offset and stride are guaranteed to be 1017 * 4-byte aligned, this alignment will never cross the 1018 * winsys buffer boundary. 1019 */ 1020 size3 = (fix_size3 >> (2 * i)) & 3; 1021 if (vb->stride && size3) { 1022 assert(offset % 4 == 0 && vb->stride % 4 == 0); 1023 assert(size3 <= 2); 1024 desc[2] = align(desc[2], size3 * 2); 1025 } 1026 } 1027 1028 desc[3] = velems->rsrc_word3[i]; 1029 1030 if (first_vb_use_mask & (1 << i)) { 1031 radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, 1032 (struct r600_resource*)vb->buffer, 1033 RADEON_USAGE_READ, RADEON_PRIO_VERTEX_BUFFER); 1034 } 1035 } 1036 1037 /* Don't flush the const cache. It would have a very negative effect 1038 * on performance (confirmed by testing). New descriptors are always 1039 * uploaded to a fresh new buffer, so I don't think flushing the const 1040 * cache is needed. */ 1041 si_mark_atom_dirty(sctx, &sctx->shader_userdata.atom); 1042 sctx->vertex_buffers_dirty = false; 1043 sctx->vertex_buffer_pointer_dirty = true; 1044 return true; 1045 } 1046 1047 1048 /* CONSTANT BUFFERS */ 1049 1050 static unsigned 1051 si_const_buffer_descriptors_idx(unsigned shader) 1052 { 1053 return SI_DESCS_FIRST_SHADER + shader * SI_NUM_SHADER_DESCS + 1054 SI_SHADER_DESCS_CONST_BUFFERS; 1055 } 1056 1057 static struct si_descriptors * 1058 si_const_buffer_descriptors(struct si_context *sctx, unsigned shader) 1059 { 1060 return &sctx->descriptors[si_const_buffer_descriptors_idx(shader)]; 1061 } 1062 1063 void si_upload_const_buffer(struct si_context *sctx, struct r600_resource **rbuffer, 1064 const uint8_t *ptr, unsigned size, uint32_t *const_offset) 1065 { 1066 void *tmp; 1067 1068 u_upload_alloc(sctx->b.uploader, 0, size, 256, const_offset, 1069 (struct pipe_resource**)rbuffer, &tmp); 1070 if (*rbuffer) 1071 util_memcpy_cpu_to_le32(tmp, ptr, size); 1072 } 1073 1074 static void si_set_constant_buffer(struct si_context *sctx, 1075 struct si_buffer_resources *buffers, 1076 unsigned descriptors_idx, 1077 uint slot, const struct pipe_constant_buffer *input) 1078 { 1079 struct si_descriptors *descs = &sctx->descriptors[descriptors_idx]; 1080 assert(slot < descs->num_elements); 1081 pipe_resource_reference(&buffers->buffers[slot], NULL); 1082 1083 /* CIK cannot unbind a constant buffer (S_BUFFER_LOAD is buggy 1084 * with a NULL buffer). We need to use a dummy buffer instead. */ 1085 if (sctx->b.chip_class == CIK && 1086 (!input || (!input->buffer && !input->user_buffer))) 1087 input = &sctx->null_const_buf; 1088 1089 if (input && (input->buffer || input->user_buffer)) { 1090 struct pipe_resource *buffer = NULL; 1091 uint64_t va; 1092 1093 /* Upload the user buffer if needed. */ 1094 if (input->user_buffer) { 1095 unsigned buffer_offset; 1096 1097 si_upload_const_buffer(sctx, 1098 (struct r600_resource**)&buffer, input->user_buffer, 1099 input->buffer_size, &buffer_offset); 1100 if (!buffer) { 1101 /* Just unbind on failure. */ 1102 si_set_constant_buffer(sctx, buffers, descriptors_idx, slot, NULL); 1103 return; 1104 } 1105 va = r600_resource(buffer)->gpu_address + buffer_offset; 1106 } else { 1107 pipe_resource_reference(&buffer, input->buffer); 1108 va = r600_resource(buffer)->gpu_address + input->buffer_offset; 1109 /* Only track usage for non-user buffers. */ 1110 r600_resource(buffer)->bind_history |= PIPE_BIND_CONSTANT_BUFFER; 1111 } 1112 1113 /* Set the descriptor. */ 1114 uint32_t *desc = descs->list + slot*4; 1115 desc[0] = va; 1116 desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) | 1117 S_008F04_STRIDE(0); 1118 desc[2] = input->buffer_size; 1119 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | 1120 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | 1121 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | 1122 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) | 1123 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | 1124 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32); 1125 1126 buffers->buffers[slot] = buffer; 1127 radeon_add_to_buffer_list_check_mem(&sctx->b, &sctx->b.gfx, 1128 (struct r600_resource*)buffer, 1129 buffers->shader_usage, 1130 buffers->priority, true); 1131 buffers->enabled_mask |= 1u << slot; 1132 } else { 1133 /* Clear the descriptor. */ 1134 memset(descs->list + slot*4, 0, sizeof(uint32_t) * 4); 1135 buffers->enabled_mask &= ~(1u << slot); 1136 } 1137 1138 descs->dirty_mask |= 1u << slot; 1139 sctx->descriptors_dirty |= 1u << descriptors_idx; 1140 } 1141 1142 void si_set_rw_buffer(struct si_context *sctx, 1143 uint slot, const struct pipe_constant_buffer *input) 1144 { 1145 si_set_constant_buffer(sctx, &sctx->rw_buffers, 1146 SI_DESCS_RW_BUFFERS, slot, input); 1147 } 1148 1149 static void si_pipe_set_constant_buffer(struct pipe_context *ctx, 1150 uint shader, uint slot, 1151 const struct pipe_constant_buffer *input) 1152 { 1153 struct si_context *sctx = (struct si_context *)ctx; 1154 1155 if (shader >= SI_NUM_SHADERS) 1156 return; 1157 1158 si_set_constant_buffer(sctx, &sctx->const_buffers[shader], 1159 si_const_buffer_descriptors_idx(shader), 1160 slot, input); 1161 } 1162 1163 void si_get_pipe_constant_buffer(struct si_context *sctx, uint shader, 1164 uint slot, struct pipe_constant_buffer *cbuf) 1165 { 1166 cbuf->user_buffer = NULL; 1167 si_get_buffer_from_descriptors( 1168 &sctx->const_buffers[shader], 1169 si_const_buffer_descriptors(sctx, shader), 1170 slot, &cbuf->buffer, &cbuf->buffer_offset, &cbuf->buffer_size); 1171 } 1172 1173 /* SHADER BUFFERS */ 1174 1175 static unsigned 1176 si_shader_buffer_descriptors_idx(enum pipe_shader_type shader) 1177 { 1178 return SI_DESCS_FIRST_SHADER + shader * SI_NUM_SHADER_DESCS + 1179 SI_SHADER_DESCS_SHADER_BUFFERS; 1180 } 1181 1182 static struct si_descriptors * 1183 si_shader_buffer_descriptors(struct si_context *sctx, 1184 enum pipe_shader_type shader) 1185 { 1186 return &sctx->descriptors[si_shader_buffer_descriptors_idx(shader)]; 1187 } 1188 1189 static void si_set_shader_buffers(struct pipe_context *ctx, 1190 enum pipe_shader_type shader, 1191 unsigned start_slot, unsigned count, 1192 const struct pipe_shader_buffer *sbuffers) 1193 { 1194 struct si_context *sctx = (struct si_context *)ctx; 1195 struct si_buffer_resources *buffers = &sctx->shader_buffers[shader]; 1196 struct si_descriptors *descs = si_shader_buffer_descriptors(sctx, shader); 1197 unsigned i; 1198 1199 assert(start_slot + count <= SI_NUM_SHADER_BUFFERS); 1200 1201 for (i = 0; i < count; ++i) { 1202 const struct pipe_shader_buffer *sbuffer = sbuffers ? &sbuffers[i] : NULL; 1203 struct r600_resource *buf; 1204 unsigned slot = start_slot + i; 1205 uint32_t *desc = descs->list + slot * 4; 1206 uint64_t va; 1207 1208 if (!sbuffer || !sbuffer->buffer) { 1209 pipe_resource_reference(&buffers->buffers[slot], NULL); 1210 memset(desc, 0, sizeof(uint32_t) * 4); 1211 buffers->enabled_mask &= ~(1u << slot); 1212 descs->dirty_mask |= 1u << slot; 1213 sctx->descriptors_dirty |= 1214 1u << si_shader_buffer_descriptors_idx(shader); 1215 continue; 1216 } 1217 1218 buf = (struct r600_resource *)sbuffer->buffer; 1219 va = buf->gpu_address + sbuffer->buffer_offset; 1220 1221 desc[0] = va; 1222 desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) | 1223 S_008F04_STRIDE(0); 1224 desc[2] = sbuffer->buffer_size; 1225 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | 1226 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | 1227 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | 1228 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) | 1229 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | 1230 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32); 1231 1232 pipe_resource_reference(&buffers->buffers[slot], &buf->b.b); 1233 radeon_add_to_buffer_list_check_mem(&sctx->b, &sctx->b.gfx, buf, 1234 buffers->shader_usage, 1235 buffers->priority, true); 1236 buf->bind_history |= PIPE_BIND_SHADER_BUFFER; 1237 1238 buffers->enabled_mask |= 1u << slot; 1239 descs->dirty_mask |= 1u << slot; 1240 sctx->descriptors_dirty |= 1241 1u << si_shader_buffer_descriptors_idx(shader); 1242 1243 util_range_add(&buf->valid_buffer_range, sbuffer->buffer_offset, 1244 sbuffer->buffer_offset + sbuffer->buffer_size); 1245 } 1246 } 1247 1248 void si_get_shader_buffers(struct si_context *sctx, uint shader, 1249 uint start_slot, uint count, 1250 struct pipe_shader_buffer *sbuf) 1251 { 1252 struct si_buffer_resources *buffers = &sctx->shader_buffers[shader]; 1253 struct si_descriptors *descs = si_shader_buffer_descriptors(sctx, shader); 1254 1255 for (unsigned i = 0; i < count; ++i) { 1256 si_get_buffer_from_descriptors( 1257 buffers, descs, start_slot + i, 1258 &sbuf[i].buffer, &sbuf[i].buffer_offset, 1259 &sbuf[i].buffer_size); 1260 } 1261 } 1262 1263 /* RING BUFFERS */ 1264 1265 void si_set_ring_buffer(struct pipe_context *ctx, uint slot, 1266 struct pipe_resource *buffer, 1267 unsigned stride, unsigned num_records, 1268 bool add_tid, bool swizzle, 1269 unsigned element_size, unsigned index_stride, uint64_t offset) 1270 { 1271 struct si_context *sctx = (struct si_context *)ctx; 1272 struct si_buffer_resources *buffers = &sctx->rw_buffers; 1273 struct si_descriptors *descs = &sctx->descriptors[SI_DESCS_RW_BUFFERS]; 1274 1275 /* The stride field in the resource descriptor has 14 bits */ 1276 assert(stride < (1 << 14)); 1277 1278 assert(slot < descs->num_elements); 1279 pipe_resource_reference(&buffers->buffers[slot], NULL); 1280 1281 if (buffer) { 1282 uint64_t va; 1283 1284 va = r600_resource(buffer)->gpu_address + offset; 1285 1286 switch (element_size) { 1287 default: 1288 assert(!"Unsupported ring buffer element size"); 1289 case 0: 1290 case 2: 1291 element_size = 0; 1292 break; 1293 case 4: 1294 element_size = 1; 1295 break; 1296 case 8: 1297 element_size = 2; 1298 break; 1299 case 16: 1300 element_size = 3; 1301 break; 1302 } 1303 1304 switch (index_stride) { 1305 default: 1306 assert(!"Unsupported ring buffer index stride"); 1307 case 0: 1308 case 8: 1309 index_stride = 0; 1310 break; 1311 case 16: 1312 index_stride = 1; 1313 break; 1314 case 32: 1315 index_stride = 2; 1316 break; 1317 case 64: 1318 index_stride = 3; 1319 break; 1320 } 1321 1322 if (sctx->b.chip_class >= VI && stride) 1323 num_records *= stride; 1324 1325 /* Set the descriptor. */ 1326 uint32_t *desc = descs->list + slot*4; 1327 desc[0] = va; 1328 desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) | 1329 S_008F04_STRIDE(stride) | 1330 S_008F04_SWIZZLE_ENABLE(swizzle); 1331 desc[2] = num_records; 1332 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | 1333 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | 1334 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | 1335 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) | 1336 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | 1337 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) | 1338 S_008F0C_ELEMENT_SIZE(element_size) | 1339 S_008F0C_INDEX_STRIDE(index_stride) | 1340 S_008F0C_ADD_TID_ENABLE(add_tid); 1341 1342 pipe_resource_reference(&buffers->buffers[slot], buffer); 1343 radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, 1344 (struct r600_resource*)buffer, 1345 buffers->shader_usage, buffers->priority); 1346 buffers->enabled_mask |= 1u << slot; 1347 } else { 1348 /* Clear the descriptor. */ 1349 memset(descs->list + slot*4, 0, sizeof(uint32_t) * 4); 1350 buffers->enabled_mask &= ~(1u << slot); 1351 } 1352 1353 descs->dirty_mask |= 1u << slot; 1354 sctx->descriptors_dirty |= 1u << SI_DESCS_RW_BUFFERS; 1355 } 1356 1357 /* STREAMOUT BUFFERS */ 1358 1359 static void si_set_streamout_targets(struct pipe_context *ctx, 1360 unsigned num_targets, 1361 struct pipe_stream_output_target **targets, 1362 const unsigned *offsets) 1363 { 1364 struct si_context *sctx = (struct si_context *)ctx; 1365 struct si_buffer_resources *buffers = &sctx->rw_buffers; 1366 struct si_descriptors *descs = &sctx->descriptors[SI_DESCS_RW_BUFFERS]; 1367 unsigned old_num_targets = sctx->b.streamout.num_targets; 1368 unsigned i, bufidx; 1369 1370 /* We are going to unbind the buffers. Mark which caches need to be flushed. */ 1371 if (sctx->b.streamout.num_targets && sctx->b.streamout.begin_emitted) { 1372 /* Since streamout uses vector writes which go through TC L2 1373 * and most other clients can use TC L2 as well, we don't need 1374 * to flush it. 1375 * 1376 * The only cases which requires flushing it is VGT DMA index 1377 * fetching (on <= CIK) and indirect draw data, which are rare 1378 * cases. Thus, flag the TC L2 dirtiness in the resource and 1379 * handle it at draw call time. 1380 */ 1381 for (i = 0; i < sctx->b.streamout.num_targets; i++) 1382 if (sctx->b.streamout.targets[i]) 1383 r600_resource(sctx->b.streamout.targets[i]->b.buffer)->TC_L2_dirty = true; 1384 1385 /* Invalidate the scalar cache in case a streamout buffer is 1386 * going to be used as a constant buffer. 1387 * 1388 * Invalidate TC L1, because streamout bypasses it (done by 1389 * setting GLC=1 in the store instruction), but it can contain 1390 * outdated data of streamout buffers. 1391 * 1392 * VS_PARTIAL_FLUSH is required if the buffers are going to be 1393 * used as an input immediately. 1394 */ 1395 sctx->b.flags |= SI_CONTEXT_INV_SMEM_L1 | 1396 SI_CONTEXT_INV_VMEM_L1 | 1397 SI_CONTEXT_VS_PARTIAL_FLUSH; 1398 } 1399 1400 /* All readers of the streamout targets need to be finished before we can 1401 * start writing to the targets. 1402 */ 1403 if (num_targets) 1404 sctx->b.flags |= SI_CONTEXT_PS_PARTIAL_FLUSH | 1405 SI_CONTEXT_CS_PARTIAL_FLUSH; 1406 1407 /* Streamout buffers must be bound in 2 places: 1408 * 1) in VGT by setting the VGT_STRMOUT registers 1409 * 2) as shader resources 1410 */ 1411 1412 /* Set the VGT regs. */ 1413 r600_set_streamout_targets(ctx, num_targets, targets, offsets); 1414 1415 /* Set the shader resources.*/ 1416 for (i = 0; i < num_targets; i++) { 1417 bufidx = SI_VS_STREAMOUT_BUF0 + i; 1418 1419 if (targets[i]) { 1420 struct pipe_resource *buffer = targets[i]->buffer; 1421 uint64_t va = r600_resource(buffer)->gpu_address; 1422 1423 /* Set the descriptor. 1424 * 1425 * On VI, the format must be non-INVALID, otherwise 1426 * the buffer will be considered not bound and store 1427 * instructions will be no-ops. 1428 */ 1429 uint32_t *desc = descs->list + bufidx*4; 1430 desc[0] = va; 1431 desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32); 1432 desc[2] = 0xffffffff; 1433 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | 1434 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | 1435 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | 1436 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) | 1437 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32); 1438 1439 /* Set the resource. */ 1440 pipe_resource_reference(&buffers->buffers[bufidx], 1441 buffer); 1442 radeon_add_to_buffer_list_check_mem(&sctx->b, &sctx->b.gfx, 1443 (struct r600_resource*)buffer, 1444 buffers->shader_usage, 1445 RADEON_PRIO_SHADER_RW_BUFFER, 1446 true); 1447 r600_resource(buffer)->bind_history |= PIPE_BIND_STREAM_OUTPUT; 1448 1449 buffers->enabled_mask |= 1u << bufidx; 1450 } else { 1451 /* Clear the descriptor and unset the resource. */ 1452 memset(descs->list + bufidx*4, 0, 1453 sizeof(uint32_t) * 4); 1454 pipe_resource_reference(&buffers->buffers[bufidx], 1455 NULL); 1456 buffers->enabled_mask &= ~(1u << bufidx); 1457 } 1458 descs->dirty_mask |= 1u << bufidx; 1459 } 1460 for (; i < old_num_targets; i++) { 1461 bufidx = SI_VS_STREAMOUT_BUF0 + i; 1462 /* Clear the descriptor and unset the resource. */ 1463 memset(descs->list + bufidx*4, 0, sizeof(uint32_t) * 4); 1464 pipe_resource_reference(&buffers->buffers[bufidx], NULL); 1465 buffers->enabled_mask &= ~(1u << bufidx); 1466 descs->dirty_mask |= 1u << bufidx; 1467 } 1468 1469 sctx->descriptors_dirty |= 1u << SI_DESCS_RW_BUFFERS; 1470 } 1471 1472 static void si_desc_reset_buffer_offset(struct pipe_context *ctx, 1473 uint32_t *desc, uint64_t old_buf_va, 1474 struct pipe_resource *new_buf) 1475 { 1476 /* Retrieve the buffer offset from the descriptor. */ 1477 uint64_t old_desc_va = 1478 desc[0] | ((uint64_t)G_008F04_BASE_ADDRESS_HI(desc[1]) << 32); 1479 1480 assert(old_buf_va <= old_desc_va); 1481 uint64_t offset_within_buffer = old_desc_va - old_buf_va; 1482 1483 /* Update the descriptor. */ 1484 si_set_buf_desc_address(r600_resource(new_buf), offset_within_buffer, 1485 desc); 1486 } 1487 1488 /* INTERNAL CONST BUFFERS */ 1489 1490 static void si_set_polygon_stipple(struct pipe_context *ctx, 1491 const struct pipe_poly_stipple *state) 1492 { 1493 struct si_context *sctx = (struct si_context *)ctx; 1494 struct pipe_constant_buffer cb = {}; 1495 unsigned stipple[32]; 1496 int i; 1497 1498 for (i = 0; i < 32; i++) 1499 stipple[i] = util_bitreverse(state->stipple[i]); 1500 1501 cb.user_buffer = stipple; 1502 cb.buffer_size = sizeof(stipple); 1503 1504 si_set_rw_buffer(sctx, SI_PS_CONST_POLY_STIPPLE, &cb); 1505 } 1506 1507 /* TEXTURE METADATA ENABLE/DISABLE */ 1508 1509 /* CMASK can be enabled (for fast clear) and disabled (for texture export) 1510 * while the texture is bound, possibly by a different context. In that case, 1511 * call this function to update compressed_colortex_masks. 1512 */ 1513 void si_update_compressed_colortex_masks(struct si_context *sctx) 1514 { 1515 for (int i = 0; i < SI_NUM_SHADERS; ++i) { 1516 si_samplers_update_compressed_colortex_mask(&sctx->samplers[i]); 1517 si_images_update_compressed_colortex_mask(&sctx->images[i]); 1518 si_update_compressed_tex_shader_mask(sctx, i); 1519 } 1520 } 1521 1522 /* BUFFER DISCARD/INVALIDATION */ 1523 1524 /** Reset descriptors of buffer resources after \p buf has been invalidated. */ 1525 static void si_reset_buffer_resources(struct si_context *sctx, 1526 struct si_buffer_resources *buffers, 1527 unsigned descriptors_idx, 1528 struct pipe_resource *buf, 1529 uint64_t old_va) 1530 { 1531 struct si_descriptors *descs = &sctx->descriptors[descriptors_idx]; 1532 unsigned mask = buffers->enabled_mask; 1533 1534 while (mask) { 1535 unsigned i = u_bit_scan(&mask); 1536 if (buffers->buffers[i] == buf) { 1537 si_desc_reset_buffer_offset(&sctx->b.b, 1538 descs->list + i*4, 1539 old_va, buf); 1540 descs->dirty_mask |= 1u << i; 1541 sctx->descriptors_dirty |= 1u << descriptors_idx; 1542 1543 radeon_add_to_buffer_list_check_mem(&sctx->b, &sctx->b.gfx, 1544 (struct r600_resource *)buf, 1545 buffers->shader_usage, 1546 buffers->priority, true); 1547 } 1548 } 1549 } 1550 1551 /* Reallocate a buffer a update all resource bindings where the buffer is 1552 * bound. 1553 * 1554 * This is used to avoid CPU-GPU synchronizations, because it makes the buffer 1555 * idle by discarding its contents. Apps usually tell us when to do this using 1556 * map_buffer flags, for example. 1557 */ 1558 static void si_invalidate_buffer(struct pipe_context *ctx, struct pipe_resource *buf) 1559 { 1560 struct si_context *sctx = (struct si_context*)ctx; 1561 struct r600_resource *rbuffer = r600_resource(buf); 1562 unsigned i, shader; 1563 uint64_t old_va = rbuffer->gpu_address; 1564 unsigned num_elems = sctx->vertex_elements ? 1565 sctx->vertex_elements->count : 0; 1566 1567 /* Reallocate the buffer in the same pipe_resource. */ 1568 r600_alloc_resource(&sctx->screen->b, rbuffer); 1569 1570 /* We changed the buffer, now we need to bind it where the old one 1571 * was bound. This consists of 2 things: 1572 * 1) Updating the resource descriptor and dirtying it. 1573 * 2) Adding a relocation to the CS, so that it's usable. 1574 */ 1575 1576 /* Vertex buffers. */ 1577 if (rbuffer->bind_history & PIPE_BIND_VERTEX_BUFFER) { 1578 for (i = 0; i < num_elems; i++) { 1579 int vb = sctx->vertex_elements->elements[i].vertex_buffer_index; 1580 1581 if (vb >= ARRAY_SIZE(sctx->vertex_buffer)) 1582 continue; 1583 if (!sctx->vertex_buffer[vb].buffer) 1584 continue; 1585 1586 if (sctx->vertex_buffer[vb].buffer == buf) { 1587 sctx->vertex_buffers_dirty = true; 1588 break; 1589 } 1590 } 1591 } 1592 1593 /* Streamout buffers. (other internal buffers can't be invalidated) */ 1594 if (rbuffer->bind_history & PIPE_BIND_STREAM_OUTPUT) { 1595 for (i = SI_VS_STREAMOUT_BUF0; i <= SI_VS_STREAMOUT_BUF3; i++) { 1596 struct si_buffer_resources *buffers = &sctx->rw_buffers; 1597 struct si_descriptors *descs = 1598 &sctx->descriptors[SI_DESCS_RW_BUFFERS]; 1599 1600 if (buffers->buffers[i] != buf) 1601 continue; 1602 1603 si_desc_reset_buffer_offset(ctx, descs->list + i*4, 1604 old_va, buf); 1605 descs->dirty_mask |= 1u << i; 1606 sctx->descriptors_dirty |= 1u << SI_DESCS_RW_BUFFERS; 1607 1608 radeon_add_to_buffer_list_check_mem(&sctx->b, &sctx->b.gfx, 1609 rbuffer, buffers->shader_usage, 1610 RADEON_PRIO_SHADER_RW_BUFFER, 1611 true); 1612 1613 /* Update the streamout state. */ 1614 if (sctx->b.streamout.begin_emitted) 1615 r600_emit_streamout_end(&sctx->b); 1616 sctx->b.streamout.append_bitmask = 1617 sctx->b.streamout.enabled_mask; 1618 r600_streamout_buffers_dirty(&sctx->b); 1619 } 1620 } 1621 1622 /* Constant and shader buffers. */ 1623 if (rbuffer->bind_history & PIPE_BIND_CONSTANT_BUFFER) { 1624 for (shader = 0; shader < SI_NUM_SHADERS; shader++) 1625 si_reset_buffer_resources(sctx, &sctx->const_buffers[shader], 1626 si_const_buffer_descriptors_idx(shader), 1627 buf, old_va); 1628 } 1629 1630 if (rbuffer->bind_history & PIPE_BIND_SHADER_BUFFER) { 1631 for (shader = 0; shader < SI_NUM_SHADERS; shader++) 1632 si_reset_buffer_resources(sctx, &sctx->shader_buffers[shader], 1633 si_shader_buffer_descriptors_idx(shader), 1634 buf, old_va); 1635 } 1636 1637 if (rbuffer->bind_history & PIPE_BIND_SAMPLER_VIEW) { 1638 /* Texture buffers - update bindings. */ 1639 for (shader = 0; shader < SI_NUM_SHADERS; shader++) { 1640 struct si_sampler_views *views = &sctx->samplers[shader].views; 1641 struct si_descriptors *descs = 1642 si_sampler_descriptors(sctx, shader); 1643 unsigned mask = views->enabled_mask; 1644 1645 while (mask) { 1646 unsigned i = u_bit_scan(&mask); 1647 if (views->views[i]->texture == buf) { 1648 si_desc_reset_buffer_offset(ctx, 1649 descs->list + 1650 i * 16 + 4, 1651 old_va, buf); 1652 descs->dirty_mask |= 1u << i; 1653 sctx->descriptors_dirty |= 1654 1u << si_sampler_descriptors_idx(shader); 1655 1656 radeon_add_to_buffer_list_check_mem(&sctx->b, &sctx->b.gfx, 1657 rbuffer, RADEON_USAGE_READ, 1658 RADEON_PRIO_SAMPLER_BUFFER, 1659 true); 1660 } 1661 } 1662 } 1663 } 1664 1665 /* Shader images */ 1666 if (rbuffer->bind_history & PIPE_BIND_SHADER_IMAGE) { 1667 for (shader = 0; shader < SI_NUM_SHADERS; ++shader) { 1668 struct si_images_info *images = &sctx->images[shader]; 1669 struct si_descriptors *descs = 1670 si_image_descriptors(sctx, shader); 1671 unsigned mask = images->enabled_mask; 1672 1673 while (mask) { 1674 unsigned i = u_bit_scan(&mask); 1675 1676 if (images->views[i].resource == buf) { 1677 if (images->views[i].access & PIPE_IMAGE_ACCESS_WRITE) 1678 si_mark_image_range_valid(&images->views[i]); 1679 1680 si_desc_reset_buffer_offset( 1681 ctx, descs->list + i * 8 + 4, 1682 old_va, buf); 1683 descs->dirty_mask |= 1u << i; 1684 sctx->descriptors_dirty |= 1685 1u << si_image_descriptors_idx(shader); 1686 1687 radeon_add_to_buffer_list_check_mem( 1688 &sctx->b, &sctx->b.gfx, rbuffer, 1689 RADEON_USAGE_READWRITE, 1690 RADEON_PRIO_SAMPLER_BUFFER, true); 1691 } 1692 } 1693 } 1694 } 1695 } 1696 1697 /* Update mutable image descriptor fields of all bound textures. */ 1698 void si_update_all_texture_descriptors(struct si_context *sctx) 1699 { 1700 unsigned shader; 1701 1702 for (shader = 0; shader < SI_NUM_SHADERS; shader++) { 1703 struct si_sampler_views *samplers = &sctx->samplers[shader].views; 1704 struct si_images_info *images = &sctx->images[shader]; 1705 unsigned mask; 1706 1707 /* Images. */ 1708 mask = images->enabled_mask; 1709 while (mask) { 1710 unsigned i = u_bit_scan(&mask); 1711 struct pipe_image_view *view = &images->views[i]; 1712 1713 if (!view->resource || 1714 view->resource->target == PIPE_BUFFER) 1715 continue; 1716 1717 si_set_shader_image(sctx, shader, i, view, true); 1718 } 1719 1720 /* Sampler views. */ 1721 mask = samplers->enabled_mask; 1722 while (mask) { 1723 unsigned i = u_bit_scan(&mask); 1724 struct pipe_sampler_view *view = samplers->views[i]; 1725 1726 if (!view || 1727 !view->texture || 1728 view->texture->target == PIPE_BUFFER) 1729 continue; 1730 1731 si_set_sampler_view(sctx, shader, i, 1732 samplers->views[i], true); 1733 } 1734 1735 si_update_compressed_tex_shader_mask(sctx, shader); 1736 } 1737 } 1738 1739 /* SHADER USER DATA */ 1740 1741 static void si_mark_shader_pointers_dirty(struct si_context *sctx, 1742 unsigned shader) 1743 { 1744 sctx->shader_pointers_dirty |= 1745 u_bit_consecutive(SI_DESCS_FIRST_SHADER + shader * SI_NUM_SHADER_DESCS, 1746 SI_NUM_SHADER_DESCS); 1747 1748 if (shader == PIPE_SHADER_VERTEX) 1749 sctx->vertex_buffer_pointer_dirty = sctx->vertex_buffers.buffer != NULL; 1750 1751 si_mark_atom_dirty(sctx, &sctx->shader_userdata.atom); 1752 } 1753 1754 static void si_shader_userdata_begin_new_cs(struct si_context *sctx) 1755 { 1756 sctx->shader_pointers_dirty = u_bit_consecutive(0, SI_NUM_DESCS); 1757 sctx->vertex_buffer_pointer_dirty = sctx->vertex_buffers.buffer != NULL; 1758 si_mark_atom_dirty(sctx, &sctx->shader_userdata.atom); 1759 } 1760 1761 /* Set a base register address for user data constants in the given shader. 1762 * This assigns a mapping from PIPE_SHADER_* to SPI_SHADER_USER_DATA_*. 1763 */ 1764 static void si_set_user_data_base(struct si_context *sctx, 1765 unsigned shader, uint32_t new_base) 1766 { 1767 uint32_t *base = &sctx->shader_userdata.sh_base[shader]; 1768 1769 if (*base != new_base) { 1770 *base = new_base; 1771 1772 if (new_base) 1773 si_mark_shader_pointers_dirty(sctx, shader); 1774 } 1775 } 1776 1777 /* This must be called when these shaders are changed from non-NULL to NULL 1778 * and vice versa: 1779 * - geometry shader 1780 * - tessellation control shader 1781 * - tessellation evaluation shader 1782 */ 1783 void si_shader_change_notify(struct si_context *sctx) 1784 { 1785 /* VS can be bound as VS, ES, or LS. */ 1786 if (sctx->tes_shader.cso) 1787 si_set_user_data_base(sctx, PIPE_SHADER_VERTEX, 1788 R_00B530_SPI_SHADER_USER_DATA_LS_0); 1789 else if (sctx->gs_shader.cso) 1790 si_set_user_data_base(sctx, PIPE_SHADER_VERTEX, 1791 R_00B330_SPI_SHADER_USER_DATA_ES_0); 1792 else 1793 si_set_user_data_base(sctx, PIPE_SHADER_VERTEX, 1794 R_00B130_SPI_SHADER_USER_DATA_VS_0); 1795 1796 /* TES can be bound as ES, VS, or not bound. */ 1797 if (sctx->tes_shader.cso) { 1798 if (sctx->gs_shader.cso) 1799 si_set_user_data_base(sctx, PIPE_SHADER_TESS_EVAL, 1800 R_00B330_SPI_SHADER_USER_DATA_ES_0); 1801 else 1802 si_set_user_data_base(sctx, PIPE_SHADER_TESS_EVAL, 1803 R_00B130_SPI_SHADER_USER_DATA_VS_0); 1804 } else { 1805 si_set_user_data_base(sctx, PIPE_SHADER_TESS_EVAL, 0); 1806 } 1807 } 1808 1809 static void si_emit_shader_pointer(struct si_context *sctx, 1810 struct si_descriptors *desc, 1811 unsigned sh_base) 1812 { 1813 struct radeon_winsys_cs *cs = sctx->b.gfx.cs; 1814 uint64_t va; 1815 1816 assert(desc->buffer); 1817 1818 va = desc->buffer->gpu_address + 1819 desc->buffer_offset; 1820 1821 radeon_emit(cs, PKT3(PKT3_SET_SH_REG, 2, 0)); 1822 radeon_emit(cs, (sh_base + desc->shader_userdata_offset - SI_SH_REG_OFFSET) >> 2); 1823 radeon_emit(cs, va); 1824 radeon_emit(cs, va >> 32); 1825 } 1826 1827 void si_emit_graphics_shader_userdata(struct si_context *sctx, 1828 struct r600_atom *atom) 1829 { 1830 unsigned mask; 1831 uint32_t *sh_base = sctx->shader_userdata.sh_base; 1832 struct si_descriptors *descs; 1833 1834 descs = &sctx->descriptors[SI_DESCS_RW_BUFFERS]; 1835 1836 if (sctx->shader_pointers_dirty & (1 << SI_DESCS_RW_BUFFERS)) { 1837 si_emit_shader_pointer(sctx, descs, 1838 R_00B030_SPI_SHADER_USER_DATA_PS_0); 1839 si_emit_shader_pointer(sctx, descs, 1840 R_00B130_SPI_SHADER_USER_DATA_VS_0); 1841 si_emit_shader_pointer(sctx, descs, 1842 R_00B230_SPI_SHADER_USER_DATA_GS_0); 1843 si_emit_shader_pointer(sctx, descs, 1844 R_00B330_SPI_SHADER_USER_DATA_ES_0); 1845 si_emit_shader_pointer(sctx, descs, 1846 R_00B430_SPI_SHADER_USER_DATA_HS_0); 1847 } 1848 1849 mask = sctx->shader_pointers_dirty & 1850 u_bit_consecutive(SI_DESCS_FIRST_SHADER, 1851 SI_DESCS_FIRST_COMPUTE - SI_DESCS_FIRST_SHADER); 1852 1853 while (mask) { 1854 unsigned i = u_bit_scan(&mask); 1855 unsigned shader = (i - SI_DESCS_FIRST_SHADER) / SI_NUM_SHADER_DESCS; 1856 unsigned base = sh_base[shader]; 1857 1858 if (base) 1859 si_emit_shader_pointer(sctx, descs + i, base); 1860 } 1861 sctx->shader_pointers_dirty &= 1862 ~u_bit_consecutive(SI_DESCS_RW_BUFFERS, SI_DESCS_FIRST_COMPUTE); 1863 1864 if (sctx->vertex_buffer_pointer_dirty) { 1865 si_emit_shader_pointer(sctx, &sctx->vertex_buffers, 1866 sh_base[PIPE_SHADER_VERTEX]); 1867 sctx->vertex_buffer_pointer_dirty = false; 1868 } 1869 } 1870 1871 void si_emit_compute_shader_userdata(struct si_context *sctx) 1872 { 1873 unsigned base = R_00B900_COMPUTE_USER_DATA_0; 1874 struct si_descriptors *descs = sctx->descriptors; 1875 unsigned compute_mask = 1876 u_bit_consecutive(SI_DESCS_FIRST_COMPUTE, SI_NUM_SHADER_DESCS); 1877 unsigned mask = sctx->shader_pointers_dirty & compute_mask; 1878 1879 while (mask) { 1880 unsigned i = u_bit_scan(&mask); 1881 1882 si_emit_shader_pointer(sctx, descs + i, base); 1883 } 1884 sctx->shader_pointers_dirty &= ~compute_mask; 1885 } 1886 1887 /* INIT/DEINIT/UPLOAD */ 1888 1889 void si_init_all_descriptors(struct si_context *sctx) 1890 { 1891 int i; 1892 unsigned ce_offset = 0; 1893 1894 for (i = 0; i < SI_NUM_SHADERS; i++) { 1895 si_init_buffer_resources(&sctx->const_buffers[i], 1896 si_const_buffer_descriptors(sctx, i), 1897 SI_NUM_CONST_BUFFERS, SI_SGPR_CONST_BUFFERS, 1898 RADEON_USAGE_READ, RADEON_PRIO_CONST_BUFFER, 1899 &ce_offset); 1900 si_init_buffer_resources(&sctx->shader_buffers[i], 1901 si_shader_buffer_descriptors(sctx, i), 1902 SI_NUM_SHADER_BUFFERS, SI_SGPR_SHADER_BUFFERS, 1903 RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_RW_BUFFER, 1904 &ce_offset); 1905 1906 si_init_descriptors(si_sampler_descriptors(sctx, i), 1907 SI_SGPR_SAMPLERS, 16, SI_NUM_SAMPLERS, 1908 null_texture_descriptor, &ce_offset); 1909 1910 si_init_descriptors(si_image_descriptors(sctx, i), 1911 SI_SGPR_IMAGES, 8, SI_NUM_IMAGES, 1912 null_image_descriptor, &ce_offset); 1913 } 1914 1915 si_init_buffer_resources(&sctx->rw_buffers, 1916 &sctx->descriptors[SI_DESCS_RW_BUFFERS], 1917 SI_NUM_RW_BUFFERS, SI_SGPR_RW_BUFFERS, 1918 RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_RINGS, 1919 &ce_offset); 1920 si_init_descriptors(&sctx->vertex_buffers, SI_SGPR_VERTEX_BUFFERS, 1921 4, SI_NUM_VERTEX_BUFFERS, NULL, NULL); 1922 1923 sctx->descriptors_dirty = u_bit_consecutive(0, SI_NUM_DESCS); 1924 1925 assert(ce_offset <= 32768); 1926 1927 /* Set pipe_context functions. */ 1928 sctx->b.b.bind_sampler_states = si_bind_sampler_states; 1929 sctx->b.b.set_shader_images = si_set_shader_images; 1930 sctx->b.b.set_constant_buffer = si_pipe_set_constant_buffer; 1931 sctx->b.b.set_polygon_stipple = si_set_polygon_stipple; 1932 sctx->b.b.set_shader_buffers = si_set_shader_buffers; 1933 sctx->b.b.set_sampler_views = si_set_sampler_views; 1934 sctx->b.b.set_stream_output_targets = si_set_streamout_targets; 1935 sctx->b.invalidate_buffer = si_invalidate_buffer; 1936 1937 /* Shader user data. */ 1938 si_init_atom(sctx, &sctx->shader_userdata.atom, &sctx->atoms.s.shader_userdata, 1939 si_emit_graphics_shader_userdata); 1940 1941 /* Set default and immutable mappings. */ 1942 si_set_user_data_base(sctx, PIPE_SHADER_VERTEX, R_00B130_SPI_SHADER_USER_DATA_VS_0); 1943 si_set_user_data_base(sctx, PIPE_SHADER_TESS_CTRL, R_00B430_SPI_SHADER_USER_DATA_HS_0); 1944 si_set_user_data_base(sctx, PIPE_SHADER_GEOMETRY, R_00B230_SPI_SHADER_USER_DATA_GS_0); 1945 si_set_user_data_base(sctx, PIPE_SHADER_FRAGMENT, R_00B030_SPI_SHADER_USER_DATA_PS_0); 1946 } 1947 1948 bool si_upload_graphics_shader_descriptors(struct si_context *sctx) 1949 { 1950 const unsigned mask = u_bit_consecutive(0, SI_DESCS_FIRST_COMPUTE); 1951 unsigned dirty = sctx->descriptors_dirty & mask; 1952 1953 /* Assume nothing will go wrong: */ 1954 sctx->shader_pointers_dirty |= dirty; 1955 1956 while (dirty) { 1957 unsigned i = u_bit_scan(&dirty); 1958 1959 if (!si_upload_descriptors(sctx, &sctx->descriptors[i], 1960 &sctx->shader_userdata.atom)) 1961 return false; 1962 } 1963 1964 sctx->descriptors_dirty &= ~mask; 1965 return true; 1966 } 1967 1968 bool si_upload_compute_shader_descriptors(struct si_context *sctx) 1969 { 1970 /* Does not update rw_buffers as that is not needed for compute shaders 1971 * and the input buffer is using the same SGPR's anyway. 1972 */ 1973 const unsigned mask = u_bit_consecutive(SI_DESCS_FIRST_COMPUTE, 1974 SI_NUM_DESCS - SI_DESCS_FIRST_COMPUTE); 1975 unsigned dirty = sctx->descriptors_dirty & mask; 1976 1977 /* Assume nothing will go wrong: */ 1978 sctx->shader_pointers_dirty |= dirty; 1979 1980 while (dirty) { 1981 unsigned i = u_bit_scan(&dirty); 1982 1983 if (!si_upload_descriptors(sctx, &sctx->descriptors[i], NULL)) 1984 return false; 1985 } 1986 1987 sctx->descriptors_dirty &= ~mask; 1988 1989 return true; 1990 } 1991 1992 void si_release_all_descriptors(struct si_context *sctx) 1993 { 1994 int i; 1995 1996 for (i = 0; i < SI_NUM_SHADERS; i++) { 1997 si_release_buffer_resources(&sctx->const_buffers[i], 1998 si_const_buffer_descriptors(sctx, i)); 1999 si_release_buffer_resources(&sctx->shader_buffers[i], 2000 si_shader_buffer_descriptors(sctx, i)); 2001 si_release_sampler_views(&sctx->samplers[i].views); 2002 si_release_image_views(&sctx->images[i]); 2003 } 2004 si_release_buffer_resources(&sctx->rw_buffers, 2005 &sctx->descriptors[SI_DESCS_RW_BUFFERS]); 2006 2007 for (i = 0; i < SI_NUM_DESCS; ++i) 2008 si_release_descriptors(&sctx->descriptors[i]); 2009 si_release_descriptors(&sctx->vertex_buffers); 2010 } 2011 2012 void si_all_descriptors_begin_new_cs(struct si_context *sctx) 2013 { 2014 int i; 2015 2016 for (i = 0; i < SI_NUM_SHADERS; i++) { 2017 si_buffer_resources_begin_new_cs(sctx, &sctx->const_buffers[i]); 2018 si_buffer_resources_begin_new_cs(sctx, &sctx->shader_buffers[i]); 2019 si_sampler_views_begin_new_cs(sctx, &sctx->samplers[i].views); 2020 si_image_views_begin_new_cs(sctx, &sctx->images[i]); 2021 } 2022 si_buffer_resources_begin_new_cs(sctx, &sctx->rw_buffers); 2023 si_vertex_buffers_begin_new_cs(sctx); 2024 2025 for (i = 0; i < SI_NUM_DESCS; ++i) 2026 si_descriptors_begin_new_cs(sctx, &sctx->descriptors[i]); 2027 2028 si_shader_userdata_begin_new_cs(sctx); 2029 } 2030