1 /* 2 * Copyright 2016 Red Hat. 3 * Copyright 2016 Bas Nieuwenhuizen 4 * 5 * based in part on anv driver which is: 6 * Copyright 2015 Intel Corporation 7 * 8 * Permission is hereby granted, free of charge, to any person obtaining a 9 * copy of this software and associated documentation files (the "Software"), 10 * to deal in the Software without restriction, including without limitation 11 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 12 * and/or sell copies of the Software, and to permit persons to whom the 13 * Software is furnished to do so, subject to the following conditions: 14 * 15 * The above copyright notice and this permission notice (including the next 16 * paragraph) shall be included in all copies or substantial portions of the 17 * Software. 18 * 19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 25 * IN THE SOFTWARE. 26 */ 27 28 #include "radv_private.h" 29 #include "radv_radeon_winsys.h" 30 #include "radv_shader.h" 31 #include "radv_cs.h" 32 #include "sid.h" 33 #include "gfx9d.h" 34 #include "vk_format.h" 35 #include "radv_debug.h" 36 #include "radv_meta.h" 37 38 #include "ac_debug.h" 39 40 static void radv_handle_image_transition(struct radv_cmd_buffer *cmd_buffer, 41 struct radv_image *image, 42 VkImageLayout src_layout, 43 VkImageLayout dst_layout, 44 uint32_t src_family, 45 uint32_t dst_family, 46 const VkImageSubresourceRange *range, 47 VkImageAspectFlags pending_clears); 48 49 const struct radv_dynamic_state default_dynamic_state = { 50 .viewport = { 51 .count = 0, 52 }, 53 .scissor = { 54 .count = 0, 55 }, 56 .line_width = 1.0f, 57 .depth_bias = { 58 .bias = 0.0f, 59 .clamp = 0.0f, 60 .slope = 0.0f, 61 }, 62 .blend_constants = { 0.0f, 0.0f, 0.0f, 0.0f }, 63 .depth_bounds = { 64 .min = 0.0f, 65 .max = 1.0f, 66 }, 67 .stencil_compare_mask = { 68 .front = ~0u, 69 .back = ~0u, 70 }, 71 .stencil_write_mask = { 72 .front = ~0u, 73 .back = ~0u, 74 }, 75 .stencil_reference = { 76 .front = 0u, 77 .back = 0u, 78 }, 79 }; 80 81 static void 82 radv_bind_dynamic_state(struct radv_cmd_buffer *cmd_buffer, 83 const struct radv_dynamic_state *src) 84 { 85 struct radv_dynamic_state *dest = &cmd_buffer->state.dynamic; 86 uint32_t copy_mask = src->mask; 87 uint32_t dest_mask = 0; 88 89 /* Make sure to copy the number of viewports/scissors because they can 90 * only be specified at pipeline creation time. 91 */ 92 dest->viewport.count = src->viewport.count; 93 dest->scissor.count = src->scissor.count; 94 dest->discard_rectangle.count = src->discard_rectangle.count; 95 96 if (copy_mask & RADV_DYNAMIC_VIEWPORT) { 97 if (memcmp(&dest->viewport.viewports, &src->viewport.viewports, 98 src->viewport.count * sizeof(VkViewport))) { 99 typed_memcpy(dest->viewport.viewports, 100 src->viewport.viewports, 101 src->viewport.count); 102 dest_mask |= RADV_DYNAMIC_VIEWPORT; 103 } 104 } 105 106 if (copy_mask & RADV_DYNAMIC_SCISSOR) { 107 if (memcmp(&dest->scissor.scissors, &src->scissor.scissors, 108 src->scissor.count * sizeof(VkRect2D))) { 109 typed_memcpy(dest->scissor.scissors, 110 src->scissor.scissors, src->scissor.count); 111 dest_mask |= RADV_DYNAMIC_SCISSOR; 112 } 113 } 114 115 if (copy_mask & RADV_DYNAMIC_LINE_WIDTH) { 116 if (dest->line_width != src->line_width) { 117 dest->line_width = src->line_width; 118 dest_mask |= RADV_DYNAMIC_LINE_WIDTH; 119 } 120 } 121 122 if (copy_mask & RADV_DYNAMIC_DEPTH_BIAS) { 123 if (memcmp(&dest->depth_bias, &src->depth_bias, 124 sizeof(src->depth_bias))) { 125 dest->depth_bias = src->depth_bias; 126 dest_mask |= RADV_DYNAMIC_DEPTH_BIAS; 127 } 128 } 129 130 if (copy_mask & RADV_DYNAMIC_BLEND_CONSTANTS) { 131 if (memcmp(&dest->blend_constants, &src->blend_constants, 132 sizeof(src->blend_constants))) { 133 typed_memcpy(dest->blend_constants, 134 src->blend_constants, 4); 135 dest_mask |= RADV_DYNAMIC_BLEND_CONSTANTS; 136 } 137 } 138 139 if (copy_mask & RADV_DYNAMIC_DEPTH_BOUNDS) { 140 if (memcmp(&dest->depth_bounds, &src->depth_bounds, 141 sizeof(src->depth_bounds))) { 142 dest->depth_bounds = src->depth_bounds; 143 dest_mask |= RADV_DYNAMIC_DEPTH_BOUNDS; 144 } 145 } 146 147 if (copy_mask & RADV_DYNAMIC_STENCIL_COMPARE_MASK) { 148 if (memcmp(&dest->stencil_compare_mask, 149 &src->stencil_compare_mask, 150 sizeof(src->stencil_compare_mask))) { 151 dest->stencil_compare_mask = src->stencil_compare_mask; 152 dest_mask |= RADV_DYNAMIC_STENCIL_COMPARE_MASK; 153 } 154 } 155 156 if (copy_mask & RADV_DYNAMIC_STENCIL_WRITE_MASK) { 157 if (memcmp(&dest->stencil_write_mask, &src->stencil_write_mask, 158 sizeof(src->stencil_write_mask))) { 159 dest->stencil_write_mask = src->stencil_write_mask; 160 dest_mask |= RADV_DYNAMIC_STENCIL_WRITE_MASK; 161 } 162 } 163 164 if (copy_mask & RADV_DYNAMIC_STENCIL_REFERENCE) { 165 if (memcmp(&dest->stencil_reference, &src->stencil_reference, 166 sizeof(src->stencil_reference))) { 167 dest->stencil_reference = src->stencil_reference; 168 dest_mask |= RADV_DYNAMIC_STENCIL_REFERENCE; 169 } 170 } 171 172 if (copy_mask & RADV_DYNAMIC_DISCARD_RECTANGLE) { 173 if (memcmp(&dest->discard_rectangle.rectangles, &src->discard_rectangle.rectangles, 174 src->discard_rectangle.count * sizeof(VkRect2D))) { 175 typed_memcpy(dest->discard_rectangle.rectangles, 176 src->discard_rectangle.rectangles, 177 src->discard_rectangle.count); 178 dest_mask |= RADV_DYNAMIC_DISCARD_RECTANGLE; 179 } 180 } 181 182 cmd_buffer->state.dirty |= dest_mask; 183 } 184 185 bool radv_cmd_buffer_uses_mec(struct radv_cmd_buffer *cmd_buffer) 186 { 187 return cmd_buffer->queue_family_index == RADV_QUEUE_COMPUTE && 188 cmd_buffer->device->physical_device->rad_info.chip_class >= CIK; 189 } 190 191 enum ring_type radv_queue_family_to_ring(int f) { 192 switch (f) { 193 case RADV_QUEUE_GENERAL: 194 return RING_GFX; 195 case RADV_QUEUE_COMPUTE: 196 return RING_COMPUTE; 197 case RADV_QUEUE_TRANSFER: 198 return RING_DMA; 199 default: 200 unreachable("Unknown queue family"); 201 } 202 } 203 204 static VkResult radv_create_cmd_buffer( 205 struct radv_device * device, 206 struct radv_cmd_pool * pool, 207 VkCommandBufferLevel level, 208 VkCommandBuffer* pCommandBuffer) 209 { 210 struct radv_cmd_buffer *cmd_buffer; 211 unsigned ring; 212 cmd_buffer = vk_zalloc(&pool->alloc, sizeof(*cmd_buffer), 8, 213 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 214 if (cmd_buffer == NULL) 215 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); 216 217 cmd_buffer->_loader_data.loaderMagic = ICD_LOADER_MAGIC; 218 cmd_buffer->device = device; 219 cmd_buffer->pool = pool; 220 cmd_buffer->level = level; 221 222 if (pool) { 223 list_addtail(&cmd_buffer->pool_link, &pool->cmd_buffers); 224 cmd_buffer->queue_family_index = pool->queue_family_index; 225 226 } else { 227 /* Init the pool_link so we can safefly call list_del when we destroy 228 * the command buffer 229 */ 230 list_inithead(&cmd_buffer->pool_link); 231 cmd_buffer->queue_family_index = RADV_QUEUE_GENERAL; 232 } 233 234 ring = radv_queue_family_to_ring(cmd_buffer->queue_family_index); 235 236 cmd_buffer->cs = device->ws->cs_create(device->ws, ring); 237 if (!cmd_buffer->cs) { 238 vk_free(&cmd_buffer->pool->alloc, cmd_buffer); 239 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); 240 } 241 242 *pCommandBuffer = radv_cmd_buffer_to_handle(cmd_buffer); 243 244 list_inithead(&cmd_buffer->upload.list); 245 246 return VK_SUCCESS; 247 } 248 249 static void 250 radv_cmd_buffer_destroy(struct radv_cmd_buffer *cmd_buffer) 251 { 252 list_del(&cmd_buffer->pool_link); 253 254 list_for_each_entry_safe(struct radv_cmd_buffer_upload, up, 255 &cmd_buffer->upload.list, list) { 256 cmd_buffer->device->ws->buffer_destroy(up->upload_bo); 257 list_del(&up->list); 258 free(up); 259 } 260 261 if (cmd_buffer->upload.upload_bo) 262 cmd_buffer->device->ws->buffer_destroy(cmd_buffer->upload.upload_bo); 263 cmd_buffer->device->ws->cs_destroy(cmd_buffer->cs); 264 free(cmd_buffer->push_descriptors.set.mapped_ptr); 265 vk_free(&cmd_buffer->pool->alloc, cmd_buffer); 266 } 267 268 static VkResult 269 radv_reset_cmd_buffer(struct radv_cmd_buffer *cmd_buffer) 270 { 271 272 cmd_buffer->device->ws->cs_reset(cmd_buffer->cs); 273 274 list_for_each_entry_safe(struct radv_cmd_buffer_upload, up, 275 &cmd_buffer->upload.list, list) { 276 cmd_buffer->device->ws->buffer_destroy(up->upload_bo); 277 list_del(&up->list); 278 free(up); 279 } 280 281 cmd_buffer->push_constant_stages = 0; 282 cmd_buffer->scratch_size_needed = 0; 283 cmd_buffer->compute_scratch_size_needed = 0; 284 cmd_buffer->esgs_ring_size_needed = 0; 285 cmd_buffer->gsvs_ring_size_needed = 0; 286 cmd_buffer->tess_rings_needed = false; 287 cmd_buffer->sample_positions_needed = false; 288 289 if (cmd_buffer->upload.upload_bo) 290 radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, 291 cmd_buffer->upload.upload_bo, 8); 292 cmd_buffer->upload.offset = 0; 293 294 cmd_buffer->record_result = VK_SUCCESS; 295 296 cmd_buffer->ring_offsets_idx = -1; 297 298 if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) { 299 void *fence_ptr; 300 radv_cmd_buffer_upload_alloc(cmd_buffer, 8, 0, 301 &cmd_buffer->gfx9_fence_offset, 302 &fence_ptr); 303 cmd_buffer->gfx9_fence_bo = cmd_buffer->upload.upload_bo; 304 } 305 306 cmd_buffer->status = RADV_CMD_BUFFER_STATUS_INITIAL; 307 308 return cmd_buffer->record_result; 309 } 310 311 static bool 312 radv_cmd_buffer_resize_upload_buf(struct radv_cmd_buffer *cmd_buffer, 313 uint64_t min_needed) 314 { 315 uint64_t new_size; 316 struct radeon_winsys_bo *bo; 317 struct radv_cmd_buffer_upload *upload; 318 struct radv_device *device = cmd_buffer->device; 319 320 new_size = MAX2(min_needed, 16 * 1024); 321 new_size = MAX2(new_size, 2 * cmd_buffer->upload.size); 322 323 bo = device->ws->buffer_create(device->ws, 324 new_size, 4096, 325 RADEON_DOMAIN_GTT, 326 RADEON_FLAG_CPU_ACCESS| 327 RADEON_FLAG_NO_INTERPROCESS_SHARING); 328 329 if (!bo) { 330 cmd_buffer->record_result = VK_ERROR_OUT_OF_DEVICE_MEMORY; 331 return false; 332 } 333 334 radv_cs_add_buffer(device->ws, cmd_buffer->cs, bo, 8); 335 if (cmd_buffer->upload.upload_bo) { 336 upload = malloc(sizeof(*upload)); 337 338 if (!upload) { 339 cmd_buffer->record_result = VK_ERROR_OUT_OF_HOST_MEMORY; 340 device->ws->buffer_destroy(bo); 341 return false; 342 } 343 344 memcpy(upload, &cmd_buffer->upload, sizeof(*upload)); 345 list_add(&upload->list, &cmd_buffer->upload.list); 346 } 347 348 cmd_buffer->upload.upload_bo = bo; 349 cmd_buffer->upload.size = new_size; 350 cmd_buffer->upload.offset = 0; 351 cmd_buffer->upload.map = device->ws->buffer_map(cmd_buffer->upload.upload_bo); 352 353 if (!cmd_buffer->upload.map) { 354 cmd_buffer->record_result = VK_ERROR_OUT_OF_DEVICE_MEMORY; 355 return false; 356 } 357 358 return true; 359 } 360 361 bool 362 radv_cmd_buffer_upload_alloc(struct radv_cmd_buffer *cmd_buffer, 363 unsigned size, 364 unsigned alignment, 365 unsigned *out_offset, 366 void **ptr) 367 { 368 uint64_t offset = align(cmd_buffer->upload.offset, alignment); 369 if (offset + size > cmd_buffer->upload.size) { 370 if (!radv_cmd_buffer_resize_upload_buf(cmd_buffer, size)) 371 return false; 372 offset = 0; 373 } 374 375 *out_offset = offset; 376 *ptr = cmd_buffer->upload.map + offset; 377 378 cmd_buffer->upload.offset = offset + size; 379 return true; 380 } 381 382 bool 383 radv_cmd_buffer_upload_data(struct radv_cmd_buffer *cmd_buffer, 384 unsigned size, unsigned alignment, 385 const void *data, unsigned *out_offset) 386 { 387 uint8_t *ptr; 388 389 if (!radv_cmd_buffer_upload_alloc(cmd_buffer, size, alignment, 390 out_offset, (void **)&ptr)) 391 return false; 392 393 if (ptr) 394 memcpy(ptr, data, size); 395 396 return true; 397 } 398 399 static void 400 radv_emit_write_data_packet(struct radeon_winsys_cs *cs, uint64_t va, 401 unsigned count, const uint32_t *data) 402 { 403 radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 2 + count, 0)); 404 radeon_emit(cs, S_370_DST_SEL(V_370_MEM_ASYNC) | 405 S_370_WR_CONFIRM(1) | 406 S_370_ENGINE_SEL(V_370_ME)); 407 radeon_emit(cs, va); 408 radeon_emit(cs, va >> 32); 409 radeon_emit_array(cs, data, count); 410 } 411 412 void radv_cmd_buffer_trace_emit(struct radv_cmd_buffer *cmd_buffer) 413 { 414 struct radv_device *device = cmd_buffer->device; 415 struct radeon_winsys_cs *cs = cmd_buffer->cs; 416 uint64_t va; 417 418 va = radv_buffer_get_va(device->trace_bo); 419 if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY) 420 va += 4; 421 422 MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 7); 423 424 ++cmd_buffer->state.trace_id; 425 radv_cs_add_buffer(device->ws, cs, device->trace_bo, 8); 426 radv_emit_write_data_packet(cs, va, 1, &cmd_buffer->state.trace_id); 427 radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); 428 radeon_emit(cs, AC_ENCODE_TRACE_POINT(cmd_buffer->state.trace_id)); 429 } 430 431 static void 432 radv_cmd_buffer_after_draw(struct radv_cmd_buffer *cmd_buffer) 433 { 434 if (cmd_buffer->device->instance->debug_flags & RADV_DEBUG_SYNC_SHADERS) { 435 enum radv_cmd_flush_bits flags; 436 437 /* Force wait for graphics/compute engines to be idle. */ 438 flags = RADV_CMD_FLAG_PS_PARTIAL_FLUSH | 439 RADV_CMD_FLAG_CS_PARTIAL_FLUSH; 440 441 si_cs_emit_cache_flush(cmd_buffer->cs, 442 cmd_buffer->device->physical_device->rad_info.chip_class, 443 NULL, 0, 444 radv_cmd_buffer_uses_mec(cmd_buffer), 445 flags); 446 } 447 448 if (unlikely(cmd_buffer->device->trace_bo)) 449 radv_cmd_buffer_trace_emit(cmd_buffer); 450 } 451 452 static void 453 radv_save_pipeline(struct radv_cmd_buffer *cmd_buffer, 454 struct radv_pipeline *pipeline, enum ring_type ring) 455 { 456 struct radv_device *device = cmd_buffer->device; 457 struct radeon_winsys_cs *cs = cmd_buffer->cs; 458 uint32_t data[2]; 459 uint64_t va; 460 461 va = radv_buffer_get_va(device->trace_bo); 462 463 switch (ring) { 464 case RING_GFX: 465 va += 8; 466 break; 467 case RING_COMPUTE: 468 va += 16; 469 break; 470 default: 471 assert(!"invalid ring type"); 472 } 473 474 MAYBE_UNUSED unsigned cdw_max = radeon_check_space(device->ws, 475 cmd_buffer->cs, 6); 476 477 data[0] = (uintptr_t)pipeline; 478 data[1] = (uintptr_t)pipeline >> 32; 479 480 radv_cs_add_buffer(device->ws, cs, device->trace_bo, 8); 481 radv_emit_write_data_packet(cs, va, 2, data); 482 } 483 484 void radv_set_descriptor_set(struct radv_cmd_buffer *cmd_buffer, 485 struct radv_descriptor_set *set, 486 unsigned idx) 487 { 488 cmd_buffer->descriptors[idx] = set; 489 if (set) 490 cmd_buffer->state.valid_descriptors |= (1u << idx); 491 else 492 cmd_buffer->state.valid_descriptors &= ~(1u << idx); 493 cmd_buffer->state.descriptors_dirty |= (1u << idx); 494 495 } 496 497 static void 498 radv_save_descriptors(struct radv_cmd_buffer *cmd_buffer) 499 { 500 struct radv_device *device = cmd_buffer->device; 501 struct radeon_winsys_cs *cs = cmd_buffer->cs; 502 uint32_t data[MAX_SETS * 2] = {}; 503 uint64_t va; 504 unsigned i; 505 va = radv_buffer_get_va(device->trace_bo) + 24; 506 507 MAYBE_UNUSED unsigned cdw_max = radeon_check_space(device->ws, 508 cmd_buffer->cs, 4 + MAX_SETS * 2); 509 510 for_each_bit(i, cmd_buffer->state.valid_descriptors) { 511 struct radv_descriptor_set *set = cmd_buffer->descriptors[i]; 512 data[i * 2] = (uintptr_t)set; 513 data[i * 2 + 1] = (uintptr_t)set >> 32; 514 } 515 516 radv_cs_add_buffer(device->ws, cs, device->trace_bo, 8); 517 radv_emit_write_data_packet(cs, va, MAX_SETS * 2, data); 518 } 519 520 static void 521 radv_emit_graphics_blend_state(struct radv_cmd_buffer *cmd_buffer, 522 struct radv_pipeline *pipeline) 523 { 524 radeon_set_context_reg_seq(cmd_buffer->cs, R_028780_CB_BLEND0_CONTROL, 8); 525 radeon_emit_array(cmd_buffer->cs, pipeline->graphics.blend.cb_blend_control, 526 8); 527 radeon_set_context_reg(cmd_buffer->cs, R_028808_CB_COLOR_CONTROL, pipeline->graphics.blend.cb_color_control); 528 radeon_set_context_reg(cmd_buffer->cs, R_028B70_DB_ALPHA_TO_MASK, pipeline->graphics.blend.db_alpha_to_mask); 529 530 if (cmd_buffer->device->physical_device->has_rbplus) { 531 532 radeon_set_context_reg_seq(cmd_buffer->cs, R_028760_SX_MRT0_BLEND_OPT, 8); 533 radeon_emit_array(cmd_buffer->cs, pipeline->graphics.blend.sx_mrt_blend_opt, 8); 534 535 radeon_set_context_reg_seq(cmd_buffer->cs, R_028754_SX_PS_DOWNCONVERT, 3); 536 radeon_emit(cmd_buffer->cs, 0); /* R_028754_SX_PS_DOWNCONVERT */ 537 radeon_emit(cmd_buffer->cs, 0); /* R_028758_SX_BLEND_OPT_EPSILON */ 538 radeon_emit(cmd_buffer->cs, 0); /* R_02875C_SX_BLEND_OPT_CONTROL */ 539 } 540 } 541 542 static void 543 radv_emit_graphics_depth_stencil_state(struct radv_cmd_buffer *cmd_buffer, 544 struct radv_pipeline *pipeline) 545 { 546 struct radv_depth_stencil_state *ds = &pipeline->graphics.ds; 547 radeon_set_context_reg(cmd_buffer->cs, R_028800_DB_DEPTH_CONTROL, ds->db_depth_control); 548 radeon_set_context_reg(cmd_buffer->cs, R_02842C_DB_STENCIL_CONTROL, ds->db_stencil_control); 549 550 radeon_set_context_reg(cmd_buffer->cs, R_028000_DB_RENDER_CONTROL, ds->db_render_control); 551 radeon_set_context_reg(cmd_buffer->cs, R_028010_DB_RENDER_OVERRIDE2, ds->db_render_override2); 552 } 553 554 struct ac_userdata_info * 555 radv_lookup_user_sgpr(struct radv_pipeline *pipeline, 556 gl_shader_stage stage, 557 int idx) 558 { 559 if (stage == MESA_SHADER_VERTEX) { 560 if (pipeline->shaders[MESA_SHADER_VERTEX]) 561 return &pipeline->shaders[MESA_SHADER_VERTEX]->info.user_sgprs_locs.shader_data[idx]; 562 if (pipeline->shaders[MESA_SHADER_TESS_CTRL]) 563 return &pipeline->shaders[MESA_SHADER_TESS_CTRL]->info.user_sgprs_locs.shader_data[idx]; 564 if (pipeline->shaders[MESA_SHADER_GEOMETRY]) 565 return &pipeline->shaders[MESA_SHADER_GEOMETRY]->info.user_sgprs_locs.shader_data[idx]; 566 } else if (stage == MESA_SHADER_TESS_EVAL) { 567 if (pipeline->shaders[MESA_SHADER_TESS_EVAL]) 568 return &pipeline->shaders[MESA_SHADER_TESS_EVAL]->info.user_sgprs_locs.shader_data[idx]; 569 if (pipeline->shaders[MESA_SHADER_GEOMETRY]) 570 return &pipeline->shaders[MESA_SHADER_GEOMETRY]->info.user_sgprs_locs.shader_data[idx]; 571 } 572 return &pipeline->shaders[stage]->info.user_sgprs_locs.shader_data[idx]; 573 } 574 575 static void 576 radv_emit_userdata_address(struct radv_cmd_buffer *cmd_buffer, 577 struct radv_pipeline *pipeline, 578 gl_shader_stage stage, 579 int idx, uint64_t va) 580 { 581 struct ac_userdata_info *loc = radv_lookup_user_sgpr(pipeline, stage, idx); 582 uint32_t base_reg = pipeline->user_data_0[stage]; 583 if (loc->sgpr_idx == -1) 584 return; 585 assert(loc->num_sgprs == 2); 586 assert(!loc->indirect); 587 radeon_set_sh_reg_seq(cmd_buffer->cs, base_reg + loc->sgpr_idx * 4, 2); 588 radeon_emit(cmd_buffer->cs, va); 589 radeon_emit(cmd_buffer->cs, va >> 32); 590 } 591 592 static void 593 radv_update_multisample_state(struct radv_cmd_buffer *cmd_buffer, 594 struct radv_pipeline *pipeline) 595 { 596 int num_samples = pipeline->graphics.ms.num_samples; 597 struct radv_multisample_state *ms = &pipeline->graphics.ms; 598 struct radv_pipeline *old_pipeline = cmd_buffer->state.emitted_pipeline; 599 600 radeon_set_context_reg_seq(cmd_buffer->cs, R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, 2); 601 radeon_emit(cmd_buffer->cs, ms->pa_sc_aa_mask[0]); 602 radeon_emit(cmd_buffer->cs, ms->pa_sc_aa_mask[1]); 603 604 radeon_set_context_reg(cmd_buffer->cs, R_028804_DB_EQAA, ms->db_eqaa); 605 radeon_set_context_reg(cmd_buffer->cs, R_028A4C_PA_SC_MODE_CNTL_1, ms->pa_sc_mode_cntl_1); 606 607 if (pipeline->shaders[MESA_SHADER_FRAGMENT]->info.info.ps.needs_sample_positions) { 608 uint32_t offset; 609 struct ac_userdata_info *loc = radv_lookup_user_sgpr(pipeline, MESA_SHADER_FRAGMENT, AC_UD_PS_SAMPLE_POS_OFFSET); 610 uint32_t base_reg = pipeline->user_data_0[MESA_SHADER_FRAGMENT]; 611 if (loc->sgpr_idx == -1) 612 return; 613 assert(loc->num_sgprs == 1); 614 assert(!loc->indirect); 615 switch (num_samples) { 616 default: 617 offset = 0; 618 break; 619 case 2: 620 offset = 1; 621 break; 622 case 4: 623 offset = 3; 624 break; 625 case 8: 626 offset = 7; 627 break; 628 case 16: 629 offset = 15; 630 break; 631 } 632 633 radeon_set_sh_reg(cmd_buffer->cs, base_reg + loc->sgpr_idx * 4, offset); 634 cmd_buffer->sample_positions_needed = true; 635 } 636 637 if (old_pipeline && num_samples == old_pipeline->graphics.ms.num_samples) 638 return; 639 640 radeon_set_context_reg_seq(cmd_buffer->cs, R_028BDC_PA_SC_LINE_CNTL, 2); 641 radeon_emit(cmd_buffer->cs, ms->pa_sc_line_cntl); 642 radeon_emit(cmd_buffer->cs, ms->pa_sc_aa_config); 643 644 radeon_set_context_reg(cmd_buffer->cs, R_028A48_PA_SC_MODE_CNTL_0, ms->pa_sc_mode_cntl_0); 645 646 radv_cayman_emit_msaa_sample_locs(cmd_buffer->cs, num_samples); 647 648 /* GFX9: Flush DFSM when the AA mode changes. */ 649 if (cmd_buffer->device->dfsm_allowed) { 650 radeon_emit(cmd_buffer->cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); 651 radeon_emit(cmd_buffer->cs, EVENT_TYPE(V_028A90_FLUSH_DFSM) | EVENT_INDEX(0)); 652 } 653 } 654 655 static void 656 radv_emit_graphics_raster_state(struct radv_cmd_buffer *cmd_buffer, 657 struct radv_pipeline *pipeline) 658 { 659 struct radv_raster_state *raster = &pipeline->graphics.raster; 660 661 radeon_set_context_reg(cmd_buffer->cs, R_028810_PA_CL_CLIP_CNTL, 662 raster->pa_cl_clip_cntl); 663 radeon_set_context_reg(cmd_buffer->cs, R_0286D4_SPI_INTERP_CONTROL_0, 664 raster->spi_interp_control); 665 radeon_set_context_reg(cmd_buffer->cs, R_028BE4_PA_SU_VTX_CNTL, 666 raster->pa_su_vtx_cntl); 667 radeon_set_context_reg(cmd_buffer->cs, R_028814_PA_SU_SC_MODE_CNTL, 668 raster->pa_su_sc_mode_cntl); 669 } 670 671 static inline void 672 radv_emit_prefetch_TC_L2_async(struct radv_cmd_buffer *cmd_buffer, uint64_t va, 673 unsigned size) 674 { 675 if (cmd_buffer->device->physical_device->rad_info.chip_class >= CIK) 676 si_cp_dma_prefetch(cmd_buffer, va, size); 677 } 678 679 static void 680 radv_emit_VBO_descriptors_prefetch(struct radv_cmd_buffer *cmd_buffer) 681 { 682 if (cmd_buffer->state.vb_prefetch_dirty) { 683 radv_emit_prefetch_TC_L2_async(cmd_buffer, 684 cmd_buffer->state.vb_va, 685 cmd_buffer->state.vb_size); 686 cmd_buffer->state.vb_prefetch_dirty = false; 687 } 688 } 689 690 static void 691 radv_emit_shader_prefetch(struct radv_cmd_buffer *cmd_buffer, 692 struct radv_shader_variant *shader) 693 { 694 struct radeon_winsys *ws = cmd_buffer->device->ws; 695 struct radeon_winsys_cs *cs = cmd_buffer->cs; 696 uint64_t va; 697 698 if (!shader) 699 return; 700 701 va = radv_buffer_get_va(shader->bo) + shader->bo_offset; 702 703 radv_cs_add_buffer(ws, cs, shader->bo, 8); 704 radv_emit_prefetch_TC_L2_async(cmd_buffer, va, shader->code_size); 705 } 706 707 static void 708 radv_emit_prefetch(struct radv_cmd_buffer *cmd_buffer, 709 struct radv_pipeline *pipeline) 710 { 711 radv_emit_shader_prefetch(cmd_buffer, 712 pipeline->shaders[MESA_SHADER_VERTEX]); 713 radv_emit_VBO_descriptors_prefetch(cmd_buffer); 714 radv_emit_shader_prefetch(cmd_buffer, 715 pipeline->shaders[MESA_SHADER_TESS_CTRL]); 716 radv_emit_shader_prefetch(cmd_buffer, 717 pipeline->shaders[MESA_SHADER_TESS_EVAL]); 718 radv_emit_shader_prefetch(cmd_buffer, 719 pipeline->shaders[MESA_SHADER_GEOMETRY]); 720 radv_emit_shader_prefetch(cmd_buffer, pipeline->gs_copy_shader); 721 radv_emit_shader_prefetch(cmd_buffer, 722 pipeline->shaders[MESA_SHADER_FRAGMENT]); 723 } 724 725 static void 726 radv_emit_hw_vs(struct radv_cmd_buffer *cmd_buffer, 727 struct radv_pipeline *pipeline, 728 struct radv_shader_variant *shader) 729 { 730 uint64_t va = radv_buffer_get_va(shader->bo) + shader->bo_offset; 731 732 radeon_set_context_reg(cmd_buffer->cs, R_0286C4_SPI_VS_OUT_CONFIG, 733 pipeline->graphics.vs.spi_vs_out_config); 734 735 radeon_set_context_reg(cmd_buffer->cs, R_02870C_SPI_SHADER_POS_FORMAT, 736 pipeline->graphics.vs.spi_shader_pos_format); 737 738 radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B120_SPI_SHADER_PGM_LO_VS, 4); 739 radeon_emit(cmd_buffer->cs, va >> 8); 740 radeon_emit(cmd_buffer->cs, va >> 40); 741 radeon_emit(cmd_buffer->cs, shader->rsrc1); 742 radeon_emit(cmd_buffer->cs, shader->rsrc2); 743 744 radeon_set_context_reg(cmd_buffer->cs, R_028818_PA_CL_VTE_CNTL, 745 S_028818_VTX_W0_FMT(1) | 746 S_028818_VPORT_X_SCALE_ENA(1) | S_028818_VPORT_X_OFFSET_ENA(1) | 747 S_028818_VPORT_Y_SCALE_ENA(1) | S_028818_VPORT_Y_OFFSET_ENA(1) | 748 S_028818_VPORT_Z_SCALE_ENA(1) | S_028818_VPORT_Z_OFFSET_ENA(1)); 749 750 751 radeon_set_context_reg(cmd_buffer->cs, R_02881C_PA_CL_VS_OUT_CNTL, 752 pipeline->graphics.vs.pa_cl_vs_out_cntl); 753 754 if (cmd_buffer->device->physical_device->rad_info.chip_class <= VI) 755 radeon_set_context_reg(cmd_buffer->cs, R_028AB4_VGT_REUSE_OFF, 756 pipeline->graphics.vs.vgt_reuse_off); 757 } 758 759 static void 760 radv_emit_hw_es(struct radv_cmd_buffer *cmd_buffer, 761 struct radv_pipeline *pipeline, 762 struct radv_shader_variant *shader) 763 { 764 uint64_t va = radv_buffer_get_va(shader->bo) + shader->bo_offset; 765 766 radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B320_SPI_SHADER_PGM_LO_ES, 4); 767 radeon_emit(cmd_buffer->cs, va >> 8); 768 radeon_emit(cmd_buffer->cs, va >> 40); 769 radeon_emit(cmd_buffer->cs, shader->rsrc1); 770 radeon_emit(cmd_buffer->cs, shader->rsrc2); 771 } 772 773 static void 774 radv_emit_hw_ls(struct radv_cmd_buffer *cmd_buffer, 775 struct radv_shader_variant *shader) 776 { 777 uint64_t va = radv_buffer_get_va(shader->bo) + shader->bo_offset; 778 uint32_t rsrc2 = shader->rsrc2; 779 780 radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B520_SPI_SHADER_PGM_LO_LS, 2); 781 radeon_emit(cmd_buffer->cs, va >> 8); 782 radeon_emit(cmd_buffer->cs, va >> 40); 783 784 rsrc2 |= S_00B52C_LDS_SIZE(cmd_buffer->state.pipeline->graphics.tess.lds_size); 785 if (cmd_buffer->device->physical_device->rad_info.chip_class == CIK && 786 cmd_buffer->device->physical_device->rad_info.family != CHIP_HAWAII) 787 radeon_set_sh_reg(cmd_buffer->cs, R_00B52C_SPI_SHADER_PGM_RSRC2_LS, rsrc2); 788 789 radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B528_SPI_SHADER_PGM_RSRC1_LS, 2); 790 radeon_emit(cmd_buffer->cs, shader->rsrc1); 791 radeon_emit(cmd_buffer->cs, rsrc2); 792 } 793 794 static void 795 radv_emit_hw_hs(struct radv_cmd_buffer *cmd_buffer, 796 struct radv_shader_variant *shader) 797 { 798 uint64_t va = radv_buffer_get_va(shader->bo) + shader->bo_offset; 799 800 if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) { 801 radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B410_SPI_SHADER_PGM_LO_LS, 2); 802 radeon_emit(cmd_buffer->cs, va >> 8); 803 radeon_emit(cmd_buffer->cs, va >> 40); 804 805 radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B428_SPI_SHADER_PGM_RSRC1_HS, 2); 806 radeon_emit(cmd_buffer->cs, shader->rsrc1); 807 radeon_emit(cmd_buffer->cs, shader->rsrc2 | 808 S_00B42C_LDS_SIZE(cmd_buffer->state.pipeline->graphics.tess.lds_size)); 809 } else { 810 radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B420_SPI_SHADER_PGM_LO_HS, 4); 811 radeon_emit(cmd_buffer->cs, va >> 8); 812 radeon_emit(cmd_buffer->cs, va >> 40); 813 radeon_emit(cmd_buffer->cs, shader->rsrc1); 814 radeon_emit(cmd_buffer->cs, shader->rsrc2); 815 } 816 } 817 818 static void 819 radv_emit_vertex_shader(struct radv_cmd_buffer *cmd_buffer, 820 struct radv_pipeline *pipeline) 821 { 822 struct radv_shader_variant *vs; 823 824 radeon_set_context_reg(cmd_buffer->cs, R_028A84_VGT_PRIMITIVEID_EN, pipeline->graphics.vgt_primitiveid_en); 825 826 /* Skip shaders merged into HS/GS */ 827 vs = pipeline->shaders[MESA_SHADER_VERTEX]; 828 if (!vs) 829 return; 830 831 if (vs->info.vs.as_ls) 832 radv_emit_hw_ls(cmd_buffer, vs); 833 else if (vs->info.vs.as_es) 834 radv_emit_hw_es(cmd_buffer, pipeline, vs); 835 else 836 radv_emit_hw_vs(cmd_buffer, pipeline, vs); 837 } 838 839 840 static void 841 radv_emit_tess_shaders(struct radv_cmd_buffer *cmd_buffer, 842 struct radv_pipeline *pipeline) 843 { 844 if (!radv_pipeline_has_tess(pipeline)) 845 return; 846 847 struct radv_shader_variant *tes, *tcs; 848 849 tcs = pipeline->shaders[MESA_SHADER_TESS_CTRL]; 850 tes = pipeline->shaders[MESA_SHADER_TESS_EVAL]; 851 852 if (tes) { 853 if (tes->info.tes.as_es) 854 radv_emit_hw_es(cmd_buffer, pipeline, tes); 855 else 856 radv_emit_hw_vs(cmd_buffer, pipeline, tes); 857 } 858 859 radv_emit_hw_hs(cmd_buffer, tcs); 860 861 radeon_set_context_reg(cmd_buffer->cs, R_028B6C_VGT_TF_PARAM, 862 pipeline->graphics.tess.tf_param); 863 864 if (cmd_buffer->device->physical_device->rad_info.chip_class >= CIK) 865 radeon_set_context_reg_idx(cmd_buffer->cs, R_028B58_VGT_LS_HS_CONFIG, 2, 866 pipeline->graphics.tess.ls_hs_config); 867 else 868 radeon_set_context_reg(cmd_buffer->cs, R_028B58_VGT_LS_HS_CONFIG, 869 pipeline->graphics.tess.ls_hs_config); 870 871 struct ac_userdata_info *loc; 872 873 loc = radv_lookup_user_sgpr(pipeline, MESA_SHADER_TESS_CTRL, AC_UD_TCS_OFFCHIP_LAYOUT); 874 if (loc->sgpr_idx != -1) { 875 uint32_t base_reg = pipeline->user_data_0[MESA_SHADER_TESS_CTRL]; 876 assert(loc->num_sgprs == 4); 877 assert(!loc->indirect); 878 radeon_set_sh_reg_seq(cmd_buffer->cs, base_reg + loc->sgpr_idx * 4, 4); 879 radeon_emit(cmd_buffer->cs, pipeline->graphics.tess.offchip_layout); 880 radeon_emit(cmd_buffer->cs, pipeline->graphics.tess.tcs_out_offsets); 881 radeon_emit(cmd_buffer->cs, pipeline->graphics.tess.tcs_out_layout | 882 pipeline->graphics.tess.num_tcs_input_cp << 26); 883 radeon_emit(cmd_buffer->cs, pipeline->graphics.tess.tcs_in_layout); 884 } 885 886 loc = radv_lookup_user_sgpr(pipeline, MESA_SHADER_TESS_EVAL, AC_UD_TES_OFFCHIP_LAYOUT); 887 if (loc->sgpr_idx != -1) { 888 uint32_t base_reg = pipeline->user_data_0[MESA_SHADER_TESS_EVAL]; 889 assert(loc->num_sgprs == 1); 890 assert(!loc->indirect); 891 892 radeon_set_sh_reg(cmd_buffer->cs, base_reg + loc->sgpr_idx * 4, 893 pipeline->graphics.tess.offchip_layout); 894 } 895 896 loc = radv_lookup_user_sgpr(pipeline, MESA_SHADER_VERTEX, AC_UD_VS_LS_TCS_IN_LAYOUT); 897 if (loc->sgpr_idx != -1) { 898 uint32_t base_reg = pipeline->user_data_0[MESA_SHADER_VERTEX]; 899 assert(loc->num_sgprs == 1); 900 assert(!loc->indirect); 901 902 radeon_set_sh_reg(cmd_buffer->cs, base_reg + loc->sgpr_idx * 4, 903 pipeline->graphics.tess.tcs_in_layout); 904 } 905 } 906 907 static void 908 radv_emit_geometry_shader(struct radv_cmd_buffer *cmd_buffer, 909 struct radv_pipeline *pipeline) 910 { 911 struct radv_shader_variant *gs; 912 uint64_t va; 913 914 radeon_set_context_reg(cmd_buffer->cs, R_028A40_VGT_GS_MODE, pipeline->graphics.vgt_gs_mode); 915 916 gs = pipeline->shaders[MESA_SHADER_GEOMETRY]; 917 if (!gs) 918 return; 919 920 uint32_t gsvs_itemsize = gs->info.gs.max_gsvs_emit_size >> 2; 921 922 radeon_set_context_reg_seq(cmd_buffer->cs, R_028A60_VGT_GSVS_RING_OFFSET_1, 3); 923 radeon_emit(cmd_buffer->cs, gsvs_itemsize); 924 radeon_emit(cmd_buffer->cs, gsvs_itemsize); 925 radeon_emit(cmd_buffer->cs, gsvs_itemsize); 926 927 radeon_set_context_reg(cmd_buffer->cs, R_028AB0_VGT_GSVS_RING_ITEMSIZE, gsvs_itemsize); 928 929 radeon_set_context_reg(cmd_buffer->cs, R_028B38_VGT_GS_MAX_VERT_OUT, gs->info.gs.vertices_out); 930 931 uint32_t gs_vert_itemsize = gs->info.gs.gsvs_vertex_size; 932 radeon_set_context_reg_seq(cmd_buffer->cs, R_028B5C_VGT_GS_VERT_ITEMSIZE, 4); 933 radeon_emit(cmd_buffer->cs, gs_vert_itemsize >> 2); 934 radeon_emit(cmd_buffer->cs, 0); 935 radeon_emit(cmd_buffer->cs, 0); 936 radeon_emit(cmd_buffer->cs, 0); 937 938 uint32_t gs_num_invocations = gs->info.gs.invocations; 939 radeon_set_context_reg(cmd_buffer->cs, R_028B90_VGT_GS_INSTANCE_CNT, 940 S_028B90_CNT(MIN2(gs_num_invocations, 127)) | 941 S_028B90_ENABLE(gs_num_invocations > 0)); 942 943 radeon_set_context_reg(cmd_buffer->cs, R_028AAC_VGT_ESGS_RING_ITEMSIZE, 944 pipeline->graphics.gs.vgt_esgs_ring_itemsize); 945 946 va = radv_buffer_get_va(gs->bo) + gs->bo_offset; 947 948 if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) { 949 radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B210_SPI_SHADER_PGM_LO_ES, 2); 950 radeon_emit(cmd_buffer->cs, va >> 8); 951 radeon_emit(cmd_buffer->cs, va >> 40); 952 953 radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B228_SPI_SHADER_PGM_RSRC1_GS, 2); 954 radeon_emit(cmd_buffer->cs, gs->rsrc1); 955 radeon_emit(cmd_buffer->cs, gs->rsrc2 | 956 S_00B22C_LDS_SIZE(pipeline->graphics.gs.lds_size)); 957 958 radeon_set_context_reg(cmd_buffer->cs, R_028A44_VGT_GS_ONCHIP_CNTL, pipeline->graphics.gs.vgt_gs_onchip_cntl); 959 radeon_set_context_reg(cmd_buffer->cs, R_028A94_VGT_GS_MAX_PRIMS_PER_SUBGROUP, pipeline->graphics.gs.vgt_gs_max_prims_per_subgroup); 960 } else { 961 radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B220_SPI_SHADER_PGM_LO_GS, 4); 962 radeon_emit(cmd_buffer->cs, va >> 8); 963 radeon_emit(cmd_buffer->cs, va >> 40); 964 radeon_emit(cmd_buffer->cs, gs->rsrc1); 965 radeon_emit(cmd_buffer->cs, gs->rsrc2); 966 } 967 968 radv_emit_hw_vs(cmd_buffer, pipeline, pipeline->gs_copy_shader); 969 970 struct ac_userdata_info *loc = radv_lookup_user_sgpr(cmd_buffer->state.pipeline, MESA_SHADER_GEOMETRY, 971 AC_UD_GS_VS_RING_STRIDE_ENTRIES); 972 if (loc->sgpr_idx != -1) { 973 uint32_t stride = gs->info.gs.max_gsvs_emit_size; 974 uint32_t num_entries = 64; 975 bool is_vi = cmd_buffer->device->physical_device->rad_info.chip_class >= VI; 976 977 if (is_vi) 978 num_entries *= stride; 979 980 stride = S_008F04_STRIDE(stride); 981 radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B230_SPI_SHADER_USER_DATA_GS_0 + loc->sgpr_idx * 4, 2); 982 radeon_emit(cmd_buffer->cs, stride); 983 radeon_emit(cmd_buffer->cs, num_entries); 984 } 985 } 986 987 static void 988 radv_emit_fragment_shader(struct radv_cmd_buffer *cmd_buffer, 989 struct radv_pipeline *pipeline) 990 { 991 struct radv_shader_variant *ps; 992 uint64_t va; 993 struct radv_blend_state *blend = &pipeline->graphics.blend; 994 assert (pipeline->shaders[MESA_SHADER_FRAGMENT]); 995 996 ps = pipeline->shaders[MESA_SHADER_FRAGMENT]; 997 va = radv_buffer_get_va(ps->bo) + ps->bo_offset; 998 999 radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B020_SPI_SHADER_PGM_LO_PS, 4); 1000 radeon_emit(cmd_buffer->cs, va >> 8); 1001 radeon_emit(cmd_buffer->cs, va >> 40); 1002 radeon_emit(cmd_buffer->cs, ps->rsrc1); 1003 radeon_emit(cmd_buffer->cs, ps->rsrc2); 1004 1005 radeon_set_context_reg(cmd_buffer->cs, R_02880C_DB_SHADER_CONTROL, 1006 pipeline->graphics.db_shader_control); 1007 1008 radeon_set_context_reg(cmd_buffer->cs, R_0286CC_SPI_PS_INPUT_ENA, 1009 ps->config.spi_ps_input_ena); 1010 1011 radeon_set_context_reg(cmd_buffer->cs, R_0286D0_SPI_PS_INPUT_ADDR, 1012 ps->config.spi_ps_input_addr); 1013 1014 radeon_set_context_reg(cmd_buffer->cs, R_0286D8_SPI_PS_IN_CONTROL, 1015 S_0286D8_NUM_INTERP(ps->info.fs.num_interp)); 1016 1017 radeon_set_context_reg(cmd_buffer->cs, R_0286E0_SPI_BARYC_CNTL, pipeline->graphics.spi_baryc_cntl); 1018 1019 radeon_set_context_reg(cmd_buffer->cs, R_028710_SPI_SHADER_Z_FORMAT, 1020 pipeline->graphics.shader_z_format); 1021 1022 radeon_set_context_reg(cmd_buffer->cs, R_028714_SPI_SHADER_COL_FORMAT, blend->spi_shader_col_format); 1023 1024 radeon_set_context_reg(cmd_buffer->cs, R_028238_CB_TARGET_MASK, blend->cb_target_mask); 1025 radeon_set_context_reg(cmd_buffer->cs, R_02823C_CB_SHADER_MASK, blend->cb_shader_mask); 1026 1027 if (cmd_buffer->device->dfsm_allowed) { 1028 /* optimise this? */ 1029 radeon_emit(cmd_buffer->cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); 1030 radeon_emit(cmd_buffer->cs, EVENT_TYPE(V_028A90_FLUSH_DFSM) | EVENT_INDEX(0)); 1031 } 1032 1033 if (pipeline->graphics.ps_input_cntl_num) { 1034 radeon_set_context_reg_seq(cmd_buffer->cs, R_028644_SPI_PS_INPUT_CNTL_0, pipeline->graphics.ps_input_cntl_num); 1035 for (unsigned i = 0; i < pipeline->graphics.ps_input_cntl_num; i++) { 1036 radeon_emit(cmd_buffer->cs, pipeline->graphics.ps_input_cntl[i]); 1037 } 1038 } 1039 } 1040 1041 static void 1042 radv_emit_vgt_vertex_reuse(struct radv_cmd_buffer *cmd_buffer, 1043 struct radv_pipeline *pipeline) 1044 { 1045 struct radeon_winsys_cs *cs = cmd_buffer->cs; 1046 1047 if (cmd_buffer->device->physical_device->rad_info.family < CHIP_POLARIS10) 1048 return; 1049 1050 radeon_set_context_reg(cs, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 1051 pipeline->graphics.vtx_reuse_depth); 1052 } 1053 1054 static void 1055 radv_emit_binning_state(struct radv_cmd_buffer *cmd_buffer, 1056 struct radv_pipeline *pipeline) 1057 { 1058 struct radeon_winsys_cs *cs = cmd_buffer->cs; 1059 1060 if (cmd_buffer->device->physical_device->rad_info.chip_class < GFX9) 1061 return; 1062 1063 radeon_set_context_reg(cs, R_028C44_PA_SC_BINNER_CNTL_0, 1064 pipeline->graphics.bin.pa_sc_binner_cntl_0); 1065 radeon_set_context_reg(cs, R_028060_DB_DFSM_CONTROL, 1066 pipeline->graphics.bin.db_dfsm_control); 1067 } 1068 1069 static void 1070 radv_emit_graphics_pipeline(struct radv_cmd_buffer *cmd_buffer) 1071 { 1072 struct radv_pipeline *pipeline = cmd_buffer->state.pipeline; 1073 1074 if (!pipeline || cmd_buffer->state.emitted_pipeline == pipeline) 1075 return; 1076 1077 radv_emit_graphics_depth_stencil_state(cmd_buffer, pipeline); 1078 radv_emit_graphics_blend_state(cmd_buffer, pipeline); 1079 radv_emit_graphics_raster_state(cmd_buffer, pipeline); 1080 radv_update_multisample_state(cmd_buffer, pipeline); 1081 radv_emit_vertex_shader(cmd_buffer, pipeline); 1082 radv_emit_tess_shaders(cmd_buffer, pipeline); 1083 radv_emit_geometry_shader(cmd_buffer, pipeline); 1084 radv_emit_fragment_shader(cmd_buffer, pipeline); 1085 radv_emit_vgt_vertex_reuse(cmd_buffer, pipeline); 1086 radv_emit_binning_state(cmd_buffer, pipeline); 1087 1088 cmd_buffer->scratch_size_needed = 1089 MAX2(cmd_buffer->scratch_size_needed, 1090 pipeline->max_waves * pipeline->scratch_bytes_per_wave); 1091 1092 radeon_set_context_reg(cmd_buffer->cs, R_0286E8_SPI_TMPRING_SIZE, 1093 S_0286E8_WAVES(pipeline->max_waves) | 1094 S_0286E8_WAVESIZE(pipeline->scratch_bytes_per_wave >> 10)); 1095 1096 if (!cmd_buffer->state.emitted_pipeline || 1097 cmd_buffer->state.emitted_pipeline->graphics.can_use_guardband != 1098 pipeline->graphics.can_use_guardband) 1099 cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_SCISSOR; 1100 1101 radeon_set_context_reg(cmd_buffer->cs, R_028B54_VGT_SHADER_STAGES_EN, pipeline->graphics.vgt_shader_stages_en); 1102 1103 if (cmd_buffer->device->physical_device->rad_info.chip_class >= CIK) { 1104 radeon_set_uconfig_reg_idx(cmd_buffer->cs, R_030908_VGT_PRIMITIVE_TYPE, 1, pipeline->graphics.prim); 1105 } else { 1106 radeon_set_config_reg(cmd_buffer->cs, R_008958_VGT_PRIMITIVE_TYPE, pipeline->graphics.prim); 1107 } 1108 radeon_set_context_reg(cmd_buffer->cs, R_028A6C_VGT_GS_OUT_PRIM_TYPE, pipeline->graphics.gs_out); 1109 1110 radeon_set_context_reg(cmd_buffer->cs, R_02820C_PA_SC_CLIPRECT_RULE, pipeline->graphics.pa_sc_cliprect_rule); 1111 1112 if (unlikely(cmd_buffer->device->trace_bo)) 1113 radv_save_pipeline(cmd_buffer, pipeline, RING_GFX); 1114 1115 cmd_buffer->state.emitted_pipeline = pipeline; 1116 1117 cmd_buffer->state.dirty &= ~RADV_CMD_DIRTY_PIPELINE; 1118 } 1119 1120 static void 1121 radv_emit_viewport(struct radv_cmd_buffer *cmd_buffer) 1122 { 1123 si_write_viewport(cmd_buffer->cs, 0, cmd_buffer->state.dynamic.viewport.count, 1124 cmd_buffer->state.dynamic.viewport.viewports); 1125 } 1126 1127 static void 1128 radv_emit_scissor(struct radv_cmd_buffer *cmd_buffer) 1129 { 1130 uint32_t count = cmd_buffer->state.dynamic.scissor.count; 1131 1132 /* Vega10/Raven scissor bug workaround. This must be done before VPORT 1133 * scissor registers are changed. There is also a more efficient but 1134 * more involved alternative workaround. 1135 */ 1136 if (cmd_buffer->device->physical_device->has_scissor_bug) { 1137 cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_PS_PARTIAL_FLUSH; 1138 si_emit_cache_flush(cmd_buffer); 1139 } 1140 si_write_scissors(cmd_buffer->cs, 0, count, 1141 cmd_buffer->state.dynamic.scissor.scissors, 1142 cmd_buffer->state.dynamic.viewport.viewports, 1143 cmd_buffer->state.emitted_pipeline->graphics.can_use_guardband); 1144 } 1145 1146 static void 1147 radv_emit_discard_rectangle(struct radv_cmd_buffer *cmd_buffer) 1148 { 1149 if (!cmd_buffer->state.dynamic.discard_rectangle.count) 1150 return; 1151 1152 radeon_set_context_reg_seq(cmd_buffer->cs, R_028210_PA_SC_CLIPRECT_0_TL, 1153 cmd_buffer->state.dynamic.discard_rectangle.count * 2); 1154 for (unsigned i = 0; i < cmd_buffer->state.dynamic.discard_rectangle.count; ++i) { 1155 VkRect2D rect = cmd_buffer->state.dynamic.discard_rectangle.rectangles[i]; 1156 radeon_emit(cmd_buffer->cs, S_028210_TL_X(rect.offset.x) | S_028210_TL_Y(rect.offset.y)); 1157 radeon_emit(cmd_buffer->cs, S_028214_BR_X(rect.offset.x + rect.extent.width) | 1158 S_028214_BR_Y(rect.offset.y + rect.extent.height)); 1159 } 1160 } 1161 1162 static void 1163 radv_emit_line_width(struct radv_cmd_buffer *cmd_buffer) 1164 { 1165 unsigned width = cmd_buffer->state.dynamic.line_width * 8; 1166 1167 radeon_set_context_reg(cmd_buffer->cs, R_028A08_PA_SU_LINE_CNTL, 1168 S_028A08_WIDTH(CLAMP(width, 0, 0xFFF))); 1169 } 1170 1171 static void 1172 radv_emit_blend_constants(struct radv_cmd_buffer *cmd_buffer) 1173 { 1174 struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; 1175 1176 radeon_set_context_reg_seq(cmd_buffer->cs, R_028414_CB_BLEND_RED, 4); 1177 radeon_emit_array(cmd_buffer->cs, (uint32_t *)d->blend_constants, 4); 1178 } 1179 1180 static void 1181 radv_emit_stencil(struct radv_cmd_buffer *cmd_buffer) 1182 { 1183 struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; 1184 1185 radeon_set_context_reg_seq(cmd_buffer->cs, 1186 R_028430_DB_STENCILREFMASK, 2); 1187 radeon_emit(cmd_buffer->cs, 1188 S_028430_STENCILTESTVAL(d->stencil_reference.front) | 1189 S_028430_STENCILMASK(d->stencil_compare_mask.front) | 1190 S_028430_STENCILWRITEMASK(d->stencil_write_mask.front) | 1191 S_028430_STENCILOPVAL(1)); 1192 radeon_emit(cmd_buffer->cs, 1193 S_028434_STENCILTESTVAL_BF(d->stencil_reference.back) | 1194 S_028434_STENCILMASK_BF(d->stencil_compare_mask.back) | 1195 S_028434_STENCILWRITEMASK_BF(d->stencil_write_mask.back) | 1196 S_028434_STENCILOPVAL_BF(1)); 1197 } 1198 1199 static void 1200 radv_emit_depth_bounds(struct radv_cmd_buffer *cmd_buffer) 1201 { 1202 struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; 1203 1204 radeon_set_context_reg(cmd_buffer->cs, R_028020_DB_DEPTH_BOUNDS_MIN, 1205 fui(d->depth_bounds.min)); 1206 radeon_set_context_reg(cmd_buffer->cs, R_028024_DB_DEPTH_BOUNDS_MAX, 1207 fui(d->depth_bounds.max)); 1208 } 1209 1210 static void 1211 radv_emit_depth_bias(struct radv_cmd_buffer *cmd_buffer) 1212 { 1213 struct radv_raster_state *raster = &cmd_buffer->state.pipeline->graphics.raster; 1214 struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; 1215 unsigned slope = fui(d->depth_bias.slope * 16.0f); 1216 unsigned bias = fui(d->depth_bias.bias * cmd_buffer->state.offset_scale); 1217 1218 if (G_028814_POLY_OFFSET_FRONT_ENABLE(raster->pa_su_sc_mode_cntl)) { 1219 radeon_set_context_reg_seq(cmd_buffer->cs, 1220 R_028B7C_PA_SU_POLY_OFFSET_CLAMP, 5); 1221 radeon_emit(cmd_buffer->cs, fui(d->depth_bias.clamp)); /* CLAMP */ 1222 radeon_emit(cmd_buffer->cs, slope); /* FRONT SCALE */ 1223 radeon_emit(cmd_buffer->cs, bias); /* FRONT OFFSET */ 1224 radeon_emit(cmd_buffer->cs, slope); /* BACK SCALE */ 1225 radeon_emit(cmd_buffer->cs, bias); /* BACK OFFSET */ 1226 } 1227 } 1228 1229 static void 1230 radv_emit_fb_color_state(struct radv_cmd_buffer *cmd_buffer, 1231 int index, 1232 struct radv_attachment_info *att, 1233 struct radv_image *image, 1234 VkImageLayout layout) 1235 { 1236 bool is_vi = cmd_buffer->device->physical_device->rad_info.chip_class >= VI; 1237 struct radv_color_buffer_info *cb = &att->cb; 1238 uint32_t cb_color_info = cb->cb_color_info; 1239 1240 if (!radv_layout_dcc_compressed(image, layout, 1241 radv_image_queue_family_mask(image, 1242 cmd_buffer->queue_family_index, 1243 cmd_buffer->queue_family_index))) { 1244 cb_color_info &= C_028C70_DCC_ENABLE; 1245 } 1246 1247 if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) { 1248 radeon_set_context_reg_seq(cmd_buffer->cs, R_028C60_CB_COLOR0_BASE + index * 0x3c, 11); 1249 radeon_emit(cmd_buffer->cs, cb->cb_color_base); 1250 radeon_emit(cmd_buffer->cs, cb->cb_color_base >> 32); 1251 radeon_emit(cmd_buffer->cs, cb->cb_color_attrib2); 1252 radeon_emit(cmd_buffer->cs, cb->cb_color_view); 1253 radeon_emit(cmd_buffer->cs, cb_color_info); 1254 radeon_emit(cmd_buffer->cs, cb->cb_color_attrib); 1255 radeon_emit(cmd_buffer->cs, cb->cb_dcc_control); 1256 radeon_emit(cmd_buffer->cs, cb->cb_color_cmask); 1257 radeon_emit(cmd_buffer->cs, cb->cb_color_cmask >> 32); 1258 radeon_emit(cmd_buffer->cs, cb->cb_color_fmask); 1259 radeon_emit(cmd_buffer->cs, cb->cb_color_fmask >> 32); 1260 1261 radeon_set_context_reg_seq(cmd_buffer->cs, R_028C94_CB_COLOR0_DCC_BASE + index * 0x3c, 2); 1262 radeon_emit(cmd_buffer->cs, cb->cb_dcc_base); 1263 radeon_emit(cmd_buffer->cs, cb->cb_dcc_base >> 32); 1264 1265 radeon_set_context_reg(cmd_buffer->cs, R_0287A0_CB_MRT0_EPITCH + index * 4, 1266 S_0287A0_EPITCH(att->attachment->image->surface.u.gfx9.surf.epitch)); 1267 } else { 1268 radeon_set_context_reg_seq(cmd_buffer->cs, R_028C60_CB_COLOR0_BASE + index * 0x3c, 11); 1269 radeon_emit(cmd_buffer->cs, cb->cb_color_base); 1270 radeon_emit(cmd_buffer->cs, cb->cb_color_pitch); 1271 radeon_emit(cmd_buffer->cs, cb->cb_color_slice); 1272 radeon_emit(cmd_buffer->cs, cb->cb_color_view); 1273 radeon_emit(cmd_buffer->cs, cb_color_info); 1274 radeon_emit(cmd_buffer->cs, cb->cb_color_attrib); 1275 radeon_emit(cmd_buffer->cs, cb->cb_dcc_control); 1276 radeon_emit(cmd_buffer->cs, cb->cb_color_cmask); 1277 radeon_emit(cmd_buffer->cs, cb->cb_color_cmask_slice); 1278 radeon_emit(cmd_buffer->cs, cb->cb_color_fmask); 1279 radeon_emit(cmd_buffer->cs, cb->cb_color_fmask_slice); 1280 1281 if (is_vi) { /* DCC BASE */ 1282 radeon_set_context_reg(cmd_buffer->cs, R_028C94_CB_COLOR0_DCC_BASE + index * 0x3c, cb->cb_dcc_base); 1283 } 1284 } 1285 } 1286 1287 static void 1288 radv_emit_fb_ds_state(struct radv_cmd_buffer *cmd_buffer, 1289 struct radv_ds_buffer_info *ds, 1290 struct radv_image *image, 1291 VkImageLayout layout) 1292 { 1293 uint32_t db_z_info = ds->db_z_info; 1294 uint32_t db_stencil_info = ds->db_stencil_info; 1295 1296 if (!radv_layout_has_htile(image, layout, 1297 radv_image_queue_family_mask(image, 1298 cmd_buffer->queue_family_index, 1299 cmd_buffer->queue_family_index))) { 1300 db_z_info &= C_028040_TILE_SURFACE_ENABLE; 1301 db_stencil_info |= S_028044_TILE_STENCIL_DISABLE(1); 1302 } 1303 1304 radeon_set_context_reg(cmd_buffer->cs, R_028008_DB_DEPTH_VIEW, ds->db_depth_view); 1305 radeon_set_context_reg(cmd_buffer->cs, R_028ABC_DB_HTILE_SURFACE, ds->db_htile_surface); 1306 1307 1308 if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) { 1309 radeon_set_context_reg_seq(cmd_buffer->cs, R_028014_DB_HTILE_DATA_BASE, 3); 1310 radeon_emit(cmd_buffer->cs, ds->db_htile_data_base); 1311 radeon_emit(cmd_buffer->cs, ds->db_htile_data_base >> 32); 1312 radeon_emit(cmd_buffer->cs, ds->db_depth_size); 1313 1314 radeon_set_context_reg_seq(cmd_buffer->cs, R_028038_DB_Z_INFO, 10); 1315 radeon_emit(cmd_buffer->cs, db_z_info); /* DB_Z_INFO */ 1316 radeon_emit(cmd_buffer->cs, db_stencil_info); /* DB_STENCIL_INFO */ 1317 radeon_emit(cmd_buffer->cs, ds->db_z_read_base); /* DB_Z_READ_BASE */ 1318 radeon_emit(cmd_buffer->cs, ds->db_z_read_base >> 32); /* DB_Z_READ_BASE_HI */ 1319 radeon_emit(cmd_buffer->cs, ds->db_stencil_read_base); /* DB_STENCIL_READ_BASE */ 1320 radeon_emit(cmd_buffer->cs, ds->db_stencil_read_base >> 32); /* DB_STENCIL_READ_BASE_HI */ 1321 radeon_emit(cmd_buffer->cs, ds->db_z_write_base); /* DB_Z_WRITE_BASE */ 1322 radeon_emit(cmd_buffer->cs, ds->db_z_write_base >> 32); /* DB_Z_WRITE_BASE_HI */ 1323 radeon_emit(cmd_buffer->cs, ds->db_stencil_write_base); /* DB_STENCIL_WRITE_BASE */ 1324 radeon_emit(cmd_buffer->cs, ds->db_stencil_write_base >> 32); /* DB_STENCIL_WRITE_BASE_HI */ 1325 1326 radeon_set_context_reg_seq(cmd_buffer->cs, R_028068_DB_Z_INFO2, 2); 1327 radeon_emit(cmd_buffer->cs, ds->db_z_info2); 1328 radeon_emit(cmd_buffer->cs, ds->db_stencil_info2); 1329 } else { 1330 radeon_set_context_reg(cmd_buffer->cs, R_028014_DB_HTILE_DATA_BASE, ds->db_htile_data_base); 1331 1332 radeon_set_context_reg_seq(cmd_buffer->cs, R_02803C_DB_DEPTH_INFO, 9); 1333 radeon_emit(cmd_buffer->cs, ds->db_depth_info); /* R_02803C_DB_DEPTH_INFO */ 1334 radeon_emit(cmd_buffer->cs, db_z_info); /* R_028040_DB_Z_INFO */ 1335 radeon_emit(cmd_buffer->cs, db_stencil_info); /* R_028044_DB_STENCIL_INFO */ 1336 radeon_emit(cmd_buffer->cs, ds->db_z_read_base); /* R_028048_DB_Z_READ_BASE */ 1337 radeon_emit(cmd_buffer->cs, ds->db_stencil_read_base); /* R_02804C_DB_STENCIL_READ_BASE */ 1338 radeon_emit(cmd_buffer->cs, ds->db_z_write_base); /* R_028050_DB_Z_WRITE_BASE */ 1339 radeon_emit(cmd_buffer->cs, ds->db_stencil_write_base); /* R_028054_DB_STENCIL_WRITE_BASE */ 1340 radeon_emit(cmd_buffer->cs, ds->db_depth_size); /* R_028058_DB_DEPTH_SIZE */ 1341 radeon_emit(cmd_buffer->cs, ds->db_depth_slice); /* R_02805C_DB_DEPTH_SLICE */ 1342 1343 } 1344 1345 radeon_set_context_reg(cmd_buffer->cs, R_028B78_PA_SU_POLY_OFFSET_DB_FMT_CNTL, 1346 ds->pa_su_poly_offset_db_fmt_cntl); 1347 } 1348 1349 void 1350 radv_set_depth_clear_regs(struct radv_cmd_buffer *cmd_buffer, 1351 struct radv_image *image, 1352 VkClearDepthStencilValue ds_clear_value, 1353 VkImageAspectFlags aspects) 1354 { 1355 uint64_t va = radv_buffer_get_va(image->bo); 1356 va += image->offset + image->clear_value_offset; 1357 unsigned reg_offset = 0, reg_count = 0; 1358 1359 assert(image->surface.htile_size); 1360 1361 if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) { 1362 ++reg_count; 1363 } else { 1364 ++reg_offset; 1365 va += 4; 1366 } 1367 if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) 1368 ++reg_count; 1369 1370 radeon_emit(cmd_buffer->cs, PKT3(PKT3_WRITE_DATA, 2 + reg_count, 0)); 1371 radeon_emit(cmd_buffer->cs, S_370_DST_SEL(V_370_MEM_ASYNC) | 1372 S_370_WR_CONFIRM(1) | 1373 S_370_ENGINE_SEL(V_370_PFP)); 1374 radeon_emit(cmd_buffer->cs, va); 1375 radeon_emit(cmd_buffer->cs, va >> 32); 1376 if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) 1377 radeon_emit(cmd_buffer->cs, ds_clear_value.stencil); 1378 if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) 1379 radeon_emit(cmd_buffer->cs, fui(ds_clear_value.depth)); 1380 1381 radeon_set_context_reg_seq(cmd_buffer->cs, R_028028_DB_STENCIL_CLEAR + 4 * reg_offset, reg_count); 1382 if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) 1383 radeon_emit(cmd_buffer->cs, ds_clear_value.stencil); /* R_028028_DB_STENCIL_CLEAR */ 1384 if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) 1385 radeon_emit(cmd_buffer->cs, fui(ds_clear_value.depth)); /* R_02802C_DB_DEPTH_CLEAR */ 1386 } 1387 1388 static void 1389 radv_load_depth_clear_regs(struct radv_cmd_buffer *cmd_buffer, 1390 struct radv_image *image) 1391 { 1392 VkImageAspectFlags aspects = vk_format_aspects(image->vk_format); 1393 uint64_t va = radv_buffer_get_va(image->bo); 1394 va += image->offset + image->clear_value_offset; 1395 unsigned reg_offset = 0, reg_count = 0; 1396 1397 if (!image->surface.htile_size) 1398 return; 1399 1400 if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) { 1401 ++reg_count; 1402 } else { 1403 ++reg_offset; 1404 va += 4; 1405 } 1406 if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) 1407 ++reg_count; 1408 1409 radeon_emit(cmd_buffer->cs, PKT3(PKT3_COPY_DATA, 4, 0)); 1410 radeon_emit(cmd_buffer->cs, COPY_DATA_SRC_SEL(COPY_DATA_MEM) | 1411 COPY_DATA_DST_SEL(COPY_DATA_REG) | 1412 (reg_count == 2 ? COPY_DATA_COUNT_SEL : 0)); 1413 radeon_emit(cmd_buffer->cs, va); 1414 radeon_emit(cmd_buffer->cs, va >> 32); 1415 radeon_emit(cmd_buffer->cs, (R_028028_DB_STENCIL_CLEAR + 4 * reg_offset) >> 2); 1416 radeon_emit(cmd_buffer->cs, 0); 1417 1418 radeon_emit(cmd_buffer->cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0)); 1419 radeon_emit(cmd_buffer->cs, 0); 1420 } 1421 1422 /* 1423 *with DCC some colors don't require CMASK elimiation before being 1424 * used as a texture. This sets a predicate value to determine if the 1425 * cmask eliminate is required. 1426 */ 1427 void 1428 radv_set_dcc_need_cmask_elim_pred(struct radv_cmd_buffer *cmd_buffer, 1429 struct radv_image *image, 1430 bool value) 1431 { 1432 uint64_t pred_val = value; 1433 uint64_t va = radv_buffer_get_va(image->bo); 1434 va += image->offset + image->dcc_pred_offset; 1435 1436 assert(image->surface.dcc_size); 1437 1438 radeon_emit(cmd_buffer->cs, PKT3(PKT3_WRITE_DATA, 4, 0)); 1439 radeon_emit(cmd_buffer->cs, S_370_DST_SEL(V_370_MEM_ASYNC) | 1440 S_370_WR_CONFIRM(1) | 1441 S_370_ENGINE_SEL(V_370_PFP)); 1442 radeon_emit(cmd_buffer->cs, va); 1443 radeon_emit(cmd_buffer->cs, va >> 32); 1444 radeon_emit(cmd_buffer->cs, pred_val); 1445 radeon_emit(cmd_buffer->cs, pred_val >> 32); 1446 } 1447 1448 void 1449 radv_set_color_clear_regs(struct radv_cmd_buffer *cmd_buffer, 1450 struct radv_image *image, 1451 int idx, 1452 uint32_t color_values[2]) 1453 { 1454 uint64_t va = radv_buffer_get_va(image->bo); 1455 va += image->offset + image->clear_value_offset; 1456 1457 assert(image->cmask.size || image->surface.dcc_size); 1458 1459 radeon_emit(cmd_buffer->cs, PKT3(PKT3_WRITE_DATA, 4, 0)); 1460 radeon_emit(cmd_buffer->cs, S_370_DST_SEL(V_370_MEM_ASYNC) | 1461 S_370_WR_CONFIRM(1) | 1462 S_370_ENGINE_SEL(V_370_PFP)); 1463 radeon_emit(cmd_buffer->cs, va); 1464 radeon_emit(cmd_buffer->cs, va >> 32); 1465 radeon_emit(cmd_buffer->cs, color_values[0]); 1466 radeon_emit(cmd_buffer->cs, color_values[1]); 1467 1468 radeon_set_context_reg_seq(cmd_buffer->cs, R_028C8C_CB_COLOR0_CLEAR_WORD0 + idx * 0x3c, 2); 1469 radeon_emit(cmd_buffer->cs, color_values[0]); 1470 radeon_emit(cmd_buffer->cs, color_values[1]); 1471 } 1472 1473 static void 1474 radv_load_color_clear_regs(struct radv_cmd_buffer *cmd_buffer, 1475 struct radv_image *image, 1476 int idx) 1477 { 1478 uint64_t va = radv_buffer_get_va(image->bo); 1479 va += image->offset + image->clear_value_offset; 1480 1481 if (!image->cmask.size && !image->surface.dcc_size) 1482 return; 1483 1484 uint32_t reg = R_028C8C_CB_COLOR0_CLEAR_WORD0 + idx * 0x3c; 1485 1486 radeon_emit(cmd_buffer->cs, PKT3(PKT3_COPY_DATA, 4, cmd_buffer->state.predicating)); 1487 radeon_emit(cmd_buffer->cs, COPY_DATA_SRC_SEL(COPY_DATA_MEM) | 1488 COPY_DATA_DST_SEL(COPY_DATA_REG) | 1489 COPY_DATA_COUNT_SEL); 1490 radeon_emit(cmd_buffer->cs, va); 1491 radeon_emit(cmd_buffer->cs, va >> 32); 1492 radeon_emit(cmd_buffer->cs, reg >> 2); 1493 radeon_emit(cmd_buffer->cs, 0); 1494 1495 radeon_emit(cmd_buffer->cs, PKT3(PKT3_PFP_SYNC_ME, 0, cmd_buffer->state.predicating)); 1496 radeon_emit(cmd_buffer->cs, 0); 1497 } 1498 1499 static void 1500 radv_emit_framebuffer_state(struct radv_cmd_buffer *cmd_buffer) 1501 { 1502 int i; 1503 struct radv_framebuffer *framebuffer = cmd_buffer->state.framebuffer; 1504 const struct radv_subpass *subpass = cmd_buffer->state.subpass; 1505 1506 /* this may happen for inherited secondary recording */ 1507 if (!framebuffer) 1508 return; 1509 1510 for (i = 0; i < 8; ++i) { 1511 if (i >= subpass->color_count || subpass->color_attachments[i].attachment == VK_ATTACHMENT_UNUSED) { 1512 radeon_set_context_reg(cmd_buffer->cs, R_028C70_CB_COLOR0_INFO + i * 0x3C, 1513 S_028C70_FORMAT(V_028C70_COLOR_INVALID)); 1514 continue; 1515 } 1516 1517 int idx = subpass->color_attachments[i].attachment; 1518 struct radv_attachment_info *att = &framebuffer->attachments[idx]; 1519 struct radv_image *image = att->attachment->image; 1520 VkImageLayout layout = subpass->color_attachments[i].layout; 1521 1522 radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, att->attachment->bo, 8); 1523 1524 assert(att->attachment->aspect_mask & VK_IMAGE_ASPECT_COLOR_BIT); 1525 radv_emit_fb_color_state(cmd_buffer, i, att, image, layout); 1526 1527 radv_load_color_clear_regs(cmd_buffer, image, i); 1528 } 1529 1530 if(subpass->depth_stencil_attachment.attachment != VK_ATTACHMENT_UNUSED) { 1531 int idx = subpass->depth_stencil_attachment.attachment; 1532 VkImageLayout layout = subpass->depth_stencil_attachment.layout; 1533 struct radv_attachment_info *att = &framebuffer->attachments[idx]; 1534 struct radv_image *image = att->attachment->image; 1535 radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, att->attachment->bo, 8); 1536 MAYBE_UNUSED uint32_t queue_mask = radv_image_queue_family_mask(image, 1537 cmd_buffer->queue_family_index, 1538 cmd_buffer->queue_family_index); 1539 /* We currently don't support writing decompressed HTILE */ 1540 assert(radv_layout_has_htile(image, layout, queue_mask) == 1541 radv_layout_is_htile_compressed(image, layout, queue_mask)); 1542 1543 radv_emit_fb_ds_state(cmd_buffer, &att->ds, image, layout); 1544 1545 if (att->ds.offset_scale != cmd_buffer->state.offset_scale) { 1546 cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS; 1547 cmd_buffer->state.offset_scale = att->ds.offset_scale; 1548 } 1549 radv_load_depth_clear_regs(cmd_buffer, image); 1550 } else { 1551 if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) 1552 radeon_set_context_reg_seq(cmd_buffer->cs, R_028038_DB_Z_INFO, 2); 1553 else 1554 radeon_set_context_reg_seq(cmd_buffer->cs, R_028040_DB_Z_INFO, 2); 1555 1556 radeon_emit(cmd_buffer->cs, S_028040_FORMAT(V_028040_Z_INVALID)); /* DB_Z_INFO */ 1557 radeon_emit(cmd_buffer->cs, S_028044_FORMAT(V_028044_STENCIL_INVALID)); /* DB_STENCIL_INFO */ 1558 } 1559 radeon_set_context_reg(cmd_buffer->cs, R_028208_PA_SC_WINDOW_SCISSOR_BR, 1560 S_028208_BR_X(framebuffer->width) | 1561 S_028208_BR_Y(framebuffer->height)); 1562 1563 if (cmd_buffer->device->dfsm_allowed) { 1564 radeon_emit(cmd_buffer->cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); 1565 radeon_emit(cmd_buffer->cs, EVENT_TYPE(V_028A90_BREAK_BATCH) | EVENT_INDEX(0)); 1566 } 1567 1568 cmd_buffer->state.dirty &= ~RADV_CMD_DIRTY_FRAMEBUFFER; 1569 } 1570 1571 static void 1572 radv_emit_index_buffer(struct radv_cmd_buffer *cmd_buffer) 1573 { 1574 struct radeon_winsys_cs *cs = cmd_buffer->cs; 1575 struct radv_cmd_state *state = &cmd_buffer->state; 1576 1577 if (state->index_type != state->last_index_type) { 1578 if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) { 1579 radeon_set_uconfig_reg_idx(cs, R_03090C_VGT_INDEX_TYPE, 1580 2, state->index_type); 1581 } else { 1582 radeon_emit(cs, PKT3(PKT3_INDEX_TYPE, 0, 0)); 1583 radeon_emit(cs, state->index_type); 1584 } 1585 1586 state->last_index_type = state->index_type; 1587 } 1588 1589 radeon_emit(cs, PKT3(PKT3_INDEX_BASE, 1, 0)); 1590 radeon_emit(cs, state->index_va); 1591 radeon_emit(cs, state->index_va >> 32); 1592 1593 radeon_emit(cs, PKT3(PKT3_INDEX_BUFFER_SIZE, 0, 0)); 1594 radeon_emit(cs, state->max_index_count); 1595 1596 cmd_buffer->state.dirty &= ~RADV_CMD_DIRTY_INDEX_BUFFER; 1597 } 1598 1599 void radv_set_db_count_control(struct radv_cmd_buffer *cmd_buffer) 1600 { 1601 uint32_t db_count_control; 1602 1603 if(!cmd_buffer->state.active_occlusion_queries) { 1604 if (cmd_buffer->device->physical_device->rad_info.chip_class >= CIK) { 1605 db_count_control = 0; 1606 } else { 1607 db_count_control = S_028004_ZPASS_INCREMENT_DISABLE(1); 1608 } 1609 } else { 1610 if (cmd_buffer->device->physical_device->rad_info.chip_class >= CIK) { 1611 db_count_control = S_028004_PERFECT_ZPASS_COUNTS(1) | 1612 S_028004_SAMPLE_RATE(0) | /* TODO: set this to the number of samples of the current framebuffer */ 1613 S_028004_ZPASS_ENABLE(1) | 1614 S_028004_SLICE_EVEN_ENABLE(1) | 1615 S_028004_SLICE_ODD_ENABLE(1); 1616 } else { 1617 db_count_control = S_028004_PERFECT_ZPASS_COUNTS(1) | 1618 S_028004_SAMPLE_RATE(0); /* TODO: set this to the number of samples of the current framebuffer */ 1619 } 1620 } 1621 1622 radeon_set_context_reg(cmd_buffer->cs, R_028004_DB_COUNT_CONTROL, db_count_control); 1623 } 1624 1625 static void 1626 radv_cmd_buffer_flush_dynamic_state(struct radv_cmd_buffer *cmd_buffer) 1627 { 1628 if (G_028810_DX_RASTERIZATION_KILL(cmd_buffer->state.pipeline->graphics.raster.pa_cl_clip_cntl)) 1629 return; 1630 1631 if (cmd_buffer->state.dirty & (RADV_CMD_DIRTY_DYNAMIC_VIEWPORT)) 1632 radv_emit_viewport(cmd_buffer); 1633 1634 if (cmd_buffer->state.dirty & (RADV_CMD_DIRTY_DYNAMIC_SCISSOR | RADV_CMD_DIRTY_DYNAMIC_VIEWPORT)) 1635 radv_emit_scissor(cmd_buffer); 1636 1637 if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_DYNAMIC_LINE_WIDTH) 1638 radv_emit_line_width(cmd_buffer); 1639 1640 if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS) 1641 radv_emit_blend_constants(cmd_buffer); 1642 1643 if (cmd_buffer->state.dirty & (RADV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE | 1644 RADV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK | 1645 RADV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK)) 1646 radv_emit_stencil(cmd_buffer); 1647 1648 if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS) 1649 radv_emit_depth_bounds(cmd_buffer); 1650 1651 if (cmd_buffer->state.dirty & (RADV_CMD_DIRTY_PIPELINE | 1652 RADV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS)) 1653 radv_emit_depth_bias(cmd_buffer); 1654 1655 if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_DYNAMIC_DISCARD_RECTANGLE) 1656 radv_emit_discard_rectangle(cmd_buffer); 1657 1658 cmd_buffer->state.dirty &= ~RADV_CMD_DIRTY_DYNAMIC_ALL; 1659 } 1660 1661 static void 1662 emit_stage_descriptor_set_userdata(struct radv_cmd_buffer *cmd_buffer, 1663 struct radv_pipeline *pipeline, 1664 int idx, 1665 uint64_t va, 1666 gl_shader_stage stage) 1667 { 1668 struct ac_userdata_info *desc_set_loc = &pipeline->shaders[stage]->info.user_sgprs_locs.descriptor_sets[idx]; 1669 uint32_t base_reg = pipeline->user_data_0[stage]; 1670 1671 if (desc_set_loc->sgpr_idx == -1 || desc_set_loc->indirect) 1672 return; 1673 1674 assert(!desc_set_loc->indirect); 1675 assert(desc_set_loc->num_sgprs == 2); 1676 radeon_set_sh_reg_seq(cmd_buffer->cs, 1677 base_reg + desc_set_loc->sgpr_idx * 4, 2); 1678 radeon_emit(cmd_buffer->cs, va); 1679 radeon_emit(cmd_buffer->cs, va >> 32); 1680 } 1681 1682 static void 1683 radv_emit_descriptor_set_userdata(struct radv_cmd_buffer *cmd_buffer, 1684 VkShaderStageFlags stages, 1685 struct radv_descriptor_set *set, 1686 unsigned idx) 1687 { 1688 if (cmd_buffer->state.pipeline) { 1689 radv_foreach_stage(stage, stages) { 1690 if (cmd_buffer->state.pipeline->shaders[stage]) 1691 emit_stage_descriptor_set_userdata(cmd_buffer, cmd_buffer->state.pipeline, 1692 idx, set->va, 1693 stage); 1694 } 1695 } 1696 1697 if (cmd_buffer->state.compute_pipeline && (stages & VK_SHADER_STAGE_COMPUTE_BIT)) 1698 emit_stage_descriptor_set_userdata(cmd_buffer, cmd_buffer->state.compute_pipeline, 1699 idx, set->va, 1700 MESA_SHADER_COMPUTE); 1701 } 1702 1703 static void 1704 radv_flush_push_descriptors(struct radv_cmd_buffer *cmd_buffer) 1705 { 1706 struct radv_descriptor_set *set = &cmd_buffer->push_descriptors.set; 1707 unsigned bo_offset; 1708 1709 if (!radv_cmd_buffer_upload_data(cmd_buffer, set->size, 32, 1710 set->mapped_ptr, 1711 &bo_offset)) 1712 return; 1713 1714 set->va = radv_buffer_get_va(cmd_buffer->upload.upload_bo); 1715 set->va += bo_offset; 1716 } 1717 1718 static void 1719 radv_flush_indirect_descriptor_sets(struct radv_cmd_buffer *cmd_buffer) 1720 { 1721 uint32_t size = MAX_SETS * 2 * 4; 1722 uint32_t offset; 1723 void *ptr; 1724 1725 if (!radv_cmd_buffer_upload_alloc(cmd_buffer, size, 1726 256, &offset, &ptr)) 1727 return; 1728 1729 for (unsigned i = 0; i < MAX_SETS; i++) { 1730 uint32_t *uptr = ((uint32_t *)ptr) + i * 2; 1731 uint64_t set_va = 0; 1732 struct radv_descriptor_set *set = cmd_buffer->descriptors[i]; 1733 if (cmd_buffer->state.valid_descriptors & (1u << i)) 1734 set_va = set->va; 1735 uptr[0] = set_va & 0xffffffff; 1736 uptr[1] = set_va >> 32; 1737 } 1738 1739 uint64_t va = radv_buffer_get_va(cmd_buffer->upload.upload_bo); 1740 va += offset; 1741 1742 if (cmd_buffer->state.pipeline) { 1743 if (cmd_buffer->state.pipeline->shaders[MESA_SHADER_VERTEX]) 1744 radv_emit_userdata_address(cmd_buffer, cmd_buffer->state.pipeline, MESA_SHADER_VERTEX, 1745 AC_UD_INDIRECT_DESCRIPTOR_SETS, va); 1746 1747 if (cmd_buffer->state.pipeline->shaders[MESA_SHADER_FRAGMENT]) 1748 radv_emit_userdata_address(cmd_buffer, cmd_buffer->state.pipeline, MESA_SHADER_FRAGMENT, 1749 AC_UD_INDIRECT_DESCRIPTOR_SETS, va); 1750 1751 if (radv_pipeline_has_gs(cmd_buffer->state.pipeline)) 1752 radv_emit_userdata_address(cmd_buffer, cmd_buffer->state.pipeline, MESA_SHADER_GEOMETRY, 1753 AC_UD_INDIRECT_DESCRIPTOR_SETS, va); 1754 1755 if (radv_pipeline_has_tess(cmd_buffer->state.pipeline)) 1756 radv_emit_userdata_address(cmd_buffer, cmd_buffer->state.pipeline, MESA_SHADER_TESS_CTRL, 1757 AC_UD_INDIRECT_DESCRIPTOR_SETS, va); 1758 1759 if (radv_pipeline_has_tess(cmd_buffer->state.pipeline)) 1760 radv_emit_userdata_address(cmd_buffer, cmd_buffer->state.pipeline, MESA_SHADER_TESS_EVAL, 1761 AC_UD_INDIRECT_DESCRIPTOR_SETS, va); 1762 } 1763 1764 if (cmd_buffer->state.compute_pipeline) 1765 radv_emit_userdata_address(cmd_buffer, cmd_buffer->state.compute_pipeline, MESA_SHADER_COMPUTE, 1766 AC_UD_INDIRECT_DESCRIPTOR_SETS, va); 1767 } 1768 1769 static void 1770 radv_flush_descriptors(struct radv_cmd_buffer *cmd_buffer, 1771 VkShaderStageFlags stages) 1772 { 1773 unsigned i; 1774 1775 if (!cmd_buffer->state.descriptors_dirty) 1776 return; 1777 1778 if (cmd_buffer->state.push_descriptors_dirty) 1779 radv_flush_push_descriptors(cmd_buffer); 1780 1781 if ((cmd_buffer->state.pipeline && cmd_buffer->state.pipeline->need_indirect_descriptor_sets) || 1782 (cmd_buffer->state.compute_pipeline && cmd_buffer->state.compute_pipeline->need_indirect_descriptor_sets)) { 1783 radv_flush_indirect_descriptor_sets(cmd_buffer); 1784 } 1785 1786 MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, 1787 cmd_buffer->cs, 1788 MAX_SETS * MESA_SHADER_STAGES * 4); 1789 1790 for_each_bit(i, cmd_buffer->state.descriptors_dirty) { 1791 struct radv_descriptor_set *set = cmd_buffer->descriptors[i]; 1792 if (!(cmd_buffer->state.valid_descriptors & (1u << i))) 1793 continue; 1794 1795 radv_emit_descriptor_set_userdata(cmd_buffer, stages, set, i); 1796 } 1797 cmd_buffer->state.descriptors_dirty = 0; 1798 cmd_buffer->state.push_descriptors_dirty = false; 1799 1800 if (unlikely(cmd_buffer->device->trace_bo)) 1801 radv_save_descriptors(cmd_buffer); 1802 1803 assert(cmd_buffer->cs->cdw <= cdw_max); 1804 } 1805 1806 static void 1807 radv_flush_constants(struct radv_cmd_buffer *cmd_buffer, 1808 struct radv_pipeline *pipeline, 1809 VkShaderStageFlags stages) 1810 { 1811 struct radv_pipeline_layout *layout = pipeline->layout; 1812 unsigned offset; 1813 void *ptr; 1814 uint64_t va; 1815 1816 stages &= cmd_buffer->push_constant_stages; 1817 if (!stages || 1818 (!layout->push_constant_size && !layout->dynamic_offset_count)) 1819 return; 1820 1821 if (!radv_cmd_buffer_upload_alloc(cmd_buffer, layout->push_constant_size + 1822 16 * layout->dynamic_offset_count, 1823 256, &offset, &ptr)) 1824 return; 1825 1826 memcpy(ptr, cmd_buffer->push_constants, layout->push_constant_size); 1827 memcpy((char*)ptr + layout->push_constant_size, cmd_buffer->dynamic_buffers, 1828 16 * layout->dynamic_offset_count); 1829 1830 va = radv_buffer_get_va(cmd_buffer->upload.upload_bo); 1831 va += offset; 1832 1833 MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, 1834 cmd_buffer->cs, MESA_SHADER_STAGES * 4); 1835 1836 radv_foreach_stage(stage, stages) { 1837 if (pipeline->shaders[stage]) { 1838 radv_emit_userdata_address(cmd_buffer, pipeline, stage, 1839 AC_UD_PUSH_CONSTANTS, va); 1840 } 1841 } 1842 1843 cmd_buffer->push_constant_stages &= ~stages; 1844 assert(cmd_buffer->cs->cdw <= cdw_max); 1845 } 1846 1847 static bool 1848 radv_cmd_buffer_update_vertex_descriptors(struct radv_cmd_buffer *cmd_buffer, bool pipeline_is_dirty) 1849 { 1850 if ((pipeline_is_dirty || 1851 (cmd_buffer->state.dirty & RADV_CMD_DIRTY_VERTEX_BUFFER)) && 1852 cmd_buffer->state.pipeline->vertex_elements.count && 1853 radv_get_vertex_shader(cmd_buffer->state.pipeline)->info.info.vs.has_vertex_buffers) { 1854 struct radv_vertex_elements_info *velems = &cmd_buffer->state.pipeline->vertex_elements; 1855 unsigned vb_offset; 1856 void *vb_ptr; 1857 uint32_t i = 0; 1858 uint32_t count = velems->count; 1859 uint64_t va; 1860 1861 /* allocate some descriptor state for vertex buffers */ 1862 if (!radv_cmd_buffer_upload_alloc(cmd_buffer, count * 16, 256, 1863 &vb_offset, &vb_ptr)) 1864 return false; 1865 1866 for (i = 0; i < count; i++) { 1867 uint32_t *desc = &((uint32_t *)vb_ptr)[i * 4]; 1868 uint32_t offset; 1869 int vb = velems->binding[i]; 1870 struct radv_buffer *buffer = cmd_buffer->vertex_bindings[vb].buffer; 1871 uint32_t stride = cmd_buffer->state.pipeline->binding_stride[vb]; 1872 1873 va = radv_buffer_get_va(buffer->bo); 1874 1875 offset = cmd_buffer->vertex_bindings[vb].offset + velems->offset[i]; 1876 va += offset + buffer->offset; 1877 desc[0] = va; 1878 desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) | S_008F04_STRIDE(stride); 1879 if (cmd_buffer->device->physical_device->rad_info.chip_class <= CIK && stride) 1880 desc[2] = (buffer->size - offset - velems->format_size[i]) / stride + 1; 1881 else 1882 desc[2] = buffer->size - offset; 1883 desc[3] = velems->rsrc_word3[i]; 1884 } 1885 1886 va = radv_buffer_get_va(cmd_buffer->upload.upload_bo); 1887 va += vb_offset; 1888 1889 radv_emit_userdata_address(cmd_buffer, cmd_buffer->state.pipeline, MESA_SHADER_VERTEX, 1890 AC_UD_VS_VERTEX_BUFFERS, va); 1891 1892 cmd_buffer->state.vb_va = va; 1893 cmd_buffer->state.vb_size = count * 16; 1894 cmd_buffer->state.vb_prefetch_dirty = true; 1895 } 1896 cmd_buffer->state.dirty &= ~RADV_CMD_DIRTY_VERTEX_BUFFER; 1897 1898 return true; 1899 } 1900 1901 static bool 1902 radv_upload_graphics_shader_descriptors(struct radv_cmd_buffer *cmd_buffer, bool pipeline_is_dirty) 1903 { 1904 if (!radv_cmd_buffer_update_vertex_descriptors(cmd_buffer, pipeline_is_dirty)) 1905 return false; 1906 1907 radv_flush_descriptors(cmd_buffer, VK_SHADER_STAGE_ALL_GRAPHICS); 1908 radv_flush_constants(cmd_buffer, cmd_buffer->state.pipeline, 1909 VK_SHADER_STAGE_ALL_GRAPHICS); 1910 1911 return true; 1912 } 1913 1914 static void 1915 radv_emit_draw_registers(struct radv_cmd_buffer *cmd_buffer, bool indexed_draw, 1916 bool instanced_draw, bool indirect_draw, 1917 uint32_t draw_vertex_count) 1918 { 1919 struct radeon_info *info = &cmd_buffer->device->physical_device->rad_info; 1920 struct radv_cmd_state *state = &cmd_buffer->state; 1921 struct radeon_winsys_cs *cs = cmd_buffer->cs; 1922 uint32_t ia_multi_vgt_param; 1923 int32_t primitive_reset_en; 1924 1925 /* Draw state. */ 1926 ia_multi_vgt_param = 1927 si_get_ia_multi_vgt_param(cmd_buffer, instanced_draw, 1928 indirect_draw, draw_vertex_count); 1929 1930 if (state->last_ia_multi_vgt_param != ia_multi_vgt_param) { 1931 if (info->chip_class >= GFX9) { 1932 radeon_set_uconfig_reg_idx(cs, 1933 R_030960_IA_MULTI_VGT_PARAM, 1934 4, ia_multi_vgt_param); 1935 } else if (info->chip_class >= CIK) { 1936 radeon_set_context_reg_idx(cs, 1937 R_028AA8_IA_MULTI_VGT_PARAM, 1938 1, ia_multi_vgt_param); 1939 } else { 1940 radeon_set_context_reg(cs, R_028AA8_IA_MULTI_VGT_PARAM, 1941 ia_multi_vgt_param); 1942 } 1943 state->last_ia_multi_vgt_param = ia_multi_vgt_param; 1944 } 1945 1946 /* Primitive restart. */ 1947 primitive_reset_en = 1948 indexed_draw && state->pipeline->graphics.prim_restart_enable; 1949 1950 if (primitive_reset_en != state->last_primitive_reset_en) { 1951 state->last_primitive_reset_en = primitive_reset_en; 1952 if (info->chip_class >= GFX9) { 1953 radeon_set_uconfig_reg(cs, 1954 R_03092C_VGT_MULTI_PRIM_IB_RESET_EN, 1955 primitive_reset_en); 1956 } else { 1957 radeon_set_context_reg(cs, 1958 R_028A94_VGT_MULTI_PRIM_IB_RESET_EN, 1959 primitive_reset_en); 1960 } 1961 } 1962 1963 if (primitive_reset_en) { 1964 uint32_t primitive_reset_index = 1965 state->index_type ? 0xffffffffu : 0xffffu; 1966 1967 if (primitive_reset_index != state->last_primitive_reset_index) { 1968 radeon_set_context_reg(cs, 1969 R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX, 1970 primitive_reset_index); 1971 state->last_primitive_reset_index = primitive_reset_index; 1972 } 1973 } 1974 } 1975 1976 static void radv_stage_flush(struct radv_cmd_buffer *cmd_buffer, 1977 VkPipelineStageFlags src_stage_mask) 1978 { 1979 if (src_stage_mask & (VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | 1980 VK_PIPELINE_STAGE_TRANSFER_BIT | 1981 VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT | 1982 VK_PIPELINE_STAGE_ALL_COMMANDS_BIT)) { 1983 cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH; 1984 } 1985 1986 if (src_stage_mask & (VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT | 1987 VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT | 1988 VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT | 1989 VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | 1990 VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | 1991 VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT | 1992 VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | 1993 VK_PIPELINE_STAGE_TRANSFER_BIT | 1994 VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT | 1995 VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT | 1996 VK_PIPELINE_STAGE_ALL_COMMANDS_BIT)) { 1997 cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_PS_PARTIAL_FLUSH; 1998 } else if (src_stage_mask & (VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT | 1999 VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | 2000 VK_PIPELINE_STAGE_VERTEX_SHADER_BIT)) { 2001 cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_VS_PARTIAL_FLUSH; 2002 } 2003 } 2004 2005 static enum radv_cmd_flush_bits 2006 radv_src_access_flush(struct radv_cmd_buffer *cmd_buffer, 2007 VkAccessFlags src_flags) 2008 { 2009 enum radv_cmd_flush_bits flush_bits = 0; 2010 uint32_t b; 2011 for_each_bit(b, src_flags) { 2012 switch ((VkAccessFlagBits)(1 << b)) { 2013 case VK_ACCESS_SHADER_WRITE_BIT: 2014 flush_bits |= RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2; 2015 break; 2016 case VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT: 2017 flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB | 2018 RADV_CMD_FLAG_FLUSH_AND_INV_CB_META; 2019 break; 2020 case VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT: 2021 flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB | 2022 RADV_CMD_FLAG_FLUSH_AND_INV_DB_META; 2023 break; 2024 case VK_ACCESS_TRANSFER_WRITE_BIT: 2025 flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB | 2026 RADV_CMD_FLAG_FLUSH_AND_INV_CB_META | 2027 RADV_CMD_FLAG_FLUSH_AND_INV_DB | 2028 RADV_CMD_FLAG_FLUSH_AND_INV_DB_META | 2029 RADV_CMD_FLAG_INV_GLOBAL_L2; 2030 break; 2031 default: 2032 break; 2033 } 2034 } 2035 return flush_bits; 2036 } 2037 2038 static enum radv_cmd_flush_bits 2039 radv_dst_access_flush(struct radv_cmd_buffer *cmd_buffer, 2040 VkAccessFlags dst_flags, 2041 struct radv_image *image) 2042 { 2043 enum radv_cmd_flush_bits flush_bits = 0; 2044 uint32_t b; 2045 for_each_bit(b, dst_flags) { 2046 switch ((VkAccessFlagBits)(1 << b)) { 2047 case VK_ACCESS_INDIRECT_COMMAND_READ_BIT: 2048 case VK_ACCESS_INDEX_READ_BIT: 2049 break; 2050 case VK_ACCESS_UNIFORM_READ_BIT: 2051 flush_bits |= RADV_CMD_FLAG_INV_VMEM_L1 | RADV_CMD_FLAG_INV_SMEM_L1; 2052 break; 2053 case VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT: 2054 case VK_ACCESS_SHADER_READ_BIT: 2055 case VK_ACCESS_TRANSFER_READ_BIT: 2056 case VK_ACCESS_INPUT_ATTACHMENT_READ_BIT: 2057 flush_bits |= RADV_CMD_FLAG_INV_VMEM_L1 | 2058 RADV_CMD_FLAG_INV_GLOBAL_L2; 2059 break; 2060 case VK_ACCESS_COLOR_ATTACHMENT_READ_BIT: 2061 /* TODO: change to image && when the image gets passed 2062 * through from the subpass. */ 2063 if (!image || (image->usage & VK_IMAGE_USAGE_STORAGE_BIT)) 2064 flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB | 2065 RADV_CMD_FLAG_FLUSH_AND_INV_CB_META; 2066 break; 2067 case VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT: 2068 if (!image || (image->usage & VK_IMAGE_USAGE_STORAGE_BIT)) 2069 flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB | 2070 RADV_CMD_FLAG_FLUSH_AND_INV_DB_META; 2071 break; 2072 default: 2073 break; 2074 } 2075 } 2076 return flush_bits; 2077 } 2078 2079 static void radv_subpass_barrier(struct radv_cmd_buffer *cmd_buffer, const struct radv_subpass_barrier *barrier) 2080 { 2081 cmd_buffer->state.flush_bits |= radv_src_access_flush(cmd_buffer, barrier->src_access_mask); 2082 radv_stage_flush(cmd_buffer, barrier->src_stage_mask); 2083 cmd_buffer->state.flush_bits |= radv_dst_access_flush(cmd_buffer, barrier->dst_access_mask, 2084 NULL); 2085 } 2086 2087 static void radv_handle_subpass_image_transition(struct radv_cmd_buffer *cmd_buffer, 2088 VkAttachmentReference att) 2089 { 2090 unsigned idx = att.attachment; 2091 struct radv_image_view *view = cmd_buffer->state.framebuffer->attachments[idx].attachment; 2092 VkImageSubresourceRange range; 2093 range.aspectMask = 0; 2094 range.baseMipLevel = view->base_mip; 2095 range.levelCount = 1; 2096 range.baseArrayLayer = view->base_layer; 2097 range.layerCount = cmd_buffer->state.framebuffer->layers; 2098 2099 radv_handle_image_transition(cmd_buffer, 2100 view->image, 2101 cmd_buffer->state.attachments[idx].current_layout, 2102 att.layout, 0, 0, &range, 2103 cmd_buffer->state.attachments[idx].pending_clear_aspects); 2104 2105 cmd_buffer->state.attachments[idx].current_layout = att.layout; 2106 2107 2108 } 2109 2110 void 2111 radv_cmd_buffer_set_subpass(struct radv_cmd_buffer *cmd_buffer, 2112 const struct radv_subpass *subpass, bool transitions) 2113 { 2114 if (transitions) { 2115 radv_subpass_barrier(cmd_buffer, &subpass->start_barrier); 2116 2117 for (unsigned i = 0; i < subpass->color_count; ++i) { 2118 if (subpass->color_attachments[i].attachment != VK_ATTACHMENT_UNUSED) 2119 radv_handle_subpass_image_transition(cmd_buffer, 2120 subpass->color_attachments[i]); 2121 } 2122 2123 for (unsigned i = 0; i < subpass->input_count; ++i) { 2124 radv_handle_subpass_image_transition(cmd_buffer, 2125 subpass->input_attachments[i]); 2126 } 2127 2128 if (subpass->depth_stencil_attachment.attachment != VK_ATTACHMENT_UNUSED) { 2129 radv_handle_subpass_image_transition(cmd_buffer, 2130 subpass->depth_stencil_attachment); 2131 } 2132 } 2133 2134 cmd_buffer->state.subpass = subpass; 2135 2136 cmd_buffer->state.dirty |= RADV_CMD_DIRTY_FRAMEBUFFER; 2137 } 2138 2139 static VkResult 2140 radv_cmd_state_setup_attachments(struct radv_cmd_buffer *cmd_buffer, 2141 struct radv_render_pass *pass, 2142 const VkRenderPassBeginInfo *info) 2143 { 2144 struct radv_cmd_state *state = &cmd_buffer->state; 2145 2146 if (pass->attachment_count == 0) { 2147 state->attachments = NULL; 2148 return VK_SUCCESS; 2149 } 2150 2151 state->attachments = vk_alloc(&cmd_buffer->pool->alloc, 2152 pass->attachment_count * 2153 sizeof(state->attachments[0]), 2154 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 2155 if (state->attachments == NULL) { 2156 cmd_buffer->record_result = VK_ERROR_OUT_OF_HOST_MEMORY; 2157 return cmd_buffer->record_result; 2158 } 2159 2160 for (uint32_t i = 0; i < pass->attachment_count; ++i) { 2161 struct radv_render_pass_attachment *att = &pass->attachments[i]; 2162 VkImageAspectFlags att_aspects = vk_format_aspects(att->format); 2163 VkImageAspectFlags clear_aspects = 0; 2164 2165 if (att_aspects == VK_IMAGE_ASPECT_COLOR_BIT) { 2166 /* color attachment */ 2167 if (att->load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) { 2168 clear_aspects |= VK_IMAGE_ASPECT_COLOR_BIT; 2169 } 2170 } else { 2171 /* depthstencil attachment */ 2172 if ((att_aspects & VK_IMAGE_ASPECT_DEPTH_BIT) && 2173 att->load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) { 2174 clear_aspects |= VK_IMAGE_ASPECT_DEPTH_BIT; 2175 if ((att_aspects & VK_IMAGE_ASPECT_STENCIL_BIT) && 2176 att->stencil_load_op == VK_ATTACHMENT_LOAD_OP_DONT_CARE) 2177 clear_aspects |= VK_IMAGE_ASPECT_STENCIL_BIT; 2178 } 2179 if ((att_aspects & VK_IMAGE_ASPECT_STENCIL_BIT) && 2180 att->stencil_load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) { 2181 clear_aspects |= VK_IMAGE_ASPECT_STENCIL_BIT; 2182 } 2183 } 2184 2185 state->attachments[i].pending_clear_aspects = clear_aspects; 2186 state->attachments[i].cleared_views = 0; 2187 if (clear_aspects && info) { 2188 assert(info->clearValueCount > i); 2189 state->attachments[i].clear_value = info->pClearValues[i]; 2190 } 2191 2192 state->attachments[i].current_layout = att->initial_layout; 2193 } 2194 2195 return VK_SUCCESS; 2196 } 2197 2198 VkResult radv_AllocateCommandBuffers( 2199 VkDevice _device, 2200 const VkCommandBufferAllocateInfo *pAllocateInfo, 2201 VkCommandBuffer *pCommandBuffers) 2202 { 2203 RADV_FROM_HANDLE(radv_device, device, _device); 2204 RADV_FROM_HANDLE(radv_cmd_pool, pool, pAllocateInfo->commandPool); 2205 2206 VkResult result = VK_SUCCESS; 2207 uint32_t i; 2208 2209 for (i = 0; i < pAllocateInfo->commandBufferCount; i++) { 2210 2211 if (!list_empty(&pool->free_cmd_buffers)) { 2212 struct radv_cmd_buffer *cmd_buffer = list_first_entry(&pool->free_cmd_buffers, struct radv_cmd_buffer, pool_link); 2213 2214 list_del(&cmd_buffer->pool_link); 2215 list_addtail(&cmd_buffer->pool_link, &pool->cmd_buffers); 2216 2217 result = radv_reset_cmd_buffer(cmd_buffer); 2218 cmd_buffer->_loader_data.loaderMagic = ICD_LOADER_MAGIC; 2219 cmd_buffer->level = pAllocateInfo->level; 2220 2221 pCommandBuffers[i] = radv_cmd_buffer_to_handle(cmd_buffer); 2222 } else { 2223 result = radv_create_cmd_buffer(device, pool, pAllocateInfo->level, 2224 &pCommandBuffers[i]); 2225 } 2226 if (result != VK_SUCCESS) 2227 break; 2228 } 2229 2230 if (result != VK_SUCCESS) { 2231 radv_FreeCommandBuffers(_device, pAllocateInfo->commandPool, 2232 i, pCommandBuffers); 2233 2234 /* From the Vulkan 1.0.66 spec: 2235 * 2236 * "vkAllocateCommandBuffers can be used to create multiple 2237 * command buffers. If the creation of any of those command 2238 * buffers fails, the implementation must destroy all 2239 * successfully created command buffer objects from this 2240 * command, set all entries of the pCommandBuffers array to 2241 * NULL and return the error." 2242 */ 2243 memset(pCommandBuffers, 0, 2244 sizeof(*pCommandBuffers) * pAllocateInfo->commandBufferCount); 2245 } 2246 2247 return result; 2248 } 2249 2250 void radv_FreeCommandBuffers( 2251 VkDevice device, 2252 VkCommandPool commandPool, 2253 uint32_t commandBufferCount, 2254 const VkCommandBuffer *pCommandBuffers) 2255 { 2256 for (uint32_t i = 0; i < commandBufferCount; i++) { 2257 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, pCommandBuffers[i]); 2258 2259 if (cmd_buffer) { 2260 if (cmd_buffer->pool) { 2261 list_del(&cmd_buffer->pool_link); 2262 list_addtail(&cmd_buffer->pool_link, &cmd_buffer->pool->free_cmd_buffers); 2263 } else 2264 radv_cmd_buffer_destroy(cmd_buffer); 2265 2266 } 2267 } 2268 } 2269 2270 VkResult radv_ResetCommandBuffer( 2271 VkCommandBuffer commandBuffer, 2272 VkCommandBufferResetFlags flags) 2273 { 2274 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 2275 return radv_reset_cmd_buffer(cmd_buffer); 2276 } 2277 2278 static void emit_gfx_buffer_state(struct radv_cmd_buffer *cmd_buffer) 2279 { 2280 struct radv_device *device = cmd_buffer->device; 2281 if (device->gfx_init) { 2282 uint64_t va = radv_buffer_get_va(device->gfx_init); 2283 radv_cs_add_buffer(device->ws, cmd_buffer->cs, device->gfx_init, 8); 2284 radeon_emit(cmd_buffer->cs, PKT3(PKT3_INDIRECT_BUFFER_CIK, 2, 0)); 2285 radeon_emit(cmd_buffer->cs, va); 2286 radeon_emit(cmd_buffer->cs, va >> 32); 2287 radeon_emit(cmd_buffer->cs, device->gfx_init_size_dw & 0xffff); 2288 } else 2289 si_init_config(cmd_buffer); 2290 } 2291 2292 VkResult radv_BeginCommandBuffer( 2293 VkCommandBuffer commandBuffer, 2294 const VkCommandBufferBeginInfo *pBeginInfo) 2295 { 2296 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 2297 VkResult result = VK_SUCCESS; 2298 2299 if (cmd_buffer->status != RADV_CMD_BUFFER_STATUS_INITIAL) { 2300 /* If the command buffer has already been resetted with 2301 * vkResetCommandBuffer, no need to do it again. 2302 */ 2303 result = radv_reset_cmd_buffer(cmd_buffer); 2304 if (result != VK_SUCCESS) 2305 return result; 2306 } 2307 2308 memset(&cmd_buffer->state, 0, sizeof(cmd_buffer->state)); 2309 cmd_buffer->state.last_primitive_reset_en = -1; 2310 cmd_buffer->state.last_index_type = -1; 2311 cmd_buffer->state.last_num_instances = -1; 2312 cmd_buffer->state.last_vertex_offset = -1; 2313 cmd_buffer->state.last_first_instance = -1; 2314 cmd_buffer->usage_flags = pBeginInfo->flags; 2315 2316 /* setup initial configuration into command buffer */ 2317 if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY) { 2318 switch (cmd_buffer->queue_family_index) { 2319 case RADV_QUEUE_GENERAL: 2320 emit_gfx_buffer_state(cmd_buffer); 2321 break; 2322 case RADV_QUEUE_COMPUTE: 2323 si_init_compute(cmd_buffer); 2324 break; 2325 case RADV_QUEUE_TRANSFER: 2326 default: 2327 break; 2328 } 2329 } 2330 2331 if (pBeginInfo->flags & VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT) { 2332 assert(pBeginInfo->pInheritanceInfo); 2333 cmd_buffer->state.framebuffer = radv_framebuffer_from_handle(pBeginInfo->pInheritanceInfo->framebuffer); 2334 cmd_buffer->state.pass = radv_render_pass_from_handle(pBeginInfo->pInheritanceInfo->renderPass); 2335 2336 struct radv_subpass *subpass = 2337 &cmd_buffer->state.pass->subpasses[pBeginInfo->pInheritanceInfo->subpass]; 2338 2339 result = radv_cmd_state_setup_attachments(cmd_buffer, cmd_buffer->state.pass, NULL); 2340 if (result != VK_SUCCESS) 2341 return result; 2342 2343 radv_cmd_buffer_set_subpass(cmd_buffer, subpass, false); 2344 } 2345 2346 if (unlikely(cmd_buffer->device->trace_bo)) 2347 radv_cmd_buffer_trace_emit(cmd_buffer); 2348 2349 cmd_buffer->status = RADV_CMD_BUFFER_STATUS_RECORDING; 2350 2351 return result; 2352 } 2353 2354 void radv_CmdBindVertexBuffers( 2355 VkCommandBuffer commandBuffer, 2356 uint32_t firstBinding, 2357 uint32_t bindingCount, 2358 const VkBuffer* pBuffers, 2359 const VkDeviceSize* pOffsets) 2360 { 2361 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 2362 struct radv_vertex_binding *vb = cmd_buffer->vertex_bindings; 2363 bool changed = false; 2364 2365 /* We have to defer setting up vertex buffer since we need the buffer 2366 * stride from the pipeline. */ 2367 2368 assert(firstBinding + bindingCount <= MAX_VBS); 2369 for (uint32_t i = 0; i < bindingCount; i++) { 2370 uint32_t idx = firstBinding + i; 2371 2372 if (!changed && 2373 (vb[idx].buffer != radv_buffer_from_handle(pBuffers[i]) || 2374 vb[idx].offset != pOffsets[i])) { 2375 changed = true; 2376 } 2377 2378 vb[idx].buffer = radv_buffer_from_handle(pBuffers[i]); 2379 vb[idx].offset = pOffsets[i]; 2380 2381 radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, 2382 vb[idx].buffer->bo, 8); 2383 } 2384 2385 if (!changed) { 2386 /* No state changes. */ 2387 return; 2388 } 2389 2390 cmd_buffer->state.dirty |= RADV_CMD_DIRTY_VERTEX_BUFFER; 2391 } 2392 2393 void radv_CmdBindIndexBuffer( 2394 VkCommandBuffer commandBuffer, 2395 VkBuffer buffer, 2396 VkDeviceSize offset, 2397 VkIndexType indexType) 2398 { 2399 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 2400 RADV_FROM_HANDLE(radv_buffer, index_buffer, buffer); 2401 2402 if (cmd_buffer->state.index_buffer == index_buffer && 2403 cmd_buffer->state.index_offset == offset && 2404 cmd_buffer->state.index_type == indexType) { 2405 /* No state changes. */ 2406 return; 2407 } 2408 2409 cmd_buffer->state.index_buffer = index_buffer; 2410 cmd_buffer->state.index_offset = offset; 2411 cmd_buffer->state.index_type = indexType; /* vk matches hw */ 2412 cmd_buffer->state.index_va = radv_buffer_get_va(index_buffer->bo); 2413 cmd_buffer->state.index_va += index_buffer->offset + offset; 2414 2415 int index_size_shift = cmd_buffer->state.index_type ? 2 : 1; 2416 cmd_buffer->state.max_index_count = (index_buffer->size - offset) >> index_size_shift; 2417 cmd_buffer->state.dirty |= RADV_CMD_DIRTY_INDEX_BUFFER; 2418 radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, index_buffer->bo, 8); 2419 } 2420 2421 2422 static void 2423 radv_bind_descriptor_set(struct radv_cmd_buffer *cmd_buffer, 2424 struct radv_descriptor_set *set, unsigned idx) 2425 { 2426 struct radeon_winsys *ws = cmd_buffer->device->ws; 2427 2428 radv_set_descriptor_set(cmd_buffer, set, idx); 2429 if (!set) 2430 return; 2431 2432 assert(!(set->layout->flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR)); 2433 2434 for (unsigned j = 0; j < set->layout->buffer_count; ++j) 2435 if (set->descriptors[j]) 2436 radv_cs_add_buffer(ws, cmd_buffer->cs, set->descriptors[j], 7); 2437 2438 if(set->bo) 2439 radv_cs_add_buffer(ws, cmd_buffer->cs, set->bo, 8); 2440 } 2441 2442 void radv_CmdBindDescriptorSets( 2443 VkCommandBuffer commandBuffer, 2444 VkPipelineBindPoint pipelineBindPoint, 2445 VkPipelineLayout _layout, 2446 uint32_t firstSet, 2447 uint32_t descriptorSetCount, 2448 const VkDescriptorSet* pDescriptorSets, 2449 uint32_t dynamicOffsetCount, 2450 const uint32_t* pDynamicOffsets) 2451 { 2452 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 2453 RADV_FROM_HANDLE(radv_pipeline_layout, layout, _layout); 2454 unsigned dyn_idx = 0; 2455 2456 for (unsigned i = 0; i < descriptorSetCount; ++i) { 2457 unsigned idx = i + firstSet; 2458 RADV_FROM_HANDLE(radv_descriptor_set, set, pDescriptorSets[i]); 2459 radv_bind_descriptor_set(cmd_buffer, set, idx); 2460 2461 for(unsigned j = 0; j < set->layout->dynamic_offset_count; ++j, ++dyn_idx) { 2462 unsigned idx = j + layout->set[i + firstSet].dynamic_offset_start; 2463 uint32_t *dst = cmd_buffer->dynamic_buffers + idx * 4; 2464 assert(dyn_idx < dynamicOffsetCount); 2465 2466 struct radv_descriptor_range *range = set->dynamic_descriptors + j; 2467 uint64_t va = range->va + pDynamicOffsets[dyn_idx]; 2468 dst[0] = va; 2469 dst[1] = S_008F04_BASE_ADDRESS_HI(va >> 32); 2470 dst[2] = range->size; 2471 dst[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | 2472 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | 2473 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | 2474 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) | 2475 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | 2476 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32); 2477 cmd_buffer->push_constant_stages |= 2478 set->layout->dynamic_shader_stages; 2479 } 2480 } 2481 } 2482 2483 static bool radv_init_push_descriptor_set(struct radv_cmd_buffer *cmd_buffer, 2484 struct radv_descriptor_set *set, 2485 struct radv_descriptor_set_layout *layout) 2486 { 2487 set->size = layout->size; 2488 set->layout = layout; 2489 2490 if (cmd_buffer->push_descriptors.capacity < set->size) { 2491 size_t new_size = MAX2(set->size, 1024); 2492 new_size = MAX2(new_size, 2 * cmd_buffer->push_descriptors.capacity); 2493 new_size = MIN2(new_size, 96 * MAX_PUSH_DESCRIPTORS); 2494 2495 free(set->mapped_ptr); 2496 set->mapped_ptr = malloc(new_size); 2497 2498 if (!set->mapped_ptr) { 2499 cmd_buffer->push_descriptors.capacity = 0; 2500 cmd_buffer->record_result = VK_ERROR_OUT_OF_HOST_MEMORY; 2501 return false; 2502 } 2503 2504 cmd_buffer->push_descriptors.capacity = new_size; 2505 } 2506 2507 return true; 2508 } 2509 2510 void radv_meta_push_descriptor_set( 2511 struct radv_cmd_buffer* cmd_buffer, 2512 VkPipelineBindPoint pipelineBindPoint, 2513 VkPipelineLayout _layout, 2514 uint32_t set, 2515 uint32_t descriptorWriteCount, 2516 const VkWriteDescriptorSet* pDescriptorWrites) 2517 { 2518 RADV_FROM_HANDLE(radv_pipeline_layout, layout, _layout); 2519 struct radv_descriptor_set *push_set = &cmd_buffer->meta_push_descriptors; 2520 unsigned bo_offset; 2521 2522 assert(set == 0); 2523 assert(layout->set[set].layout->flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR); 2524 2525 push_set->size = layout->set[set].layout->size; 2526 push_set->layout = layout->set[set].layout; 2527 2528 if (!radv_cmd_buffer_upload_alloc(cmd_buffer, push_set->size, 32, 2529 &bo_offset, 2530 (void**) &push_set->mapped_ptr)) 2531 return; 2532 2533 push_set->va = radv_buffer_get_va(cmd_buffer->upload.upload_bo); 2534 push_set->va += bo_offset; 2535 2536 radv_update_descriptor_sets(cmd_buffer->device, cmd_buffer, 2537 radv_descriptor_set_to_handle(push_set), 2538 descriptorWriteCount, pDescriptorWrites, 0, NULL); 2539 2540 radv_set_descriptor_set(cmd_buffer, push_set, set); 2541 } 2542 2543 void radv_CmdPushDescriptorSetKHR( 2544 VkCommandBuffer commandBuffer, 2545 VkPipelineBindPoint pipelineBindPoint, 2546 VkPipelineLayout _layout, 2547 uint32_t set, 2548 uint32_t descriptorWriteCount, 2549 const VkWriteDescriptorSet* pDescriptorWrites) 2550 { 2551 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 2552 RADV_FROM_HANDLE(radv_pipeline_layout, layout, _layout); 2553 struct radv_descriptor_set *push_set = &cmd_buffer->push_descriptors.set; 2554 2555 assert(layout->set[set].layout->flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR); 2556 2557 if (!radv_init_push_descriptor_set(cmd_buffer, push_set, layout->set[set].layout)) 2558 return; 2559 2560 radv_update_descriptor_sets(cmd_buffer->device, cmd_buffer, 2561 radv_descriptor_set_to_handle(push_set), 2562 descriptorWriteCount, pDescriptorWrites, 0, NULL); 2563 2564 radv_set_descriptor_set(cmd_buffer, push_set, set); 2565 cmd_buffer->state.push_descriptors_dirty = true; 2566 } 2567 2568 void radv_CmdPushDescriptorSetWithTemplateKHR( 2569 VkCommandBuffer commandBuffer, 2570 VkDescriptorUpdateTemplateKHR descriptorUpdateTemplate, 2571 VkPipelineLayout _layout, 2572 uint32_t set, 2573 const void* pData) 2574 { 2575 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 2576 RADV_FROM_HANDLE(radv_pipeline_layout, layout, _layout); 2577 struct radv_descriptor_set *push_set = &cmd_buffer->push_descriptors.set; 2578 2579 assert(layout->set[set].layout->flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR); 2580 2581 if (!radv_init_push_descriptor_set(cmd_buffer, push_set, layout->set[set].layout)) 2582 return; 2583 2584 radv_update_descriptor_set_with_template(cmd_buffer->device, cmd_buffer, push_set, 2585 descriptorUpdateTemplate, pData); 2586 2587 radv_set_descriptor_set(cmd_buffer, push_set, set); 2588 cmd_buffer->state.push_descriptors_dirty = true; 2589 } 2590 2591 void radv_CmdPushConstants(VkCommandBuffer commandBuffer, 2592 VkPipelineLayout layout, 2593 VkShaderStageFlags stageFlags, 2594 uint32_t offset, 2595 uint32_t size, 2596 const void* pValues) 2597 { 2598 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 2599 memcpy(cmd_buffer->push_constants + offset, pValues, size); 2600 cmd_buffer->push_constant_stages |= stageFlags; 2601 } 2602 2603 VkResult radv_EndCommandBuffer( 2604 VkCommandBuffer commandBuffer) 2605 { 2606 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 2607 2608 if (cmd_buffer->queue_family_index != RADV_QUEUE_TRANSFER) { 2609 if (cmd_buffer->device->physical_device->rad_info.chip_class == SI) 2610 cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH | RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2; 2611 si_emit_cache_flush(cmd_buffer); 2612 } 2613 2614 vk_free(&cmd_buffer->pool->alloc, cmd_buffer->state.attachments); 2615 2616 if (!cmd_buffer->device->ws->cs_finalize(cmd_buffer->cs)) 2617 return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY); 2618 2619 cmd_buffer->status = RADV_CMD_BUFFER_STATUS_EXECUTABLE; 2620 2621 return cmd_buffer->record_result; 2622 } 2623 2624 static void 2625 radv_emit_compute_pipeline(struct radv_cmd_buffer *cmd_buffer) 2626 { 2627 struct radv_shader_variant *compute_shader; 2628 struct radv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; 2629 struct radv_device *device = cmd_buffer->device; 2630 unsigned compute_resource_limits; 2631 unsigned waves_per_threadgroup; 2632 uint64_t va; 2633 2634 if (!pipeline || pipeline == cmd_buffer->state.emitted_compute_pipeline) 2635 return; 2636 2637 cmd_buffer->state.emitted_compute_pipeline = pipeline; 2638 2639 compute_shader = pipeline->shaders[MESA_SHADER_COMPUTE]; 2640 va = radv_buffer_get_va(compute_shader->bo) + compute_shader->bo_offset; 2641 2642 MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, 2643 cmd_buffer->cs, 19); 2644 2645 radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B830_COMPUTE_PGM_LO, 2); 2646 radeon_emit(cmd_buffer->cs, va >> 8); 2647 radeon_emit(cmd_buffer->cs, va >> 40); 2648 2649 radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B848_COMPUTE_PGM_RSRC1, 2); 2650 radeon_emit(cmd_buffer->cs, compute_shader->rsrc1); 2651 radeon_emit(cmd_buffer->cs, compute_shader->rsrc2); 2652 2653 2654 cmd_buffer->compute_scratch_size_needed = 2655 MAX2(cmd_buffer->compute_scratch_size_needed, 2656 pipeline->max_waves * pipeline->scratch_bytes_per_wave); 2657 2658 /* change these once we have scratch support */ 2659 radeon_set_sh_reg(cmd_buffer->cs, R_00B860_COMPUTE_TMPRING_SIZE, 2660 S_00B860_WAVES(pipeline->max_waves) | 2661 S_00B860_WAVESIZE(pipeline->scratch_bytes_per_wave >> 10)); 2662 2663 /* Calculate best compute resource limits. */ 2664 waves_per_threadgroup = 2665 DIV_ROUND_UP(compute_shader->info.cs.block_size[0] * 2666 compute_shader->info.cs.block_size[1] * 2667 compute_shader->info.cs.block_size[2], 64); 2668 compute_resource_limits = 2669 S_00B854_SIMD_DEST_CNTL(waves_per_threadgroup % 4 == 0); 2670 2671 if (device->physical_device->rad_info.chip_class >= CIK) { 2672 unsigned num_cu_per_se = 2673 device->physical_device->rad_info.num_good_compute_units / 2674 device->physical_device->rad_info.max_se; 2675 2676 /* Force even distribution on all SIMDs in CU if the workgroup 2677 * size is 64. This has shown some good improvements if # of 2678 * CUs per SE is not a multiple of 4. 2679 */ 2680 if (num_cu_per_se % 4 && waves_per_threadgroup == 1) 2681 compute_resource_limits |= S_00B854_FORCE_SIMD_DIST(1); 2682 } 2683 2684 radeon_set_sh_reg(cmd_buffer->cs, R_00B854_COMPUTE_RESOURCE_LIMITS, 2685 compute_resource_limits); 2686 2687 radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B81C_COMPUTE_NUM_THREAD_X, 3); 2688 radeon_emit(cmd_buffer->cs, 2689 S_00B81C_NUM_THREAD_FULL(compute_shader->info.cs.block_size[0])); 2690 radeon_emit(cmd_buffer->cs, 2691 S_00B81C_NUM_THREAD_FULL(compute_shader->info.cs.block_size[1])); 2692 radeon_emit(cmd_buffer->cs, 2693 S_00B81C_NUM_THREAD_FULL(compute_shader->info.cs.block_size[2])); 2694 2695 assert(cmd_buffer->cs->cdw <= cdw_max); 2696 2697 if (unlikely(cmd_buffer->device->trace_bo)) 2698 radv_save_pipeline(cmd_buffer, pipeline, RING_COMPUTE); 2699 } 2700 2701 static void radv_mark_descriptor_sets_dirty(struct radv_cmd_buffer *cmd_buffer) 2702 { 2703 cmd_buffer->state.descriptors_dirty |= cmd_buffer->state.valid_descriptors; 2704 } 2705 2706 void radv_CmdBindPipeline( 2707 VkCommandBuffer commandBuffer, 2708 VkPipelineBindPoint pipelineBindPoint, 2709 VkPipeline _pipeline) 2710 { 2711 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 2712 RADV_FROM_HANDLE(radv_pipeline, pipeline, _pipeline); 2713 2714 switch (pipelineBindPoint) { 2715 case VK_PIPELINE_BIND_POINT_COMPUTE: 2716 if (cmd_buffer->state.compute_pipeline == pipeline) 2717 return; 2718 radv_mark_descriptor_sets_dirty(cmd_buffer); 2719 2720 cmd_buffer->state.compute_pipeline = pipeline; 2721 cmd_buffer->push_constant_stages |= VK_SHADER_STAGE_COMPUTE_BIT; 2722 break; 2723 case VK_PIPELINE_BIND_POINT_GRAPHICS: 2724 if (cmd_buffer->state.pipeline == pipeline) 2725 return; 2726 radv_mark_descriptor_sets_dirty(cmd_buffer); 2727 2728 cmd_buffer->state.pipeline = pipeline; 2729 if (!pipeline) 2730 break; 2731 2732 cmd_buffer->state.dirty |= RADV_CMD_DIRTY_PIPELINE; 2733 cmd_buffer->push_constant_stages |= pipeline->active_stages; 2734 2735 /* the new vertex shader might not have the same user regs */ 2736 cmd_buffer->state.last_first_instance = -1; 2737 cmd_buffer->state.last_vertex_offset = -1; 2738 2739 radv_bind_dynamic_state(cmd_buffer, &pipeline->dynamic_state); 2740 2741 if (pipeline->graphics.esgs_ring_size > cmd_buffer->esgs_ring_size_needed) 2742 cmd_buffer->esgs_ring_size_needed = pipeline->graphics.esgs_ring_size; 2743 if (pipeline->graphics.gsvs_ring_size > cmd_buffer->gsvs_ring_size_needed) 2744 cmd_buffer->gsvs_ring_size_needed = pipeline->graphics.gsvs_ring_size; 2745 2746 if (radv_pipeline_has_tess(pipeline)) 2747 cmd_buffer->tess_rings_needed = true; 2748 2749 if (radv_pipeline_has_gs(pipeline)) { 2750 struct ac_userdata_info *loc = radv_lookup_user_sgpr(cmd_buffer->state.pipeline, MESA_SHADER_GEOMETRY, 2751 AC_UD_SCRATCH_RING_OFFSETS); 2752 if (cmd_buffer->ring_offsets_idx == -1) 2753 cmd_buffer->ring_offsets_idx = loc->sgpr_idx; 2754 else if (loc->sgpr_idx != -1) 2755 assert(loc->sgpr_idx == cmd_buffer->ring_offsets_idx); 2756 } 2757 break; 2758 default: 2759 assert(!"invalid bind point"); 2760 break; 2761 } 2762 } 2763 2764 void radv_CmdSetViewport( 2765 VkCommandBuffer commandBuffer, 2766 uint32_t firstViewport, 2767 uint32_t viewportCount, 2768 const VkViewport* pViewports) 2769 { 2770 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 2771 struct radv_cmd_state *state = &cmd_buffer->state; 2772 MAYBE_UNUSED const uint32_t total_count = firstViewport + viewportCount; 2773 2774 assert(firstViewport < MAX_VIEWPORTS); 2775 assert(total_count >= 1 && total_count <= MAX_VIEWPORTS); 2776 2777 if (cmd_buffer->device->physical_device->has_scissor_bug) { 2778 /* Try to skip unnecessary PS partial flushes when the viewports 2779 * don't change. 2780 */ 2781 if (!(state->dirty & (RADV_CMD_DIRTY_DYNAMIC_VIEWPORT | 2782 RADV_CMD_DIRTY_DYNAMIC_SCISSOR)) && 2783 !memcmp(state->dynamic.viewport.viewports + firstViewport, 2784 pViewports, viewportCount * sizeof(*pViewports))) { 2785 return; 2786 } 2787 } 2788 2789 memcpy(state->dynamic.viewport.viewports + firstViewport, pViewports, 2790 viewportCount * sizeof(*pViewports)); 2791 2792 state->dirty |= RADV_CMD_DIRTY_DYNAMIC_VIEWPORT; 2793 } 2794 2795 void radv_CmdSetScissor( 2796 VkCommandBuffer commandBuffer, 2797 uint32_t firstScissor, 2798 uint32_t scissorCount, 2799 const VkRect2D* pScissors) 2800 { 2801 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 2802 struct radv_cmd_state *state = &cmd_buffer->state; 2803 MAYBE_UNUSED const uint32_t total_count = firstScissor + scissorCount; 2804 2805 assert(firstScissor < MAX_SCISSORS); 2806 assert(total_count >= 1 && total_count <= MAX_SCISSORS); 2807 2808 if (cmd_buffer->device->physical_device->has_scissor_bug) { 2809 /* Try to skip unnecessary PS partial flushes when the scissors 2810 * don't change. 2811 */ 2812 if (!(state->dirty & (RADV_CMD_DIRTY_DYNAMIC_VIEWPORT | 2813 RADV_CMD_DIRTY_DYNAMIC_SCISSOR)) && 2814 !memcmp(state->dynamic.scissor.scissors + firstScissor, 2815 pScissors, scissorCount * sizeof(*pScissors))) { 2816 return; 2817 } 2818 } 2819 2820 memcpy(state->dynamic.scissor.scissors + firstScissor, pScissors, 2821 scissorCount * sizeof(*pScissors)); 2822 2823 state->dirty |= RADV_CMD_DIRTY_DYNAMIC_SCISSOR; 2824 } 2825 2826 void radv_CmdSetLineWidth( 2827 VkCommandBuffer commandBuffer, 2828 float lineWidth) 2829 { 2830 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 2831 cmd_buffer->state.dynamic.line_width = lineWidth; 2832 cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_LINE_WIDTH; 2833 } 2834 2835 void radv_CmdSetDepthBias( 2836 VkCommandBuffer commandBuffer, 2837 float depthBiasConstantFactor, 2838 float depthBiasClamp, 2839 float depthBiasSlopeFactor) 2840 { 2841 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 2842 2843 cmd_buffer->state.dynamic.depth_bias.bias = depthBiasConstantFactor; 2844 cmd_buffer->state.dynamic.depth_bias.clamp = depthBiasClamp; 2845 cmd_buffer->state.dynamic.depth_bias.slope = depthBiasSlopeFactor; 2846 2847 cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS; 2848 } 2849 2850 void radv_CmdSetBlendConstants( 2851 VkCommandBuffer commandBuffer, 2852 const float blendConstants[4]) 2853 { 2854 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 2855 2856 memcpy(cmd_buffer->state.dynamic.blend_constants, 2857 blendConstants, sizeof(float) * 4); 2858 2859 cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS; 2860 } 2861 2862 void radv_CmdSetDepthBounds( 2863 VkCommandBuffer commandBuffer, 2864 float minDepthBounds, 2865 float maxDepthBounds) 2866 { 2867 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 2868 2869 cmd_buffer->state.dynamic.depth_bounds.min = minDepthBounds; 2870 cmd_buffer->state.dynamic.depth_bounds.max = maxDepthBounds; 2871 2872 cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS; 2873 } 2874 2875 void radv_CmdSetStencilCompareMask( 2876 VkCommandBuffer commandBuffer, 2877 VkStencilFaceFlags faceMask, 2878 uint32_t compareMask) 2879 { 2880 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 2881 2882 if (faceMask & VK_STENCIL_FACE_FRONT_BIT) 2883 cmd_buffer->state.dynamic.stencil_compare_mask.front = compareMask; 2884 if (faceMask & VK_STENCIL_FACE_BACK_BIT) 2885 cmd_buffer->state.dynamic.stencil_compare_mask.back = compareMask; 2886 2887 cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK; 2888 } 2889 2890 void radv_CmdSetStencilWriteMask( 2891 VkCommandBuffer commandBuffer, 2892 VkStencilFaceFlags faceMask, 2893 uint32_t writeMask) 2894 { 2895 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 2896 2897 if (faceMask & VK_STENCIL_FACE_FRONT_BIT) 2898 cmd_buffer->state.dynamic.stencil_write_mask.front = writeMask; 2899 if (faceMask & VK_STENCIL_FACE_BACK_BIT) 2900 cmd_buffer->state.dynamic.stencil_write_mask.back = writeMask; 2901 2902 cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK; 2903 } 2904 2905 void radv_CmdSetStencilReference( 2906 VkCommandBuffer commandBuffer, 2907 VkStencilFaceFlags faceMask, 2908 uint32_t reference) 2909 { 2910 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 2911 2912 if (faceMask & VK_STENCIL_FACE_FRONT_BIT) 2913 cmd_buffer->state.dynamic.stencil_reference.front = reference; 2914 if (faceMask & VK_STENCIL_FACE_BACK_BIT) 2915 cmd_buffer->state.dynamic.stencil_reference.back = reference; 2916 2917 cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE; 2918 } 2919 2920 void radv_CmdSetDiscardRectangleEXT( 2921 VkCommandBuffer commandBuffer, 2922 uint32_t firstDiscardRectangle, 2923 uint32_t discardRectangleCount, 2924 const VkRect2D* pDiscardRectangles) 2925 { 2926 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 2927 struct radv_cmd_state *state = &cmd_buffer->state; 2928 MAYBE_UNUSED const uint32_t total_count = firstDiscardRectangle + discardRectangleCount; 2929 2930 assert(firstDiscardRectangle < MAX_DISCARD_RECTANGLES); 2931 assert(total_count >= 1 && total_count <= MAX_DISCARD_RECTANGLES); 2932 2933 typed_memcpy(&state->dynamic.discard_rectangle.rectangles[firstDiscardRectangle], 2934 pDiscardRectangles, discardRectangleCount); 2935 2936 state->dirty |= RADV_CMD_DIRTY_DYNAMIC_DISCARD_RECTANGLE; 2937 } 2938 2939 void radv_CmdExecuteCommands( 2940 VkCommandBuffer commandBuffer, 2941 uint32_t commandBufferCount, 2942 const VkCommandBuffer* pCmdBuffers) 2943 { 2944 RADV_FROM_HANDLE(radv_cmd_buffer, primary, commandBuffer); 2945 2946 assert(commandBufferCount > 0); 2947 2948 /* Emit pending flushes on primary prior to executing secondary */ 2949 si_emit_cache_flush(primary); 2950 2951 for (uint32_t i = 0; i < commandBufferCount; i++) { 2952 RADV_FROM_HANDLE(radv_cmd_buffer, secondary, pCmdBuffers[i]); 2953 2954 primary->scratch_size_needed = MAX2(primary->scratch_size_needed, 2955 secondary->scratch_size_needed); 2956 primary->compute_scratch_size_needed = MAX2(primary->compute_scratch_size_needed, 2957 secondary->compute_scratch_size_needed); 2958 2959 if (secondary->esgs_ring_size_needed > primary->esgs_ring_size_needed) 2960 primary->esgs_ring_size_needed = secondary->esgs_ring_size_needed; 2961 if (secondary->gsvs_ring_size_needed > primary->gsvs_ring_size_needed) 2962 primary->gsvs_ring_size_needed = secondary->gsvs_ring_size_needed; 2963 if (secondary->tess_rings_needed) 2964 primary->tess_rings_needed = true; 2965 if (secondary->sample_positions_needed) 2966 primary->sample_positions_needed = true; 2967 2968 if (secondary->ring_offsets_idx != -1) { 2969 if (primary->ring_offsets_idx == -1) 2970 primary->ring_offsets_idx = secondary->ring_offsets_idx; 2971 else 2972 assert(secondary->ring_offsets_idx == primary->ring_offsets_idx); 2973 } 2974 primary->device->ws->cs_execute_secondary(primary->cs, secondary->cs); 2975 2976 2977 /* When the secondary command buffer is compute only we don't 2978 * need to re-emit the current graphics pipeline. 2979 */ 2980 if (secondary->state.emitted_pipeline) { 2981 primary->state.emitted_pipeline = 2982 secondary->state.emitted_pipeline; 2983 } 2984 2985 /* When the secondary command buffer is graphics only we don't 2986 * need to re-emit the current compute pipeline. 2987 */ 2988 if (secondary->state.emitted_compute_pipeline) { 2989 primary->state.emitted_compute_pipeline = 2990 secondary->state.emitted_compute_pipeline; 2991 } 2992 2993 /* Only re-emit the draw packets when needed. */ 2994 if (secondary->state.last_primitive_reset_en != -1) { 2995 primary->state.last_primitive_reset_en = 2996 secondary->state.last_primitive_reset_en; 2997 } 2998 2999 if (secondary->state.last_primitive_reset_index) { 3000 primary->state.last_primitive_reset_index = 3001 secondary->state.last_primitive_reset_index; 3002 } 3003 3004 if (secondary->state.last_ia_multi_vgt_param) { 3005 primary->state.last_ia_multi_vgt_param = 3006 secondary->state.last_ia_multi_vgt_param; 3007 } 3008 3009 primary->state.last_first_instance = secondary->state.last_first_instance; 3010 primary->state.last_num_instances = secondary->state.last_num_instances; 3011 primary->state.last_vertex_offset = secondary->state.last_vertex_offset; 3012 3013 if (secondary->state.last_index_type != -1) { 3014 primary->state.last_index_type = 3015 secondary->state.last_index_type; 3016 } 3017 } 3018 3019 /* After executing commands from secondary buffers we have to dirty 3020 * some states. 3021 */ 3022 primary->state.dirty |= RADV_CMD_DIRTY_PIPELINE | 3023 RADV_CMD_DIRTY_INDEX_BUFFER | 3024 RADV_CMD_DIRTY_DYNAMIC_ALL; 3025 radv_mark_descriptor_sets_dirty(primary); 3026 } 3027 3028 VkResult radv_CreateCommandPool( 3029 VkDevice _device, 3030 const VkCommandPoolCreateInfo* pCreateInfo, 3031 const VkAllocationCallbacks* pAllocator, 3032 VkCommandPool* pCmdPool) 3033 { 3034 RADV_FROM_HANDLE(radv_device, device, _device); 3035 struct radv_cmd_pool *pool; 3036 3037 pool = vk_alloc2(&device->alloc, pAllocator, sizeof(*pool), 8, 3038 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 3039 if (pool == NULL) 3040 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); 3041 3042 if (pAllocator) 3043 pool->alloc = *pAllocator; 3044 else 3045 pool->alloc = device->alloc; 3046 3047 list_inithead(&pool->cmd_buffers); 3048 list_inithead(&pool->free_cmd_buffers); 3049 3050 pool->queue_family_index = pCreateInfo->queueFamilyIndex; 3051 3052 *pCmdPool = radv_cmd_pool_to_handle(pool); 3053 3054 return VK_SUCCESS; 3055 3056 } 3057 3058 void radv_DestroyCommandPool( 3059 VkDevice _device, 3060 VkCommandPool commandPool, 3061 const VkAllocationCallbacks* pAllocator) 3062 { 3063 RADV_FROM_HANDLE(radv_device, device, _device); 3064 RADV_FROM_HANDLE(radv_cmd_pool, pool, commandPool); 3065 3066 if (!pool) 3067 return; 3068 3069 list_for_each_entry_safe(struct radv_cmd_buffer, cmd_buffer, 3070 &pool->cmd_buffers, pool_link) { 3071 radv_cmd_buffer_destroy(cmd_buffer); 3072 } 3073 3074 list_for_each_entry_safe(struct radv_cmd_buffer, cmd_buffer, 3075 &pool->free_cmd_buffers, pool_link) { 3076 radv_cmd_buffer_destroy(cmd_buffer); 3077 } 3078 3079 vk_free2(&device->alloc, pAllocator, pool); 3080 } 3081 3082 VkResult radv_ResetCommandPool( 3083 VkDevice device, 3084 VkCommandPool commandPool, 3085 VkCommandPoolResetFlags flags) 3086 { 3087 RADV_FROM_HANDLE(radv_cmd_pool, pool, commandPool); 3088 VkResult result; 3089 3090 list_for_each_entry(struct radv_cmd_buffer, cmd_buffer, 3091 &pool->cmd_buffers, pool_link) { 3092 result = radv_reset_cmd_buffer(cmd_buffer); 3093 if (result != VK_SUCCESS) 3094 return result; 3095 } 3096 3097 return VK_SUCCESS; 3098 } 3099 3100 void radv_TrimCommandPoolKHR( 3101 VkDevice device, 3102 VkCommandPool commandPool, 3103 VkCommandPoolTrimFlagsKHR flags) 3104 { 3105 RADV_FROM_HANDLE(radv_cmd_pool, pool, commandPool); 3106 3107 if (!pool) 3108 return; 3109 3110 list_for_each_entry_safe(struct radv_cmd_buffer, cmd_buffer, 3111 &pool->free_cmd_buffers, pool_link) { 3112 radv_cmd_buffer_destroy(cmd_buffer); 3113 } 3114 } 3115 3116 void radv_CmdBeginRenderPass( 3117 VkCommandBuffer commandBuffer, 3118 const VkRenderPassBeginInfo* pRenderPassBegin, 3119 VkSubpassContents contents) 3120 { 3121 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 3122 RADV_FROM_HANDLE(radv_render_pass, pass, pRenderPassBegin->renderPass); 3123 RADV_FROM_HANDLE(radv_framebuffer, framebuffer, pRenderPassBegin->framebuffer); 3124 3125 MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, 3126 cmd_buffer->cs, 2048); 3127 MAYBE_UNUSED VkResult result; 3128 3129 cmd_buffer->state.framebuffer = framebuffer; 3130 cmd_buffer->state.pass = pass; 3131 cmd_buffer->state.render_area = pRenderPassBegin->renderArea; 3132 3133 result = radv_cmd_state_setup_attachments(cmd_buffer, pass, pRenderPassBegin); 3134 if (result != VK_SUCCESS) 3135 return; 3136 3137 radv_cmd_buffer_set_subpass(cmd_buffer, pass->subpasses, true); 3138 assert(cmd_buffer->cs->cdw <= cdw_max); 3139 3140 radv_cmd_buffer_clear_subpass(cmd_buffer); 3141 } 3142 3143 void radv_CmdNextSubpass( 3144 VkCommandBuffer commandBuffer, 3145 VkSubpassContents contents) 3146 { 3147 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 3148 3149 radv_cmd_buffer_resolve_subpass(cmd_buffer); 3150 3151 radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 3152 2048); 3153 3154 radv_cmd_buffer_set_subpass(cmd_buffer, cmd_buffer->state.subpass + 1, true); 3155 radv_cmd_buffer_clear_subpass(cmd_buffer); 3156 } 3157 3158 static void radv_emit_view_index(struct radv_cmd_buffer *cmd_buffer, unsigned index) 3159 { 3160 struct radv_pipeline *pipeline = cmd_buffer->state.pipeline; 3161 for (unsigned stage = 0; stage < MESA_SHADER_STAGES; ++stage) { 3162 if (!pipeline->shaders[stage]) 3163 continue; 3164 struct ac_userdata_info *loc = radv_lookup_user_sgpr(pipeline, stage, AC_UD_VIEW_INDEX); 3165 if (loc->sgpr_idx == -1) 3166 continue; 3167 uint32_t base_reg = pipeline->user_data_0[stage]; 3168 radeon_set_sh_reg(cmd_buffer->cs, base_reg + loc->sgpr_idx * 4, index); 3169 3170 } 3171 if (pipeline->gs_copy_shader) { 3172 struct ac_userdata_info *loc = &pipeline->gs_copy_shader->info.user_sgprs_locs.shader_data[AC_UD_VIEW_INDEX]; 3173 if (loc->sgpr_idx != -1) { 3174 uint32_t base_reg = R_00B130_SPI_SHADER_USER_DATA_VS_0; 3175 radeon_set_sh_reg(cmd_buffer->cs, base_reg + loc->sgpr_idx * 4, index); 3176 } 3177 } 3178 } 3179 3180 static void 3181 radv_cs_emit_draw_packet(struct radv_cmd_buffer *cmd_buffer, 3182 uint32_t vertex_count) 3183 { 3184 radeon_emit(cmd_buffer->cs, PKT3(PKT3_DRAW_INDEX_AUTO, 1, cmd_buffer->state.predicating)); 3185 radeon_emit(cmd_buffer->cs, vertex_count); 3186 radeon_emit(cmd_buffer->cs, V_0287F0_DI_SRC_SEL_AUTO_INDEX | 3187 S_0287F0_USE_OPAQUE(0)); 3188 } 3189 3190 static void 3191 radv_cs_emit_draw_indexed_packet(struct radv_cmd_buffer *cmd_buffer, 3192 uint64_t index_va, 3193 uint32_t index_count) 3194 { 3195 radeon_emit(cmd_buffer->cs, PKT3(PKT3_DRAW_INDEX_2, 4, false)); 3196 radeon_emit(cmd_buffer->cs, cmd_buffer->state.max_index_count); 3197 radeon_emit(cmd_buffer->cs, index_va); 3198 radeon_emit(cmd_buffer->cs, index_va >> 32); 3199 radeon_emit(cmd_buffer->cs, index_count); 3200 radeon_emit(cmd_buffer->cs, V_0287F0_DI_SRC_SEL_DMA); 3201 } 3202 3203 static void 3204 radv_cs_emit_indirect_draw_packet(struct radv_cmd_buffer *cmd_buffer, 3205 bool indexed, 3206 uint32_t draw_count, 3207 uint64_t count_va, 3208 uint32_t stride) 3209 { 3210 struct radeon_winsys_cs *cs = cmd_buffer->cs; 3211 unsigned di_src_sel = indexed ? V_0287F0_DI_SRC_SEL_DMA 3212 : V_0287F0_DI_SRC_SEL_AUTO_INDEX; 3213 bool draw_id_enable = radv_get_vertex_shader(cmd_buffer->state.pipeline)->info.info.vs.needs_draw_id; 3214 uint32_t base_reg = cmd_buffer->state.pipeline->graphics.vtx_base_sgpr; 3215 assert(base_reg); 3216 3217 /* just reset draw state for vertex data */ 3218 cmd_buffer->state.last_first_instance = -1; 3219 cmd_buffer->state.last_num_instances = -1; 3220 cmd_buffer->state.last_vertex_offset = -1; 3221 3222 if (draw_count == 1 && !count_va && !draw_id_enable) { 3223 radeon_emit(cs, PKT3(indexed ? PKT3_DRAW_INDEX_INDIRECT : 3224 PKT3_DRAW_INDIRECT, 3, false)); 3225 radeon_emit(cs, 0); 3226 radeon_emit(cs, (base_reg - SI_SH_REG_OFFSET) >> 2); 3227 radeon_emit(cs, ((base_reg + 4) - SI_SH_REG_OFFSET) >> 2); 3228 radeon_emit(cs, di_src_sel); 3229 } else { 3230 radeon_emit(cs, PKT3(indexed ? PKT3_DRAW_INDEX_INDIRECT_MULTI : 3231 PKT3_DRAW_INDIRECT_MULTI, 3232 8, false)); 3233 radeon_emit(cs, 0); 3234 radeon_emit(cs, (base_reg - SI_SH_REG_OFFSET) >> 2); 3235 radeon_emit(cs, ((base_reg + 4) - SI_SH_REG_OFFSET) >> 2); 3236 radeon_emit(cs, (((base_reg + 8) - SI_SH_REG_OFFSET) >> 2) | 3237 S_2C3_DRAW_INDEX_ENABLE(draw_id_enable) | 3238 S_2C3_COUNT_INDIRECT_ENABLE(!!count_va)); 3239 radeon_emit(cs, draw_count); /* count */ 3240 radeon_emit(cs, count_va); /* count_addr */ 3241 radeon_emit(cs, count_va >> 32); 3242 radeon_emit(cs, stride); /* stride */ 3243 radeon_emit(cs, di_src_sel); 3244 } 3245 } 3246 3247 struct radv_draw_info { 3248 /** 3249 * Number of vertices. 3250 */ 3251 uint32_t count; 3252 3253 /** 3254 * Index of the first vertex. 3255 */ 3256 int32_t vertex_offset; 3257 3258 /** 3259 * First instance id. 3260 */ 3261 uint32_t first_instance; 3262 3263 /** 3264 * Number of instances. 3265 */ 3266 uint32_t instance_count; 3267 3268 /** 3269 * First index (indexed draws only). 3270 */ 3271 uint32_t first_index; 3272 3273 /** 3274 * Whether it's an indexed draw. 3275 */ 3276 bool indexed; 3277 3278 /** 3279 * Indirect draw parameters resource. 3280 */ 3281 struct radv_buffer *indirect; 3282 uint64_t indirect_offset; 3283 uint32_t stride; 3284 3285 /** 3286 * Draw count parameters resource. 3287 */ 3288 struct radv_buffer *count_buffer; 3289 uint64_t count_buffer_offset; 3290 }; 3291 3292 static void 3293 radv_emit_draw_packets(struct radv_cmd_buffer *cmd_buffer, 3294 const struct radv_draw_info *info) 3295 { 3296 struct radv_cmd_state *state = &cmd_buffer->state; 3297 struct radeon_winsys *ws = cmd_buffer->device->ws; 3298 struct radeon_winsys_cs *cs = cmd_buffer->cs; 3299 3300 if (info->indirect) { 3301 uint64_t va = radv_buffer_get_va(info->indirect->bo); 3302 uint64_t count_va = 0; 3303 3304 va += info->indirect->offset + info->indirect_offset; 3305 3306 radv_cs_add_buffer(ws, cs, info->indirect->bo, 8); 3307 3308 radeon_emit(cs, PKT3(PKT3_SET_BASE, 2, 0)); 3309 radeon_emit(cs, 1); 3310 radeon_emit(cs, va); 3311 radeon_emit(cs, va >> 32); 3312 3313 if (info->count_buffer) { 3314 count_va = radv_buffer_get_va(info->count_buffer->bo); 3315 count_va += info->count_buffer->offset + 3316 info->count_buffer_offset; 3317 3318 radv_cs_add_buffer(ws, cs, info->count_buffer->bo, 8); 3319 } 3320 3321 if (!state->subpass->view_mask) { 3322 radv_cs_emit_indirect_draw_packet(cmd_buffer, 3323 info->indexed, 3324 info->count, 3325 count_va, 3326 info->stride); 3327 } else { 3328 unsigned i; 3329 for_each_bit(i, state->subpass->view_mask) { 3330 radv_emit_view_index(cmd_buffer, i); 3331 3332 radv_cs_emit_indirect_draw_packet(cmd_buffer, 3333 info->indexed, 3334 info->count, 3335 count_va, 3336 info->stride); 3337 } 3338 } 3339 } else { 3340 assert(state->pipeline->graphics.vtx_base_sgpr); 3341 3342 if (info->vertex_offset != state->last_vertex_offset || 3343 info->first_instance != state->last_first_instance) { 3344 radeon_set_sh_reg_seq(cs, state->pipeline->graphics.vtx_base_sgpr, 3345 state->pipeline->graphics.vtx_emit_num); 3346 3347 radeon_emit(cs, info->vertex_offset); 3348 radeon_emit(cs, info->first_instance); 3349 if (state->pipeline->graphics.vtx_emit_num == 3) 3350 radeon_emit(cs, 0); 3351 state->last_first_instance = info->first_instance; 3352 state->last_vertex_offset = info->vertex_offset; 3353 } 3354 3355 if (state->last_num_instances != info->instance_count) { 3356 radeon_emit(cs, PKT3(PKT3_NUM_INSTANCES, 0, false)); 3357 radeon_emit(cs, info->instance_count); 3358 state->last_num_instances = info->instance_count; 3359 } 3360 3361 if (info->indexed) { 3362 int index_size = state->index_type ? 4 : 2; 3363 uint64_t index_va; 3364 3365 index_va = state->index_va; 3366 index_va += info->first_index * index_size; 3367 3368 if (!state->subpass->view_mask) { 3369 radv_cs_emit_draw_indexed_packet(cmd_buffer, 3370 index_va, 3371 info->count); 3372 } else { 3373 unsigned i; 3374 for_each_bit(i, state->subpass->view_mask) { 3375 radv_emit_view_index(cmd_buffer, i); 3376 3377 radv_cs_emit_draw_indexed_packet(cmd_buffer, 3378 index_va, 3379 info->count); 3380 } 3381 } 3382 } else { 3383 if (!state->subpass->view_mask) { 3384 radv_cs_emit_draw_packet(cmd_buffer, info->count); 3385 } else { 3386 unsigned i; 3387 for_each_bit(i, state->subpass->view_mask) { 3388 radv_emit_view_index(cmd_buffer, i); 3389 3390 radv_cs_emit_draw_packet(cmd_buffer, 3391 info->count); 3392 } 3393 } 3394 } 3395 } 3396 } 3397 3398 static void 3399 radv_emit_all_graphics_states(struct radv_cmd_buffer *cmd_buffer, 3400 const struct radv_draw_info *info) 3401 { 3402 if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_PIPELINE) 3403 radv_emit_graphics_pipeline(cmd_buffer); 3404 3405 if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_FRAMEBUFFER) 3406 radv_emit_framebuffer_state(cmd_buffer); 3407 3408 if (info->indexed) { 3409 if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_INDEX_BUFFER) 3410 radv_emit_index_buffer(cmd_buffer); 3411 } else { 3412 /* On CI and later, non-indexed draws overwrite VGT_INDEX_TYPE, 3413 * so the state must be re-emitted before the next indexed 3414 * draw. 3415 */ 3416 if (cmd_buffer->device->physical_device->rad_info.chip_class >= CIK) { 3417 cmd_buffer->state.last_index_type = -1; 3418 cmd_buffer->state.dirty |= RADV_CMD_DIRTY_INDEX_BUFFER; 3419 } 3420 } 3421 3422 radv_cmd_buffer_flush_dynamic_state(cmd_buffer); 3423 3424 radv_emit_draw_registers(cmd_buffer, info->indexed, 3425 info->instance_count > 1, info->indirect, 3426 info->indirect ? 0 : info->count); 3427 } 3428 3429 static void 3430 radv_draw(struct radv_cmd_buffer *cmd_buffer, 3431 const struct radv_draw_info *info) 3432 { 3433 bool pipeline_is_dirty = 3434 (cmd_buffer->state.dirty & RADV_CMD_DIRTY_PIPELINE) && 3435 cmd_buffer->state.pipeline && 3436 cmd_buffer->state.pipeline != cmd_buffer->state.emitted_pipeline; 3437 3438 MAYBE_UNUSED unsigned cdw_max = 3439 radeon_check_space(cmd_buffer->device->ws, 3440 cmd_buffer->cs, 4096); 3441 3442 /* Use optimal packet order based on whether we need to sync the 3443 * pipeline. 3444 */ 3445 if (cmd_buffer->state.flush_bits & (RADV_CMD_FLAG_FLUSH_AND_INV_CB | 3446 RADV_CMD_FLAG_FLUSH_AND_INV_DB | 3447 RADV_CMD_FLAG_PS_PARTIAL_FLUSH | 3448 RADV_CMD_FLAG_CS_PARTIAL_FLUSH)) { 3449 /* If we have to wait for idle, set all states first, so that 3450 * all SET packets are processed in parallel with previous draw 3451 * calls. Then upload descriptors, set shader pointers, and 3452 * draw, and prefetch at the end. This ensures that the time 3453 * the CUs are idle is very short. (there are only SET_SH 3454 * packets between the wait and the draw) 3455 */ 3456 radv_emit_all_graphics_states(cmd_buffer, info); 3457 si_emit_cache_flush(cmd_buffer); 3458 /* <-- CUs are idle here --> */ 3459 3460 if (!radv_upload_graphics_shader_descriptors(cmd_buffer, pipeline_is_dirty)) 3461 return; 3462 3463 radv_emit_draw_packets(cmd_buffer, info); 3464 /* <-- CUs are busy here --> */ 3465 3466 /* Start prefetches after the draw has been started. Both will 3467 * run in parallel, but starting the draw first is more 3468 * important. 3469 */ 3470 if (pipeline_is_dirty) { 3471 radv_emit_prefetch(cmd_buffer, 3472 cmd_buffer->state.pipeline); 3473 } 3474 } else { 3475 /* If we don't wait for idle, start prefetches first, then set 3476 * states, and draw at the end. 3477 */ 3478 si_emit_cache_flush(cmd_buffer); 3479 3480 if (pipeline_is_dirty) { 3481 radv_emit_prefetch(cmd_buffer, 3482 cmd_buffer->state.pipeline); 3483 } 3484 3485 if (!radv_upload_graphics_shader_descriptors(cmd_buffer, pipeline_is_dirty)) 3486 return; 3487 3488 radv_emit_all_graphics_states(cmd_buffer, info); 3489 radv_emit_draw_packets(cmd_buffer, info); 3490 } 3491 3492 assert(cmd_buffer->cs->cdw <= cdw_max); 3493 radv_cmd_buffer_after_draw(cmd_buffer); 3494 } 3495 3496 void radv_CmdDraw( 3497 VkCommandBuffer commandBuffer, 3498 uint32_t vertexCount, 3499 uint32_t instanceCount, 3500 uint32_t firstVertex, 3501 uint32_t firstInstance) 3502 { 3503 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 3504 struct radv_draw_info info = {}; 3505 3506 info.count = vertexCount; 3507 info.instance_count = instanceCount; 3508 info.first_instance = firstInstance; 3509 info.vertex_offset = firstVertex; 3510 3511 radv_draw(cmd_buffer, &info); 3512 } 3513 3514 void radv_CmdDrawIndexed( 3515 VkCommandBuffer commandBuffer, 3516 uint32_t indexCount, 3517 uint32_t instanceCount, 3518 uint32_t firstIndex, 3519 int32_t vertexOffset, 3520 uint32_t firstInstance) 3521 { 3522 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 3523 struct radv_draw_info info = {}; 3524 3525 info.indexed = true; 3526 info.count = indexCount; 3527 info.instance_count = instanceCount; 3528 info.first_index = firstIndex; 3529 info.vertex_offset = vertexOffset; 3530 info.first_instance = firstInstance; 3531 3532 radv_draw(cmd_buffer, &info); 3533 } 3534 3535 void radv_CmdDrawIndirect( 3536 VkCommandBuffer commandBuffer, 3537 VkBuffer _buffer, 3538 VkDeviceSize offset, 3539 uint32_t drawCount, 3540 uint32_t stride) 3541 { 3542 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 3543 RADV_FROM_HANDLE(radv_buffer, buffer, _buffer); 3544 struct radv_draw_info info = {}; 3545 3546 info.count = drawCount; 3547 info.indirect = buffer; 3548 info.indirect_offset = offset; 3549 info.stride = stride; 3550 3551 radv_draw(cmd_buffer, &info); 3552 } 3553 3554 void radv_CmdDrawIndexedIndirect( 3555 VkCommandBuffer commandBuffer, 3556 VkBuffer _buffer, 3557 VkDeviceSize offset, 3558 uint32_t drawCount, 3559 uint32_t stride) 3560 { 3561 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 3562 RADV_FROM_HANDLE(radv_buffer, buffer, _buffer); 3563 struct radv_draw_info info = {}; 3564 3565 info.indexed = true; 3566 info.count = drawCount; 3567 info.indirect = buffer; 3568 info.indirect_offset = offset; 3569 info.stride = stride; 3570 3571 radv_draw(cmd_buffer, &info); 3572 } 3573 3574 void radv_CmdDrawIndirectCountAMD( 3575 VkCommandBuffer commandBuffer, 3576 VkBuffer _buffer, 3577 VkDeviceSize offset, 3578 VkBuffer _countBuffer, 3579 VkDeviceSize countBufferOffset, 3580 uint32_t maxDrawCount, 3581 uint32_t stride) 3582 { 3583 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 3584 RADV_FROM_HANDLE(radv_buffer, buffer, _buffer); 3585 RADV_FROM_HANDLE(radv_buffer, count_buffer, _countBuffer); 3586 struct radv_draw_info info = {}; 3587 3588 info.count = maxDrawCount; 3589 info.indirect = buffer; 3590 info.indirect_offset = offset; 3591 info.count_buffer = count_buffer; 3592 info.count_buffer_offset = countBufferOffset; 3593 info.stride = stride; 3594 3595 radv_draw(cmd_buffer, &info); 3596 } 3597 3598 void radv_CmdDrawIndexedIndirectCountAMD( 3599 VkCommandBuffer commandBuffer, 3600 VkBuffer _buffer, 3601 VkDeviceSize offset, 3602 VkBuffer _countBuffer, 3603 VkDeviceSize countBufferOffset, 3604 uint32_t maxDrawCount, 3605 uint32_t stride) 3606 { 3607 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 3608 RADV_FROM_HANDLE(radv_buffer, buffer, _buffer); 3609 RADV_FROM_HANDLE(radv_buffer, count_buffer, _countBuffer); 3610 struct radv_draw_info info = {}; 3611 3612 info.indexed = true; 3613 info.count = maxDrawCount; 3614 info.indirect = buffer; 3615 info.indirect_offset = offset; 3616 info.count_buffer = count_buffer; 3617 info.count_buffer_offset = countBufferOffset; 3618 info.stride = stride; 3619 3620 radv_draw(cmd_buffer, &info); 3621 } 3622 3623 struct radv_dispatch_info { 3624 /** 3625 * Determine the layout of the grid (in block units) to be used. 3626 */ 3627 uint32_t blocks[3]; 3628 3629 /** 3630 * Whether it's an unaligned compute dispatch. 3631 */ 3632 bool unaligned; 3633 3634 /** 3635 * Indirect compute parameters resource. 3636 */ 3637 struct radv_buffer *indirect; 3638 uint64_t indirect_offset; 3639 }; 3640 3641 static void 3642 radv_emit_dispatch_packets(struct radv_cmd_buffer *cmd_buffer, 3643 const struct radv_dispatch_info *info) 3644 { 3645 struct radv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; 3646 struct radv_shader_variant *compute_shader = pipeline->shaders[MESA_SHADER_COMPUTE]; 3647 unsigned dispatch_initiator = cmd_buffer->device->dispatch_initiator; 3648 struct radeon_winsys *ws = cmd_buffer->device->ws; 3649 struct radeon_winsys_cs *cs = cmd_buffer->cs; 3650 struct ac_userdata_info *loc; 3651 3652 loc = radv_lookup_user_sgpr(pipeline, MESA_SHADER_COMPUTE, 3653 AC_UD_CS_GRID_SIZE); 3654 3655 MAYBE_UNUSED unsigned cdw_max = radeon_check_space(ws, cs, 25); 3656 3657 if (info->indirect) { 3658 uint64_t va = radv_buffer_get_va(info->indirect->bo); 3659 3660 va += info->indirect->offset + info->indirect_offset; 3661 3662 radv_cs_add_buffer(ws, cs, info->indirect->bo, 8); 3663 3664 if (loc->sgpr_idx != -1) { 3665 for (unsigned i = 0; i < 3; ++i) { 3666 radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0)); 3667 radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_MEM) | 3668 COPY_DATA_DST_SEL(COPY_DATA_REG)); 3669 radeon_emit(cs, (va + 4 * i)); 3670 radeon_emit(cs, (va + 4 * i) >> 32); 3671 radeon_emit(cs, ((R_00B900_COMPUTE_USER_DATA_0 3672 + loc->sgpr_idx * 4) >> 2) + i); 3673 radeon_emit(cs, 0); 3674 } 3675 } 3676 3677 if (radv_cmd_buffer_uses_mec(cmd_buffer)) { 3678 radeon_emit(cs, PKT3(PKT3_DISPATCH_INDIRECT, 2, 0) | 3679 PKT3_SHADER_TYPE_S(1)); 3680 radeon_emit(cs, va); 3681 radeon_emit(cs, va >> 32); 3682 radeon_emit(cs, dispatch_initiator); 3683 } else { 3684 radeon_emit(cs, PKT3(PKT3_SET_BASE, 2, 0) | 3685 PKT3_SHADER_TYPE_S(1)); 3686 radeon_emit(cs, 1); 3687 radeon_emit(cs, va); 3688 radeon_emit(cs, va >> 32); 3689 3690 radeon_emit(cs, PKT3(PKT3_DISPATCH_INDIRECT, 1, 0) | 3691 PKT3_SHADER_TYPE_S(1)); 3692 radeon_emit(cs, 0); 3693 radeon_emit(cs, dispatch_initiator); 3694 } 3695 } else { 3696 unsigned blocks[3] = { info->blocks[0], info->blocks[1], info->blocks[2] }; 3697 3698 if (info->unaligned) { 3699 unsigned *cs_block_size = compute_shader->info.cs.block_size; 3700 unsigned remainder[3]; 3701 3702 /* If aligned, these should be an entire block size, 3703 * not 0. 3704 */ 3705 remainder[0] = blocks[0] + cs_block_size[0] - 3706 align_u32_npot(blocks[0], cs_block_size[0]); 3707 remainder[1] = blocks[1] + cs_block_size[1] - 3708 align_u32_npot(blocks[1], cs_block_size[1]); 3709 remainder[2] = blocks[2] + cs_block_size[2] - 3710 align_u32_npot(blocks[2], cs_block_size[2]); 3711 3712 blocks[0] = round_up_u32(blocks[0], cs_block_size[0]); 3713 blocks[1] = round_up_u32(blocks[1], cs_block_size[1]); 3714 blocks[2] = round_up_u32(blocks[2], cs_block_size[2]); 3715 3716 radeon_set_sh_reg_seq(cs, R_00B81C_COMPUTE_NUM_THREAD_X, 3); 3717 radeon_emit(cs, 3718 S_00B81C_NUM_THREAD_FULL(cs_block_size[0]) | 3719 S_00B81C_NUM_THREAD_PARTIAL(remainder[0])); 3720 radeon_emit(cs, 3721 S_00B81C_NUM_THREAD_FULL(cs_block_size[1]) | 3722 S_00B81C_NUM_THREAD_PARTIAL(remainder[1])); 3723 radeon_emit(cs, 3724 S_00B81C_NUM_THREAD_FULL(cs_block_size[2]) | 3725 S_00B81C_NUM_THREAD_PARTIAL(remainder[2])); 3726 3727 dispatch_initiator |= S_00B800_PARTIAL_TG_EN(1); 3728 } 3729 3730 if (loc->sgpr_idx != -1) { 3731 assert(!loc->indirect); 3732 assert(loc->num_sgprs == 3); 3733 3734 radeon_set_sh_reg_seq(cs, R_00B900_COMPUTE_USER_DATA_0 + 3735 loc->sgpr_idx * 4, 3); 3736 radeon_emit(cs, blocks[0]); 3737 radeon_emit(cs, blocks[1]); 3738 radeon_emit(cs, blocks[2]); 3739 } 3740 3741 radeon_emit(cs, PKT3(PKT3_DISPATCH_DIRECT, 3, 0) | 3742 PKT3_SHADER_TYPE_S(1)); 3743 radeon_emit(cs, blocks[0]); 3744 radeon_emit(cs, blocks[1]); 3745 radeon_emit(cs, blocks[2]); 3746 radeon_emit(cs, dispatch_initiator); 3747 } 3748 3749 assert(cmd_buffer->cs->cdw <= cdw_max); 3750 } 3751 3752 static void 3753 radv_upload_compute_shader_descriptors(struct radv_cmd_buffer *cmd_buffer) 3754 { 3755 radv_flush_descriptors(cmd_buffer, VK_SHADER_STAGE_COMPUTE_BIT); 3756 radv_flush_constants(cmd_buffer, cmd_buffer->state.compute_pipeline, 3757 VK_SHADER_STAGE_COMPUTE_BIT); 3758 } 3759 3760 static void 3761 radv_dispatch(struct radv_cmd_buffer *cmd_buffer, 3762 const struct radv_dispatch_info *info) 3763 { 3764 struct radv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; 3765 bool pipeline_is_dirty = pipeline && 3766 pipeline != cmd_buffer->state.emitted_compute_pipeline; 3767 3768 if (cmd_buffer->state.flush_bits & (RADV_CMD_FLAG_FLUSH_AND_INV_CB | 3769 RADV_CMD_FLAG_FLUSH_AND_INV_DB | 3770 RADV_CMD_FLAG_PS_PARTIAL_FLUSH | 3771 RADV_CMD_FLAG_CS_PARTIAL_FLUSH)) { 3772 /* If we have to wait for idle, set all states first, so that 3773 * all SET packets are processed in parallel with previous draw 3774 * calls. Then upload descriptors, set shader pointers, and 3775 * dispatch, and prefetch at the end. This ensures that the 3776 * time the CUs are idle is very short. (there are only SET_SH 3777 * packets between the wait and the draw) 3778 */ 3779 radv_emit_compute_pipeline(cmd_buffer); 3780 si_emit_cache_flush(cmd_buffer); 3781 /* <-- CUs are idle here --> */ 3782 3783 radv_upload_compute_shader_descriptors(cmd_buffer); 3784 3785 radv_emit_dispatch_packets(cmd_buffer, info); 3786 /* <-- CUs are busy here --> */ 3787 3788 /* Start prefetches after the dispatch has been started. Both 3789 * will run in parallel, but starting the dispatch first is 3790 * more important. 3791 */ 3792 if (pipeline_is_dirty) { 3793 radv_emit_shader_prefetch(cmd_buffer, 3794 pipeline->shaders[MESA_SHADER_COMPUTE]); 3795 } 3796 } else { 3797 /* If we don't wait for idle, start prefetches first, then set 3798 * states, and dispatch at the end. 3799 */ 3800 si_emit_cache_flush(cmd_buffer); 3801 3802 if (pipeline_is_dirty) { 3803 radv_emit_shader_prefetch(cmd_buffer, 3804 pipeline->shaders[MESA_SHADER_COMPUTE]); 3805 } 3806 3807 radv_upload_compute_shader_descriptors(cmd_buffer); 3808 3809 radv_emit_compute_pipeline(cmd_buffer); 3810 radv_emit_dispatch_packets(cmd_buffer, info); 3811 } 3812 3813 radv_cmd_buffer_after_draw(cmd_buffer); 3814 } 3815 3816 void radv_CmdDispatch( 3817 VkCommandBuffer commandBuffer, 3818 uint32_t x, 3819 uint32_t y, 3820 uint32_t z) 3821 { 3822 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 3823 struct radv_dispatch_info info = {}; 3824 3825 info.blocks[0] = x; 3826 info.blocks[1] = y; 3827 info.blocks[2] = z; 3828 3829 radv_dispatch(cmd_buffer, &info); 3830 } 3831 3832 void radv_CmdDispatchIndirect( 3833 VkCommandBuffer commandBuffer, 3834 VkBuffer _buffer, 3835 VkDeviceSize offset) 3836 { 3837 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 3838 RADV_FROM_HANDLE(radv_buffer, buffer, _buffer); 3839 struct radv_dispatch_info info = {}; 3840 3841 info.indirect = buffer; 3842 info.indirect_offset = offset; 3843 3844 radv_dispatch(cmd_buffer, &info); 3845 } 3846 3847 void radv_unaligned_dispatch( 3848 struct radv_cmd_buffer *cmd_buffer, 3849 uint32_t x, 3850 uint32_t y, 3851 uint32_t z) 3852 { 3853 struct radv_dispatch_info info = {}; 3854 3855 info.blocks[0] = x; 3856 info.blocks[1] = y; 3857 info.blocks[2] = z; 3858 info.unaligned = 1; 3859 3860 radv_dispatch(cmd_buffer, &info); 3861 } 3862 3863 void radv_CmdEndRenderPass( 3864 VkCommandBuffer commandBuffer) 3865 { 3866 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 3867 3868 radv_subpass_barrier(cmd_buffer, &cmd_buffer->state.pass->end_barrier); 3869 3870 radv_cmd_buffer_resolve_subpass(cmd_buffer); 3871 3872 for (unsigned i = 0; i < cmd_buffer->state.framebuffer->attachment_count; ++i) { 3873 VkImageLayout layout = cmd_buffer->state.pass->attachments[i].final_layout; 3874 radv_handle_subpass_image_transition(cmd_buffer, 3875 (VkAttachmentReference){i, layout}); 3876 } 3877 3878 vk_free(&cmd_buffer->pool->alloc, cmd_buffer->state.attachments); 3879 3880 cmd_buffer->state.pass = NULL; 3881 cmd_buffer->state.subpass = NULL; 3882 cmd_buffer->state.attachments = NULL; 3883 cmd_buffer->state.framebuffer = NULL; 3884 } 3885 3886 /* 3887 * For HTILE we have the following interesting clear words: 3888 * 0xfffff30f: Uncompressed, full depth range, for depth+stencil HTILE 3889 * 0xfffc000f: Uncompressed, full depth range, for depth only HTILE. 3890 * 0xfffffff0: Clear depth to 1.0 3891 * 0x00000000: Clear depth to 0.0 3892 */ 3893 static void radv_initialize_htile(struct radv_cmd_buffer *cmd_buffer, 3894 struct radv_image *image, 3895 const VkImageSubresourceRange *range, 3896 uint32_t clear_word) 3897 { 3898 assert(range->baseMipLevel == 0); 3899 assert(range->levelCount == 1 || range->levelCount == VK_REMAINING_ARRAY_LAYERS); 3900 unsigned layer_count = radv_get_layerCount(image, range); 3901 uint64_t size = image->surface.htile_slice_size * layer_count; 3902 uint64_t offset = image->offset + image->htile_offset + 3903 image->surface.htile_slice_size * range->baseArrayLayer; 3904 struct radv_cmd_state *state = &cmd_buffer->state; 3905 3906 state->flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB | 3907 RADV_CMD_FLAG_FLUSH_AND_INV_DB_META; 3908 3909 state->flush_bits |= radv_fill_buffer(cmd_buffer, image->bo, offset, 3910 size, clear_word); 3911 3912 state->flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB_META; 3913 } 3914 3915 static void radv_handle_depth_image_transition(struct radv_cmd_buffer *cmd_buffer, 3916 struct radv_image *image, 3917 VkImageLayout src_layout, 3918 VkImageLayout dst_layout, 3919 unsigned src_queue_mask, 3920 unsigned dst_queue_mask, 3921 const VkImageSubresourceRange *range, 3922 VkImageAspectFlags pending_clears) 3923 { 3924 if (dst_layout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL && 3925 (pending_clears & vk_format_aspects(image->vk_format)) == vk_format_aspects(image->vk_format) && 3926 cmd_buffer->state.render_area.offset.x == 0 && cmd_buffer->state.render_area.offset.y == 0 && 3927 cmd_buffer->state.render_area.extent.width == image->info.width && 3928 cmd_buffer->state.render_area.extent.height == image->info.height) { 3929 /* The clear will initialize htile. */ 3930 return; 3931 } else if (src_layout == VK_IMAGE_LAYOUT_UNDEFINED && 3932 radv_layout_has_htile(image, dst_layout, dst_queue_mask)) { 3933 /* TODO: merge with the clear if applicable */ 3934 radv_initialize_htile(cmd_buffer, image, range, 0); 3935 } else if (!radv_layout_is_htile_compressed(image, src_layout, src_queue_mask) && 3936 radv_layout_is_htile_compressed(image, dst_layout, dst_queue_mask)) { 3937 uint32_t clear_value = vk_format_is_stencil(image->vk_format) ? 0xfffff30f : 0xfffc000f; 3938 radv_initialize_htile(cmd_buffer, image, range, clear_value); 3939 } else if (radv_layout_is_htile_compressed(image, src_layout, src_queue_mask) && 3940 !radv_layout_is_htile_compressed(image, dst_layout, dst_queue_mask)) { 3941 VkImageSubresourceRange local_range = *range; 3942 local_range.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT; 3943 local_range.baseMipLevel = 0; 3944 local_range.levelCount = 1; 3945 3946 cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB | 3947 RADV_CMD_FLAG_FLUSH_AND_INV_DB_META; 3948 3949 radv_decompress_depth_image_inplace(cmd_buffer, image, &local_range); 3950 3951 cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB | 3952 RADV_CMD_FLAG_FLUSH_AND_INV_DB_META; 3953 } 3954 } 3955 3956 void radv_initialise_cmask(struct radv_cmd_buffer *cmd_buffer, 3957 struct radv_image *image, uint32_t value) 3958 { 3959 struct radv_cmd_state *state = &cmd_buffer->state; 3960 3961 state->flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB | 3962 RADV_CMD_FLAG_FLUSH_AND_INV_CB_META; 3963 3964 state->flush_bits |= radv_fill_buffer(cmd_buffer, image->bo, 3965 image->offset + image->cmask.offset, 3966 image->cmask.size, value); 3967 3968 state->flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB_META; 3969 } 3970 3971 static void radv_handle_cmask_image_transition(struct radv_cmd_buffer *cmd_buffer, 3972 struct radv_image *image, 3973 VkImageLayout src_layout, 3974 VkImageLayout dst_layout, 3975 unsigned src_queue_mask, 3976 unsigned dst_queue_mask, 3977 const VkImageSubresourceRange *range) 3978 { 3979 if (src_layout == VK_IMAGE_LAYOUT_UNDEFINED) { 3980 if (image->fmask.size) 3981 radv_initialise_cmask(cmd_buffer, image, 0xccccccccu); 3982 else 3983 radv_initialise_cmask(cmd_buffer, image, 0xffffffffu); 3984 } else if (radv_layout_can_fast_clear(image, src_layout, src_queue_mask) && 3985 !radv_layout_can_fast_clear(image, dst_layout, dst_queue_mask)) { 3986 radv_fast_clear_flush_image_inplace(cmd_buffer, image, range); 3987 } 3988 } 3989 3990 void radv_initialize_dcc(struct radv_cmd_buffer *cmd_buffer, 3991 struct radv_image *image, uint32_t value) 3992 { 3993 struct radv_cmd_state *state = &cmd_buffer->state; 3994 3995 state->flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB | 3996 RADV_CMD_FLAG_FLUSH_AND_INV_CB_META; 3997 3998 state->flush_bits |= radv_fill_buffer(cmd_buffer, image->bo, 3999 image->offset + image->dcc_offset, 4000 image->surface.dcc_size, value); 4001 4002 state->flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB | 4003 RADV_CMD_FLAG_FLUSH_AND_INV_CB_META; 4004 } 4005 4006 static void radv_handle_dcc_image_transition(struct radv_cmd_buffer *cmd_buffer, 4007 struct radv_image *image, 4008 VkImageLayout src_layout, 4009 VkImageLayout dst_layout, 4010 unsigned src_queue_mask, 4011 unsigned dst_queue_mask, 4012 const VkImageSubresourceRange *range) 4013 { 4014 if (src_layout == VK_IMAGE_LAYOUT_PREINITIALIZED) { 4015 radv_initialize_dcc(cmd_buffer, image, 0xffffffffu); 4016 } else if (src_layout == VK_IMAGE_LAYOUT_UNDEFINED) { 4017 radv_initialize_dcc(cmd_buffer, image, 4018 radv_layout_dcc_compressed(image, dst_layout, dst_queue_mask) ? 4019 0x20202020u : 0xffffffffu); 4020 } else if (radv_layout_dcc_compressed(image, src_layout, src_queue_mask) && 4021 !radv_layout_dcc_compressed(image, dst_layout, dst_queue_mask)) { 4022 radv_decompress_dcc(cmd_buffer, image, range); 4023 } else if (radv_layout_can_fast_clear(image, src_layout, src_queue_mask) && 4024 !radv_layout_can_fast_clear(image, dst_layout, dst_queue_mask)) { 4025 radv_fast_clear_flush_image_inplace(cmd_buffer, image, range); 4026 } 4027 } 4028 4029 static void radv_handle_image_transition(struct radv_cmd_buffer *cmd_buffer, 4030 struct radv_image *image, 4031 VkImageLayout src_layout, 4032 VkImageLayout dst_layout, 4033 uint32_t src_family, 4034 uint32_t dst_family, 4035 const VkImageSubresourceRange *range, 4036 VkImageAspectFlags pending_clears) 4037 { 4038 if (image->exclusive && src_family != dst_family) { 4039 /* This is an acquire or a release operation and there will be 4040 * a corresponding release/acquire. Do the transition in the 4041 * most flexible queue. */ 4042 4043 assert(src_family == cmd_buffer->queue_family_index || 4044 dst_family == cmd_buffer->queue_family_index); 4045 4046 if (cmd_buffer->queue_family_index == RADV_QUEUE_TRANSFER) 4047 return; 4048 4049 if (cmd_buffer->queue_family_index == RADV_QUEUE_COMPUTE && 4050 (src_family == RADV_QUEUE_GENERAL || 4051 dst_family == RADV_QUEUE_GENERAL)) 4052 return; 4053 } 4054 4055 unsigned src_queue_mask = radv_image_queue_family_mask(image, src_family, cmd_buffer->queue_family_index); 4056 unsigned dst_queue_mask = radv_image_queue_family_mask(image, dst_family, cmd_buffer->queue_family_index); 4057 4058 if (image->surface.htile_size) 4059 radv_handle_depth_image_transition(cmd_buffer, image, src_layout, 4060 dst_layout, src_queue_mask, 4061 dst_queue_mask, range, 4062 pending_clears); 4063 4064 if (image->cmask.size || image->fmask.size) 4065 radv_handle_cmask_image_transition(cmd_buffer, image, src_layout, 4066 dst_layout, src_queue_mask, 4067 dst_queue_mask, range); 4068 4069 if (image->surface.dcc_size) 4070 radv_handle_dcc_image_transition(cmd_buffer, image, src_layout, 4071 dst_layout, src_queue_mask, 4072 dst_queue_mask, range); 4073 } 4074 4075 void radv_CmdPipelineBarrier( 4076 VkCommandBuffer commandBuffer, 4077 VkPipelineStageFlags srcStageMask, 4078 VkPipelineStageFlags destStageMask, 4079 VkBool32 byRegion, 4080 uint32_t memoryBarrierCount, 4081 const VkMemoryBarrier* pMemoryBarriers, 4082 uint32_t bufferMemoryBarrierCount, 4083 const VkBufferMemoryBarrier* pBufferMemoryBarriers, 4084 uint32_t imageMemoryBarrierCount, 4085 const VkImageMemoryBarrier* pImageMemoryBarriers) 4086 { 4087 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 4088 enum radv_cmd_flush_bits src_flush_bits = 0; 4089 enum radv_cmd_flush_bits dst_flush_bits = 0; 4090 4091 for (uint32_t i = 0; i < memoryBarrierCount; i++) { 4092 src_flush_bits |= radv_src_access_flush(cmd_buffer, pMemoryBarriers[i].srcAccessMask); 4093 dst_flush_bits |= radv_dst_access_flush(cmd_buffer, pMemoryBarriers[i].dstAccessMask, 4094 NULL); 4095 } 4096 4097 for (uint32_t i = 0; i < bufferMemoryBarrierCount; i++) { 4098 src_flush_bits |= radv_src_access_flush(cmd_buffer, pBufferMemoryBarriers[i].srcAccessMask); 4099 dst_flush_bits |= radv_dst_access_flush(cmd_buffer, pBufferMemoryBarriers[i].dstAccessMask, 4100 NULL); 4101 } 4102 4103 for (uint32_t i = 0; i < imageMemoryBarrierCount; i++) { 4104 RADV_FROM_HANDLE(radv_image, image, pImageMemoryBarriers[i].image); 4105 src_flush_bits |= radv_src_access_flush(cmd_buffer, pImageMemoryBarriers[i].srcAccessMask); 4106 dst_flush_bits |= radv_dst_access_flush(cmd_buffer, pImageMemoryBarriers[i].dstAccessMask, 4107 image); 4108 } 4109 4110 radv_stage_flush(cmd_buffer, srcStageMask); 4111 cmd_buffer->state.flush_bits |= src_flush_bits; 4112 4113 for (uint32_t i = 0; i < imageMemoryBarrierCount; i++) { 4114 RADV_FROM_HANDLE(radv_image, image, pImageMemoryBarriers[i].image); 4115 radv_handle_image_transition(cmd_buffer, image, 4116 pImageMemoryBarriers[i].oldLayout, 4117 pImageMemoryBarriers[i].newLayout, 4118 pImageMemoryBarriers[i].srcQueueFamilyIndex, 4119 pImageMemoryBarriers[i].dstQueueFamilyIndex, 4120 &pImageMemoryBarriers[i].subresourceRange, 4121 0); 4122 } 4123 4124 cmd_buffer->state.flush_bits |= dst_flush_bits; 4125 } 4126 4127 4128 static void write_event(struct radv_cmd_buffer *cmd_buffer, 4129 struct radv_event *event, 4130 VkPipelineStageFlags stageMask, 4131 unsigned value) 4132 { 4133 struct radeon_winsys_cs *cs = cmd_buffer->cs; 4134 uint64_t va = radv_buffer_get_va(event->bo); 4135 4136 radv_cs_add_buffer(cmd_buffer->device->ws, cs, event->bo, 8); 4137 4138 MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cs, 18); 4139 4140 /* TODO: this is overkill. Probably should figure something out from 4141 * the stage mask. */ 4142 4143 si_cs_emit_write_event_eop(cs, 4144 cmd_buffer->state.predicating, 4145 cmd_buffer->device->physical_device->rad_info.chip_class, 4146 radv_cmd_buffer_uses_mec(cmd_buffer), 4147 V_028A90_BOTTOM_OF_PIPE_TS, 0, 4148 1, va, 2, value); 4149 4150 assert(cmd_buffer->cs->cdw <= cdw_max); 4151 } 4152 4153 void radv_CmdSetEvent(VkCommandBuffer commandBuffer, 4154 VkEvent _event, 4155 VkPipelineStageFlags stageMask) 4156 { 4157 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 4158 RADV_FROM_HANDLE(radv_event, event, _event); 4159 4160 write_event(cmd_buffer, event, stageMask, 1); 4161 } 4162 4163 void radv_CmdResetEvent(VkCommandBuffer commandBuffer, 4164 VkEvent _event, 4165 VkPipelineStageFlags stageMask) 4166 { 4167 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 4168 RADV_FROM_HANDLE(radv_event, event, _event); 4169 4170 write_event(cmd_buffer, event, stageMask, 0); 4171 } 4172 4173 void radv_CmdWaitEvents(VkCommandBuffer commandBuffer, 4174 uint32_t eventCount, 4175 const VkEvent* pEvents, 4176 VkPipelineStageFlags srcStageMask, 4177 VkPipelineStageFlags dstStageMask, 4178 uint32_t memoryBarrierCount, 4179 const VkMemoryBarrier* pMemoryBarriers, 4180 uint32_t bufferMemoryBarrierCount, 4181 const VkBufferMemoryBarrier* pBufferMemoryBarriers, 4182 uint32_t imageMemoryBarrierCount, 4183 const VkImageMemoryBarrier* pImageMemoryBarriers) 4184 { 4185 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 4186 struct radeon_winsys_cs *cs = cmd_buffer->cs; 4187 4188 for (unsigned i = 0; i < eventCount; ++i) { 4189 RADV_FROM_HANDLE(radv_event, event, pEvents[i]); 4190 uint64_t va = radv_buffer_get_va(event->bo); 4191 4192 radv_cs_add_buffer(cmd_buffer->device->ws, cs, event->bo, 8); 4193 4194 MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cs, 7); 4195 4196 si_emit_wait_fence(cs, false, va, 1, 0xffffffff); 4197 assert(cmd_buffer->cs->cdw <= cdw_max); 4198 } 4199 4200 4201 for (uint32_t i = 0; i < imageMemoryBarrierCount; i++) { 4202 RADV_FROM_HANDLE(radv_image, image, pImageMemoryBarriers[i].image); 4203 4204 radv_handle_image_transition(cmd_buffer, image, 4205 pImageMemoryBarriers[i].oldLayout, 4206 pImageMemoryBarriers[i].newLayout, 4207 pImageMemoryBarriers[i].srcQueueFamilyIndex, 4208 pImageMemoryBarriers[i].dstQueueFamilyIndex, 4209 &pImageMemoryBarriers[i].subresourceRange, 4210 0); 4211 } 4212 4213 /* TODO: figure out how to do memory barriers without waiting */ 4214 cmd_buffer->state.flush_bits |= RADV_CMD_FLUSH_AND_INV_FRAMEBUFFER | 4215 RADV_CMD_FLAG_INV_GLOBAL_L2 | 4216 RADV_CMD_FLAG_INV_VMEM_L1 | 4217 RADV_CMD_FLAG_INV_SMEM_L1; 4218 } 4219