1 /* 2 * Copyright 2010 Jerome Glisse <glisse (at) freedesktop.org> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * on the rights to use, copy, modify, merge, publish, distribute, sub 8 * license, and/or sell copies of the Software, and to permit persons to whom 9 * the Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 */ 23 24 #include "si_pipe.h" 25 #include "si_compute.h" 26 #include "util/u_format.h" 27 #include "util/u_log.h" 28 #include "util/u_surface.h" 29 30 enum { 31 SI_COPY = SI_SAVE_FRAMEBUFFER | SI_SAVE_TEXTURES | 32 SI_SAVE_FRAGMENT_STATE | SI_DISABLE_RENDER_COND, 33 34 SI_BLIT = SI_SAVE_FRAMEBUFFER | SI_SAVE_TEXTURES | 35 SI_SAVE_FRAGMENT_STATE, 36 37 SI_DECOMPRESS = SI_SAVE_FRAMEBUFFER | SI_SAVE_FRAGMENT_STATE | 38 SI_DISABLE_RENDER_COND, 39 40 SI_COLOR_RESOLVE = SI_SAVE_FRAMEBUFFER | SI_SAVE_FRAGMENT_STATE 41 }; 42 43 void si_blitter_begin(struct pipe_context *ctx, enum si_blitter_op op) 44 { 45 struct si_context *sctx = (struct si_context *)ctx; 46 47 util_blitter_save_vertex_shader(sctx->blitter, sctx->vs_shader.cso); 48 util_blitter_save_tessctrl_shader(sctx->blitter, sctx->tcs_shader.cso); 49 util_blitter_save_tesseval_shader(sctx->blitter, sctx->tes_shader.cso); 50 util_blitter_save_geometry_shader(sctx->blitter, sctx->gs_shader.cso); 51 util_blitter_save_so_targets(sctx->blitter, sctx->streamout.num_targets, 52 (struct pipe_stream_output_target**)sctx->streamout.targets); 53 util_blitter_save_rasterizer(sctx->blitter, sctx->queued.named.rasterizer); 54 55 if (op & SI_SAVE_FRAGMENT_STATE) { 56 util_blitter_save_blend(sctx->blitter, sctx->queued.named.blend); 57 util_blitter_save_depth_stencil_alpha(sctx->blitter, sctx->queued.named.dsa); 58 util_blitter_save_stencil_ref(sctx->blitter, &sctx->stencil_ref.state); 59 util_blitter_save_fragment_shader(sctx->blitter, sctx->ps_shader.cso); 60 util_blitter_save_sample_mask(sctx->blitter, sctx->sample_mask.sample_mask); 61 util_blitter_save_scissor(sctx->blitter, &sctx->scissors.states[0]); 62 } 63 64 if (op & SI_SAVE_FRAMEBUFFER) 65 util_blitter_save_framebuffer(sctx->blitter, &sctx->framebuffer.state); 66 67 if (op & SI_SAVE_TEXTURES) { 68 util_blitter_save_fragment_sampler_states( 69 sctx->blitter, 2, 70 (void**)sctx->samplers[PIPE_SHADER_FRAGMENT].sampler_states); 71 72 util_blitter_save_fragment_sampler_views(sctx->blitter, 2, 73 sctx->samplers[PIPE_SHADER_FRAGMENT].views); 74 } 75 76 if (op & SI_DISABLE_RENDER_COND) 77 sctx->b.render_cond_force_off = true; 78 } 79 80 void si_blitter_end(struct pipe_context *ctx) 81 { 82 struct si_context *sctx = (struct si_context *)ctx; 83 84 sctx->b.render_cond_force_off = false; 85 86 /* Restore shader pointers because the VS blit shader changed all 87 * non-global VS user SGPRs. */ 88 sctx->shader_pointers_dirty |= SI_DESCS_SHADER_MASK(VERTEX); 89 sctx->vertex_buffer_pointer_dirty = true; 90 si_mark_atom_dirty(sctx, &sctx->shader_pointers.atom); 91 } 92 93 static unsigned u_max_sample(struct pipe_resource *r) 94 { 95 return r->nr_samples ? r->nr_samples - 1 : 0; 96 } 97 98 static unsigned 99 si_blit_dbcb_copy(struct si_context *sctx, 100 struct r600_texture *src, 101 struct r600_texture *dst, 102 unsigned planes, unsigned level_mask, 103 unsigned first_layer, unsigned last_layer, 104 unsigned first_sample, unsigned last_sample) 105 { 106 struct pipe_surface surf_tmpl = {{0}}; 107 unsigned layer, sample, checked_last_layer, max_layer; 108 unsigned fully_copied_levels = 0; 109 110 if (planes & PIPE_MASK_Z) 111 sctx->dbcb_depth_copy_enabled = true; 112 if (planes & PIPE_MASK_S) 113 sctx->dbcb_stencil_copy_enabled = true; 114 si_mark_atom_dirty(sctx, &sctx->db_render_state); 115 116 assert(sctx->dbcb_depth_copy_enabled || sctx->dbcb_stencil_copy_enabled); 117 118 sctx->decompression_enabled = true; 119 120 while (level_mask) { 121 unsigned level = u_bit_scan(&level_mask); 122 123 /* The smaller the mipmap level, the less layers there are 124 * as far as 3D textures are concerned. */ 125 max_layer = util_max_layer(&src->resource.b.b, level); 126 checked_last_layer = MIN2(last_layer, max_layer); 127 128 surf_tmpl.u.tex.level = level; 129 130 for (layer = first_layer; layer <= checked_last_layer; layer++) { 131 struct pipe_surface *zsurf, *cbsurf; 132 133 surf_tmpl.format = src->resource.b.b.format; 134 surf_tmpl.u.tex.first_layer = layer; 135 surf_tmpl.u.tex.last_layer = layer; 136 137 zsurf = sctx->b.b.create_surface(&sctx->b.b, &src->resource.b.b, &surf_tmpl); 138 139 surf_tmpl.format = dst->resource.b.b.format; 140 cbsurf = sctx->b.b.create_surface(&sctx->b.b, &dst->resource.b.b, &surf_tmpl); 141 142 for (sample = first_sample; sample <= last_sample; sample++) { 143 if (sample != sctx->dbcb_copy_sample) { 144 sctx->dbcb_copy_sample = sample; 145 si_mark_atom_dirty(sctx, &sctx->db_render_state); 146 } 147 148 si_blitter_begin(&sctx->b.b, SI_DECOMPRESS); 149 util_blitter_custom_depth_stencil(sctx->blitter, zsurf, cbsurf, 1 << sample, 150 sctx->custom_dsa_flush, 1.0f); 151 si_blitter_end(&sctx->b.b); 152 } 153 154 pipe_surface_reference(&zsurf, NULL); 155 pipe_surface_reference(&cbsurf, NULL); 156 } 157 158 if (first_layer == 0 && last_layer >= max_layer && 159 first_sample == 0 && last_sample >= u_max_sample(&src->resource.b.b)) 160 fully_copied_levels |= 1u << level; 161 } 162 163 sctx->decompression_enabled = false; 164 sctx->dbcb_depth_copy_enabled = false; 165 sctx->dbcb_stencil_copy_enabled = false; 166 si_mark_atom_dirty(sctx, &sctx->db_render_state); 167 168 return fully_copied_levels; 169 } 170 171 static void si_blit_decompress_depth(struct pipe_context *ctx, 172 struct r600_texture *texture, 173 struct r600_texture *staging, 174 unsigned first_level, unsigned last_level, 175 unsigned first_layer, unsigned last_layer, 176 unsigned first_sample, unsigned last_sample) 177 { 178 const struct util_format_description *desc; 179 unsigned planes = 0; 180 181 assert(staging != NULL && "use si_blit_decompress_zs_in_place instead"); 182 183 desc = util_format_description(staging->resource.b.b.format); 184 185 if (util_format_has_depth(desc)) 186 planes |= PIPE_MASK_Z; 187 if (util_format_has_stencil(desc)) 188 planes |= PIPE_MASK_S; 189 190 si_blit_dbcb_copy( 191 (struct si_context *)ctx, texture, staging, planes, 192 u_bit_consecutive(first_level, last_level - first_level + 1), 193 first_layer, last_layer, first_sample, last_sample); 194 } 195 196 /* Helper function for si_blit_decompress_zs_in_place. 197 */ 198 static void 199 si_blit_decompress_zs_planes_in_place(struct si_context *sctx, 200 struct r600_texture *texture, 201 unsigned planes, unsigned level_mask, 202 unsigned first_layer, unsigned last_layer) 203 { 204 struct pipe_surface *zsurf, surf_tmpl = {{0}}; 205 unsigned layer, max_layer, checked_last_layer; 206 unsigned fully_decompressed_mask = 0; 207 208 if (!level_mask) 209 return; 210 211 if (planes & PIPE_MASK_S) 212 sctx->db_flush_stencil_inplace = true; 213 if (planes & PIPE_MASK_Z) 214 sctx->db_flush_depth_inplace = true; 215 si_mark_atom_dirty(sctx, &sctx->db_render_state); 216 217 surf_tmpl.format = texture->resource.b.b.format; 218 219 sctx->decompression_enabled = true; 220 221 while (level_mask) { 222 unsigned level = u_bit_scan(&level_mask); 223 224 surf_tmpl.u.tex.level = level; 225 226 /* The smaller the mipmap level, the less layers there are 227 * as far as 3D textures are concerned. */ 228 max_layer = util_max_layer(&texture->resource.b.b, level); 229 checked_last_layer = MIN2(last_layer, max_layer); 230 231 for (layer = first_layer; layer <= checked_last_layer; layer++) { 232 surf_tmpl.u.tex.first_layer = layer; 233 surf_tmpl.u.tex.last_layer = layer; 234 235 zsurf = sctx->b.b.create_surface(&sctx->b.b, &texture->resource.b.b, &surf_tmpl); 236 237 si_blitter_begin(&sctx->b.b, SI_DECOMPRESS); 238 util_blitter_custom_depth_stencil(sctx->blitter, zsurf, NULL, ~0, 239 sctx->custom_dsa_flush, 240 1.0f); 241 si_blitter_end(&sctx->b.b); 242 243 pipe_surface_reference(&zsurf, NULL); 244 } 245 246 /* The texture will always be dirty if some layers aren't flushed. 247 * I don't think this case occurs often though. */ 248 if (first_layer == 0 && last_layer >= max_layer) { 249 fully_decompressed_mask |= 1u << level; 250 } 251 } 252 253 if (planes & PIPE_MASK_Z) 254 texture->dirty_level_mask &= ~fully_decompressed_mask; 255 if (planes & PIPE_MASK_S) 256 texture->stencil_dirty_level_mask &= ~fully_decompressed_mask; 257 258 sctx->decompression_enabled = false; 259 sctx->db_flush_depth_inplace = false; 260 sctx->db_flush_stencil_inplace = false; 261 si_mark_atom_dirty(sctx, &sctx->db_render_state); 262 } 263 264 /* Helper function of si_flush_depth_texture: decompress the given levels 265 * of Z and/or S planes in place. 266 */ 267 static void 268 si_blit_decompress_zs_in_place(struct si_context *sctx, 269 struct r600_texture *texture, 270 unsigned levels_z, unsigned levels_s, 271 unsigned first_layer, unsigned last_layer) 272 { 273 unsigned both = levels_z & levels_s; 274 275 /* First, do combined Z & S decompresses for levels that need it. */ 276 if (both) { 277 si_blit_decompress_zs_planes_in_place( 278 sctx, texture, PIPE_MASK_Z | PIPE_MASK_S, 279 both, 280 first_layer, last_layer); 281 levels_z &= ~both; 282 levels_s &= ~both; 283 } 284 285 /* Now do separate Z and S decompresses. */ 286 if (levels_z) { 287 si_blit_decompress_zs_planes_in_place( 288 sctx, texture, PIPE_MASK_Z, 289 levels_z, 290 first_layer, last_layer); 291 } 292 293 if (levels_s) { 294 si_blit_decompress_zs_planes_in_place( 295 sctx, texture, PIPE_MASK_S, 296 levels_s, 297 first_layer, last_layer); 298 } 299 } 300 301 static void 302 si_decompress_depth(struct si_context *sctx, 303 struct r600_texture *tex, 304 unsigned required_planes, 305 unsigned first_level, unsigned last_level, 306 unsigned first_layer, unsigned last_layer) 307 { 308 unsigned inplace_planes = 0; 309 unsigned copy_planes = 0; 310 unsigned level_mask = u_bit_consecutive(first_level, last_level - first_level + 1); 311 unsigned levels_z = 0; 312 unsigned levels_s = 0; 313 314 if (required_planes & PIPE_MASK_Z) { 315 levels_z = level_mask & tex->dirty_level_mask; 316 317 if (levels_z) { 318 if (si_can_sample_zs(tex, false)) 319 inplace_planes |= PIPE_MASK_Z; 320 else 321 copy_planes |= PIPE_MASK_Z; 322 } 323 } 324 if (required_planes & PIPE_MASK_S) { 325 levels_s = level_mask & tex->stencil_dirty_level_mask; 326 327 if (levels_s) { 328 if (si_can_sample_zs(tex, true)) 329 inplace_planes |= PIPE_MASK_S; 330 else 331 copy_planes |= PIPE_MASK_S; 332 } 333 } 334 335 if (unlikely(sctx->b.log)) 336 u_log_printf(sctx->b.log, 337 "\n------------------------------------------------\n" 338 "Decompress Depth (levels %u - %u, levels Z: 0x%x S: 0x%x)\n\n", 339 first_level, last_level, levels_z, levels_s); 340 341 /* We may have to allocate the flushed texture here when called from 342 * si_decompress_subresource. 343 */ 344 if (copy_planes && 345 (tex->flushed_depth_texture || 346 si_init_flushed_depth_texture(&sctx->b.b, &tex->resource.b.b, NULL))) { 347 struct r600_texture *dst = tex->flushed_depth_texture; 348 unsigned fully_copied_levels; 349 unsigned levels = 0; 350 351 assert(tex->flushed_depth_texture); 352 353 if (util_format_is_depth_and_stencil(dst->resource.b.b.format)) 354 copy_planes = PIPE_MASK_Z | PIPE_MASK_S; 355 356 if (copy_planes & PIPE_MASK_Z) { 357 levels |= levels_z; 358 levels_z = 0; 359 } 360 if (copy_planes & PIPE_MASK_S) { 361 levels |= levels_s; 362 levels_s = 0; 363 } 364 365 fully_copied_levels = si_blit_dbcb_copy( 366 sctx, tex, dst, copy_planes, levels, 367 first_layer, last_layer, 368 0, u_max_sample(&tex->resource.b.b)); 369 370 if (copy_planes & PIPE_MASK_Z) 371 tex->dirty_level_mask &= ~fully_copied_levels; 372 if (copy_planes & PIPE_MASK_S) 373 tex->stencil_dirty_level_mask &= ~fully_copied_levels; 374 } 375 376 if (inplace_planes) { 377 bool has_htile = si_htile_enabled(tex, first_level); 378 bool tc_compat_htile = vi_tc_compat_htile_enabled(tex, first_level); 379 380 /* Don't decompress if there is no HTILE or when HTILE is 381 * TC-compatible. */ 382 if (has_htile && !tc_compat_htile) { 383 si_blit_decompress_zs_in_place( 384 sctx, tex, 385 levels_z, levels_s, 386 first_layer, last_layer); 387 } else { 388 /* This is only a cache flush. 389 * 390 * Only clear the mask that we are flushing, because 391 * si_make_DB_shader_coherent() treats different levels 392 * and depth and stencil differently. 393 */ 394 if (inplace_planes & PIPE_MASK_Z) 395 tex->dirty_level_mask &= ~levels_z; 396 if (inplace_planes & PIPE_MASK_S) 397 tex->stencil_dirty_level_mask &= ~levels_s; 398 } 399 400 /* Only in-place decompression needs to flush DB caches, or 401 * when we don't decompress but TC-compatible planes are dirty. 402 */ 403 si_make_DB_shader_coherent(sctx, tex->resource.b.b.nr_samples, 404 inplace_planes & PIPE_MASK_S, 405 tc_compat_htile); 406 } 407 /* set_framebuffer_state takes care of coherency for single-sample. 408 * The DB->CB copy uses CB for the final writes. 409 */ 410 if (copy_planes && tex->resource.b.b.nr_samples > 1) 411 si_make_CB_shader_coherent(sctx, tex->resource.b.b.nr_samples, 412 false); 413 } 414 415 static void 416 si_decompress_sampler_depth_textures(struct si_context *sctx, 417 struct si_samplers *textures) 418 { 419 unsigned i; 420 unsigned mask = textures->needs_depth_decompress_mask; 421 422 while (mask) { 423 struct pipe_sampler_view *view; 424 struct si_sampler_view *sview; 425 struct r600_texture *tex; 426 427 i = u_bit_scan(&mask); 428 429 view = textures->views[i]; 430 assert(view); 431 sview = (struct si_sampler_view*)view; 432 433 tex = (struct r600_texture *)view->texture; 434 assert(tex->db_compatible); 435 436 si_decompress_depth(sctx, tex, 437 sview->is_stencil_sampler ? PIPE_MASK_S : PIPE_MASK_Z, 438 view->u.tex.first_level, view->u.tex.last_level, 439 0, util_max_layer(&tex->resource.b.b, view->u.tex.first_level)); 440 } 441 } 442 443 static void si_blit_decompress_color(struct pipe_context *ctx, 444 struct r600_texture *rtex, 445 unsigned first_level, unsigned last_level, 446 unsigned first_layer, unsigned last_layer, 447 bool need_dcc_decompress) 448 { 449 struct si_context *sctx = (struct si_context *)ctx; 450 void* custom_blend; 451 unsigned layer, checked_last_layer, max_layer; 452 unsigned level_mask = 453 u_bit_consecutive(first_level, last_level - first_level + 1); 454 455 if (!need_dcc_decompress) 456 level_mask &= rtex->dirty_level_mask; 457 if (!level_mask) 458 return; 459 460 if (unlikely(sctx->b.log)) 461 u_log_printf(sctx->b.log, 462 "\n------------------------------------------------\n" 463 "Decompress Color (levels %u - %u, mask 0x%x)\n\n", 464 first_level, last_level, level_mask); 465 466 if (need_dcc_decompress) { 467 custom_blend = sctx->custom_blend_dcc_decompress; 468 469 assert(rtex->dcc_offset); 470 471 /* disable levels without DCC */ 472 for (int i = first_level; i <= last_level; i++) { 473 if (!vi_dcc_enabled(rtex, i)) 474 level_mask &= ~(1 << i); 475 } 476 } else if (rtex->fmask.size) { 477 custom_blend = sctx->custom_blend_fmask_decompress; 478 } else { 479 custom_blend = sctx->custom_blend_eliminate_fastclear; 480 } 481 482 sctx->decompression_enabled = true; 483 484 while (level_mask) { 485 unsigned level = u_bit_scan(&level_mask); 486 487 /* The smaller the mipmap level, the less layers there are 488 * as far as 3D textures are concerned. */ 489 max_layer = util_max_layer(&rtex->resource.b.b, level); 490 checked_last_layer = MIN2(last_layer, max_layer); 491 492 for (layer = first_layer; layer <= checked_last_layer; layer++) { 493 struct pipe_surface *cbsurf, surf_tmpl; 494 495 surf_tmpl.format = rtex->resource.b.b.format; 496 surf_tmpl.u.tex.level = level; 497 surf_tmpl.u.tex.first_layer = layer; 498 surf_tmpl.u.tex.last_layer = layer; 499 cbsurf = ctx->create_surface(ctx, &rtex->resource.b.b, &surf_tmpl); 500 501 /* Required before and after FMASK and DCC_DECOMPRESS. */ 502 if (custom_blend == sctx->custom_blend_fmask_decompress || 503 custom_blend == sctx->custom_blend_dcc_decompress) 504 sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_CB; 505 506 si_blitter_begin(ctx, SI_DECOMPRESS); 507 util_blitter_custom_color(sctx->blitter, cbsurf, custom_blend); 508 si_blitter_end(ctx); 509 510 if (custom_blend == sctx->custom_blend_fmask_decompress || 511 custom_blend == sctx->custom_blend_dcc_decompress) 512 sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_CB; 513 514 pipe_surface_reference(&cbsurf, NULL); 515 } 516 517 /* The texture will always be dirty if some layers aren't flushed. 518 * I don't think this case occurs often though. */ 519 if (first_layer == 0 && last_layer >= max_layer) { 520 rtex->dirty_level_mask &= ~(1 << level); 521 } 522 } 523 524 sctx->decompression_enabled = false; 525 si_make_CB_shader_coherent(sctx, rtex->resource.b.b.nr_samples, 526 vi_dcc_enabled(rtex, first_level)); 527 } 528 529 static void 530 si_decompress_color_texture(struct si_context *sctx, struct r600_texture *tex, 531 unsigned first_level, unsigned last_level) 532 { 533 /* CMASK or DCC can be discarded and we can still end up here. */ 534 if (!tex->cmask.size && !tex->fmask.size && !tex->dcc_offset) 535 return; 536 537 si_blit_decompress_color(&sctx->b.b, tex, first_level, last_level, 0, 538 util_max_layer(&tex->resource.b.b, first_level), 539 false); 540 } 541 542 static void 543 si_decompress_sampler_color_textures(struct si_context *sctx, 544 struct si_samplers *textures) 545 { 546 unsigned i; 547 unsigned mask = textures->needs_color_decompress_mask; 548 549 while (mask) { 550 struct pipe_sampler_view *view; 551 struct r600_texture *tex; 552 553 i = u_bit_scan(&mask); 554 555 view = textures->views[i]; 556 assert(view); 557 558 tex = (struct r600_texture *)view->texture; 559 560 si_decompress_color_texture(sctx, tex, view->u.tex.first_level, 561 view->u.tex.last_level); 562 } 563 } 564 565 static void 566 si_decompress_image_color_textures(struct si_context *sctx, 567 struct si_images *images) 568 { 569 unsigned i; 570 unsigned mask = images->needs_color_decompress_mask; 571 572 while (mask) { 573 const struct pipe_image_view *view; 574 struct r600_texture *tex; 575 576 i = u_bit_scan(&mask); 577 578 view = &images->views[i]; 579 assert(view->resource->target != PIPE_BUFFER); 580 581 tex = (struct r600_texture *)view->resource; 582 583 si_decompress_color_texture(sctx, tex, view->u.tex.level, 584 view->u.tex.level); 585 } 586 } 587 588 static void si_check_render_feedback_texture(struct si_context *sctx, 589 struct r600_texture *tex, 590 unsigned first_level, 591 unsigned last_level, 592 unsigned first_layer, 593 unsigned last_layer) 594 { 595 bool render_feedback = false; 596 597 if (!tex->dcc_offset) 598 return; 599 600 for (unsigned j = 0; j < sctx->framebuffer.state.nr_cbufs; ++j) { 601 struct r600_surface * surf; 602 603 if (!sctx->framebuffer.state.cbufs[j]) 604 continue; 605 606 surf = (struct r600_surface*)sctx->framebuffer.state.cbufs[j]; 607 608 if (tex == (struct r600_texture *)surf->base.texture && 609 surf->base.u.tex.level >= first_level && 610 surf->base.u.tex.level <= last_level && 611 surf->base.u.tex.first_layer <= last_layer && 612 surf->base.u.tex.last_layer >= first_layer) { 613 render_feedback = true; 614 break; 615 } 616 } 617 618 if (render_feedback) 619 si_texture_disable_dcc(&sctx->b, tex); 620 } 621 622 static void si_check_render_feedback_textures(struct si_context *sctx, 623 struct si_samplers *textures) 624 { 625 uint32_t mask = textures->enabled_mask; 626 627 while (mask) { 628 const struct pipe_sampler_view *view; 629 struct r600_texture *tex; 630 631 unsigned i = u_bit_scan(&mask); 632 633 view = textures->views[i]; 634 if(view->texture->target == PIPE_BUFFER) 635 continue; 636 637 tex = (struct r600_texture *)view->texture; 638 639 si_check_render_feedback_texture(sctx, tex, 640 view->u.tex.first_level, 641 view->u.tex.last_level, 642 view->u.tex.first_layer, 643 view->u.tex.last_layer); 644 } 645 } 646 647 static void si_check_render_feedback_images(struct si_context *sctx, 648 struct si_images *images) 649 { 650 uint32_t mask = images->enabled_mask; 651 652 while (mask) { 653 const struct pipe_image_view *view; 654 struct r600_texture *tex; 655 656 unsigned i = u_bit_scan(&mask); 657 658 view = &images->views[i]; 659 if (view->resource->target == PIPE_BUFFER) 660 continue; 661 662 tex = (struct r600_texture *)view->resource; 663 664 si_check_render_feedback_texture(sctx, tex, 665 view->u.tex.level, 666 view->u.tex.level, 667 view->u.tex.first_layer, 668 view->u.tex.last_layer); 669 } 670 } 671 672 static void si_check_render_feedback_resident_textures(struct si_context *sctx) 673 { 674 util_dynarray_foreach(&sctx->resident_tex_handles, 675 struct si_texture_handle *, tex_handle) { 676 struct pipe_sampler_view *view; 677 struct r600_texture *tex; 678 679 view = (*tex_handle)->view; 680 if (view->texture->target == PIPE_BUFFER) 681 continue; 682 683 tex = (struct r600_texture *)view->texture; 684 685 si_check_render_feedback_texture(sctx, tex, 686 view->u.tex.first_level, 687 view->u.tex.last_level, 688 view->u.tex.first_layer, 689 view->u.tex.last_layer); 690 } 691 } 692 693 static void si_check_render_feedback_resident_images(struct si_context *sctx) 694 { 695 util_dynarray_foreach(&sctx->resident_img_handles, 696 struct si_image_handle *, img_handle) { 697 struct pipe_image_view *view; 698 struct r600_texture *tex; 699 700 view = &(*img_handle)->view; 701 if (view->resource->target == PIPE_BUFFER) 702 continue; 703 704 tex = (struct r600_texture *)view->resource; 705 706 si_check_render_feedback_texture(sctx, tex, 707 view->u.tex.level, 708 view->u.tex.level, 709 view->u.tex.first_layer, 710 view->u.tex.last_layer); 711 } 712 } 713 714 static void si_check_render_feedback(struct si_context *sctx) 715 { 716 717 if (!sctx->need_check_render_feedback) 718 return; 719 720 for (int i = 0; i < SI_NUM_SHADERS; ++i) { 721 si_check_render_feedback_images(sctx, &sctx->images[i]); 722 si_check_render_feedback_textures(sctx, &sctx->samplers[i]); 723 } 724 725 si_check_render_feedback_resident_images(sctx); 726 si_check_render_feedback_resident_textures(sctx); 727 728 sctx->need_check_render_feedback = false; 729 } 730 731 static void si_decompress_resident_textures(struct si_context *sctx) 732 { 733 util_dynarray_foreach(&sctx->resident_tex_needs_color_decompress, 734 struct si_texture_handle *, tex_handle) { 735 struct pipe_sampler_view *view = (*tex_handle)->view; 736 struct r600_texture *tex = (struct r600_texture *)view->texture; 737 738 si_decompress_color_texture(sctx, tex, view->u.tex.first_level, 739 view->u.tex.last_level); 740 } 741 742 util_dynarray_foreach(&sctx->resident_tex_needs_depth_decompress, 743 struct si_texture_handle *, tex_handle) { 744 struct pipe_sampler_view *view = (*tex_handle)->view; 745 struct si_sampler_view *sview = (struct si_sampler_view *)view; 746 struct r600_texture *tex = (struct r600_texture *)view->texture; 747 748 si_decompress_depth(sctx, tex, 749 sview->is_stencil_sampler ? PIPE_MASK_S : PIPE_MASK_Z, 750 view->u.tex.first_level, view->u.tex.last_level, 751 0, util_max_layer(&tex->resource.b.b, view->u.tex.first_level)); 752 } 753 } 754 755 static void si_decompress_resident_images(struct si_context *sctx) 756 { 757 util_dynarray_foreach(&sctx->resident_img_needs_color_decompress, 758 struct si_image_handle *, img_handle) { 759 struct pipe_image_view *view = &(*img_handle)->view; 760 struct r600_texture *tex = (struct r600_texture *)view->resource; 761 762 si_decompress_color_texture(sctx, tex, view->u.tex.level, 763 view->u.tex.level); 764 } 765 } 766 767 void si_decompress_textures(struct si_context *sctx, unsigned shader_mask) 768 { 769 unsigned compressed_colortex_counter, mask; 770 771 if (sctx->blitter->running) 772 return; 773 774 /* Update the compressed_colortex_mask if necessary. */ 775 compressed_colortex_counter = p_atomic_read(&sctx->screen->compressed_colortex_counter); 776 if (compressed_colortex_counter != sctx->b.last_compressed_colortex_counter) { 777 sctx->b.last_compressed_colortex_counter = compressed_colortex_counter; 778 si_update_needs_color_decompress_masks(sctx); 779 } 780 781 /* Decompress color & depth textures if needed. */ 782 mask = sctx->shader_needs_decompress_mask & shader_mask; 783 while (mask) { 784 unsigned i = u_bit_scan(&mask); 785 786 if (sctx->samplers[i].needs_depth_decompress_mask) { 787 si_decompress_sampler_depth_textures(sctx, &sctx->samplers[i]); 788 } 789 if (sctx->samplers[i].needs_color_decompress_mask) { 790 si_decompress_sampler_color_textures(sctx, &sctx->samplers[i]); 791 } 792 if (sctx->images[i].needs_color_decompress_mask) { 793 si_decompress_image_color_textures(sctx, &sctx->images[i]); 794 } 795 } 796 797 if (shader_mask & u_bit_consecutive(0, SI_NUM_GRAPHICS_SHADERS)) { 798 if (sctx->uses_bindless_samplers) 799 si_decompress_resident_textures(sctx); 800 if (sctx->uses_bindless_images) 801 si_decompress_resident_images(sctx); 802 } else if (shader_mask & (1 << PIPE_SHADER_COMPUTE)) { 803 if (sctx->cs_shader_state.program->uses_bindless_samplers) 804 si_decompress_resident_textures(sctx); 805 if (sctx->cs_shader_state.program->uses_bindless_images) 806 si_decompress_resident_images(sctx); 807 } 808 809 si_check_render_feedback(sctx); 810 } 811 812 /* Helper for decompressing a portion of a color or depth resource before 813 * blitting if any decompression is needed. 814 * The driver doesn't decompress resources automatically while u_blitter is 815 * rendering. */ 816 static void si_decompress_subresource(struct pipe_context *ctx, 817 struct pipe_resource *tex, 818 unsigned planes, unsigned level, 819 unsigned first_layer, unsigned last_layer) 820 { 821 struct si_context *sctx = (struct si_context *)ctx; 822 struct r600_texture *rtex = (struct r600_texture*)tex; 823 824 if (rtex->db_compatible) { 825 planes &= PIPE_MASK_Z | PIPE_MASK_S; 826 827 if (!rtex->surface.has_stencil) 828 planes &= ~PIPE_MASK_S; 829 830 /* If we've rendered into the framebuffer and it's a blitting 831 * source, make sure the decompression pass is invoked 832 * by dirtying the framebuffer. 833 */ 834 if (sctx->framebuffer.state.zsbuf && 835 sctx->framebuffer.state.zsbuf->u.tex.level == level && 836 sctx->framebuffer.state.zsbuf->texture == tex) 837 si_update_fb_dirtiness_after_rendering(sctx); 838 839 si_decompress_depth(sctx, rtex, planes, 840 level, level, 841 first_layer, last_layer); 842 } else if (rtex->fmask.size || rtex->cmask.size || rtex->dcc_offset) { 843 /* If we've rendered into the framebuffer and it's a blitting 844 * source, make sure the decompression pass is invoked 845 * by dirtying the framebuffer. 846 */ 847 for (unsigned i = 0; i < sctx->framebuffer.state.nr_cbufs; i++) { 848 if (sctx->framebuffer.state.cbufs[i] && 849 sctx->framebuffer.state.cbufs[i]->u.tex.level == level && 850 sctx->framebuffer.state.cbufs[i]->texture == tex) { 851 si_update_fb_dirtiness_after_rendering(sctx); 852 break; 853 } 854 } 855 856 si_blit_decompress_color(ctx, rtex, level, level, 857 first_layer, last_layer, false); 858 } 859 } 860 861 struct texture_orig_info { 862 unsigned format; 863 unsigned width0; 864 unsigned height0; 865 unsigned npix_x; 866 unsigned npix_y; 867 unsigned npix0_x; 868 unsigned npix0_y; 869 }; 870 871 void si_resource_copy_region(struct pipe_context *ctx, 872 struct pipe_resource *dst, 873 unsigned dst_level, 874 unsigned dstx, unsigned dsty, unsigned dstz, 875 struct pipe_resource *src, 876 unsigned src_level, 877 const struct pipe_box *src_box) 878 { 879 struct si_context *sctx = (struct si_context *)ctx; 880 struct r600_texture *rsrc = (struct r600_texture*)src; 881 struct pipe_surface *dst_view, dst_templ; 882 struct pipe_sampler_view src_templ, *src_view; 883 unsigned dst_width, dst_height, src_width0, src_height0; 884 unsigned dst_width0, dst_height0, src_force_level = 0; 885 struct pipe_box sbox, dstbox; 886 887 /* Handle buffers first. */ 888 if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) { 889 si_copy_buffer(sctx, dst, src, dstx, src_box->x, src_box->width, 0); 890 return; 891 } 892 893 assert(u_max_sample(dst) == u_max_sample(src)); 894 895 /* The driver doesn't decompress resources automatically while 896 * u_blitter is rendering. */ 897 si_decompress_subresource(ctx, src, PIPE_MASK_RGBAZS, src_level, 898 src_box->z, src_box->z + src_box->depth - 1); 899 900 dst_width = u_minify(dst->width0, dst_level); 901 dst_height = u_minify(dst->height0, dst_level); 902 dst_width0 = dst->width0; 903 dst_height0 = dst->height0; 904 src_width0 = src->width0; 905 src_height0 = src->height0; 906 907 util_blitter_default_dst_texture(&dst_templ, dst, dst_level, dstz); 908 util_blitter_default_src_texture(sctx->blitter, &src_templ, src, src_level); 909 910 if (util_format_is_compressed(src->format) || 911 util_format_is_compressed(dst->format)) { 912 unsigned blocksize = rsrc->surface.bpe; 913 914 if (blocksize == 8) 915 src_templ.format = PIPE_FORMAT_R16G16B16A16_UINT; /* 64-bit block */ 916 else 917 src_templ.format = PIPE_FORMAT_R32G32B32A32_UINT; /* 128-bit block */ 918 dst_templ.format = src_templ.format; 919 920 dst_width = util_format_get_nblocksx(dst->format, dst_width); 921 dst_height = util_format_get_nblocksy(dst->format, dst_height); 922 dst_width0 = util_format_get_nblocksx(dst->format, dst_width0); 923 dst_height0 = util_format_get_nblocksy(dst->format, dst_height0); 924 src_width0 = util_format_get_nblocksx(src->format, src_width0); 925 src_height0 = util_format_get_nblocksy(src->format, src_height0); 926 927 dstx = util_format_get_nblocksx(dst->format, dstx); 928 dsty = util_format_get_nblocksy(dst->format, dsty); 929 930 sbox.x = util_format_get_nblocksx(src->format, src_box->x); 931 sbox.y = util_format_get_nblocksy(src->format, src_box->y); 932 sbox.z = src_box->z; 933 sbox.width = util_format_get_nblocksx(src->format, src_box->width); 934 sbox.height = util_format_get_nblocksy(src->format, src_box->height); 935 sbox.depth = src_box->depth; 936 src_box = &sbox; 937 938 src_force_level = src_level; 939 } else if (!util_blitter_is_copy_supported(sctx->blitter, dst, src)) { 940 if (util_format_is_subsampled_422(src->format)) { 941 src_templ.format = PIPE_FORMAT_R8G8B8A8_UINT; 942 dst_templ.format = PIPE_FORMAT_R8G8B8A8_UINT; 943 944 dst_width = util_format_get_nblocksx(dst->format, dst_width); 945 dst_width0 = util_format_get_nblocksx(dst->format, dst_width0); 946 src_width0 = util_format_get_nblocksx(src->format, src_width0); 947 948 dstx = util_format_get_nblocksx(dst->format, dstx); 949 950 sbox = *src_box; 951 sbox.x = util_format_get_nblocksx(src->format, src_box->x); 952 sbox.width = util_format_get_nblocksx(src->format, src_box->width); 953 src_box = &sbox; 954 } else { 955 unsigned blocksize = rsrc->surface.bpe; 956 957 switch (blocksize) { 958 case 1: 959 dst_templ.format = PIPE_FORMAT_R8_UNORM; 960 src_templ.format = PIPE_FORMAT_R8_UNORM; 961 break; 962 case 2: 963 dst_templ.format = PIPE_FORMAT_R8G8_UNORM; 964 src_templ.format = PIPE_FORMAT_R8G8_UNORM; 965 break; 966 case 4: 967 dst_templ.format = PIPE_FORMAT_R8G8B8A8_UNORM; 968 src_templ.format = PIPE_FORMAT_R8G8B8A8_UNORM; 969 break; 970 case 8: 971 dst_templ.format = PIPE_FORMAT_R16G16B16A16_UINT; 972 src_templ.format = PIPE_FORMAT_R16G16B16A16_UINT; 973 break; 974 case 16: 975 dst_templ.format = PIPE_FORMAT_R32G32B32A32_UINT; 976 src_templ.format = PIPE_FORMAT_R32G32B32A32_UINT; 977 break; 978 default: 979 fprintf(stderr, "Unhandled format %s with blocksize %u\n", 980 util_format_short_name(src->format), blocksize); 981 assert(0); 982 } 983 } 984 } 985 986 /* SNORM8 blitting has precision issues on some chips. Use the SINT 987 * equivalent instead, which doesn't force DCC decompression. 988 * Note that some chips avoid this issue by using SDMA. 989 */ 990 if (util_format_is_snorm8(dst_templ.format)) { 991 switch (dst_templ.format) { 992 case PIPE_FORMAT_R8_SNORM: 993 dst_templ.format = src_templ.format = PIPE_FORMAT_R8_SINT; 994 break; 995 case PIPE_FORMAT_R8G8_SNORM: 996 dst_templ.format = src_templ.format = PIPE_FORMAT_R8G8_SINT; 997 break; 998 case PIPE_FORMAT_R8G8B8X8_SNORM: 999 dst_templ.format = src_templ.format = PIPE_FORMAT_R8G8B8X8_SINT; 1000 break; 1001 case PIPE_FORMAT_R8G8B8A8_SNORM: 1002 /* There are no SINT variants for ABGR and XBGR, so we have to use RGBA. */ 1003 case PIPE_FORMAT_A8B8G8R8_SNORM: 1004 case PIPE_FORMAT_X8B8G8R8_SNORM: 1005 dst_templ.format = src_templ.format = PIPE_FORMAT_R8G8B8A8_SINT; 1006 break; 1007 case PIPE_FORMAT_A8_SNORM: 1008 dst_templ.format = src_templ.format = PIPE_FORMAT_A8_SINT; 1009 break; 1010 case PIPE_FORMAT_L8_SNORM: 1011 dst_templ.format = src_templ.format = PIPE_FORMAT_L8_SINT; 1012 break; 1013 case PIPE_FORMAT_L8A8_SNORM: 1014 dst_templ.format = src_templ.format = PIPE_FORMAT_L8A8_SINT; 1015 break; 1016 case PIPE_FORMAT_I8_SNORM: 1017 dst_templ.format = src_templ.format = PIPE_FORMAT_I8_SINT; 1018 break; 1019 default:; /* fall through */ 1020 } 1021 } 1022 1023 vi_disable_dcc_if_incompatible_format(&sctx->b, dst, dst_level, 1024 dst_templ.format); 1025 vi_disable_dcc_if_incompatible_format(&sctx->b, src, src_level, 1026 src_templ.format); 1027 1028 /* Initialize the surface. */ 1029 dst_view = si_create_surface_custom(ctx, dst, &dst_templ, 1030 dst_width0, dst_height0, 1031 dst_width, dst_height); 1032 1033 /* Initialize the sampler view. */ 1034 src_view = si_create_sampler_view_custom(ctx, src, &src_templ, 1035 src_width0, src_height0, 1036 src_force_level); 1037 1038 u_box_3d(dstx, dsty, dstz, abs(src_box->width), abs(src_box->height), 1039 abs(src_box->depth), &dstbox); 1040 1041 /* Copy. */ 1042 si_blitter_begin(ctx, SI_COPY); 1043 util_blitter_blit_generic(sctx->blitter, dst_view, &dstbox, 1044 src_view, src_box, src_width0, src_height0, 1045 PIPE_MASK_RGBAZS, PIPE_TEX_FILTER_NEAREST, NULL, 1046 false); 1047 si_blitter_end(ctx); 1048 1049 pipe_surface_reference(&dst_view, NULL); 1050 pipe_sampler_view_reference(&src_view, NULL); 1051 } 1052 1053 static void si_do_CB_resolve(struct si_context *sctx, 1054 const struct pipe_blit_info *info, 1055 struct pipe_resource *dst, 1056 unsigned dst_level, unsigned dst_z, 1057 enum pipe_format format) 1058 { 1059 /* Required before and after CB_RESOLVE. */ 1060 sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_CB; 1061 1062 si_blitter_begin(&sctx->b.b, SI_COLOR_RESOLVE | 1063 (info->render_condition_enable ? 0 : SI_DISABLE_RENDER_COND)); 1064 util_blitter_custom_resolve_color(sctx->blitter, dst, dst_level, dst_z, 1065 info->src.resource, info->src.box.z, 1066 ~0, sctx->custom_blend_resolve, 1067 format); 1068 si_blitter_end(&sctx->b.b); 1069 1070 /* Flush caches for possible texturing. */ 1071 si_make_CB_shader_coherent(sctx, 1, false); 1072 } 1073 1074 static bool do_hardware_msaa_resolve(struct pipe_context *ctx, 1075 const struct pipe_blit_info *info) 1076 { 1077 struct si_context *sctx = (struct si_context*)ctx; 1078 struct r600_texture *src = (struct r600_texture*)info->src.resource; 1079 struct r600_texture *dst = (struct r600_texture*)info->dst.resource; 1080 MAYBE_UNUSED struct r600_texture *rtmp; 1081 unsigned dst_width = u_minify(info->dst.resource->width0, info->dst.level); 1082 unsigned dst_height = u_minify(info->dst.resource->height0, info->dst.level); 1083 enum pipe_format format = info->src.format; 1084 struct pipe_resource *tmp, templ; 1085 struct pipe_blit_info blit; 1086 1087 /* Check basic requirements for hw resolve. */ 1088 if (!(info->src.resource->nr_samples > 1 && 1089 info->dst.resource->nr_samples <= 1 && 1090 !util_format_is_pure_integer(format) && 1091 !util_format_is_depth_or_stencil(format) && 1092 util_max_layer(info->src.resource, 0) == 0)) 1093 return false; 1094 1095 /* Hardware MSAA resolve doesn't work if SPI format = NORM16_ABGR and 1096 * the format is R16G16. Use R16A16, which does work. 1097 */ 1098 if (format == PIPE_FORMAT_R16G16_UNORM) 1099 format = PIPE_FORMAT_R16A16_UNORM; 1100 if (format == PIPE_FORMAT_R16G16_SNORM) 1101 format = PIPE_FORMAT_R16A16_SNORM; 1102 1103 /* Check the remaining requirements for hw resolve. */ 1104 if (util_max_layer(info->dst.resource, info->dst.level) == 0 && 1105 !info->scissor_enable && 1106 (info->mask & PIPE_MASK_RGBA) == PIPE_MASK_RGBA && 1107 util_is_format_compatible(util_format_description(info->src.format), 1108 util_format_description(info->dst.format)) && 1109 dst_width == info->src.resource->width0 && 1110 dst_height == info->src.resource->height0 && 1111 info->dst.box.x == 0 && 1112 info->dst.box.y == 0 && 1113 info->dst.box.width == dst_width && 1114 info->dst.box.height == dst_height && 1115 info->dst.box.depth == 1 && 1116 info->src.box.x == 0 && 1117 info->src.box.y == 0 && 1118 info->src.box.width == dst_width && 1119 info->src.box.height == dst_height && 1120 info->src.box.depth == 1 && 1121 !dst->surface.is_linear && 1122 (!dst->cmask.size || !dst->dirty_level_mask)) { /* dst cannot be fast-cleared */ 1123 /* Check the last constraint. */ 1124 if (src->surface.micro_tile_mode != dst->surface.micro_tile_mode) { 1125 /* The next fast clear will switch to this mode to 1126 * get direct hw resolve next time if the mode is 1127 * different now. 1128 */ 1129 src->last_msaa_resolve_target_micro_mode = 1130 dst->surface.micro_tile_mode; 1131 goto resolve_to_temp; 1132 } 1133 1134 /* Resolving into a surface with DCC is unsupported. Since 1135 * it's being overwritten anyway, clear it to uncompressed. 1136 * This is still the fastest codepath even with this clear. 1137 */ 1138 if (vi_dcc_enabled(dst, info->dst.level)) { 1139 /* TODO: Implement per-level DCC clears for GFX9. */ 1140 if (sctx->b.chip_class >= GFX9 && 1141 info->dst.resource->last_level != 0) 1142 goto resolve_to_temp; 1143 1144 vi_dcc_clear_level(sctx, dst, info->dst.level, 1145 0xFFFFFFFF); 1146 dst->dirty_level_mask &= ~(1 << info->dst.level); 1147 } 1148 1149 /* Resolve directly from src to dst. */ 1150 si_do_CB_resolve(sctx, info, info->dst.resource, 1151 info->dst.level, info->dst.box.z, format); 1152 return true; 1153 } 1154 1155 resolve_to_temp: 1156 /* Shader-based resolve is VERY SLOW. Instead, resolve into 1157 * a temporary texture and blit. 1158 */ 1159 memset(&templ, 0, sizeof(templ)); 1160 templ.target = PIPE_TEXTURE_2D; 1161 templ.format = info->src.resource->format; 1162 templ.width0 = info->src.resource->width0; 1163 templ.height0 = info->src.resource->height0; 1164 templ.depth0 = 1; 1165 templ.array_size = 1; 1166 templ.usage = PIPE_USAGE_DEFAULT; 1167 templ.flags = R600_RESOURCE_FLAG_FORCE_TILING | 1168 R600_RESOURCE_FLAG_DISABLE_DCC; 1169 1170 /* The src and dst microtile modes must be the same. */ 1171 if (src->surface.micro_tile_mode == RADEON_MICRO_MODE_DISPLAY) 1172 templ.bind = PIPE_BIND_SCANOUT; 1173 else 1174 templ.bind = 0; 1175 1176 tmp = ctx->screen->resource_create(ctx->screen, &templ); 1177 if (!tmp) 1178 return false; 1179 rtmp = (struct r600_texture*)tmp; 1180 1181 assert(!rtmp->surface.is_linear); 1182 assert(src->surface.micro_tile_mode == rtmp->surface.micro_tile_mode); 1183 1184 /* resolve */ 1185 si_do_CB_resolve(sctx, info, tmp, 0, 0, format); 1186 1187 /* blit */ 1188 blit = *info; 1189 blit.src.resource = tmp; 1190 blit.src.box.z = 0; 1191 1192 si_blitter_begin(ctx, SI_BLIT | 1193 (info->render_condition_enable ? 0 : SI_DISABLE_RENDER_COND)); 1194 util_blitter_blit(sctx->blitter, &blit); 1195 si_blitter_end(ctx); 1196 1197 pipe_resource_reference(&tmp, NULL); 1198 return true; 1199 } 1200 1201 static void si_blit(struct pipe_context *ctx, 1202 const struct pipe_blit_info *info) 1203 { 1204 struct si_context *sctx = (struct si_context*)ctx; 1205 struct r600_texture *rdst = (struct r600_texture *)info->dst.resource; 1206 1207 if (do_hardware_msaa_resolve(ctx, info)) { 1208 return; 1209 } 1210 1211 /* Using SDMA for copying to a linear texture in GTT is much faster. 1212 * This improves DRI PRIME performance. 1213 * 1214 * resource_copy_region can't do this yet, because dma_copy calls it 1215 * on failure (recursion). 1216 */ 1217 if (rdst->surface.is_linear && 1218 sctx->b.dma_copy && 1219 util_can_blit_via_copy_region(info, false)) { 1220 sctx->b.dma_copy(ctx, info->dst.resource, info->dst.level, 1221 info->dst.box.x, info->dst.box.y, 1222 info->dst.box.z, 1223 info->src.resource, info->src.level, 1224 &info->src.box); 1225 return; 1226 } 1227 1228 assert(util_blitter_is_blit_supported(sctx->blitter, info)); 1229 1230 /* The driver doesn't decompress resources automatically while 1231 * u_blitter is rendering. */ 1232 vi_disable_dcc_if_incompatible_format(&sctx->b, info->src.resource, 1233 info->src.level, 1234 info->src.format); 1235 vi_disable_dcc_if_incompatible_format(&sctx->b, info->dst.resource, 1236 info->dst.level, 1237 info->dst.format); 1238 si_decompress_subresource(ctx, info->src.resource, info->mask, 1239 info->src.level, 1240 info->src.box.z, 1241 info->src.box.z + info->src.box.depth - 1); 1242 1243 if (sctx->screen->debug_flags & DBG(FORCE_DMA) && 1244 util_try_blit_via_copy_region(ctx, info)) 1245 return; 1246 1247 si_blitter_begin(ctx, SI_BLIT | 1248 (info->render_condition_enable ? 0 : SI_DISABLE_RENDER_COND)); 1249 util_blitter_blit(sctx->blitter, info); 1250 si_blitter_end(ctx); 1251 } 1252 1253 static boolean si_generate_mipmap(struct pipe_context *ctx, 1254 struct pipe_resource *tex, 1255 enum pipe_format format, 1256 unsigned base_level, unsigned last_level, 1257 unsigned first_layer, unsigned last_layer) 1258 { 1259 struct si_context *sctx = (struct si_context*)ctx; 1260 struct r600_texture *rtex = (struct r600_texture *)tex; 1261 1262 if (!util_blitter_is_copy_supported(sctx->blitter, tex, tex)) 1263 return false; 1264 1265 /* The driver doesn't decompress resources automatically while 1266 * u_blitter is rendering. */ 1267 vi_disable_dcc_if_incompatible_format(&sctx->b, tex, base_level, 1268 format); 1269 si_decompress_subresource(ctx, tex, PIPE_MASK_RGBAZS, 1270 base_level, first_layer, last_layer); 1271 1272 /* Clear dirty_level_mask for the levels that will be overwritten. */ 1273 assert(base_level < last_level); 1274 rtex->dirty_level_mask &= ~u_bit_consecutive(base_level + 1, 1275 last_level - base_level); 1276 1277 sctx->generate_mipmap_for_depth = rtex->is_depth; 1278 1279 si_blitter_begin(ctx, SI_BLIT | SI_DISABLE_RENDER_COND); 1280 util_blitter_generate_mipmap(sctx->blitter, tex, format, 1281 base_level, last_level, 1282 first_layer, last_layer); 1283 si_blitter_end(ctx); 1284 1285 sctx->generate_mipmap_for_depth = false; 1286 return true; 1287 } 1288 1289 static void si_flush_resource(struct pipe_context *ctx, 1290 struct pipe_resource *res) 1291 { 1292 struct r600_texture *rtex = (struct r600_texture*)res; 1293 1294 assert(res->target != PIPE_BUFFER); 1295 assert(!rtex->dcc_separate_buffer || rtex->dcc_gather_statistics); 1296 1297 /* st/dri calls flush twice per frame (not a bug), this prevents double 1298 * decompression. */ 1299 if (rtex->dcc_separate_buffer && !rtex->separate_dcc_dirty) 1300 return; 1301 1302 if (!rtex->is_depth && (rtex->cmask.size || rtex->dcc_offset)) { 1303 si_blit_decompress_color(ctx, rtex, 0, res->last_level, 1304 0, util_max_layer(res, 0), 1305 rtex->dcc_separate_buffer != NULL); 1306 } 1307 1308 /* Always do the analysis even if DCC is disabled at the moment. */ 1309 if (rtex->dcc_gather_statistics && rtex->separate_dcc_dirty) { 1310 rtex->separate_dcc_dirty = false; 1311 vi_separate_dcc_process_and_reset_stats(ctx, rtex); 1312 } 1313 } 1314 1315 static void si_decompress_dcc(struct pipe_context *ctx, 1316 struct r600_texture *rtex) 1317 { 1318 if (!rtex->dcc_offset) 1319 return; 1320 1321 si_blit_decompress_color(ctx, rtex, 0, rtex->resource.b.b.last_level, 1322 0, util_max_layer(&rtex->resource.b.b, 0), 1323 true); 1324 } 1325 1326 void si_init_blit_functions(struct si_context *sctx) 1327 { 1328 sctx->b.b.resource_copy_region = si_resource_copy_region; 1329 sctx->b.b.blit = si_blit; 1330 sctx->b.b.flush_resource = si_flush_resource; 1331 sctx->b.b.generate_mipmap = si_generate_mipmap; 1332 sctx->b.blit_decompress_depth = si_blit_decompress_depth; 1333 sctx->b.decompress_dcc = si_decompress_dcc; 1334 } 1335