1 /* 2 * Copyright 2010 Jerome Glisse <glisse (at) freedesktop.org> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * on the rights to use, copy, modify, merge, publish, distribute, sub 8 * license, and/or sell copies of the Software, and to permit persons to whom 9 * the Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 */ 23 24 #include "si_pipe.h" 25 #include "util/u_format.h" 26 #include "util/u_surface.h" 27 28 enum si_blitter_op /* bitmask */ 29 { 30 SI_SAVE_TEXTURES = 1, 31 SI_SAVE_FRAMEBUFFER = 2, 32 SI_SAVE_FRAGMENT_STATE = 4, 33 SI_DISABLE_RENDER_COND = 8, 34 35 SI_CLEAR = SI_SAVE_FRAGMENT_STATE, 36 37 SI_CLEAR_SURFACE = SI_SAVE_FRAMEBUFFER | SI_SAVE_FRAGMENT_STATE, 38 39 SI_COPY = SI_SAVE_FRAMEBUFFER | SI_SAVE_TEXTURES | 40 SI_SAVE_FRAGMENT_STATE | SI_DISABLE_RENDER_COND, 41 42 SI_BLIT = SI_SAVE_FRAMEBUFFER | SI_SAVE_TEXTURES | 43 SI_SAVE_FRAGMENT_STATE, 44 45 SI_DECOMPRESS = SI_SAVE_FRAMEBUFFER | SI_SAVE_FRAGMENT_STATE | 46 SI_DISABLE_RENDER_COND, 47 48 SI_COLOR_RESOLVE = SI_SAVE_FRAMEBUFFER | SI_SAVE_FRAGMENT_STATE 49 }; 50 51 static void si_blitter_begin(struct pipe_context *ctx, enum si_blitter_op op) 52 { 53 struct si_context *sctx = (struct si_context *)ctx; 54 55 util_blitter_save_vertex_buffer_slot(sctx->blitter, sctx->vertex_buffer); 56 util_blitter_save_vertex_elements(sctx->blitter, sctx->vertex_elements); 57 util_blitter_save_vertex_shader(sctx->blitter, sctx->vs_shader.cso); 58 util_blitter_save_tessctrl_shader(sctx->blitter, sctx->tcs_shader.cso); 59 util_blitter_save_tesseval_shader(sctx->blitter, sctx->tes_shader.cso); 60 util_blitter_save_geometry_shader(sctx->blitter, sctx->gs_shader.cso); 61 util_blitter_save_so_targets(sctx->blitter, sctx->b.streamout.num_targets, 62 (struct pipe_stream_output_target**)sctx->b.streamout.targets); 63 util_blitter_save_rasterizer(sctx->blitter, sctx->queued.named.rasterizer); 64 65 if (op & SI_SAVE_FRAGMENT_STATE) { 66 util_blitter_save_blend(sctx->blitter, sctx->queued.named.blend); 67 util_blitter_save_depth_stencil_alpha(sctx->blitter, sctx->queued.named.dsa); 68 util_blitter_save_stencil_ref(sctx->blitter, &sctx->stencil_ref.state); 69 util_blitter_save_fragment_shader(sctx->blitter, sctx->ps_shader.cso); 70 util_blitter_save_sample_mask(sctx->blitter, sctx->sample_mask.sample_mask); 71 util_blitter_save_viewport(sctx->blitter, &sctx->b.viewports.states[0]); 72 util_blitter_save_scissor(sctx->blitter, &sctx->b.scissors.states[0]); 73 } 74 75 if (op & SI_SAVE_FRAMEBUFFER) 76 util_blitter_save_framebuffer(sctx->blitter, &sctx->framebuffer.state); 77 78 if (op & SI_SAVE_TEXTURES) { 79 util_blitter_save_fragment_sampler_states( 80 sctx->blitter, 2, 81 (void**)sctx->samplers[PIPE_SHADER_FRAGMENT].views.sampler_states); 82 83 util_blitter_save_fragment_sampler_views(sctx->blitter, 2, 84 sctx->samplers[PIPE_SHADER_FRAGMENT].views.views); 85 } 86 87 if (op & SI_DISABLE_RENDER_COND) 88 sctx->b.render_cond_force_off = true; 89 } 90 91 static void si_blitter_end(struct pipe_context *ctx) 92 { 93 struct si_context *sctx = (struct si_context *)ctx; 94 95 sctx->b.render_cond_force_off = false; 96 } 97 98 static unsigned u_max_sample(struct pipe_resource *r) 99 { 100 return r->nr_samples ? r->nr_samples - 1 : 0; 101 } 102 103 static unsigned 104 si_blit_dbcb_copy(struct si_context *sctx, 105 struct r600_texture *src, 106 struct r600_texture *dst, 107 unsigned planes, unsigned level_mask, 108 unsigned first_layer, unsigned last_layer, 109 unsigned first_sample, unsigned last_sample) 110 { 111 struct pipe_surface surf_tmpl = {{0}}; 112 unsigned layer, sample, checked_last_layer, max_layer; 113 unsigned fully_copied_levels = 0; 114 115 if (planes & PIPE_MASK_Z) 116 sctx->dbcb_depth_copy_enabled = true; 117 if (planes & PIPE_MASK_S) 118 sctx->dbcb_stencil_copy_enabled = true; 119 si_mark_atom_dirty(sctx, &sctx->db_render_state); 120 121 assert(sctx->dbcb_depth_copy_enabled || sctx->dbcb_stencil_copy_enabled); 122 123 while (level_mask) { 124 unsigned level = u_bit_scan(&level_mask); 125 126 /* The smaller the mipmap level, the less layers there are 127 * as far as 3D textures are concerned. */ 128 max_layer = util_max_layer(&src->resource.b.b, level); 129 checked_last_layer = MIN2(last_layer, max_layer); 130 131 surf_tmpl.u.tex.level = level; 132 133 for (layer = first_layer; layer <= checked_last_layer; layer++) { 134 struct pipe_surface *zsurf, *cbsurf; 135 136 surf_tmpl.format = src->resource.b.b.format; 137 surf_tmpl.u.tex.first_layer = layer; 138 surf_tmpl.u.tex.last_layer = layer; 139 140 zsurf = sctx->b.b.create_surface(&sctx->b.b, &src->resource.b.b, &surf_tmpl); 141 142 surf_tmpl.format = dst->resource.b.b.format; 143 cbsurf = sctx->b.b.create_surface(&sctx->b.b, &dst->resource.b.b, &surf_tmpl); 144 145 for (sample = first_sample; sample <= last_sample; sample++) { 146 if (sample != sctx->dbcb_copy_sample) { 147 sctx->dbcb_copy_sample = sample; 148 si_mark_atom_dirty(sctx, &sctx->db_render_state); 149 } 150 151 si_blitter_begin(&sctx->b.b, SI_DECOMPRESS); 152 util_blitter_custom_depth_stencil(sctx->blitter, zsurf, cbsurf, 1 << sample, 153 sctx->custom_dsa_flush, 1.0f); 154 si_blitter_end(&sctx->b.b); 155 } 156 157 pipe_surface_reference(&zsurf, NULL); 158 pipe_surface_reference(&cbsurf, NULL); 159 } 160 161 if (first_layer == 0 && last_layer >= max_layer && 162 first_sample == 0 && last_sample >= u_max_sample(&src->resource.b.b)) 163 fully_copied_levels |= 1u << level; 164 } 165 166 sctx->dbcb_depth_copy_enabled = false; 167 sctx->dbcb_stencil_copy_enabled = false; 168 si_mark_atom_dirty(sctx, &sctx->db_render_state); 169 170 return fully_copied_levels; 171 } 172 173 static void si_blit_decompress_depth(struct pipe_context *ctx, 174 struct r600_texture *texture, 175 struct r600_texture *staging, 176 unsigned first_level, unsigned last_level, 177 unsigned first_layer, unsigned last_layer, 178 unsigned first_sample, unsigned last_sample) 179 { 180 const struct util_format_description *desc; 181 unsigned planes = 0; 182 183 assert(staging != NULL && "use si_blit_decompress_zs_in_place instead"); 184 185 desc = util_format_description(staging->resource.b.b.format); 186 187 if (util_format_has_depth(desc)) 188 planes |= PIPE_MASK_Z; 189 if (util_format_has_stencil(desc)) 190 planes |= PIPE_MASK_S; 191 192 si_blit_dbcb_copy( 193 (struct si_context *)ctx, texture, staging, planes, 194 u_bit_consecutive(first_level, last_level - first_level + 1), 195 first_layer, last_layer, first_sample, last_sample); 196 } 197 198 /* Helper function for si_blit_decompress_zs_in_place. 199 */ 200 static void 201 si_blit_decompress_zs_planes_in_place(struct si_context *sctx, 202 struct r600_texture *texture, 203 unsigned planes, unsigned level_mask, 204 unsigned first_layer, unsigned last_layer) 205 { 206 struct pipe_surface *zsurf, surf_tmpl = {{0}}; 207 unsigned layer, max_layer, checked_last_layer; 208 unsigned fully_decompressed_mask = 0; 209 210 if (!level_mask) 211 return; 212 213 if (planes & PIPE_MASK_S) 214 sctx->db_flush_stencil_inplace = true; 215 if (planes & PIPE_MASK_Z) 216 sctx->db_flush_depth_inplace = true; 217 si_mark_atom_dirty(sctx, &sctx->db_render_state); 218 219 surf_tmpl.format = texture->resource.b.b.format; 220 221 while (level_mask) { 222 unsigned level = u_bit_scan(&level_mask); 223 224 surf_tmpl.u.tex.level = level; 225 226 /* The smaller the mipmap level, the less layers there are 227 * as far as 3D textures are concerned. */ 228 max_layer = util_max_layer(&texture->resource.b.b, level); 229 checked_last_layer = MIN2(last_layer, max_layer); 230 231 for (layer = first_layer; layer <= checked_last_layer; layer++) { 232 surf_tmpl.u.tex.first_layer = layer; 233 surf_tmpl.u.tex.last_layer = layer; 234 235 zsurf = sctx->b.b.create_surface(&sctx->b.b, &texture->resource.b.b, &surf_tmpl); 236 237 si_blitter_begin(&sctx->b.b, SI_DECOMPRESS); 238 util_blitter_custom_depth_stencil(sctx->blitter, zsurf, NULL, ~0, 239 sctx->custom_dsa_flush, 240 1.0f); 241 si_blitter_end(&sctx->b.b); 242 243 pipe_surface_reference(&zsurf, NULL); 244 } 245 246 /* The texture will always be dirty if some layers aren't flushed. 247 * I don't think this case occurs often though. */ 248 if (first_layer == 0 && last_layer >= max_layer) { 249 fully_decompressed_mask |= 1u << level; 250 } 251 } 252 253 if (planes & PIPE_MASK_Z) 254 texture->dirty_level_mask &= ~fully_decompressed_mask; 255 if (planes & PIPE_MASK_S) 256 texture->stencil_dirty_level_mask &= ~fully_decompressed_mask; 257 258 sctx->db_flush_depth_inplace = false; 259 sctx->db_flush_stencil_inplace = false; 260 si_mark_atom_dirty(sctx, &sctx->db_render_state); 261 } 262 263 /* Helper function of si_flush_depth_texture: decompress the given levels 264 * of Z and/or S planes in place. 265 */ 266 static void 267 si_blit_decompress_zs_in_place(struct si_context *sctx, 268 struct r600_texture *texture, 269 unsigned levels_z, unsigned levels_s, 270 unsigned first_layer, unsigned last_layer) 271 { 272 unsigned both = levels_z & levels_s; 273 274 /* First, do combined Z & S decompresses for levels that need it. */ 275 if (both) { 276 si_blit_decompress_zs_planes_in_place( 277 sctx, texture, PIPE_MASK_Z | PIPE_MASK_S, 278 both, 279 first_layer, last_layer); 280 levels_z &= ~both; 281 levels_s &= ~both; 282 } 283 284 /* Now do separate Z and S decompresses. */ 285 if (levels_z) { 286 si_blit_decompress_zs_planes_in_place( 287 sctx, texture, PIPE_MASK_Z, 288 levels_z, 289 first_layer, last_layer); 290 } 291 292 if (levels_s) { 293 si_blit_decompress_zs_planes_in_place( 294 sctx, texture, PIPE_MASK_S, 295 levels_s, 296 first_layer, last_layer); 297 } 298 } 299 300 static void 301 si_flush_depth_texture(struct si_context *sctx, 302 struct r600_texture *tex, 303 unsigned required_planes, 304 unsigned first_level, unsigned last_level, 305 unsigned first_layer, unsigned last_layer) 306 { 307 unsigned inplace_planes = 0; 308 unsigned copy_planes = 0; 309 unsigned level_mask = u_bit_consecutive(first_level, last_level - first_level + 1); 310 unsigned levels_z = 0; 311 unsigned levels_s = 0; 312 313 if (required_planes & PIPE_MASK_Z) { 314 levels_z = level_mask & tex->dirty_level_mask; 315 316 if (levels_z) { 317 if (r600_can_sample_zs(tex, false)) 318 inplace_planes |= PIPE_MASK_Z; 319 else 320 copy_planes |= PIPE_MASK_Z; 321 } 322 } 323 if (required_planes & PIPE_MASK_S) { 324 levels_s = level_mask & tex->stencil_dirty_level_mask; 325 326 if (levels_s) { 327 if (r600_can_sample_zs(tex, true)) 328 inplace_planes |= PIPE_MASK_S; 329 else 330 copy_planes |= PIPE_MASK_S; 331 } 332 } 333 334 assert(!tex->tc_compatible_htile || levels_z == 0); 335 336 /* We may have to allocate the flushed texture here when called from 337 * si_decompress_subresource. 338 */ 339 if (copy_planes && 340 (tex->flushed_depth_texture || 341 r600_init_flushed_depth_texture(&sctx->b.b, &tex->resource.b.b, NULL))) { 342 struct r600_texture *dst = tex->flushed_depth_texture; 343 unsigned fully_copied_levels; 344 unsigned levels = 0; 345 346 assert(tex->flushed_depth_texture); 347 348 if (util_format_is_depth_and_stencil(dst->resource.b.b.format)) 349 copy_planes = PIPE_MASK_Z | PIPE_MASK_S; 350 351 if (copy_planes & PIPE_MASK_Z) { 352 levels |= levels_z; 353 levels_z = 0; 354 } 355 if (copy_planes & PIPE_MASK_S) { 356 levels |= levels_s; 357 levels_s = 0; 358 } 359 360 fully_copied_levels = si_blit_dbcb_copy( 361 sctx, tex, dst, copy_planes, levels, 362 first_layer, last_layer, 363 0, u_max_sample(&tex->resource.b.b)); 364 365 if (copy_planes & PIPE_MASK_Z) 366 tex->dirty_level_mask &= ~fully_copied_levels; 367 if (copy_planes & PIPE_MASK_S) 368 tex->stencil_dirty_level_mask &= ~fully_copied_levels; 369 } 370 371 if (inplace_planes) { 372 si_blit_decompress_zs_in_place( 373 sctx, tex, 374 levels_z, levels_s, 375 first_layer, last_layer); 376 } 377 } 378 379 static void 380 si_flush_depth_textures(struct si_context *sctx, 381 struct si_textures_info *textures) 382 { 383 unsigned i; 384 unsigned mask = textures->depth_texture_mask; 385 386 while (mask) { 387 struct pipe_sampler_view *view; 388 struct si_sampler_view *sview; 389 struct r600_texture *tex; 390 391 i = u_bit_scan(&mask); 392 393 view = textures->views.views[i]; 394 assert(view); 395 sview = (struct si_sampler_view*)view; 396 397 tex = (struct r600_texture *)view->texture; 398 assert(tex->db_compatible); 399 400 si_flush_depth_texture( 401 sctx, tex, 402 sview->is_stencil_sampler ? PIPE_MASK_S : PIPE_MASK_Z, 403 view->u.tex.first_level, view->u.tex.last_level, 404 0, util_max_layer(&tex->resource.b.b, view->u.tex.first_level)); 405 } 406 } 407 408 static void si_blit_decompress_color(struct pipe_context *ctx, 409 struct r600_texture *rtex, 410 unsigned first_level, unsigned last_level, 411 unsigned first_layer, unsigned last_layer, 412 bool need_dcc_decompress) 413 { 414 struct si_context *sctx = (struct si_context *)ctx; 415 void* custom_blend; 416 unsigned layer, checked_last_layer, max_layer; 417 unsigned level_mask = 418 u_bit_consecutive(first_level, last_level - first_level + 1); 419 420 if (!need_dcc_decompress) 421 level_mask &= rtex->dirty_level_mask; 422 if (!level_mask) 423 return; 424 425 if (rtex->dcc_offset && need_dcc_decompress) { 426 custom_blend = sctx->custom_blend_dcc_decompress; 427 428 /* disable levels without DCC */ 429 for (int i = first_level; i <= last_level; i++) { 430 if (!rtex->dcc_offset || 431 i >= rtex->surface.num_dcc_levels) 432 level_mask &= ~(1 << i); 433 } 434 } else if (rtex->fmask.size) { 435 custom_blend = sctx->custom_blend_decompress; 436 } else { 437 custom_blend = sctx->custom_blend_fastclear; 438 } 439 440 while (level_mask) { 441 unsigned level = u_bit_scan(&level_mask); 442 443 /* The smaller the mipmap level, the less layers there are 444 * as far as 3D textures are concerned. */ 445 max_layer = util_max_layer(&rtex->resource.b.b, level); 446 checked_last_layer = MIN2(last_layer, max_layer); 447 448 for (layer = first_layer; layer <= checked_last_layer; layer++) { 449 struct pipe_surface *cbsurf, surf_tmpl; 450 451 surf_tmpl.format = rtex->resource.b.b.format; 452 surf_tmpl.u.tex.level = level; 453 surf_tmpl.u.tex.first_layer = layer; 454 surf_tmpl.u.tex.last_layer = layer; 455 cbsurf = ctx->create_surface(ctx, &rtex->resource.b.b, &surf_tmpl); 456 457 si_blitter_begin(ctx, SI_DECOMPRESS); 458 util_blitter_custom_color(sctx->blitter, cbsurf, custom_blend); 459 si_blitter_end(ctx); 460 461 pipe_surface_reference(&cbsurf, NULL); 462 } 463 464 /* The texture will always be dirty if some layers aren't flushed. 465 * I don't think this case occurs often though. */ 466 if (first_layer == 0 && last_layer >= max_layer) { 467 rtex->dirty_level_mask &= ~(1 << level); 468 } 469 } 470 } 471 472 static void 473 si_decompress_sampler_color_textures(struct si_context *sctx, 474 struct si_textures_info *textures) 475 { 476 unsigned i; 477 unsigned mask = textures->compressed_colortex_mask; 478 479 while (mask) { 480 struct pipe_sampler_view *view; 481 struct r600_texture *tex; 482 483 i = u_bit_scan(&mask); 484 485 view = textures->views.views[i]; 486 assert(view); 487 488 tex = (struct r600_texture *)view->texture; 489 /* CMASK or DCC can be discarded and we can still end up here. */ 490 if (!tex->cmask.size && !tex->fmask.size && !tex->dcc_offset) 491 continue; 492 493 si_blit_decompress_color(&sctx->b.b, tex, 494 view->u.tex.first_level, view->u.tex.last_level, 495 0, util_max_layer(&tex->resource.b.b, view->u.tex.first_level), 496 false); 497 } 498 } 499 500 static void 501 si_decompress_image_color_textures(struct si_context *sctx, 502 struct si_images_info *images) 503 { 504 unsigned i; 505 unsigned mask = images->compressed_colortex_mask; 506 507 while (mask) { 508 const struct pipe_image_view *view; 509 struct r600_texture *tex; 510 511 i = u_bit_scan(&mask); 512 513 view = &images->views[i]; 514 assert(view->resource->target != PIPE_BUFFER); 515 516 tex = (struct r600_texture *)view->resource; 517 if (!tex->cmask.size && !tex->fmask.size && !tex->dcc_offset) 518 continue; 519 520 si_blit_decompress_color(&sctx->b.b, tex, 521 view->u.tex.level, view->u.tex.level, 522 0, util_max_layer(&tex->resource.b.b, view->u.tex.level), 523 false); 524 } 525 } 526 527 static void si_check_render_feedback_textures(struct si_context *sctx, 528 struct si_textures_info *textures) 529 { 530 uint32_t mask = textures->views.enabled_mask; 531 532 while (mask) { 533 const struct pipe_sampler_view *view; 534 struct r600_texture *tex; 535 bool render_feedback = false; 536 537 unsigned i = u_bit_scan(&mask); 538 539 view = textures->views.views[i]; 540 if(view->texture->target == PIPE_BUFFER) 541 continue; 542 543 tex = (struct r600_texture *)view->texture; 544 if (!tex->dcc_offset) 545 continue; 546 547 for (unsigned j = 0; j < sctx->framebuffer.state.nr_cbufs; ++j) { 548 struct r600_surface * surf; 549 550 if (!sctx->framebuffer.state.cbufs[j]) 551 continue; 552 553 surf = (struct r600_surface*)sctx->framebuffer.state.cbufs[j]; 554 555 if (tex == (struct r600_texture*)surf->base.texture && 556 surf->base.u.tex.level >= view->u.tex.first_level && 557 surf->base.u.tex.level <= view->u.tex.last_level && 558 surf->base.u.tex.first_layer <= view->u.tex.last_layer && 559 surf->base.u.tex.last_layer >= view->u.tex.first_layer) 560 render_feedback = true; 561 } 562 563 if (render_feedback) 564 r600_texture_disable_dcc(&sctx->b, tex); 565 } 566 } 567 568 static void si_check_render_feedback_images(struct si_context *sctx, 569 struct si_images_info *images) 570 { 571 uint32_t mask = images->enabled_mask; 572 573 while (mask) { 574 const struct pipe_image_view *view; 575 struct r600_texture *tex; 576 bool render_feedback = false; 577 578 unsigned i = u_bit_scan(&mask); 579 580 view = &images->views[i]; 581 if (view->resource->target == PIPE_BUFFER) 582 continue; 583 584 tex = (struct r600_texture *)view->resource; 585 if (!tex->dcc_offset) 586 continue; 587 588 for (unsigned j = 0; j < sctx->framebuffer.state.nr_cbufs; ++j) { 589 struct r600_surface * surf; 590 591 if (!sctx->framebuffer.state.cbufs[j]) 592 continue; 593 594 surf = (struct r600_surface*)sctx->framebuffer.state.cbufs[j]; 595 596 if (tex == (struct r600_texture*)surf->base.texture && 597 surf->base.u.tex.level == view->u.tex.level && 598 surf->base.u.tex.first_layer <= view->u.tex.last_layer && 599 surf->base.u.tex.last_layer >= view->u.tex.first_layer) 600 render_feedback = true; 601 } 602 603 if (render_feedback) 604 r600_texture_disable_dcc(&sctx->b, tex); 605 } 606 } 607 608 static void si_check_render_feedback(struct si_context *sctx) 609 { 610 611 if (!sctx->need_check_render_feedback) 612 return; 613 614 for (int i = 0; i < SI_NUM_SHADERS; ++i) { 615 si_check_render_feedback_images(sctx, &sctx->images[i]); 616 si_check_render_feedback_textures(sctx, &sctx->samplers[i]); 617 } 618 sctx->need_check_render_feedback = false; 619 } 620 621 static void si_decompress_textures(struct si_context *sctx, unsigned shader_mask) 622 { 623 unsigned compressed_colortex_counter, mask; 624 625 if (sctx->blitter->running) 626 return; 627 628 /* Update the compressed_colortex_mask if necessary. */ 629 compressed_colortex_counter = p_atomic_read(&sctx->screen->b.compressed_colortex_counter); 630 if (compressed_colortex_counter != sctx->b.last_compressed_colortex_counter) { 631 sctx->b.last_compressed_colortex_counter = compressed_colortex_counter; 632 si_update_compressed_colortex_masks(sctx); 633 } 634 635 /* Decompress color & depth textures if needed. */ 636 mask = sctx->compressed_tex_shader_mask & shader_mask; 637 while (mask) { 638 unsigned i = u_bit_scan(&mask); 639 640 if (sctx->samplers[i].depth_texture_mask) { 641 si_flush_depth_textures(sctx, &sctx->samplers[i]); 642 } 643 if (sctx->samplers[i].compressed_colortex_mask) { 644 si_decompress_sampler_color_textures(sctx, &sctx->samplers[i]); 645 } 646 if (sctx->images[i].compressed_colortex_mask) { 647 si_decompress_image_color_textures(sctx, &sctx->images[i]); 648 } 649 } 650 651 si_check_render_feedback(sctx); 652 } 653 654 void si_decompress_graphics_textures(struct si_context *sctx) 655 { 656 si_decompress_textures(sctx, u_bit_consecutive(0, SI_NUM_GRAPHICS_SHADERS)); 657 } 658 659 void si_decompress_compute_textures(struct si_context *sctx) 660 { 661 si_decompress_textures(sctx, 1 << PIPE_SHADER_COMPUTE); 662 } 663 664 static void si_clear(struct pipe_context *ctx, unsigned buffers, 665 const union pipe_color_union *color, 666 double depth, unsigned stencil) 667 { 668 struct si_context *sctx = (struct si_context *)ctx; 669 struct pipe_framebuffer_state *fb = &sctx->framebuffer.state; 670 struct pipe_surface *zsbuf = fb->zsbuf; 671 struct r600_texture *zstex = 672 zsbuf ? (struct r600_texture*)zsbuf->texture : NULL; 673 674 if (buffers & PIPE_CLEAR_COLOR) { 675 evergreen_do_fast_color_clear(&sctx->b, fb, 676 &sctx->framebuffer.atom, &buffers, 677 &sctx->framebuffer.dirty_cbufs, 678 color); 679 if (!buffers) 680 return; /* all buffers have been fast cleared */ 681 } 682 683 if (buffers & PIPE_CLEAR_COLOR) { 684 int i; 685 686 /* These buffers cannot use fast clear, make sure to disable expansion. */ 687 for (i = 0; i < fb->nr_cbufs; i++) { 688 struct r600_texture *tex; 689 690 /* If not clearing this buffer, skip. */ 691 if (!(buffers & (PIPE_CLEAR_COLOR0 << i))) 692 continue; 693 694 if (!fb->cbufs[i]) 695 continue; 696 697 tex = (struct r600_texture *)fb->cbufs[i]->texture; 698 if (tex->fmask.size == 0) 699 tex->dirty_level_mask &= ~(1 << fb->cbufs[i]->u.tex.level); 700 } 701 } 702 703 if (zstex && zstex->htile_buffer && 704 zsbuf->u.tex.level == 0 && 705 zsbuf->u.tex.first_layer == 0 && 706 zsbuf->u.tex.last_layer == util_max_layer(&zstex->resource.b.b, 0)) { 707 /* TC-compatible HTILE only supports depth clears to 0 or 1. */ 708 if (buffers & PIPE_CLEAR_DEPTH && 709 (!zstex->tc_compatible_htile || 710 depth == 0 || depth == 1)) { 711 /* Need to disable EXPCLEAR temporarily if clearing 712 * to a new value. */ 713 if (!zstex->depth_cleared || zstex->depth_clear_value != depth) { 714 sctx->db_depth_disable_expclear = true; 715 } 716 717 zstex->depth_clear_value = depth; 718 sctx->framebuffer.dirty_zsbuf = true; 719 si_mark_atom_dirty(sctx, &sctx->framebuffer.atom); /* updates DB_DEPTH_CLEAR */ 720 sctx->db_depth_clear = true; 721 si_mark_atom_dirty(sctx, &sctx->db_render_state); 722 } 723 724 /* TC-compatible HTILE only supports stencil clears to 0. */ 725 if (buffers & PIPE_CLEAR_STENCIL && 726 (!zstex->tc_compatible_htile || stencil == 0)) { 727 stencil &= 0xff; 728 729 /* Need to disable EXPCLEAR temporarily if clearing 730 * to a new value. */ 731 if (!zstex->stencil_cleared || zstex->stencil_clear_value != stencil) { 732 sctx->db_stencil_disable_expclear = true; 733 } 734 735 zstex->stencil_clear_value = stencil; 736 sctx->framebuffer.dirty_zsbuf = true; 737 si_mark_atom_dirty(sctx, &sctx->framebuffer.atom); /* updates DB_STENCIL_CLEAR */ 738 sctx->db_stencil_clear = true; 739 si_mark_atom_dirty(sctx, &sctx->db_render_state); 740 } 741 } 742 743 si_blitter_begin(ctx, SI_CLEAR); 744 util_blitter_clear(sctx->blitter, fb->width, fb->height, 745 util_framebuffer_get_num_layers(fb), 746 buffers, color, depth, stencil); 747 si_blitter_end(ctx); 748 749 if (sctx->db_depth_clear) { 750 sctx->db_depth_clear = false; 751 sctx->db_depth_disable_expclear = false; 752 zstex->depth_cleared = true; 753 si_mark_atom_dirty(sctx, &sctx->db_render_state); 754 } 755 756 if (sctx->db_stencil_clear) { 757 sctx->db_stencil_clear = false; 758 sctx->db_stencil_disable_expclear = false; 759 zstex->stencil_cleared = true; 760 si_mark_atom_dirty(sctx, &sctx->db_render_state); 761 } 762 } 763 764 static void si_clear_render_target(struct pipe_context *ctx, 765 struct pipe_surface *dst, 766 const union pipe_color_union *color, 767 unsigned dstx, unsigned dsty, 768 unsigned width, unsigned height, 769 bool render_condition_enabled) 770 { 771 struct si_context *sctx = (struct si_context *)ctx; 772 773 si_blitter_begin(ctx, SI_CLEAR_SURFACE | 774 (render_condition_enabled ? 0 : SI_DISABLE_RENDER_COND)); 775 util_blitter_clear_render_target(sctx->blitter, dst, color, 776 dstx, dsty, width, height); 777 si_blitter_end(ctx); 778 } 779 780 static void si_clear_depth_stencil(struct pipe_context *ctx, 781 struct pipe_surface *dst, 782 unsigned clear_flags, 783 double depth, 784 unsigned stencil, 785 unsigned dstx, unsigned dsty, 786 unsigned width, unsigned height, 787 bool render_condition_enabled) 788 { 789 struct si_context *sctx = (struct si_context *)ctx; 790 791 si_blitter_begin(ctx, SI_CLEAR_SURFACE | 792 (render_condition_enabled ? 0 : SI_DISABLE_RENDER_COND)); 793 util_blitter_clear_depth_stencil(sctx->blitter, dst, clear_flags, depth, stencil, 794 dstx, dsty, width, height); 795 si_blitter_end(ctx); 796 } 797 798 /* Helper for decompressing a portion of a color or depth resource before 799 * blitting if any decompression is needed. 800 * The driver doesn't decompress resources automatically while u_blitter is 801 * rendering. */ 802 static void si_decompress_subresource(struct pipe_context *ctx, 803 struct pipe_resource *tex, 804 unsigned planes, unsigned level, 805 unsigned first_layer, unsigned last_layer) 806 { 807 struct si_context *sctx = (struct si_context *)ctx; 808 struct r600_texture *rtex = (struct r600_texture*)tex; 809 810 if (rtex->db_compatible) { 811 planes &= PIPE_MASK_Z | PIPE_MASK_S; 812 813 if (!(rtex->surface.flags & RADEON_SURF_SBUFFER)) 814 planes &= ~PIPE_MASK_S; 815 816 si_flush_depth_texture(sctx, rtex, planes, 817 level, level, 818 first_layer, last_layer); 819 } else if (rtex->fmask.size || rtex->cmask.size || rtex->dcc_offset) { 820 si_blit_decompress_color(ctx, rtex, level, level, 821 first_layer, last_layer, false); 822 } 823 } 824 825 struct texture_orig_info { 826 unsigned format; 827 unsigned width0; 828 unsigned height0; 829 unsigned npix_x; 830 unsigned npix_y; 831 unsigned npix0_x; 832 unsigned npix0_y; 833 }; 834 835 void si_resource_copy_region(struct pipe_context *ctx, 836 struct pipe_resource *dst, 837 unsigned dst_level, 838 unsigned dstx, unsigned dsty, unsigned dstz, 839 struct pipe_resource *src, 840 unsigned src_level, 841 const struct pipe_box *src_box) 842 { 843 struct si_context *sctx = (struct si_context *)ctx; 844 struct r600_texture *rsrc = (struct r600_texture*)src; 845 struct pipe_surface *dst_view, dst_templ; 846 struct pipe_sampler_view src_templ, *src_view; 847 unsigned dst_width, dst_height, src_width0, src_height0; 848 unsigned src_force_level = 0; 849 struct pipe_box sbox, dstbox; 850 851 /* Handle buffers first. */ 852 if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) { 853 si_copy_buffer(sctx, dst, src, dstx, src_box->x, src_box->width, 0); 854 return; 855 } 856 857 assert(u_max_sample(dst) == u_max_sample(src)); 858 859 /* The driver doesn't decompress resources automatically while 860 * u_blitter is rendering. */ 861 si_decompress_subresource(ctx, src, PIPE_MASK_RGBAZS, src_level, 862 src_box->z, src_box->z + src_box->depth - 1); 863 864 dst_width = u_minify(dst->width0, dst_level); 865 dst_height = u_minify(dst->height0, dst_level); 866 src_width0 = src->width0; 867 src_height0 = src->height0; 868 869 util_blitter_default_dst_texture(&dst_templ, dst, dst_level, dstz); 870 util_blitter_default_src_texture(&src_templ, src, src_level); 871 872 if (util_format_is_compressed(src->format) || 873 util_format_is_compressed(dst->format)) { 874 unsigned blocksize = rsrc->surface.bpe; 875 876 if (blocksize == 8) 877 src_templ.format = PIPE_FORMAT_R16G16B16A16_UINT; /* 64-bit block */ 878 else 879 src_templ.format = PIPE_FORMAT_R32G32B32A32_UINT; /* 128-bit block */ 880 dst_templ.format = src_templ.format; 881 882 dst_width = util_format_get_nblocksx(dst->format, dst_width); 883 dst_height = util_format_get_nblocksy(dst->format, dst_height); 884 src_width0 = util_format_get_nblocksx(src->format, src_width0); 885 src_height0 = util_format_get_nblocksy(src->format, src_height0); 886 887 dstx = util_format_get_nblocksx(dst->format, dstx); 888 dsty = util_format_get_nblocksy(dst->format, dsty); 889 890 sbox.x = util_format_get_nblocksx(src->format, src_box->x); 891 sbox.y = util_format_get_nblocksy(src->format, src_box->y); 892 sbox.z = src_box->z; 893 sbox.width = util_format_get_nblocksx(src->format, src_box->width); 894 sbox.height = util_format_get_nblocksy(src->format, src_box->height); 895 sbox.depth = src_box->depth; 896 src_box = &sbox; 897 898 src_force_level = src_level; 899 } else if (!util_blitter_is_copy_supported(sctx->blitter, dst, src) || 900 /* also *8_SNORM has precision issues, use UNORM instead */ 901 util_format_is_snorm8(src->format)) { 902 if (util_format_is_subsampled_422(src->format)) { 903 src_templ.format = PIPE_FORMAT_R8G8B8A8_UINT; 904 dst_templ.format = PIPE_FORMAT_R8G8B8A8_UINT; 905 906 dst_width = util_format_get_nblocksx(dst->format, dst_width); 907 src_width0 = util_format_get_nblocksx(src->format, src_width0); 908 909 dstx = util_format_get_nblocksx(dst->format, dstx); 910 911 sbox = *src_box; 912 sbox.x = util_format_get_nblocksx(src->format, src_box->x); 913 sbox.width = util_format_get_nblocksx(src->format, src_box->width); 914 src_box = &sbox; 915 } else { 916 unsigned blocksize = rsrc->surface.bpe; 917 918 switch (blocksize) { 919 case 1: 920 dst_templ.format = PIPE_FORMAT_R8_UNORM; 921 src_templ.format = PIPE_FORMAT_R8_UNORM; 922 break; 923 case 2: 924 dst_templ.format = PIPE_FORMAT_R8G8_UNORM; 925 src_templ.format = PIPE_FORMAT_R8G8_UNORM; 926 break; 927 case 4: 928 dst_templ.format = PIPE_FORMAT_R8G8B8A8_UNORM; 929 src_templ.format = PIPE_FORMAT_R8G8B8A8_UNORM; 930 break; 931 case 8: 932 dst_templ.format = PIPE_FORMAT_R16G16B16A16_UINT; 933 src_templ.format = PIPE_FORMAT_R16G16B16A16_UINT; 934 break; 935 case 16: 936 dst_templ.format = PIPE_FORMAT_R32G32B32A32_UINT; 937 src_templ.format = PIPE_FORMAT_R32G32B32A32_UINT; 938 break; 939 default: 940 fprintf(stderr, "Unhandled format %s with blocksize %u\n", 941 util_format_short_name(src->format), blocksize); 942 assert(0); 943 } 944 } 945 } 946 947 /* Initialize the surface. */ 948 dst_view = r600_create_surface_custom(ctx, dst, &dst_templ, 949 dst_width, dst_height); 950 951 /* Initialize the sampler view. */ 952 src_view = si_create_sampler_view_custom(ctx, src, &src_templ, 953 src_width0, src_height0, 954 src_force_level); 955 956 u_box_3d(dstx, dsty, dstz, abs(src_box->width), abs(src_box->height), 957 abs(src_box->depth), &dstbox); 958 959 /* Copy. */ 960 si_blitter_begin(ctx, SI_COPY); 961 util_blitter_blit_generic(sctx->blitter, dst_view, &dstbox, 962 src_view, src_box, src_width0, src_height0, 963 PIPE_MASK_RGBAZS, PIPE_TEX_FILTER_NEAREST, NULL, 964 false); 965 si_blitter_end(ctx); 966 967 pipe_surface_reference(&dst_view, NULL); 968 pipe_sampler_view_reference(&src_view, NULL); 969 } 970 971 static bool do_hardware_msaa_resolve(struct pipe_context *ctx, 972 const struct pipe_blit_info *info) 973 { 974 struct si_context *sctx = (struct si_context*)ctx; 975 struct r600_texture *src = (struct r600_texture*)info->src.resource; 976 struct r600_texture *dst = (struct r600_texture*)info->dst.resource; 977 MAYBE_UNUSED struct r600_texture *rtmp; 978 unsigned dst_width = u_minify(info->dst.resource->width0, info->dst.level); 979 unsigned dst_height = u_minify(info->dst.resource->height0, info->dst.level); 980 enum pipe_format format = info->src.format; 981 unsigned sample_mask = ~0; 982 struct pipe_resource *tmp, templ; 983 struct pipe_blit_info blit; 984 985 /* Check basic requirements for hw resolve. */ 986 if (!(info->src.resource->nr_samples > 1 && 987 info->dst.resource->nr_samples <= 1 && 988 !util_format_is_pure_integer(format) && 989 !util_format_is_depth_or_stencil(format) && 990 util_max_layer(info->src.resource, 0) == 0)) 991 return false; 992 993 /* Hardware MSAA resolve doesn't work if SPI format = NORM16_ABGR and 994 * the format is R16G16. Use R16A16, which does work. 995 */ 996 if (format == PIPE_FORMAT_R16G16_UNORM) 997 format = PIPE_FORMAT_R16A16_UNORM; 998 if (format == PIPE_FORMAT_R16G16_SNORM) 999 format = PIPE_FORMAT_R16A16_SNORM; 1000 1001 /* Check the remaining requirements for hw resolve. */ 1002 if (util_max_layer(info->dst.resource, info->dst.level) == 0 && 1003 !info->scissor_enable && 1004 (info->mask & PIPE_MASK_RGBA) == PIPE_MASK_RGBA && 1005 util_is_format_compatible(util_format_description(info->src.format), 1006 util_format_description(info->dst.format)) && 1007 dst_width == info->src.resource->width0 && 1008 dst_height == info->src.resource->height0 && 1009 info->dst.box.x == 0 && 1010 info->dst.box.y == 0 && 1011 info->dst.box.width == dst_width && 1012 info->dst.box.height == dst_height && 1013 info->dst.box.depth == 1 && 1014 info->src.box.x == 0 && 1015 info->src.box.y == 0 && 1016 info->src.box.width == dst_width && 1017 info->src.box.height == dst_height && 1018 info->src.box.depth == 1 && 1019 !dst->surface.is_linear && 1020 (!dst->cmask.size || !dst->dirty_level_mask)) { /* dst cannot be fast-cleared */ 1021 /* Check the last constraint. */ 1022 if (src->surface.micro_tile_mode != dst->surface.micro_tile_mode) { 1023 /* The next fast clear will switch to this mode to 1024 * get direct hw resolve next time if the mode is 1025 * different now. 1026 */ 1027 src->last_msaa_resolve_target_micro_mode = 1028 dst->surface.micro_tile_mode; 1029 goto resolve_to_temp; 1030 } 1031 1032 /* Resolving into a surface with DCC is unsupported. Since 1033 * it's being overwritten anyway, clear it to uncompressed. 1034 * This is still the fastest codepath even with this clear. 1035 */ 1036 if (dst->dcc_offset && 1037 info->dst.level < dst->surface.num_dcc_levels) { 1038 vi_dcc_clear_level(&sctx->b, dst, info->dst.level, 1039 0xFFFFFFFF); 1040 dst->dirty_level_mask &= ~(1 << info->dst.level); 1041 } 1042 1043 /* Resolve directly from src to dst. */ 1044 si_blitter_begin(ctx, SI_COLOR_RESOLVE | 1045 (info->render_condition_enable ? 0 : SI_DISABLE_RENDER_COND)); 1046 util_blitter_custom_resolve_color(sctx->blitter, 1047 info->dst.resource, info->dst.level, 1048 info->dst.box.z, 1049 info->src.resource, info->src.box.z, 1050 sample_mask, sctx->custom_blend_resolve, 1051 format); 1052 si_blitter_end(ctx); 1053 return true; 1054 } 1055 1056 resolve_to_temp: 1057 /* Shader-based resolve is VERY SLOW. Instead, resolve into 1058 * a temporary texture and blit. 1059 */ 1060 memset(&templ, 0, sizeof(templ)); 1061 templ.target = PIPE_TEXTURE_2D; 1062 templ.format = info->src.resource->format; 1063 templ.width0 = info->src.resource->width0; 1064 templ.height0 = info->src.resource->height0; 1065 templ.depth0 = 1; 1066 templ.array_size = 1; 1067 templ.usage = PIPE_USAGE_DEFAULT; 1068 templ.flags = R600_RESOURCE_FLAG_FORCE_TILING | 1069 R600_RESOURCE_FLAG_DISABLE_DCC; 1070 1071 /* The src and dst microtile modes must be the same. */ 1072 if (src->surface.micro_tile_mode == RADEON_MICRO_MODE_DISPLAY) 1073 templ.bind = PIPE_BIND_SCANOUT; 1074 else 1075 templ.bind = 0; 1076 1077 tmp = ctx->screen->resource_create(ctx->screen, &templ); 1078 if (!tmp) 1079 return false; 1080 rtmp = (struct r600_texture*)tmp; 1081 1082 assert(!rtmp->surface.is_linear); 1083 assert(src->surface.micro_tile_mode == rtmp->surface.micro_tile_mode); 1084 1085 /* resolve */ 1086 si_blitter_begin(ctx, SI_COLOR_RESOLVE | 1087 (info->render_condition_enable ? 0 : SI_DISABLE_RENDER_COND)); 1088 util_blitter_custom_resolve_color(sctx->blitter, tmp, 0, 0, 1089 info->src.resource, info->src.box.z, 1090 sample_mask, sctx->custom_blend_resolve, 1091 format); 1092 si_blitter_end(ctx); 1093 1094 /* blit */ 1095 blit = *info; 1096 blit.src.resource = tmp; 1097 blit.src.box.z = 0; 1098 1099 si_blitter_begin(ctx, SI_BLIT | 1100 (info->render_condition_enable ? 0 : SI_DISABLE_RENDER_COND)); 1101 util_blitter_blit(sctx->blitter, &blit); 1102 si_blitter_end(ctx); 1103 1104 pipe_resource_reference(&tmp, NULL); 1105 return true; 1106 } 1107 1108 static void si_blit(struct pipe_context *ctx, 1109 const struct pipe_blit_info *info) 1110 { 1111 struct si_context *sctx = (struct si_context*)ctx; 1112 struct r600_texture *rdst = (struct r600_texture *)info->dst.resource; 1113 1114 if (do_hardware_msaa_resolve(ctx, info)) { 1115 return; 1116 } 1117 1118 /* Using SDMA for copying to a linear texture in GTT is much faster. 1119 * This improves DRI PRIME performance. 1120 * 1121 * resource_copy_region can't do this yet, because dma_copy calls it 1122 * on failure (recursion). 1123 */ 1124 if (rdst->surface.is_linear && 1125 sctx->b.dma_copy && 1126 util_can_blit_via_copy_region(info, false)) { 1127 sctx->b.dma_copy(ctx, info->dst.resource, info->dst.level, 1128 info->dst.box.x, info->dst.box.y, 1129 info->dst.box.z, 1130 info->src.resource, info->src.level, 1131 &info->src.box); 1132 return; 1133 } 1134 1135 assert(util_blitter_is_blit_supported(sctx->blitter, info)); 1136 1137 /* The driver doesn't decompress resources automatically while 1138 * u_blitter is rendering. */ 1139 vi_dcc_disable_if_incompatible_format(&sctx->b, info->src.resource, 1140 info->src.level, 1141 info->src.format); 1142 vi_dcc_disable_if_incompatible_format(&sctx->b, info->dst.resource, 1143 info->dst.level, 1144 info->dst.format); 1145 si_decompress_subresource(ctx, info->src.resource, info->mask, 1146 info->src.level, 1147 info->src.box.z, 1148 info->src.box.z + info->src.box.depth - 1); 1149 1150 if (sctx->screen->b.debug_flags & DBG_FORCE_DMA && 1151 util_try_blit_via_copy_region(ctx, info)) 1152 return; 1153 1154 si_blitter_begin(ctx, SI_BLIT | 1155 (info->render_condition_enable ? 0 : SI_DISABLE_RENDER_COND)); 1156 util_blitter_blit(sctx->blitter, info); 1157 si_blitter_end(ctx); 1158 } 1159 1160 static boolean si_generate_mipmap(struct pipe_context *ctx, 1161 struct pipe_resource *tex, 1162 enum pipe_format format, 1163 unsigned base_level, unsigned last_level, 1164 unsigned first_layer, unsigned last_layer) 1165 { 1166 struct si_context *sctx = (struct si_context*)ctx; 1167 struct r600_texture *rtex = (struct r600_texture *)tex; 1168 1169 if (!util_blitter_is_copy_supported(sctx->blitter, tex, tex)) 1170 return false; 1171 1172 /* The driver doesn't decompress resources automatically while 1173 * u_blitter is rendering. */ 1174 vi_dcc_disable_if_incompatible_format(&sctx->b, tex, base_level, 1175 format); 1176 si_decompress_subresource(ctx, tex, PIPE_MASK_RGBAZS, 1177 base_level, first_layer, last_layer); 1178 1179 /* Clear dirty_level_mask for the levels that will be overwritten. */ 1180 assert(base_level < last_level); 1181 rtex->dirty_level_mask &= ~u_bit_consecutive(base_level + 1, 1182 last_level - base_level); 1183 1184 si_blitter_begin(ctx, SI_BLIT | SI_DISABLE_RENDER_COND); 1185 util_blitter_generate_mipmap(sctx->blitter, tex, format, 1186 base_level, last_level, 1187 first_layer, last_layer); 1188 si_blitter_end(ctx); 1189 return true; 1190 } 1191 1192 static void si_flush_resource(struct pipe_context *ctx, 1193 struct pipe_resource *res) 1194 { 1195 struct r600_texture *rtex = (struct r600_texture*)res; 1196 1197 assert(res->target != PIPE_BUFFER); 1198 assert(!rtex->dcc_separate_buffer || rtex->dcc_gather_statistics); 1199 1200 /* st/dri calls flush twice per frame (not a bug), this prevents double 1201 * decompression. */ 1202 if (rtex->dcc_separate_buffer && !rtex->separate_dcc_dirty) 1203 return; 1204 1205 if (!rtex->is_depth && (rtex->cmask.size || rtex->dcc_offset)) { 1206 si_blit_decompress_color(ctx, rtex, 0, res->last_level, 1207 0, util_max_layer(res, 0), 1208 rtex->dcc_separate_buffer != NULL); 1209 } 1210 1211 /* Always do the analysis even if DCC is disabled at the moment. */ 1212 if (rtex->dcc_gather_statistics && rtex->separate_dcc_dirty) { 1213 rtex->separate_dcc_dirty = false; 1214 vi_separate_dcc_process_and_reset_stats(ctx, rtex); 1215 } 1216 } 1217 1218 static void si_decompress_dcc(struct pipe_context *ctx, 1219 struct r600_texture *rtex) 1220 { 1221 if (!rtex->dcc_offset) 1222 return; 1223 1224 si_blit_decompress_color(ctx, rtex, 0, rtex->resource.b.b.last_level, 1225 0, util_max_layer(&rtex->resource.b.b, 0), 1226 true); 1227 } 1228 1229 static void si_pipe_clear_buffer(struct pipe_context *ctx, 1230 struct pipe_resource *dst, 1231 unsigned offset, unsigned size, 1232 const void *clear_value_ptr, 1233 int clear_value_size) 1234 { 1235 struct si_context *sctx = (struct si_context*)ctx; 1236 uint32_t dword_value; 1237 unsigned i; 1238 1239 assert(offset % clear_value_size == 0); 1240 assert(size % clear_value_size == 0); 1241 1242 if (clear_value_size > 4) { 1243 const uint32_t *u32 = clear_value_ptr; 1244 bool clear_dword_duplicated = true; 1245 1246 /* See if we can lower large fills to dword fills. */ 1247 for (i = 1; i < clear_value_size / 4; i++) 1248 if (u32[0] != u32[i]) { 1249 clear_dword_duplicated = false; 1250 break; 1251 } 1252 1253 if (!clear_dword_duplicated) { 1254 /* Use transform feedback for 64-bit, 96-bit, and 1255 * 128-bit fills. 1256 */ 1257 union pipe_color_union clear_value; 1258 1259 memcpy(&clear_value, clear_value_ptr, clear_value_size); 1260 si_blitter_begin(ctx, SI_DISABLE_RENDER_COND); 1261 util_blitter_clear_buffer(sctx->blitter, dst, offset, 1262 size, clear_value_size / 4, 1263 &clear_value); 1264 si_blitter_end(ctx); 1265 return; 1266 } 1267 } 1268 1269 /* Expand the clear value to a dword. */ 1270 switch (clear_value_size) { 1271 case 1: 1272 dword_value = *(uint8_t*)clear_value_ptr; 1273 dword_value |= (dword_value << 8) | 1274 (dword_value << 16) | 1275 (dword_value << 24); 1276 break; 1277 case 2: 1278 dword_value = *(uint16_t*)clear_value_ptr; 1279 dword_value |= dword_value << 16; 1280 break; 1281 default: 1282 dword_value = *(uint32_t*)clear_value_ptr; 1283 } 1284 1285 sctx->b.clear_buffer(ctx, dst, offset, size, dword_value, 1286 R600_COHERENCY_SHADER); 1287 } 1288 1289 void si_init_blit_functions(struct si_context *sctx) 1290 { 1291 sctx->b.b.clear = si_clear; 1292 sctx->b.b.clear_buffer = si_pipe_clear_buffer; 1293 sctx->b.b.clear_render_target = si_clear_render_target; 1294 sctx->b.b.clear_depth_stencil = si_clear_depth_stencil; 1295 sctx->b.b.resource_copy_region = si_resource_copy_region; 1296 sctx->b.b.blit = si_blit; 1297 sctx->b.b.flush_resource = si_flush_resource; 1298 sctx->b.b.generate_mipmap = si_generate_mipmap; 1299 sctx->b.blit_decompress_depth = si_blit_decompress_depth; 1300 sctx->b.decompress_dcc = si_decompress_dcc; 1301 } 1302