Home | History | Annotate | Download | only in radeonsi
      1 /*
      2  * Copyright 2010 Jerome Glisse <glisse (at) freedesktop.org>
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * on the rights to use, copy, modify, merge, publish, distribute, sub
      8  * license, and/or sell copies of the Software, and to permit persons to whom
      9  * the Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice (including the next
     12  * paragraph) shall be included in all copies or substantial portions of the
     13  * Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
     18  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
     19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
     20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
     21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
     22  */
     23 
     24 #include "si_pipe.h"
     25 #include "si_compute.h"
     26 #include "util/u_format.h"
     27 #include "util/u_log.h"
     28 #include "util/u_surface.h"
     29 
     30 enum {
     31 	SI_COPY          = SI_SAVE_FRAMEBUFFER | SI_SAVE_TEXTURES |
     32 			   SI_SAVE_FRAGMENT_STATE | SI_DISABLE_RENDER_COND,
     33 
     34 	SI_BLIT          = SI_SAVE_FRAMEBUFFER | SI_SAVE_TEXTURES |
     35 			   SI_SAVE_FRAGMENT_STATE,
     36 
     37 	SI_DECOMPRESS    = SI_SAVE_FRAMEBUFFER | SI_SAVE_FRAGMENT_STATE |
     38 			   SI_DISABLE_RENDER_COND,
     39 
     40 	SI_COLOR_RESOLVE = SI_SAVE_FRAMEBUFFER | SI_SAVE_FRAGMENT_STATE
     41 };
     42 
     43 void si_blitter_begin(struct pipe_context *ctx, enum si_blitter_op op)
     44 {
     45 	struct si_context *sctx = (struct si_context *)ctx;
     46 
     47 	util_blitter_save_vertex_shader(sctx->blitter, sctx->vs_shader.cso);
     48 	util_blitter_save_tessctrl_shader(sctx->blitter, sctx->tcs_shader.cso);
     49 	util_blitter_save_tesseval_shader(sctx->blitter, sctx->tes_shader.cso);
     50 	util_blitter_save_geometry_shader(sctx->blitter, sctx->gs_shader.cso);
     51 	util_blitter_save_so_targets(sctx->blitter, sctx->streamout.num_targets,
     52 				     (struct pipe_stream_output_target**)sctx->streamout.targets);
     53 	util_blitter_save_rasterizer(sctx->blitter, sctx->queued.named.rasterizer);
     54 
     55 	if (op & SI_SAVE_FRAGMENT_STATE) {
     56 		util_blitter_save_blend(sctx->blitter, sctx->queued.named.blend);
     57 		util_blitter_save_depth_stencil_alpha(sctx->blitter, sctx->queued.named.dsa);
     58 		util_blitter_save_stencil_ref(sctx->blitter, &sctx->stencil_ref.state);
     59 		util_blitter_save_fragment_shader(sctx->blitter, sctx->ps_shader.cso);
     60 		util_blitter_save_sample_mask(sctx->blitter, sctx->sample_mask.sample_mask);
     61 		util_blitter_save_scissor(sctx->blitter, &sctx->scissors.states[0]);
     62 	}
     63 
     64 	if (op & SI_SAVE_FRAMEBUFFER)
     65 		util_blitter_save_framebuffer(sctx->blitter, &sctx->framebuffer.state);
     66 
     67 	if (op & SI_SAVE_TEXTURES) {
     68 		util_blitter_save_fragment_sampler_states(
     69 			sctx->blitter, 2,
     70 			(void**)sctx->samplers[PIPE_SHADER_FRAGMENT].sampler_states);
     71 
     72 		util_blitter_save_fragment_sampler_views(sctx->blitter, 2,
     73 			sctx->samplers[PIPE_SHADER_FRAGMENT].views);
     74 	}
     75 
     76 	if (op & SI_DISABLE_RENDER_COND)
     77 		sctx->b.render_cond_force_off = true;
     78 }
     79 
     80 void si_blitter_end(struct pipe_context *ctx)
     81 {
     82 	struct si_context *sctx = (struct si_context *)ctx;
     83 
     84 	sctx->b.render_cond_force_off = false;
     85 
     86 	/* Restore shader pointers because the VS blit shader changed all
     87 	 * non-global VS user SGPRs. */
     88 	sctx->shader_pointers_dirty |= SI_DESCS_SHADER_MASK(VERTEX);
     89 	sctx->vertex_buffer_pointer_dirty = true;
     90 	si_mark_atom_dirty(sctx, &sctx->shader_pointers.atom);
     91 }
     92 
     93 static unsigned u_max_sample(struct pipe_resource *r)
     94 {
     95 	return r->nr_samples ? r->nr_samples - 1 : 0;
     96 }
     97 
     98 static unsigned
     99 si_blit_dbcb_copy(struct si_context *sctx,
    100 		  struct r600_texture *src,
    101 		  struct r600_texture *dst,
    102 		  unsigned planes, unsigned level_mask,
    103 		  unsigned first_layer, unsigned last_layer,
    104 		  unsigned first_sample, unsigned last_sample)
    105 {
    106 	struct pipe_surface surf_tmpl = {{0}};
    107 	unsigned layer, sample, checked_last_layer, max_layer;
    108 	unsigned fully_copied_levels = 0;
    109 
    110 	if (planes & PIPE_MASK_Z)
    111 		sctx->dbcb_depth_copy_enabled = true;
    112 	if (planes & PIPE_MASK_S)
    113 		sctx->dbcb_stencil_copy_enabled = true;
    114 	si_mark_atom_dirty(sctx, &sctx->db_render_state);
    115 
    116 	assert(sctx->dbcb_depth_copy_enabled || sctx->dbcb_stencil_copy_enabled);
    117 
    118 	sctx->decompression_enabled = true;
    119 
    120 	while (level_mask) {
    121 		unsigned level = u_bit_scan(&level_mask);
    122 
    123 		/* The smaller the mipmap level, the less layers there are
    124 		 * as far as 3D textures are concerned. */
    125 		max_layer = util_max_layer(&src->resource.b.b, level);
    126 		checked_last_layer = MIN2(last_layer, max_layer);
    127 
    128 		surf_tmpl.u.tex.level = level;
    129 
    130 		for (layer = first_layer; layer <= checked_last_layer; layer++) {
    131 			struct pipe_surface *zsurf, *cbsurf;
    132 
    133 			surf_tmpl.format = src->resource.b.b.format;
    134 			surf_tmpl.u.tex.first_layer = layer;
    135 			surf_tmpl.u.tex.last_layer = layer;
    136 
    137 			zsurf = sctx->b.b.create_surface(&sctx->b.b, &src->resource.b.b, &surf_tmpl);
    138 
    139 			surf_tmpl.format = dst->resource.b.b.format;
    140 			cbsurf = sctx->b.b.create_surface(&sctx->b.b, &dst->resource.b.b, &surf_tmpl);
    141 
    142 			for (sample = first_sample; sample <= last_sample; sample++) {
    143 				if (sample != sctx->dbcb_copy_sample) {
    144 					sctx->dbcb_copy_sample = sample;
    145 					si_mark_atom_dirty(sctx, &sctx->db_render_state);
    146 				}
    147 
    148 				si_blitter_begin(&sctx->b.b, SI_DECOMPRESS);
    149 				util_blitter_custom_depth_stencil(sctx->blitter, zsurf, cbsurf, 1 << sample,
    150 								  sctx->custom_dsa_flush, 1.0f);
    151 				si_blitter_end(&sctx->b.b);
    152 			}
    153 
    154 			pipe_surface_reference(&zsurf, NULL);
    155 			pipe_surface_reference(&cbsurf, NULL);
    156 		}
    157 
    158 		if (first_layer == 0 && last_layer >= max_layer &&
    159 		    first_sample == 0 && last_sample >= u_max_sample(&src->resource.b.b))
    160 			fully_copied_levels |= 1u << level;
    161 	}
    162 
    163 	sctx->decompression_enabled = false;
    164 	sctx->dbcb_depth_copy_enabled = false;
    165 	sctx->dbcb_stencil_copy_enabled = false;
    166 	si_mark_atom_dirty(sctx, &sctx->db_render_state);
    167 
    168 	return fully_copied_levels;
    169 }
    170 
    171 static void si_blit_decompress_depth(struct pipe_context *ctx,
    172 				     struct r600_texture *texture,
    173 				     struct r600_texture *staging,
    174 				     unsigned first_level, unsigned last_level,
    175 				     unsigned first_layer, unsigned last_layer,
    176 				     unsigned first_sample, unsigned last_sample)
    177 {
    178 	const struct util_format_description *desc;
    179 	unsigned planes = 0;
    180 
    181 	assert(staging != NULL && "use si_blit_decompress_zs_in_place instead");
    182 
    183 	desc = util_format_description(staging->resource.b.b.format);
    184 
    185 	if (util_format_has_depth(desc))
    186 		planes |= PIPE_MASK_Z;
    187 	if (util_format_has_stencil(desc))
    188 		planes |= PIPE_MASK_S;
    189 
    190 	si_blit_dbcb_copy(
    191 		(struct si_context *)ctx, texture, staging, planes,
    192 		u_bit_consecutive(first_level, last_level - first_level + 1),
    193 		first_layer, last_layer, first_sample, last_sample);
    194 }
    195 
    196 /* Helper function for si_blit_decompress_zs_in_place.
    197  */
    198 static void
    199 si_blit_decompress_zs_planes_in_place(struct si_context *sctx,
    200 				      struct r600_texture *texture,
    201 				      unsigned planes, unsigned level_mask,
    202 				      unsigned first_layer, unsigned last_layer)
    203 {
    204 	struct pipe_surface *zsurf, surf_tmpl = {{0}};
    205 	unsigned layer, max_layer, checked_last_layer;
    206 	unsigned fully_decompressed_mask = 0;
    207 
    208 	if (!level_mask)
    209 		return;
    210 
    211 	if (planes & PIPE_MASK_S)
    212 		sctx->db_flush_stencil_inplace = true;
    213 	if (planes & PIPE_MASK_Z)
    214 		sctx->db_flush_depth_inplace = true;
    215 	si_mark_atom_dirty(sctx, &sctx->db_render_state);
    216 
    217 	surf_tmpl.format = texture->resource.b.b.format;
    218 
    219 	sctx->decompression_enabled = true;
    220 
    221 	while (level_mask) {
    222 		unsigned level = u_bit_scan(&level_mask);
    223 
    224 		surf_tmpl.u.tex.level = level;
    225 
    226 		/* The smaller the mipmap level, the less layers there are
    227 		 * as far as 3D textures are concerned. */
    228 		max_layer = util_max_layer(&texture->resource.b.b, level);
    229 		checked_last_layer = MIN2(last_layer, max_layer);
    230 
    231 		for (layer = first_layer; layer <= checked_last_layer; layer++) {
    232 			surf_tmpl.u.tex.first_layer = layer;
    233 			surf_tmpl.u.tex.last_layer = layer;
    234 
    235 			zsurf = sctx->b.b.create_surface(&sctx->b.b, &texture->resource.b.b, &surf_tmpl);
    236 
    237 			si_blitter_begin(&sctx->b.b, SI_DECOMPRESS);
    238 			util_blitter_custom_depth_stencil(sctx->blitter, zsurf, NULL, ~0,
    239 							  sctx->custom_dsa_flush,
    240 							  1.0f);
    241 			si_blitter_end(&sctx->b.b);
    242 
    243 			pipe_surface_reference(&zsurf, NULL);
    244 		}
    245 
    246 		/* The texture will always be dirty if some layers aren't flushed.
    247 		 * I don't think this case occurs often though. */
    248 		if (first_layer == 0 && last_layer >= max_layer) {
    249 			fully_decompressed_mask |= 1u << level;
    250 		}
    251 	}
    252 
    253 	if (planes & PIPE_MASK_Z)
    254 		texture->dirty_level_mask &= ~fully_decompressed_mask;
    255 	if (planes & PIPE_MASK_S)
    256 		texture->stencil_dirty_level_mask &= ~fully_decompressed_mask;
    257 
    258 	sctx->decompression_enabled = false;
    259 	sctx->db_flush_depth_inplace = false;
    260 	sctx->db_flush_stencil_inplace = false;
    261 	si_mark_atom_dirty(sctx, &sctx->db_render_state);
    262 }
    263 
    264 /* Helper function of si_flush_depth_texture: decompress the given levels
    265  * of Z and/or S planes in place.
    266  */
    267 static void
    268 si_blit_decompress_zs_in_place(struct si_context *sctx,
    269 			       struct r600_texture *texture,
    270 			       unsigned levels_z, unsigned levels_s,
    271 			       unsigned first_layer, unsigned last_layer)
    272 {
    273 	unsigned both = levels_z & levels_s;
    274 
    275 	/* First, do combined Z & S decompresses for levels that need it. */
    276 	if (both) {
    277 		si_blit_decompress_zs_planes_in_place(
    278 				sctx, texture, PIPE_MASK_Z | PIPE_MASK_S,
    279 				both,
    280 				first_layer, last_layer);
    281 		levels_z &= ~both;
    282 		levels_s &= ~both;
    283 	}
    284 
    285 	/* Now do separate Z and S decompresses. */
    286 	if (levels_z) {
    287 		si_blit_decompress_zs_planes_in_place(
    288 				sctx, texture, PIPE_MASK_Z,
    289 				levels_z,
    290 				first_layer, last_layer);
    291 	}
    292 
    293 	if (levels_s) {
    294 		si_blit_decompress_zs_planes_in_place(
    295 				sctx, texture, PIPE_MASK_S,
    296 				levels_s,
    297 				first_layer, last_layer);
    298 	}
    299 }
    300 
    301 static void
    302 si_decompress_depth(struct si_context *sctx,
    303 		    struct r600_texture *tex,
    304 		    unsigned required_planes,
    305 		    unsigned first_level, unsigned last_level,
    306 		    unsigned first_layer, unsigned last_layer)
    307 {
    308 	unsigned inplace_planes = 0;
    309 	unsigned copy_planes = 0;
    310 	unsigned level_mask = u_bit_consecutive(first_level, last_level - first_level + 1);
    311 	unsigned levels_z = 0;
    312 	unsigned levels_s = 0;
    313 
    314 	if (required_planes & PIPE_MASK_Z) {
    315 		levels_z = level_mask & tex->dirty_level_mask;
    316 
    317 		if (levels_z) {
    318 			if (si_can_sample_zs(tex, false))
    319 				inplace_planes |= PIPE_MASK_Z;
    320 			else
    321 				copy_planes |= PIPE_MASK_Z;
    322 		}
    323 	}
    324 	if (required_planes & PIPE_MASK_S) {
    325 		levels_s = level_mask & tex->stencil_dirty_level_mask;
    326 
    327 		if (levels_s) {
    328 			if (si_can_sample_zs(tex, true))
    329 				inplace_planes |= PIPE_MASK_S;
    330 			else
    331 				copy_planes |= PIPE_MASK_S;
    332 		}
    333 	}
    334 
    335 	if (unlikely(sctx->b.log))
    336 		u_log_printf(sctx->b.log,
    337 			     "\n------------------------------------------------\n"
    338 			     "Decompress Depth (levels %u - %u, levels Z: 0x%x S: 0x%x)\n\n",
    339 			     first_level, last_level, levels_z, levels_s);
    340 
    341 	/* We may have to allocate the flushed texture here when called from
    342 	 * si_decompress_subresource.
    343 	 */
    344 	if (copy_planes &&
    345 	    (tex->flushed_depth_texture ||
    346 	     si_init_flushed_depth_texture(&sctx->b.b, &tex->resource.b.b, NULL))) {
    347 		struct r600_texture *dst = tex->flushed_depth_texture;
    348 		unsigned fully_copied_levels;
    349 		unsigned levels = 0;
    350 
    351 		assert(tex->flushed_depth_texture);
    352 
    353 		if (util_format_is_depth_and_stencil(dst->resource.b.b.format))
    354 			copy_planes = PIPE_MASK_Z | PIPE_MASK_S;
    355 
    356 		if (copy_planes & PIPE_MASK_Z) {
    357 			levels |= levels_z;
    358 			levels_z = 0;
    359 		}
    360 		if (copy_planes & PIPE_MASK_S) {
    361 			levels |= levels_s;
    362 			levels_s = 0;
    363 		}
    364 
    365 		fully_copied_levels = si_blit_dbcb_copy(
    366 			sctx, tex, dst, copy_planes, levels,
    367 			first_layer, last_layer,
    368 			0, u_max_sample(&tex->resource.b.b));
    369 
    370 		if (copy_planes & PIPE_MASK_Z)
    371 			tex->dirty_level_mask &= ~fully_copied_levels;
    372 		if (copy_planes & PIPE_MASK_S)
    373 			tex->stencil_dirty_level_mask &= ~fully_copied_levels;
    374 	}
    375 
    376 	if (inplace_planes) {
    377 		bool has_htile = si_htile_enabled(tex, first_level);
    378 		bool tc_compat_htile = vi_tc_compat_htile_enabled(tex, first_level);
    379 
    380 		/* Don't decompress if there is no HTILE or when HTILE is
    381 		 * TC-compatible. */
    382 		if (has_htile && !tc_compat_htile) {
    383 			si_blit_decompress_zs_in_place(
    384 						sctx, tex,
    385 						levels_z, levels_s,
    386 						first_layer, last_layer);
    387 		} else {
    388 			/* This is only a cache flush.
    389 			 *
    390 			 * Only clear the mask that we are flushing, because
    391 			 * si_make_DB_shader_coherent() treats different levels
    392 			 * and depth and stencil differently.
    393 			 */
    394 			if (inplace_planes & PIPE_MASK_Z)
    395 				tex->dirty_level_mask &= ~levels_z;
    396 			if (inplace_planes & PIPE_MASK_S)
    397 				tex->stencil_dirty_level_mask &= ~levels_s;
    398 		}
    399 
    400 		/* Only in-place decompression needs to flush DB caches, or
    401 		 * when we don't decompress but TC-compatible planes are dirty.
    402 		 */
    403 		si_make_DB_shader_coherent(sctx, tex->resource.b.b.nr_samples,
    404 					   inplace_planes & PIPE_MASK_S,
    405 					   tc_compat_htile);
    406 	}
    407 	/* set_framebuffer_state takes care of coherency for single-sample.
    408 	 * The DB->CB copy uses CB for the final writes.
    409 	 */
    410 	if (copy_planes && tex->resource.b.b.nr_samples > 1)
    411 		si_make_CB_shader_coherent(sctx, tex->resource.b.b.nr_samples,
    412 					   false);
    413 }
    414 
    415 static void
    416 si_decompress_sampler_depth_textures(struct si_context *sctx,
    417 				     struct si_samplers *textures)
    418 {
    419 	unsigned i;
    420 	unsigned mask = textures->needs_depth_decompress_mask;
    421 
    422 	while (mask) {
    423 		struct pipe_sampler_view *view;
    424 		struct si_sampler_view *sview;
    425 		struct r600_texture *tex;
    426 
    427 		i = u_bit_scan(&mask);
    428 
    429 		view = textures->views[i];
    430 		assert(view);
    431 		sview = (struct si_sampler_view*)view;
    432 
    433 		tex = (struct r600_texture *)view->texture;
    434 		assert(tex->db_compatible);
    435 
    436 		si_decompress_depth(sctx, tex,
    437 				    sview->is_stencil_sampler ? PIPE_MASK_S : PIPE_MASK_Z,
    438 				    view->u.tex.first_level, view->u.tex.last_level,
    439 				    0, util_max_layer(&tex->resource.b.b, view->u.tex.first_level));
    440 	}
    441 }
    442 
    443 static void si_blit_decompress_color(struct pipe_context *ctx,
    444 		struct r600_texture *rtex,
    445 		unsigned first_level, unsigned last_level,
    446 		unsigned first_layer, unsigned last_layer,
    447 		bool need_dcc_decompress)
    448 {
    449 	struct si_context *sctx = (struct si_context *)ctx;
    450 	void* custom_blend;
    451 	unsigned layer, checked_last_layer, max_layer;
    452 	unsigned level_mask =
    453 		u_bit_consecutive(first_level, last_level - first_level + 1);
    454 
    455 	if (!need_dcc_decompress)
    456 		level_mask &= rtex->dirty_level_mask;
    457 	if (!level_mask)
    458 		return;
    459 
    460 	if (unlikely(sctx->b.log))
    461 		u_log_printf(sctx->b.log,
    462 			     "\n------------------------------------------------\n"
    463 			     "Decompress Color (levels %u - %u, mask 0x%x)\n\n",
    464 			     first_level, last_level, level_mask);
    465 
    466 	if (need_dcc_decompress) {
    467 		custom_blend = sctx->custom_blend_dcc_decompress;
    468 
    469 		assert(rtex->dcc_offset);
    470 
    471 		/* disable levels without DCC */
    472 		for (int i = first_level; i <= last_level; i++) {
    473 			if (!vi_dcc_enabled(rtex, i))
    474 				level_mask &= ~(1 << i);
    475 		}
    476 	} else if (rtex->fmask.size) {
    477 		custom_blend = sctx->custom_blend_fmask_decompress;
    478 	} else {
    479 		custom_blend = sctx->custom_blend_eliminate_fastclear;
    480 	}
    481 
    482 	sctx->decompression_enabled = true;
    483 
    484 	while (level_mask) {
    485 		unsigned level = u_bit_scan(&level_mask);
    486 
    487 		/* The smaller the mipmap level, the less layers there are
    488 		 * as far as 3D textures are concerned. */
    489 		max_layer = util_max_layer(&rtex->resource.b.b, level);
    490 		checked_last_layer = MIN2(last_layer, max_layer);
    491 
    492 		for (layer = first_layer; layer <= checked_last_layer; layer++) {
    493 			struct pipe_surface *cbsurf, surf_tmpl;
    494 
    495 			surf_tmpl.format = rtex->resource.b.b.format;
    496 			surf_tmpl.u.tex.level = level;
    497 			surf_tmpl.u.tex.first_layer = layer;
    498 			surf_tmpl.u.tex.last_layer = layer;
    499 			cbsurf = ctx->create_surface(ctx, &rtex->resource.b.b, &surf_tmpl);
    500 
    501 			/* Required before and after FMASK and DCC_DECOMPRESS. */
    502 			if (custom_blend == sctx->custom_blend_fmask_decompress ||
    503 			    custom_blend == sctx->custom_blend_dcc_decompress)
    504 				sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_CB;
    505 
    506 			si_blitter_begin(ctx, SI_DECOMPRESS);
    507 			util_blitter_custom_color(sctx->blitter, cbsurf, custom_blend);
    508 			si_blitter_end(ctx);
    509 
    510 			if (custom_blend == sctx->custom_blend_fmask_decompress ||
    511 			    custom_blend == sctx->custom_blend_dcc_decompress)
    512 				sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_CB;
    513 
    514 			pipe_surface_reference(&cbsurf, NULL);
    515 		}
    516 
    517 		/* The texture will always be dirty if some layers aren't flushed.
    518 		 * I don't think this case occurs often though. */
    519 		if (first_layer == 0 && last_layer >= max_layer) {
    520 			rtex->dirty_level_mask &= ~(1 << level);
    521 		}
    522 	}
    523 
    524 	sctx->decompression_enabled = false;
    525 	si_make_CB_shader_coherent(sctx, rtex->resource.b.b.nr_samples,
    526 				   vi_dcc_enabled(rtex, first_level));
    527 }
    528 
    529 static void
    530 si_decompress_color_texture(struct si_context *sctx, struct r600_texture *tex,
    531 			    unsigned first_level, unsigned last_level)
    532 {
    533 	/* CMASK or DCC can be discarded and we can still end up here. */
    534 	if (!tex->cmask.size && !tex->fmask.size && !tex->dcc_offset)
    535 		return;
    536 
    537 	si_blit_decompress_color(&sctx->b.b, tex, first_level, last_level, 0,
    538 				 util_max_layer(&tex->resource.b.b, first_level),
    539 				 false);
    540 }
    541 
    542 static void
    543 si_decompress_sampler_color_textures(struct si_context *sctx,
    544 				     struct si_samplers *textures)
    545 {
    546 	unsigned i;
    547 	unsigned mask = textures->needs_color_decompress_mask;
    548 
    549 	while (mask) {
    550 		struct pipe_sampler_view *view;
    551 		struct r600_texture *tex;
    552 
    553 		i = u_bit_scan(&mask);
    554 
    555 		view = textures->views[i];
    556 		assert(view);
    557 
    558 		tex = (struct r600_texture *)view->texture;
    559 
    560 		si_decompress_color_texture(sctx, tex, view->u.tex.first_level,
    561 					    view->u.tex.last_level);
    562 	}
    563 }
    564 
    565 static void
    566 si_decompress_image_color_textures(struct si_context *sctx,
    567 				   struct si_images *images)
    568 {
    569 	unsigned i;
    570 	unsigned mask = images->needs_color_decompress_mask;
    571 
    572 	while (mask) {
    573 		const struct pipe_image_view *view;
    574 		struct r600_texture *tex;
    575 
    576 		i = u_bit_scan(&mask);
    577 
    578 		view = &images->views[i];
    579 		assert(view->resource->target != PIPE_BUFFER);
    580 
    581 		tex = (struct r600_texture *)view->resource;
    582 
    583 		si_decompress_color_texture(sctx, tex, view->u.tex.level,
    584 					    view->u.tex.level);
    585 	}
    586 }
    587 
    588 static void si_check_render_feedback_texture(struct si_context *sctx,
    589 					     struct r600_texture *tex,
    590 					     unsigned first_level,
    591 					     unsigned last_level,
    592 					     unsigned first_layer,
    593 					     unsigned last_layer)
    594 {
    595 	bool render_feedback = false;
    596 
    597 	if (!tex->dcc_offset)
    598 		return;
    599 
    600 	for (unsigned j = 0; j < sctx->framebuffer.state.nr_cbufs; ++j) {
    601 		struct r600_surface * surf;
    602 
    603 		if (!sctx->framebuffer.state.cbufs[j])
    604 			continue;
    605 
    606 		surf = (struct r600_surface*)sctx->framebuffer.state.cbufs[j];
    607 
    608 		if (tex == (struct r600_texture *)surf->base.texture &&
    609 		    surf->base.u.tex.level >= first_level &&
    610 		    surf->base.u.tex.level <= last_level &&
    611 		    surf->base.u.tex.first_layer <= last_layer &&
    612 		    surf->base.u.tex.last_layer >= first_layer) {
    613 			render_feedback = true;
    614 			break;
    615 		}
    616 	}
    617 
    618 	if (render_feedback)
    619 		si_texture_disable_dcc(&sctx->b, tex);
    620 }
    621 
    622 static void si_check_render_feedback_textures(struct si_context *sctx,
    623                                               struct si_samplers *textures)
    624 {
    625 	uint32_t mask = textures->enabled_mask;
    626 
    627 	while (mask) {
    628 		const struct pipe_sampler_view *view;
    629 		struct r600_texture *tex;
    630 
    631 		unsigned i = u_bit_scan(&mask);
    632 
    633 		view = textures->views[i];
    634 		if(view->texture->target == PIPE_BUFFER)
    635 			continue;
    636 
    637 		tex = (struct r600_texture *)view->texture;
    638 
    639 		si_check_render_feedback_texture(sctx, tex,
    640 						 view->u.tex.first_level,
    641 						 view->u.tex.last_level,
    642 						 view->u.tex.first_layer,
    643 						 view->u.tex.last_layer);
    644 	}
    645 }
    646 
    647 static void si_check_render_feedback_images(struct si_context *sctx,
    648                                             struct si_images *images)
    649 {
    650 	uint32_t mask = images->enabled_mask;
    651 
    652 	while (mask) {
    653 		const struct pipe_image_view *view;
    654 		struct r600_texture *tex;
    655 
    656 		unsigned i = u_bit_scan(&mask);
    657 
    658 		view = &images->views[i];
    659 		if (view->resource->target == PIPE_BUFFER)
    660 			continue;
    661 
    662 		tex = (struct r600_texture *)view->resource;
    663 
    664 		si_check_render_feedback_texture(sctx, tex,
    665 						 view->u.tex.level,
    666 						 view->u.tex.level,
    667 						 view->u.tex.first_layer,
    668 						 view->u.tex.last_layer);
    669 	}
    670 }
    671 
    672 static void si_check_render_feedback_resident_textures(struct si_context *sctx)
    673 {
    674 	util_dynarray_foreach(&sctx->resident_tex_handles,
    675 			      struct si_texture_handle *, tex_handle) {
    676 		struct pipe_sampler_view *view;
    677 		struct r600_texture *tex;
    678 
    679 		view = (*tex_handle)->view;
    680 		if (view->texture->target == PIPE_BUFFER)
    681 			continue;
    682 
    683 		tex = (struct r600_texture *)view->texture;
    684 
    685 		si_check_render_feedback_texture(sctx, tex,
    686 						 view->u.tex.first_level,
    687 						 view->u.tex.last_level,
    688 						 view->u.tex.first_layer,
    689 						 view->u.tex.last_layer);
    690 	}
    691 }
    692 
    693 static void si_check_render_feedback_resident_images(struct si_context *sctx)
    694 {
    695 	util_dynarray_foreach(&sctx->resident_img_handles,
    696 			      struct si_image_handle *, img_handle) {
    697 		struct pipe_image_view *view;
    698 		struct r600_texture *tex;
    699 
    700 		view = &(*img_handle)->view;
    701 		if (view->resource->target == PIPE_BUFFER)
    702 			continue;
    703 
    704 		tex = (struct r600_texture *)view->resource;
    705 
    706 		si_check_render_feedback_texture(sctx, tex,
    707 						 view->u.tex.level,
    708 						 view->u.tex.level,
    709 						 view->u.tex.first_layer,
    710 						 view->u.tex.last_layer);
    711 	}
    712 }
    713 
    714 static void si_check_render_feedback(struct si_context *sctx)
    715 {
    716 
    717 	if (!sctx->need_check_render_feedback)
    718 		return;
    719 
    720 	for (int i = 0; i < SI_NUM_SHADERS; ++i) {
    721 		si_check_render_feedback_images(sctx, &sctx->images[i]);
    722 		si_check_render_feedback_textures(sctx, &sctx->samplers[i]);
    723 	}
    724 
    725 	si_check_render_feedback_resident_images(sctx);
    726 	si_check_render_feedback_resident_textures(sctx);
    727 
    728 	sctx->need_check_render_feedback = false;
    729 }
    730 
    731 static void si_decompress_resident_textures(struct si_context *sctx)
    732 {
    733 	util_dynarray_foreach(&sctx->resident_tex_needs_color_decompress,
    734 			      struct si_texture_handle *, tex_handle) {
    735 		struct pipe_sampler_view *view = (*tex_handle)->view;
    736 		struct r600_texture *tex = (struct r600_texture *)view->texture;
    737 
    738 		si_decompress_color_texture(sctx, tex, view->u.tex.first_level,
    739 					    view->u.tex.last_level);
    740 	}
    741 
    742 	util_dynarray_foreach(&sctx->resident_tex_needs_depth_decompress,
    743 			      struct si_texture_handle *, tex_handle) {
    744 		struct pipe_sampler_view *view = (*tex_handle)->view;
    745 		struct si_sampler_view *sview = (struct si_sampler_view *)view;
    746 		struct r600_texture *tex = (struct r600_texture *)view->texture;
    747 
    748 		si_decompress_depth(sctx, tex,
    749 			sview->is_stencil_sampler ? PIPE_MASK_S : PIPE_MASK_Z,
    750 			view->u.tex.first_level, view->u.tex.last_level,
    751 			0, util_max_layer(&tex->resource.b.b, view->u.tex.first_level));
    752 	}
    753 }
    754 
    755 static void si_decompress_resident_images(struct si_context *sctx)
    756 {
    757 	util_dynarray_foreach(&sctx->resident_img_needs_color_decompress,
    758 			      struct si_image_handle *, img_handle) {
    759 		struct pipe_image_view *view = &(*img_handle)->view;
    760 		struct r600_texture *tex = (struct r600_texture *)view->resource;
    761 
    762 		si_decompress_color_texture(sctx, tex, view->u.tex.level,
    763 					    view->u.tex.level);
    764 	}
    765 }
    766 
    767 void si_decompress_textures(struct si_context *sctx, unsigned shader_mask)
    768 {
    769 	unsigned compressed_colortex_counter, mask;
    770 
    771 	if (sctx->blitter->running)
    772 		return;
    773 
    774 	/* Update the compressed_colortex_mask if necessary. */
    775 	compressed_colortex_counter = p_atomic_read(&sctx->screen->compressed_colortex_counter);
    776 	if (compressed_colortex_counter != sctx->b.last_compressed_colortex_counter) {
    777 		sctx->b.last_compressed_colortex_counter = compressed_colortex_counter;
    778 		si_update_needs_color_decompress_masks(sctx);
    779 	}
    780 
    781 	/* Decompress color & depth textures if needed. */
    782 	mask = sctx->shader_needs_decompress_mask & shader_mask;
    783 	while (mask) {
    784 		unsigned i = u_bit_scan(&mask);
    785 
    786 		if (sctx->samplers[i].needs_depth_decompress_mask) {
    787 			si_decompress_sampler_depth_textures(sctx, &sctx->samplers[i]);
    788 		}
    789 		if (sctx->samplers[i].needs_color_decompress_mask) {
    790 			si_decompress_sampler_color_textures(sctx, &sctx->samplers[i]);
    791 		}
    792 		if (sctx->images[i].needs_color_decompress_mask) {
    793 			si_decompress_image_color_textures(sctx, &sctx->images[i]);
    794 		}
    795 	}
    796 
    797 	if (shader_mask & u_bit_consecutive(0, SI_NUM_GRAPHICS_SHADERS)) {
    798 		if (sctx->uses_bindless_samplers)
    799 			si_decompress_resident_textures(sctx);
    800 		if (sctx->uses_bindless_images)
    801 			si_decompress_resident_images(sctx);
    802 	} else if (shader_mask & (1 << PIPE_SHADER_COMPUTE)) {
    803 		if (sctx->cs_shader_state.program->uses_bindless_samplers)
    804 			si_decompress_resident_textures(sctx);
    805 		if (sctx->cs_shader_state.program->uses_bindless_images)
    806 			si_decompress_resident_images(sctx);
    807 	}
    808 
    809 	si_check_render_feedback(sctx);
    810 }
    811 
    812 /* Helper for decompressing a portion of a color or depth resource before
    813  * blitting if any decompression is needed.
    814  * The driver doesn't decompress resources automatically while u_blitter is
    815  * rendering. */
    816 static void si_decompress_subresource(struct pipe_context *ctx,
    817 				      struct pipe_resource *tex,
    818 				      unsigned planes, unsigned level,
    819 				      unsigned first_layer, unsigned last_layer)
    820 {
    821 	struct si_context *sctx = (struct si_context *)ctx;
    822 	struct r600_texture *rtex = (struct r600_texture*)tex;
    823 
    824 	if (rtex->db_compatible) {
    825 		planes &= PIPE_MASK_Z | PIPE_MASK_S;
    826 
    827 		if (!rtex->surface.has_stencil)
    828 			planes &= ~PIPE_MASK_S;
    829 
    830 		/* If we've rendered into the framebuffer and it's a blitting
    831 		 * source, make sure the decompression pass is invoked
    832 		 * by dirtying the framebuffer.
    833 		 */
    834 		if (sctx->framebuffer.state.zsbuf &&
    835 		    sctx->framebuffer.state.zsbuf->u.tex.level == level &&
    836 		    sctx->framebuffer.state.zsbuf->texture == tex)
    837 			si_update_fb_dirtiness_after_rendering(sctx);
    838 
    839 		si_decompress_depth(sctx, rtex, planes,
    840 				    level, level,
    841 				    first_layer, last_layer);
    842 	} else if (rtex->fmask.size || rtex->cmask.size || rtex->dcc_offset) {
    843 		/* If we've rendered into the framebuffer and it's a blitting
    844 		 * source, make sure the decompression pass is invoked
    845 		 * by dirtying the framebuffer.
    846 		 */
    847 		for (unsigned i = 0; i < sctx->framebuffer.state.nr_cbufs; i++) {
    848 			if (sctx->framebuffer.state.cbufs[i] &&
    849 			    sctx->framebuffer.state.cbufs[i]->u.tex.level == level &&
    850 			    sctx->framebuffer.state.cbufs[i]->texture == tex) {
    851 				si_update_fb_dirtiness_after_rendering(sctx);
    852 				break;
    853 			}
    854 		}
    855 
    856 		si_blit_decompress_color(ctx, rtex, level, level,
    857 					 first_layer, last_layer, false);
    858 	}
    859 }
    860 
    861 struct texture_orig_info {
    862 	unsigned format;
    863 	unsigned width0;
    864 	unsigned height0;
    865 	unsigned npix_x;
    866 	unsigned npix_y;
    867 	unsigned npix0_x;
    868 	unsigned npix0_y;
    869 };
    870 
    871 void si_resource_copy_region(struct pipe_context *ctx,
    872 			     struct pipe_resource *dst,
    873 			     unsigned dst_level,
    874 			     unsigned dstx, unsigned dsty, unsigned dstz,
    875 			     struct pipe_resource *src,
    876 			     unsigned src_level,
    877 			     const struct pipe_box *src_box)
    878 {
    879 	struct si_context *sctx = (struct si_context *)ctx;
    880 	struct r600_texture *rsrc = (struct r600_texture*)src;
    881 	struct pipe_surface *dst_view, dst_templ;
    882 	struct pipe_sampler_view src_templ, *src_view;
    883 	unsigned dst_width, dst_height, src_width0, src_height0;
    884 	unsigned dst_width0, dst_height0, src_force_level = 0;
    885 	struct pipe_box sbox, dstbox;
    886 
    887 	/* Handle buffers first. */
    888 	if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) {
    889 		si_copy_buffer(sctx, dst, src, dstx, src_box->x, src_box->width, 0);
    890 		return;
    891 	}
    892 
    893 	assert(u_max_sample(dst) == u_max_sample(src));
    894 
    895 	/* The driver doesn't decompress resources automatically while
    896 	 * u_blitter is rendering. */
    897 	si_decompress_subresource(ctx, src, PIPE_MASK_RGBAZS, src_level,
    898 				  src_box->z, src_box->z + src_box->depth - 1);
    899 
    900 	dst_width = u_minify(dst->width0, dst_level);
    901 	dst_height = u_minify(dst->height0, dst_level);
    902 	dst_width0 = dst->width0;
    903 	dst_height0 = dst->height0;
    904 	src_width0 = src->width0;
    905 	src_height0 = src->height0;
    906 
    907 	util_blitter_default_dst_texture(&dst_templ, dst, dst_level, dstz);
    908 	util_blitter_default_src_texture(sctx->blitter, &src_templ, src, src_level);
    909 
    910 	if (util_format_is_compressed(src->format) ||
    911 	    util_format_is_compressed(dst->format)) {
    912 		unsigned blocksize = rsrc->surface.bpe;
    913 
    914 		if (blocksize == 8)
    915 			src_templ.format = PIPE_FORMAT_R16G16B16A16_UINT; /* 64-bit block */
    916 		else
    917 			src_templ.format = PIPE_FORMAT_R32G32B32A32_UINT; /* 128-bit block */
    918 		dst_templ.format = src_templ.format;
    919 
    920 		dst_width = util_format_get_nblocksx(dst->format, dst_width);
    921 		dst_height = util_format_get_nblocksy(dst->format, dst_height);
    922 		dst_width0 = util_format_get_nblocksx(dst->format, dst_width0);
    923 		dst_height0 = util_format_get_nblocksy(dst->format, dst_height0);
    924 		src_width0 = util_format_get_nblocksx(src->format, src_width0);
    925 		src_height0 = util_format_get_nblocksy(src->format, src_height0);
    926 
    927 		dstx = util_format_get_nblocksx(dst->format, dstx);
    928 		dsty = util_format_get_nblocksy(dst->format, dsty);
    929 
    930 		sbox.x = util_format_get_nblocksx(src->format, src_box->x);
    931 		sbox.y = util_format_get_nblocksy(src->format, src_box->y);
    932 		sbox.z = src_box->z;
    933 		sbox.width = util_format_get_nblocksx(src->format, src_box->width);
    934 		sbox.height = util_format_get_nblocksy(src->format, src_box->height);
    935 		sbox.depth = src_box->depth;
    936 		src_box = &sbox;
    937 
    938 		src_force_level = src_level;
    939 	} else if (!util_blitter_is_copy_supported(sctx->blitter, dst, src)) {
    940 		if (util_format_is_subsampled_422(src->format)) {
    941 			src_templ.format = PIPE_FORMAT_R8G8B8A8_UINT;
    942 			dst_templ.format = PIPE_FORMAT_R8G8B8A8_UINT;
    943 
    944 			dst_width = util_format_get_nblocksx(dst->format, dst_width);
    945 			dst_width0 = util_format_get_nblocksx(dst->format, dst_width0);
    946 			src_width0 = util_format_get_nblocksx(src->format, src_width0);
    947 
    948 			dstx = util_format_get_nblocksx(dst->format, dstx);
    949 
    950 			sbox = *src_box;
    951 			sbox.x = util_format_get_nblocksx(src->format, src_box->x);
    952 			sbox.width = util_format_get_nblocksx(src->format, src_box->width);
    953 			src_box = &sbox;
    954 		} else {
    955 			unsigned blocksize = rsrc->surface.bpe;
    956 
    957 			switch (blocksize) {
    958 			case 1:
    959 				dst_templ.format = PIPE_FORMAT_R8_UNORM;
    960 				src_templ.format = PIPE_FORMAT_R8_UNORM;
    961 				break;
    962 			case 2:
    963 				dst_templ.format = PIPE_FORMAT_R8G8_UNORM;
    964 				src_templ.format = PIPE_FORMAT_R8G8_UNORM;
    965 				break;
    966 			case 4:
    967 				dst_templ.format = PIPE_FORMAT_R8G8B8A8_UNORM;
    968 				src_templ.format = PIPE_FORMAT_R8G8B8A8_UNORM;
    969 				break;
    970 			case 8:
    971 				dst_templ.format = PIPE_FORMAT_R16G16B16A16_UINT;
    972 				src_templ.format = PIPE_FORMAT_R16G16B16A16_UINT;
    973 				break;
    974 			case 16:
    975 				dst_templ.format = PIPE_FORMAT_R32G32B32A32_UINT;
    976 				src_templ.format = PIPE_FORMAT_R32G32B32A32_UINT;
    977 				break;
    978 			default:
    979 				fprintf(stderr, "Unhandled format %s with blocksize %u\n",
    980 					util_format_short_name(src->format), blocksize);
    981 				assert(0);
    982 			}
    983 		}
    984 	}
    985 
    986 	/* SNORM8 blitting has precision issues on some chips. Use the SINT
    987 	 * equivalent instead, which doesn't force DCC decompression.
    988 	 * Note that some chips avoid this issue by using SDMA.
    989 	 */
    990 	if (util_format_is_snorm8(dst_templ.format)) {
    991 		switch (dst_templ.format) {
    992 		case PIPE_FORMAT_R8_SNORM:
    993 			dst_templ.format = src_templ.format = PIPE_FORMAT_R8_SINT;
    994 			break;
    995 		case PIPE_FORMAT_R8G8_SNORM:
    996 			dst_templ.format = src_templ.format = PIPE_FORMAT_R8G8_SINT;
    997 			break;
    998 		case PIPE_FORMAT_R8G8B8X8_SNORM:
    999 			dst_templ.format = src_templ.format = PIPE_FORMAT_R8G8B8X8_SINT;
   1000 			break;
   1001 		case PIPE_FORMAT_R8G8B8A8_SNORM:
   1002 		/* There are no SINT variants for ABGR and XBGR, so we have to use RGBA. */
   1003 		case PIPE_FORMAT_A8B8G8R8_SNORM:
   1004 		case PIPE_FORMAT_X8B8G8R8_SNORM:
   1005 			dst_templ.format = src_templ.format = PIPE_FORMAT_R8G8B8A8_SINT;
   1006 			break;
   1007 		case PIPE_FORMAT_A8_SNORM:
   1008 			dst_templ.format = src_templ.format = PIPE_FORMAT_A8_SINT;
   1009 			break;
   1010 		case PIPE_FORMAT_L8_SNORM:
   1011 			dst_templ.format = src_templ.format = PIPE_FORMAT_L8_SINT;
   1012 			break;
   1013 		case PIPE_FORMAT_L8A8_SNORM:
   1014 			dst_templ.format = src_templ.format = PIPE_FORMAT_L8A8_SINT;
   1015 			break;
   1016 		case PIPE_FORMAT_I8_SNORM:
   1017 			dst_templ.format = src_templ.format = PIPE_FORMAT_I8_SINT;
   1018 			break;
   1019 		default:; /* fall through */
   1020 		}
   1021 	}
   1022 
   1023 	vi_disable_dcc_if_incompatible_format(&sctx->b, dst, dst_level,
   1024 					      dst_templ.format);
   1025 	vi_disable_dcc_if_incompatible_format(&sctx->b, src, src_level,
   1026 					      src_templ.format);
   1027 
   1028 	/* Initialize the surface. */
   1029 	dst_view = si_create_surface_custom(ctx, dst, &dst_templ,
   1030 					      dst_width0, dst_height0,
   1031 					      dst_width, dst_height);
   1032 
   1033 	/* Initialize the sampler view. */
   1034 	src_view = si_create_sampler_view_custom(ctx, src, &src_templ,
   1035 						 src_width0, src_height0,
   1036 						 src_force_level);
   1037 
   1038 	u_box_3d(dstx, dsty, dstz, abs(src_box->width), abs(src_box->height),
   1039 		 abs(src_box->depth), &dstbox);
   1040 
   1041 	/* Copy. */
   1042 	si_blitter_begin(ctx, SI_COPY);
   1043 	util_blitter_blit_generic(sctx->blitter, dst_view, &dstbox,
   1044 				  src_view, src_box, src_width0, src_height0,
   1045 				  PIPE_MASK_RGBAZS, PIPE_TEX_FILTER_NEAREST, NULL,
   1046 				  false);
   1047 	si_blitter_end(ctx);
   1048 
   1049 	pipe_surface_reference(&dst_view, NULL);
   1050 	pipe_sampler_view_reference(&src_view, NULL);
   1051 }
   1052 
   1053 static void si_do_CB_resolve(struct si_context *sctx,
   1054 			     const struct pipe_blit_info *info,
   1055 			     struct pipe_resource *dst,
   1056 			     unsigned dst_level, unsigned dst_z,
   1057 			     enum pipe_format format)
   1058 {
   1059 	/* Required before and after CB_RESOLVE. */
   1060 	sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_CB;
   1061 
   1062 	si_blitter_begin(&sctx->b.b, SI_COLOR_RESOLVE |
   1063 			 (info->render_condition_enable ? 0 : SI_DISABLE_RENDER_COND));
   1064 	util_blitter_custom_resolve_color(sctx->blitter, dst, dst_level, dst_z,
   1065 					  info->src.resource, info->src.box.z,
   1066 					  ~0, sctx->custom_blend_resolve,
   1067 					  format);
   1068 	si_blitter_end(&sctx->b.b);
   1069 
   1070 	/* Flush caches for possible texturing. */
   1071 	si_make_CB_shader_coherent(sctx, 1, false);
   1072 }
   1073 
   1074 static bool do_hardware_msaa_resolve(struct pipe_context *ctx,
   1075 				     const struct pipe_blit_info *info)
   1076 {
   1077 	struct si_context *sctx = (struct si_context*)ctx;
   1078 	struct r600_texture *src = (struct r600_texture*)info->src.resource;
   1079 	struct r600_texture *dst = (struct r600_texture*)info->dst.resource;
   1080 	MAYBE_UNUSED struct r600_texture *rtmp;
   1081 	unsigned dst_width = u_minify(info->dst.resource->width0, info->dst.level);
   1082 	unsigned dst_height = u_minify(info->dst.resource->height0, info->dst.level);
   1083 	enum pipe_format format = info->src.format;
   1084 	struct pipe_resource *tmp, templ;
   1085 	struct pipe_blit_info blit;
   1086 
   1087 	/* Check basic requirements for hw resolve. */
   1088 	if (!(info->src.resource->nr_samples > 1 &&
   1089 	      info->dst.resource->nr_samples <= 1 &&
   1090 	      !util_format_is_pure_integer(format) &&
   1091 	      !util_format_is_depth_or_stencil(format) &&
   1092 	      util_max_layer(info->src.resource, 0) == 0))
   1093 		return false;
   1094 
   1095 	/* Hardware MSAA resolve doesn't work if SPI format = NORM16_ABGR and
   1096 	 * the format is R16G16. Use R16A16, which does work.
   1097 	 */
   1098 	if (format == PIPE_FORMAT_R16G16_UNORM)
   1099 		format = PIPE_FORMAT_R16A16_UNORM;
   1100 	if (format == PIPE_FORMAT_R16G16_SNORM)
   1101 		format = PIPE_FORMAT_R16A16_SNORM;
   1102 
   1103 	/* Check the remaining requirements for hw resolve. */
   1104 	if (util_max_layer(info->dst.resource, info->dst.level) == 0 &&
   1105 	    !info->scissor_enable &&
   1106 	    (info->mask & PIPE_MASK_RGBA) == PIPE_MASK_RGBA &&
   1107 	    util_is_format_compatible(util_format_description(info->src.format),
   1108 				      util_format_description(info->dst.format)) &&
   1109 	    dst_width == info->src.resource->width0 &&
   1110 	    dst_height == info->src.resource->height0 &&
   1111 	    info->dst.box.x == 0 &&
   1112 	    info->dst.box.y == 0 &&
   1113 	    info->dst.box.width == dst_width &&
   1114 	    info->dst.box.height == dst_height &&
   1115 	    info->dst.box.depth == 1 &&
   1116 	    info->src.box.x == 0 &&
   1117 	    info->src.box.y == 0 &&
   1118 	    info->src.box.width == dst_width &&
   1119 	    info->src.box.height == dst_height &&
   1120 	    info->src.box.depth == 1 &&
   1121 	    !dst->surface.is_linear &&
   1122 	    (!dst->cmask.size || !dst->dirty_level_mask)) { /* dst cannot be fast-cleared */
   1123 		/* Check the last constraint. */
   1124 		if (src->surface.micro_tile_mode != dst->surface.micro_tile_mode) {
   1125 			/* The next fast clear will switch to this mode to
   1126 			 * get direct hw resolve next time if the mode is
   1127 			 * different now.
   1128 			 */
   1129 			src->last_msaa_resolve_target_micro_mode =
   1130 				dst->surface.micro_tile_mode;
   1131 			goto resolve_to_temp;
   1132 		}
   1133 
   1134 		/* Resolving into a surface with DCC is unsupported. Since
   1135 		 * it's being overwritten anyway, clear it to uncompressed.
   1136 		 * This is still the fastest codepath even with this clear.
   1137 		 */
   1138 		if (vi_dcc_enabled(dst, info->dst.level)) {
   1139 			/* TODO: Implement per-level DCC clears for GFX9. */
   1140 			if (sctx->b.chip_class >= GFX9 &&
   1141 			    info->dst.resource->last_level != 0)
   1142 				goto resolve_to_temp;
   1143 
   1144 			vi_dcc_clear_level(sctx, dst, info->dst.level,
   1145 					   0xFFFFFFFF);
   1146 			dst->dirty_level_mask &= ~(1 << info->dst.level);
   1147 		}
   1148 
   1149 		/* Resolve directly from src to dst. */
   1150 		si_do_CB_resolve(sctx, info, info->dst.resource,
   1151 				 info->dst.level, info->dst.box.z, format);
   1152 		return true;
   1153 	}
   1154 
   1155 resolve_to_temp:
   1156 	/* Shader-based resolve is VERY SLOW. Instead, resolve into
   1157 	 * a temporary texture and blit.
   1158 	 */
   1159 	memset(&templ, 0, sizeof(templ));
   1160 	templ.target = PIPE_TEXTURE_2D;
   1161 	templ.format = info->src.resource->format;
   1162 	templ.width0 = info->src.resource->width0;
   1163 	templ.height0 = info->src.resource->height0;
   1164 	templ.depth0 = 1;
   1165 	templ.array_size = 1;
   1166 	templ.usage = PIPE_USAGE_DEFAULT;
   1167 	templ.flags = R600_RESOURCE_FLAG_FORCE_TILING |
   1168 		      R600_RESOURCE_FLAG_DISABLE_DCC;
   1169 
   1170 	/* The src and dst microtile modes must be the same. */
   1171 	if (src->surface.micro_tile_mode == RADEON_MICRO_MODE_DISPLAY)
   1172 		templ.bind = PIPE_BIND_SCANOUT;
   1173 	else
   1174 		templ.bind = 0;
   1175 
   1176 	tmp = ctx->screen->resource_create(ctx->screen, &templ);
   1177 	if (!tmp)
   1178 		return false;
   1179 	rtmp = (struct r600_texture*)tmp;
   1180 
   1181 	assert(!rtmp->surface.is_linear);
   1182 	assert(src->surface.micro_tile_mode == rtmp->surface.micro_tile_mode);
   1183 
   1184 	/* resolve */
   1185 	si_do_CB_resolve(sctx, info, tmp, 0, 0, format);
   1186 
   1187 	/* blit */
   1188 	blit = *info;
   1189 	blit.src.resource = tmp;
   1190 	blit.src.box.z = 0;
   1191 
   1192 	si_blitter_begin(ctx, SI_BLIT |
   1193 			 (info->render_condition_enable ? 0 : SI_DISABLE_RENDER_COND));
   1194 	util_blitter_blit(sctx->blitter, &blit);
   1195 	si_blitter_end(ctx);
   1196 
   1197 	pipe_resource_reference(&tmp, NULL);
   1198 	return true;
   1199 }
   1200 
   1201 static void si_blit(struct pipe_context *ctx,
   1202 		    const struct pipe_blit_info *info)
   1203 {
   1204 	struct si_context *sctx = (struct si_context*)ctx;
   1205 	struct r600_texture *rdst = (struct r600_texture *)info->dst.resource;
   1206 
   1207 	if (do_hardware_msaa_resolve(ctx, info)) {
   1208 		return;
   1209 	}
   1210 
   1211 	/* Using SDMA for copying to a linear texture in GTT is much faster.
   1212 	 * This improves DRI PRIME performance.
   1213 	 *
   1214 	 * resource_copy_region can't do this yet, because dma_copy calls it
   1215 	 * on failure (recursion).
   1216 	 */
   1217 	if (rdst->surface.is_linear &&
   1218 	    sctx->b.dma_copy &&
   1219 	    util_can_blit_via_copy_region(info, false)) {
   1220 		sctx->b.dma_copy(ctx, info->dst.resource, info->dst.level,
   1221 				 info->dst.box.x, info->dst.box.y,
   1222 				 info->dst.box.z,
   1223 				 info->src.resource, info->src.level,
   1224 				 &info->src.box);
   1225 		return;
   1226 	}
   1227 
   1228 	assert(util_blitter_is_blit_supported(sctx->blitter, info));
   1229 
   1230 	/* The driver doesn't decompress resources automatically while
   1231 	 * u_blitter is rendering. */
   1232 	vi_disable_dcc_if_incompatible_format(&sctx->b, info->src.resource,
   1233 					      info->src.level,
   1234 					      info->src.format);
   1235 	vi_disable_dcc_if_incompatible_format(&sctx->b, info->dst.resource,
   1236 					      info->dst.level,
   1237 					      info->dst.format);
   1238 	si_decompress_subresource(ctx, info->src.resource, info->mask,
   1239 				  info->src.level,
   1240 				  info->src.box.z,
   1241 				  info->src.box.z + info->src.box.depth - 1);
   1242 
   1243 	if (sctx->screen->debug_flags & DBG(FORCE_DMA) &&
   1244 	    util_try_blit_via_copy_region(ctx, info))
   1245 		return;
   1246 
   1247 	si_blitter_begin(ctx, SI_BLIT |
   1248 			 (info->render_condition_enable ? 0 : SI_DISABLE_RENDER_COND));
   1249 	util_blitter_blit(sctx->blitter, info);
   1250 	si_blitter_end(ctx);
   1251 }
   1252 
   1253 static boolean si_generate_mipmap(struct pipe_context *ctx,
   1254 				  struct pipe_resource *tex,
   1255 				  enum pipe_format format,
   1256 				  unsigned base_level, unsigned last_level,
   1257 				  unsigned first_layer, unsigned last_layer)
   1258 {
   1259 	struct si_context *sctx = (struct si_context*)ctx;
   1260 	struct r600_texture *rtex = (struct r600_texture *)tex;
   1261 
   1262 	if (!util_blitter_is_copy_supported(sctx->blitter, tex, tex))
   1263 		return false;
   1264 
   1265 	/* The driver doesn't decompress resources automatically while
   1266 	 * u_blitter is rendering. */
   1267 	vi_disable_dcc_if_incompatible_format(&sctx->b, tex, base_level,
   1268 					      format);
   1269 	si_decompress_subresource(ctx, tex, PIPE_MASK_RGBAZS,
   1270 				  base_level, first_layer, last_layer);
   1271 
   1272 	/* Clear dirty_level_mask for the levels that will be overwritten. */
   1273 	assert(base_level < last_level);
   1274 	rtex->dirty_level_mask &= ~u_bit_consecutive(base_level + 1,
   1275 						     last_level - base_level);
   1276 
   1277 	sctx->generate_mipmap_for_depth = rtex->is_depth;
   1278 
   1279 	si_blitter_begin(ctx, SI_BLIT | SI_DISABLE_RENDER_COND);
   1280 	util_blitter_generate_mipmap(sctx->blitter, tex, format,
   1281 				     base_level, last_level,
   1282 				     first_layer, last_layer);
   1283 	si_blitter_end(ctx);
   1284 
   1285 	sctx->generate_mipmap_for_depth = false;
   1286 	return true;
   1287 }
   1288 
   1289 static void si_flush_resource(struct pipe_context *ctx,
   1290 			      struct pipe_resource *res)
   1291 {
   1292 	struct r600_texture *rtex = (struct r600_texture*)res;
   1293 
   1294 	assert(res->target != PIPE_BUFFER);
   1295 	assert(!rtex->dcc_separate_buffer || rtex->dcc_gather_statistics);
   1296 
   1297 	/* st/dri calls flush twice per frame (not a bug), this prevents double
   1298 	 * decompression. */
   1299 	if (rtex->dcc_separate_buffer && !rtex->separate_dcc_dirty)
   1300 		return;
   1301 
   1302 	if (!rtex->is_depth && (rtex->cmask.size || rtex->dcc_offset)) {
   1303 		si_blit_decompress_color(ctx, rtex, 0, res->last_level,
   1304 					 0, util_max_layer(res, 0),
   1305 					 rtex->dcc_separate_buffer != NULL);
   1306 	}
   1307 
   1308 	/* Always do the analysis even if DCC is disabled at the moment. */
   1309 	if (rtex->dcc_gather_statistics && rtex->separate_dcc_dirty) {
   1310 		rtex->separate_dcc_dirty = false;
   1311 		vi_separate_dcc_process_and_reset_stats(ctx, rtex);
   1312 	}
   1313 }
   1314 
   1315 static void si_decompress_dcc(struct pipe_context *ctx,
   1316 			      struct r600_texture *rtex)
   1317 {
   1318 	if (!rtex->dcc_offset)
   1319 		return;
   1320 
   1321 	si_blit_decompress_color(ctx, rtex, 0, rtex->resource.b.b.last_level,
   1322 				 0, util_max_layer(&rtex->resource.b.b, 0),
   1323 				 true);
   1324 }
   1325 
   1326 void si_init_blit_functions(struct si_context *sctx)
   1327 {
   1328 	sctx->b.b.resource_copy_region = si_resource_copy_region;
   1329 	sctx->b.b.blit = si_blit;
   1330 	sctx->b.b.flush_resource = si_flush_resource;
   1331 	sctx->b.b.generate_mipmap = si_generate_mipmap;
   1332 	sctx->b.blit_decompress_depth = si_blit_decompress_depth;
   1333 	sctx->b.decompress_dcc = si_decompress_dcc;
   1334 }
   1335