Home | History | Annotate | Download | only in radeonsi
      1 /*
      2  * Copyright 2017 Advanced Micro Devices, Inc.
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * on the rights to use, copy, modify, merge, publish, distribute, sub
      8  * license, and/or sell copies of the Software, and to permit persons to whom
      9  * the Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice (including the next
     12  * paragraph) shall be included in all copies or substantial portions of the
     13  * Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
     18  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
     19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
     20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
     21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
     22  */
     23 
     24 #include "si_pipe.h"
     25 #include "sid.h"
     26 
     27 #include "util/u_format.h"
     28 #include "util/u_pack_color.h"
     29 #include "util/u_surface.h"
     30 
     31 enum {
     32 	SI_CLEAR         = SI_SAVE_FRAGMENT_STATE,
     33 	SI_CLEAR_SURFACE = SI_SAVE_FRAMEBUFFER | SI_SAVE_FRAGMENT_STATE,
     34 };
     35 
     36 static void si_alloc_separate_cmask(struct si_screen *sscreen,
     37 				    struct r600_texture *rtex)
     38 {
     39 	if (rtex->cmask_buffer)
     40                 return;
     41 
     42 	assert(rtex->cmask.size == 0);
     43 
     44 	si_texture_get_cmask_info(sscreen, rtex, &rtex->cmask);
     45 	if (!rtex->cmask.size)
     46 		return;
     47 
     48 	rtex->cmask_buffer = (struct r600_resource *)
     49 		si_aligned_buffer_create(&sscreen->b,
     50 					 R600_RESOURCE_FLAG_UNMAPPABLE,
     51 					 PIPE_USAGE_DEFAULT,
     52 					 rtex->cmask.size,
     53 					 rtex->cmask.alignment);
     54 	if (rtex->cmask_buffer == NULL) {
     55 		rtex->cmask.size = 0;
     56 		return;
     57 	}
     58 
     59 	/* update colorbuffer state bits */
     60 	rtex->cmask.base_address_reg = rtex->cmask_buffer->gpu_address >> 8;
     61 
     62 	rtex->cb_color_info |= S_028C70_FAST_CLEAR(1);
     63 
     64 	p_atomic_inc(&sscreen->compressed_colortex_counter);
     65 }
     66 
     67 static void si_set_clear_color(struct r600_texture *rtex,
     68 			       enum pipe_format surface_format,
     69 			       const union pipe_color_union *color)
     70 {
     71 	union util_color uc;
     72 
     73 	memset(&uc, 0, sizeof(uc));
     74 
     75 	if (rtex->surface.bpe == 16) {
     76 		/* DCC fast clear only:
     77 		 *   CLEAR_WORD0 = R = G = B
     78 		 *   CLEAR_WORD1 = A
     79 		 */
     80 		assert(color->ui[0] == color->ui[1] &&
     81 		       color->ui[0] == color->ui[2]);
     82 		uc.ui[0] = color->ui[0];
     83 		uc.ui[1] = color->ui[3];
     84 	} else if (util_format_is_pure_uint(surface_format)) {
     85 		util_format_write_4ui(surface_format, color->ui, 0, &uc, 0, 0, 0, 1, 1);
     86 	} else if (util_format_is_pure_sint(surface_format)) {
     87 		util_format_write_4i(surface_format, color->i, 0, &uc, 0, 0, 0, 1, 1);
     88 	} else {
     89 		util_pack_color(color->f, surface_format, &uc);
     90 	}
     91 
     92 	memcpy(rtex->color_clear_value, &uc, 2 * sizeof(uint32_t));
     93 }
     94 
     95 static bool vi_get_fast_clear_parameters(enum pipe_format surface_format,
     96 					 const union pipe_color_union *color,
     97 					 uint32_t* reset_value,
     98 					 bool* clear_words_needed)
     99 {
    100 	bool values[4] = {};
    101 	int i;
    102 	bool main_value = false;
    103 	bool extra_value = false;
    104 	int extra_channel;
    105 
    106 	/* This is needed to get the correct DCC clear value for luminance formats.
    107 	 * 1) Get the linear format (because the next step can't handle L8_SRGB).
    108 	 * 2) Convert luminance to red. (the real hw format for luminance)
    109 	 */
    110 	surface_format = util_format_linear(surface_format);
    111 	surface_format = util_format_luminance_to_red(surface_format);
    112 
    113 	const struct util_format_description *desc = util_format_description(surface_format);
    114 
    115 	if (desc->block.bits == 128 &&
    116 	    (color->ui[0] != color->ui[1] ||
    117 	     color->ui[0] != color->ui[2]))
    118 		return false;
    119 
    120 	*clear_words_needed = true;
    121 	*reset_value = 0x20202020U;
    122 
    123 	/* If we want to clear without needing a fast clear eliminate step, we
    124 	 * can set each channel to 0 or 1 (or 0/max for integer formats). We
    125 	 * have two sets of flags, one for the last or first channel(extra) and
    126 	 * one for the other channels(main).
    127 	 */
    128 
    129 	if (surface_format == PIPE_FORMAT_R11G11B10_FLOAT ||
    130 	    surface_format == PIPE_FORMAT_B5G6R5_UNORM ||
    131 	    surface_format == PIPE_FORMAT_B5G6R5_SRGB ||
    132 	    util_format_is_alpha(surface_format)) {
    133 		extra_channel = -1;
    134 	} else if (desc->layout == UTIL_FORMAT_LAYOUT_PLAIN) {
    135 		if (si_translate_colorswap(surface_format, false) <= 1)
    136 			extra_channel = desc->nr_channels - 1;
    137 		else
    138 			extra_channel = 0;
    139 	} else
    140 		return true;
    141 
    142 	for (i = 0; i < 4; ++i) {
    143 		int index = desc->swizzle[i] - PIPE_SWIZZLE_X;
    144 
    145 		if (desc->swizzle[i] < PIPE_SWIZZLE_X ||
    146 		    desc->swizzle[i] > PIPE_SWIZZLE_W)
    147 			continue;
    148 
    149 		if (desc->channel[i].pure_integer &&
    150 		    desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) {
    151 			/* Use the maximum value for clamping the clear color. */
    152 			int max = u_bit_consecutive(0, desc->channel[i].size - 1);
    153 
    154 			values[i] = color->i[i] != 0;
    155 			if (color->i[i] != 0 && MIN2(color->i[i], max) != max)
    156 				return true;
    157 		} else if (desc->channel[i].pure_integer &&
    158 			   desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) {
    159 			/* Use the maximum value for clamping the clear color. */
    160 			unsigned max = u_bit_consecutive(0, desc->channel[i].size);
    161 
    162 			values[i] = color->ui[i] != 0U;
    163 			if (color->ui[i] != 0U && MIN2(color->ui[i], max) != max)
    164 				return true;
    165 		} else {
    166 			values[i] = color->f[i] != 0.0F;
    167 			if (color->f[i] != 0.0F && color->f[i] != 1.0F)
    168 				return true;
    169 		}
    170 
    171 		if (index == extra_channel)
    172 			extra_value = values[i];
    173 		else
    174 			main_value = values[i];
    175 	}
    176 
    177 	for (int i = 0; i < 4; ++i)
    178 		if (values[i] != main_value &&
    179 		    desc->swizzle[i] - PIPE_SWIZZLE_X != extra_channel &&
    180 		    desc->swizzle[i] >= PIPE_SWIZZLE_X &&
    181 		    desc->swizzle[i] <= PIPE_SWIZZLE_W)
    182 			return true;
    183 
    184 	*clear_words_needed = false;
    185 	if (main_value)
    186 		*reset_value |= 0x80808080U;
    187 
    188 	if (extra_value)
    189 		*reset_value |= 0x40404040U;
    190 	return true;
    191 }
    192 
    193 void vi_dcc_clear_level(struct si_context *sctx,
    194 			struct r600_texture *rtex,
    195 			unsigned level, unsigned clear_value)
    196 {
    197 	struct pipe_resource *dcc_buffer;
    198 	uint64_t dcc_offset, clear_size;
    199 
    200 	assert(vi_dcc_enabled(rtex, level));
    201 
    202 	if (rtex->dcc_separate_buffer) {
    203 		dcc_buffer = &rtex->dcc_separate_buffer->b.b;
    204 		dcc_offset = 0;
    205 	} else {
    206 		dcc_buffer = &rtex->resource.b.b;
    207 		dcc_offset = rtex->dcc_offset;
    208 	}
    209 
    210 	if (sctx->b.chip_class >= GFX9) {
    211 		/* Mipmap level clears aren't implemented. */
    212 		assert(rtex->resource.b.b.last_level == 0);
    213 		/* MSAA needs a different clear size. */
    214 		assert(rtex->resource.b.b.nr_samples <= 1);
    215 		clear_size = rtex->surface.dcc_size;
    216 	} else {
    217 		unsigned num_layers = util_num_layers(&rtex->resource.b.b, level);
    218 
    219 		/* If this is 0, fast clear isn't possible. (can occur with MSAA) */
    220 		assert(rtex->surface.u.legacy.level[level].dcc_fast_clear_size);
    221 		/* Layered MSAA DCC fast clears need to clear dcc_fast_clear_size
    222 		 * bytes for each layer. This is not currently implemented, and
    223 		 * therefore MSAA DCC isn't even enabled with multiple layers.
    224 		 */
    225 		assert(rtex->resource.b.b.nr_samples <= 1 || num_layers == 1);
    226 
    227 		dcc_offset += rtex->surface.u.legacy.level[level].dcc_offset;
    228 		clear_size = rtex->surface.u.legacy.level[level].dcc_fast_clear_size *
    229 			     num_layers;
    230 	}
    231 
    232 	si_clear_buffer(&sctx->b.b, dcc_buffer, dcc_offset, clear_size,
    233 			clear_value, R600_COHERENCY_CB_META);
    234 }
    235 
    236 /* Set the same micro tile mode as the destination of the last MSAA resolve.
    237  * This allows hitting the MSAA resolve fast path, which requires that both
    238  * src and dst micro tile modes match.
    239  */
    240 static void si_set_optimal_micro_tile_mode(struct si_screen *sscreen,
    241 					   struct r600_texture *rtex)
    242 {
    243 	if (rtex->resource.b.is_shared ||
    244 	    rtex->resource.b.b.nr_samples <= 1 ||
    245 	    rtex->surface.micro_tile_mode == rtex->last_msaa_resolve_target_micro_mode)
    246 		return;
    247 
    248 	assert(sscreen->info.chip_class >= GFX9 ||
    249 	       rtex->surface.u.legacy.level[0].mode == RADEON_SURF_MODE_2D);
    250 	assert(rtex->resource.b.b.last_level == 0);
    251 
    252 	if (sscreen->info.chip_class >= GFX9) {
    253 		/* 4K or larger tiles only. 0 is linear. 1-3 are 256B tiles. */
    254 		assert(rtex->surface.u.gfx9.surf.swizzle_mode >= 4);
    255 
    256 		/* If you do swizzle_mode % 4, you'll get:
    257 		 *   0 = Depth
    258 		 *   1 = Standard,
    259 		 *   2 = Displayable
    260 		 *   3 = Rotated
    261 		 *
    262 		 * Depth-sample order isn't allowed:
    263 		 */
    264 		assert(rtex->surface.u.gfx9.surf.swizzle_mode % 4 != 0);
    265 
    266 		switch (rtex->last_msaa_resolve_target_micro_mode) {
    267 		case RADEON_MICRO_MODE_DISPLAY:
    268 			rtex->surface.u.gfx9.surf.swizzle_mode &= ~0x3;
    269 			rtex->surface.u.gfx9.surf.swizzle_mode += 2; /* D */
    270 			break;
    271 		case RADEON_MICRO_MODE_THIN:
    272 			rtex->surface.u.gfx9.surf.swizzle_mode &= ~0x3;
    273 			rtex->surface.u.gfx9.surf.swizzle_mode += 1; /* S */
    274 			break;
    275 		case RADEON_MICRO_MODE_ROTATED:
    276 			rtex->surface.u.gfx9.surf.swizzle_mode &= ~0x3;
    277 			rtex->surface.u.gfx9.surf.swizzle_mode += 3; /* R */
    278 			break;
    279 		default: /* depth */
    280 			assert(!"unexpected micro mode");
    281 			return;
    282 		}
    283 	} else if (sscreen->info.chip_class >= CIK) {
    284 		/* These magic numbers were copied from addrlib. It doesn't use
    285 		 * any definitions for them either. They are all 2D_TILED_THIN1
    286 		 * modes with different bpp and micro tile mode.
    287 		 */
    288 		switch (rtex->last_msaa_resolve_target_micro_mode) {
    289 		case RADEON_MICRO_MODE_DISPLAY:
    290 			rtex->surface.u.legacy.tiling_index[0] = 10;
    291 			break;
    292 		case RADEON_MICRO_MODE_THIN:
    293 			rtex->surface.u.legacy.tiling_index[0] = 14;
    294 			break;
    295 		case RADEON_MICRO_MODE_ROTATED:
    296 			rtex->surface.u.legacy.tiling_index[0] = 28;
    297 			break;
    298 		default: /* depth, thick */
    299 			assert(!"unexpected micro mode");
    300 			return;
    301 		}
    302 	} else { /* SI */
    303 		switch (rtex->last_msaa_resolve_target_micro_mode) {
    304 		case RADEON_MICRO_MODE_DISPLAY:
    305 			switch (rtex->surface.bpe) {
    306 			case 1:
    307                             rtex->surface.u.legacy.tiling_index[0] = 10;
    308                             break;
    309 			case 2:
    310                             rtex->surface.u.legacy.tiling_index[0] = 11;
    311                             break;
    312 			default: /* 4, 8 */
    313                             rtex->surface.u.legacy.tiling_index[0] = 12;
    314                             break;
    315 			}
    316 			break;
    317 		case RADEON_MICRO_MODE_THIN:
    318 			switch (rtex->surface.bpe) {
    319 			case 1:
    320                                 rtex->surface.u.legacy.tiling_index[0] = 14;
    321                                 break;
    322 			case 2:
    323                                 rtex->surface.u.legacy.tiling_index[0] = 15;
    324                                 break;
    325 			case 4:
    326                                 rtex->surface.u.legacy.tiling_index[0] = 16;
    327                                 break;
    328 			default: /* 8, 16 */
    329                                 rtex->surface.u.legacy.tiling_index[0] = 17;
    330                                 break;
    331 			}
    332 			break;
    333 		default: /* depth, thick */
    334 			assert(!"unexpected micro mode");
    335 			return;
    336 		}
    337 	}
    338 
    339 	rtex->surface.micro_tile_mode = rtex->last_msaa_resolve_target_micro_mode;
    340 
    341 	p_atomic_inc(&sscreen->dirty_tex_counter);
    342 }
    343 
    344 static void si_do_fast_color_clear(struct si_context *sctx,
    345 				   unsigned *buffers,
    346 				   const union pipe_color_union *color)
    347 {
    348 	struct pipe_framebuffer_state *fb = &sctx->framebuffer.state;
    349 	int i;
    350 
    351 	/* This function is broken in BE, so just disable this path for now */
    352 #ifdef PIPE_ARCH_BIG_ENDIAN
    353 	return;
    354 #endif
    355 
    356 	if (sctx->b.render_cond)
    357 		return;
    358 
    359 	for (i = 0; i < fb->nr_cbufs; i++) {
    360 		struct r600_texture *tex;
    361 		unsigned clear_bit = PIPE_CLEAR_COLOR0 << i;
    362 
    363 		if (!fb->cbufs[i])
    364 			continue;
    365 
    366 		/* if this colorbuffer is not being cleared */
    367 		if (!(*buffers & clear_bit))
    368 			continue;
    369 
    370 		unsigned level = fb->cbufs[i]->u.tex.level;
    371 		tex = (struct r600_texture *)fb->cbufs[i]->texture;
    372 
    373 		/* the clear is allowed if all layers are bound */
    374 		if (fb->cbufs[i]->u.tex.first_layer != 0 ||
    375 		    fb->cbufs[i]->u.tex.last_layer != util_max_layer(&tex->resource.b.b, 0)) {
    376 			continue;
    377 		}
    378 
    379 		/* cannot clear mipmapped textures */
    380 		if (fb->cbufs[i]->texture->last_level != 0) {
    381 			continue;
    382 		}
    383 
    384 		/* only supported on tiled surfaces */
    385 		if (tex->surface.is_linear) {
    386 			continue;
    387 		}
    388 
    389 		/* shared textures can't use fast clear without an explicit flush,
    390 		 * because there is no way to communicate the clear color among
    391 		 * all clients
    392 		 */
    393 		if (tex->resource.b.is_shared &&
    394 		    !(tex->resource.external_usage & PIPE_HANDLE_USAGE_EXPLICIT_FLUSH))
    395 			continue;
    396 
    397 		/* fast color clear with 1D tiling doesn't work on old kernels and CIK */
    398 		if (sctx->b.chip_class == CIK &&
    399 		    tex->surface.u.legacy.level[0].mode == RADEON_SURF_MODE_1D &&
    400 		    sctx->screen->info.drm_major == 2 &&
    401 		    sctx->screen->info.drm_minor < 38) {
    402 			continue;
    403 		}
    404 
    405 		/* Fast clear is the most appropriate place to enable DCC for
    406 		 * displayable surfaces.
    407 		 */
    408 		if (sctx->b.chip_class >= VI &&
    409 		    !(sctx->screen->debug_flags & DBG(NO_DCC_FB))) {
    410 			vi_separate_dcc_try_enable(&sctx->b, tex);
    411 
    412 			/* RB+ isn't supported with a CMASK clear only on Stoney,
    413 			 * so all clears are considered to be hypothetically slow
    414 			 * clears, which is weighed when determining whether to
    415 			 * enable separate DCC.
    416 			 */
    417 			if (tex->dcc_gather_statistics &&
    418 			    sctx->b.family == CHIP_STONEY)
    419 				tex->num_slow_clears++;
    420 		}
    421 
    422 		bool need_decompress_pass = false;
    423 
    424 		/* Use a slow clear for small surfaces where the cost of
    425 		 * the eliminate pass can be higher than the benefit of fast
    426 		 * clear. The closed driver does this, but the numbers may differ.
    427 		 *
    428 		 * This helps on both dGPUs and APUs, even small APUs like Mullins.
    429 		 */
    430 		bool too_small = tex->resource.b.b.nr_samples <= 1 &&
    431 				 tex->resource.b.b.width0 *
    432 				 tex->resource.b.b.height0 <= 512 * 512;
    433 
    434 		/* Try to clear DCC first, otherwise try CMASK. */
    435 		if (vi_dcc_enabled(tex, 0)) {
    436 			uint32_t reset_value;
    437 			bool clear_words_needed;
    438 
    439 			if (sctx->screen->debug_flags & DBG(NO_DCC_CLEAR))
    440 				continue;
    441 
    442 			/* This can only occur with MSAA. */
    443 			if (sctx->b.chip_class == VI &&
    444 			    !tex->surface.u.legacy.level[level].dcc_fast_clear_size)
    445 				continue;
    446 
    447 			if (!vi_get_fast_clear_parameters(fb->cbufs[i]->format,
    448 							  color, &reset_value,
    449 							  &clear_words_needed))
    450 				continue;
    451 
    452 			if (clear_words_needed && too_small)
    453 				continue;
    454 
    455 			/* DCC fast clear with MSAA should clear CMASK to 0xC. */
    456 			if (tex->resource.b.b.nr_samples >= 2 && tex->cmask.size) {
    457 				/* TODO: This doesn't work with MSAA. */
    458 				if (clear_words_needed)
    459 					continue;
    460 
    461 				si_clear_buffer(&sctx->b.b, &tex->cmask_buffer->b.b,
    462 						tex->cmask.offset, tex->cmask.size,
    463 						0xCCCCCCCC, R600_COHERENCY_CB_META);
    464 				need_decompress_pass = true;
    465 			}
    466 
    467 			vi_dcc_clear_level(sctx, tex, 0, reset_value);
    468 
    469 			if (clear_words_needed)
    470 				need_decompress_pass = true;
    471 
    472 			tex->separate_dcc_dirty = true;
    473 		} else {
    474 			if (too_small)
    475 				continue;
    476 
    477 			/* 128-bit formats are unusupported */
    478 			if (tex->surface.bpe > 8) {
    479 				continue;
    480 			}
    481 
    482 			/* RB+ doesn't work with CMASK fast clear on Stoney. */
    483 			if (sctx->b.family == CHIP_STONEY)
    484 				continue;
    485 
    486 			/* ensure CMASK is enabled */
    487 			si_alloc_separate_cmask(sctx->screen, tex);
    488 			if (tex->cmask.size == 0) {
    489 				continue;
    490 			}
    491 
    492 			/* Do the fast clear. */
    493 			si_clear_buffer(&sctx->b.b, &tex->cmask_buffer->b.b,
    494 					tex->cmask.offset, tex->cmask.size, 0,
    495 					R600_COHERENCY_CB_META);
    496 			need_decompress_pass = true;
    497 		}
    498 
    499 		if (need_decompress_pass &&
    500 		    !(tex->dirty_level_mask & (1 << level))) {
    501 			tex->dirty_level_mask |= 1 << level;
    502 			p_atomic_inc(&sctx->screen->compressed_colortex_counter);
    503 		}
    504 
    505 		/* We can change the micro tile mode before a full clear. */
    506 		si_set_optimal_micro_tile_mode(sctx->screen, tex);
    507 
    508 		si_set_clear_color(tex, fb->cbufs[i]->format, color);
    509 
    510 		sctx->framebuffer.dirty_cbufs |= 1 << i;
    511 		si_mark_atom_dirty(sctx, &sctx->framebuffer.atom);
    512 		*buffers &= ~clear_bit;
    513 	}
    514 }
    515 
    516 static void si_clear(struct pipe_context *ctx, unsigned buffers,
    517 		     const union pipe_color_union *color,
    518 		     double depth, unsigned stencil)
    519 {
    520 	struct si_context *sctx = (struct si_context *)ctx;
    521 	struct pipe_framebuffer_state *fb = &sctx->framebuffer.state;
    522 	struct pipe_surface *zsbuf = fb->zsbuf;
    523 	struct r600_texture *zstex =
    524 		zsbuf ? (struct r600_texture*)zsbuf->texture : NULL;
    525 
    526 	if (buffers & PIPE_CLEAR_COLOR) {
    527 		si_do_fast_color_clear(sctx, &buffers, color);
    528 		if (!buffers)
    529 			return; /* all buffers have been fast cleared */
    530 	}
    531 
    532 	if (buffers & PIPE_CLEAR_COLOR) {
    533 		int i;
    534 
    535 		/* These buffers cannot use fast clear, make sure to disable expansion. */
    536 		for (i = 0; i < fb->nr_cbufs; i++) {
    537 			struct r600_texture *tex;
    538 
    539 			/* If not clearing this buffer, skip. */
    540 			if (!(buffers & (PIPE_CLEAR_COLOR0 << i)))
    541 				continue;
    542 
    543 			if (!fb->cbufs[i])
    544 				continue;
    545 
    546 			tex = (struct r600_texture *)fb->cbufs[i]->texture;
    547 			if (tex->fmask.size == 0)
    548 				tex->dirty_level_mask &= ~(1 << fb->cbufs[i]->u.tex.level);
    549 		}
    550 	}
    551 
    552 	if (zstex &&
    553 	    si_htile_enabled(zstex, zsbuf->u.tex.level) &&
    554 	    zsbuf->u.tex.first_layer == 0 &&
    555 	    zsbuf->u.tex.last_layer == util_max_layer(&zstex->resource.b.b, 0)) {
    556 		/* TC-compatible HTILE only supports depth clears to 0 or 1. */
    557 		if (buffers & PIPE_CLEAR_DEPTH &&
    558 		    (!zstex->tc_compatible_htile ||
    559 		     depth == 0 || depth == 1)) {
    560 			/* Need to disable EXPCLEAR temporarily if clearing
    561 			 * to a new value. */
    562 			if (!zstex->depth_cleared || zstex->depth_clear_value != depth) {
    563 				sctx->db_depth_disable_expclear = true;
    564 			}
    565 
    566 			zstex->depth_clear_value = depth;
    567 			sctx->framebuffer.dirty_zsbuf = true;
    568 			si_mark_atom_dirty(sctx, &sctx->framebuffer.atom); /* updates DB_DEPTH_CLEAR */
    569 			sctx->db_depth_clear = true;
    570 			si_mark_atom_dirty(sctx, &sctx->db_render_state);
    571 		}
    572 
    573 		/* TC-compatible HTILE only supports stencil clears to 0. */
    574 		if (buffers & PIPE_CLEAR_STENCIL &&
    575 		    (!zstex->tc_compatible_htile || stencil == 0)) {
    576 			stencil &= 0xff;
    577 
    578 			/* Need to disable EXPCLEAR temporarily if clearing
    579 			 * to a new value. */
    580 			if (!zstex->stencil_cleared || zstex->stencil_clear_value != stencil) {
    581 				sctx->db_stencil_disable_expclear = true;
    582 			}
    583 
    584 			zstex->stencil_clear_value = stencil;
    585 			sctx->framebuffer.dirty_zsbuf = true;
    586 			si_mark_atom_dirty(sctx, &sctx->framebuffer.atom); /* updates DB_STENCIL_CLEAR */
    587 			sctx->db_stencil_clear = true;
    588 			si_mark_atom_dirty(sctx, &sctx->db_render_state);
    589 		}
    590 
    591 		/* TODO: Find out what's wrong here. Fast depth clear leads to
    592 		 * corruption in ARK: Survival Evolved, but that may just be
    593 		 * a coincidence and the root cause is elsewhere.
    594 		 *
    595 		 * The corruption can be fixed by putting the DB flush before
    596 		 * or after the depth clear. (surprisingly)
    597 		 *
    598 		 * https://bugs.freedesktop.org/show_bug.cgi?id=102955 (apitrace)
    599 		 *
    600 		 * This hack decreases back-to-back ClearDepth performance.
    601 		 */
    602 		if (sctx->screen->clear_db_cache_before_clear) {
    603 			sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_DB;
    604 		}
    605 	}
    606 
    607 	si_blitter_begin(ctx, SI_CLEAR);
    608 	util_blitter_clear(sctx->blitter, fb->width, fb->height,
    609 			   util_framebuffer_get_num_layers(fb),
    610 			   buffers, color, depth, stencil);
    611 	si_blitter_end(ctx);
    612 
    613 	if (sctx->db_depth_clear) {
    614 		sctx->db_depth_clear = false;
    615 		sctx->db_depth_disable_expclear = false;
    616 		zstex->depth_cleared = true;
    617 		si_mark_atom_dirty(sctx, &sctx->db_render_state);
    618 	}
    619 
    620 	if (sctx->db_stencil_clear) {
    621 		sctx->db_stencil_clear = false;
    622 		sctx->db_stencil_disable_expclear = false;
    623 		zstex->stencil_cleared = true;
    624 		si_mark_atom_dirty(sctx, &sctx->db_render_state);
    625 	}
    626 }
    627 
    628 static void si_clear_render_target(struct pipe_context *ctx,
    629 				   struct pipe_surface *dst,
    630 				   const union pipe_color_union *color,
    631 				   unsigned dstx, unsigned dsty,
    632 				   unsigned width, unsigned height,
    633 				   bool render_condition_enabled)
    634 {
    635 	struct si_context *sctx = (struct si_context *)ctx;
    636 
    637 	si_blitter_begin(ctx, SI_CLEAR_SURFACE |
    638 			 (render_condition_enabled ? 0 : SI_DISABLE_RENDER_COND));
    639 	util_blitter_clear_render_target(sctx->blitter, dst, color,
    640 					 dstx, dsty, width, height);
    641 	si_blitter_end(ctx);
    642 }
    643 
    644 static void si_clear_depth_stencil(struct pipe_context *ctx,
    645 				   struct pipe_surface *dst,
    646 				   unsigned clear_flags,
    647 				   double depth,
    648 				   unsigned stencil,
    649 				   unsigned dstx, unsigned dsty,
    650 				   unsigned width, unsigned height,
    651 				   bool render_condition_enabled)
    652 {
    653 	struct si_context *sctx = (struct si_context *)ctx;
    654 
    655 	si_blitter_begin(ctx, SI_CLEAR_SURFACE |
    656 			 (render_condition_enabled ? 0 : SI_DISABLE_RENDER_COND));
    657 	util_blitter_clear_depth_stencil(sctx->blitter, dst, clear_flags, depth, stencil,
    658 					 dstx, dsty, width, height);
    659 	si_blitter_end(ctx);
    660 }
    661 
    662 static void si_clear_texture(struct pipe_context *pipe,
    663 			     struct pipe_resource *tex,
    664 			     unsigned level,
    665 			     const struct pipe_box *box,
    666 			     const void *data)
    667 {
    668 	struct pipe_screen *screen = pipe->screen;
    669 	struct r600_texture *rtex = (struct r600_texture*)tex;
    670 	struct pipe_surface tmpl = {{0}};
    671 	struct pipe_surface *sf;
    672 	const struct util_format_description *desc =
    673 		util_format_description(tex->format);
    674 
    675 	tmpl.format = tex->format;
    676 	tmpl.u.tex.first_layer = box->z;
    677 	tmpl.u.tex.last_layer = box->z + box->depth - 1;
    678 	tmpl.u.tex.level = level;
    679 	sf = pipe->create_surface(pipe, tex, &tmpl);
    680 	if (!sf)
    681 		return;
    682 
    683 	if (rtex->is_depth) {
    684 		unsigned clear;
    685 		float depth;
    686 		uint8_t stencil = 0;
    687 
    688 		/* Depth is always present. */
    689 		clear = PIPE_CLEAR_DEPTH;
    690 		desc->unpack_z_float(&depth, 0, data, 0, 1, 1);
    691 
    692 		if (rtex->surface.has_stencil) {
    693 			clear |= PIPE_CLEAR_STENCIL;
    694 			desc->unpack_s_8uint(&stencil, 0, data, 0, 1, 1);
    695 		}
    696 
    697 		si_clear_depth_stencil(pipe, sf, clear, depth, stencil,
    698 				       box->x, box->y,
    699 				       box->width, box->height, false);
    700 	} else {
    701 		union pipe_color_union color;
    702 
    703 		/* pipe_color_union requires the full vec4 representation. */
    704 		if (util_format_is_pure_uint(tex->format))
    705 			desc->unpack_rgba_uint(color.ui, 0, data, 0, 1, 1);
    706 		else if (util_format_is_pure_sint(tex->format))
    707 			desc->unpack_rgba_sint(color.i, 0, data, 0, 1, 1);
    708 		else
    709 			desc->unpack_rgba_float(color.f, 0, data, 0, 1, 1);
    710 
    711 		if (screen->is_format_supported(screen, tex->format,
    712 						tex->target, 0,
    713 						PIPE_BIND_RENDER_TARGET)) {
    714 			si_clear_render_target(pipe, sf, &color,
    715 					       box->x, box->y,
    716 					       box->width, box->height, false);
    717 		} else {
    718 			/* Software fallback - just for R9G9B9E5_FLOAT */
    719 			util_clear_render_target(pipe, sf, &color,
    720 						 box->x, box->y,
    721 						 box->width, box->height);
    722 		}
    723 	}
    724 	pipe_surface_reference(&sf, NULL);
    725 }
    726 
    727 void si_init_clear_functions(struct si_context *sctx)
    728 {
    729 	sctx->b.b.clear = si_clear;
    730 	sctx->b.b.clear_render_target = si_clear_render_target;
    731 	sctx->b.b.clear_depth_stencil = si_clear_depth_stencil;
    732 	sctx->b.b.clear_texture = si_clear_texture;
    733 }
    734