Home | History | Annotate | Download | only in freedreno
      1 /* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
      2 
      3 /*
      4  * Copyright (C) 2012 Rob Clark <robclark (at) freedesktop.org>
      5  *
      6  * Permission is hereby granted, free of charge, to any person obtaining a
      7  * copy of this software and associated documentation files (the "Software"),
      8  * to deal in the Software without restriction, including without limitation
      9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
     10  * and/or sell copies of the Software, and to permit persons to whom the
     11  * Software is furnished to do so, subject to the following conditions:
     12  *
     13  * The above copyright notice and this permission notice (including the next
     14  * paragraph) shall be included in all copies or substantial portions of the
     15  * Software.
     16  *
     17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
     22  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
     23  * SOFTWARE.
     24  *
     25  * Authors:
     26  *    Rob Clark <robclark (at) freedesktop.org>
     27  */
     28 
     29 #include "pipe/p_state.h"
     30 #include "util/u_string.h"
     31 #include "util/u_memory.h"
     32 #include "util/u_inlines.h"
     33 #include "util/u_format.h"
     34 
     35 #include "freedreno_gmem.h"
     36 #include "freedreno_context.h"
     37 #include "freedreno_fence.h"
     38 #include "freedreno_resource.h"
     39 #include "freedreno_query_hw.h"
     40 #include "freedreno_util.h"
     41 
     42 /*
     43  * GMEM is the small (ie. 256KiB for a200, 512KiB for a220, etc) tile buffer
     44  * inside the GPU.  All rendering happens to GMEM.  Larger render targets
     45  * are split into tiles that are small enough for the color (and depth and/or
     46  * stencil, if enabled) buffers to fit within GMEM.  Before rendering a tile,
     47  * if there was not a clear invalidating the previous tile contents, we need
     48  * to restore the previous tiles contents (system mem -> GMEM), and after all
     49  * the draw calls, before moving to the next tile, we need to save the tile
     50  * contents (GMEM -> system mem).
     51  *
     52  * The code in this file handles dealing with GMEM and tiling.
     53  *
     54  * The structure of the ringbuffer ends up being:
     55  *
     56  *     +--<---<-- IB ---<---+---<---+---<---<---<--+
     57  *     |                    |       |              |
     58  *     v                    ^       ^              ^
     59  *   ------------------------------------------------------
     60  *     | clear/draw cmds | Tile0 | Tile1 | .... | TileN |
     61  *   ------------------------------------------------------
     62  *                       ^
     63  *                       |
     64  *                       address submitted in issueibcmds
     65  *
     66  * Where the per-tile section handles scissor setup, mem2gmem restore (if
     67  * needed), IB to draw cmds earlier in the ringbuffer, and then gmem2mem
     68  * resolve.
     69  */
     70 
     71 static uint32_t bin_width(struct fd_screen *screen)
     72 {
     73 	if (is_a4xx(screen) || is_a5xx(screen))
     74 		return 1024;
     75 	if (is_a3xx(screen))
     76 		return 992;
     77 	return 512;
     78 }
     79 
     80 static uint32_t
     81 total_size(uint8_t cbuf_cpp[], uint8_t zsbuf_cpp[2],
     82 		   uint32_t bin_w, uint32_t bin_h, struct fd_gmem_stateobj *gmem)
     83 {
     84 	uint32_t total = 0, i;
     85 
     86 	for (i = 0; i < MAX_RENDER_TARGETS; i++) {
     87 		if (cbuf_cpp[i]) {
     88 			gmem->cbuf_base[i] = align(total, 0x4000);
     89 			total = gmem->cbuf_base[i] + cbuf_cpp[i] * bin_w * bin_h;
     90 		}
     91 	}
     92 
     93 	if (zsbuf_cpp[0]) {
     94 		gmem->zsbuf_base[0] = align(total, 0x4000);
     95 		total = gmem->zsbuf_base[0] + zsbuf_cpp[0] * bin_w * bin_h;
     96 	}
     97 
     98 	if (zsbuf_cpp[1]) {
     99 		gmem->zsbuf_base[1] = align(total, 0x4000);
    100 		total = gmem->zsbuf_base[1] + zsbuf_cpp[1] * bin_w * bin_h;
    101 	}
    102 
    103 	return total;
    104 }
    105 
    106 static void
    107 calculate_tiles(struct fd_batch *batch)
    108 {
    109 	struct fd_context *ctx = batch->ctx;
    110 	struct fd_gmem_stateobj *gmem = &ctx->gmem;
    111 	struct pipe_scissor_state *scissor = &batch->max_scissor;
    112 	struct pipe_framebuffer_state *pfb = &batch->framebuffer;
    113 	const uint32_t gmem_alignw = ctx->screen->gmem_alignw;
    114 	const uint32_t gmem_alignh = ctx->screen->gmem_alignh;
    115 	const unsigned npipes = ctx->screen->num_vsc_pipes;
    116 	const uint32_t gmem_size = ctx->screen->gmemsize_bytes;
    117 	uint32_t minx, miny, width, height;
    118 	uint32_t nbins_x = 1, nbins_y = 1;
    119 	uint32_t bin_w, bin_h;
    120 	uint32_t max_width = bin_width(ctx->screen);
    121 	uint8_t cbuf_cpp[MAX_RENDER_TARGETS] = {0}, zsbuf_cpp[2] = {0};
    122 	uint32_t i, j, t, xoff, yoff;
    123 	uint32_t tpp_x, tpp_y;
    124 	bool has_zs = !!(batch->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL));
    125 	int tile_n[npipes];
    126 
    127 	if (has_zs) {
    128 		struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture);
    129 		zsbuf_cpp[0] = rsc->cpp;
    130 		if (rsc->stencil)
    131 			zsbuf_cpp[1] = rsc->stencil->cpp;
    132 	}
    133 	for (i = 0; i < pfb->nr_cbufs; i++) {
    134 		if (pfb->cbufs[i])
    135 			cbuf_cpp[i] = util_format_get_blocksize(pfb->cbufs[i]->format);
    136 		else
    137 			cbuf_cpp[i] = 4;
    138 	}
    139 
    140 	if (!memcmp(gmem->zsbuf_cpp, zsbuf_cpp, sizeof(zsbuf_cpp)) &&
    141 		!memcmp(gmem->cbuf_cpp, cbuf_cpp, sizeof(cbuf_cpp)) &&
    142 		!memcmp(&gmem->scissor, scissor, sizeof(gmem->scissor))) {
    143 		/* everything is up-to-date */
    144 		return;
    145 	}
    146 
    147 	if (fd_mesa_debug & FD_DBG_NOSCIS) {
    148 		minx = 0;
    149 		miny = 0;
    150 		width = pfb->width;
    151 		height = pfb->height;
    152 	} else {
    153 		/* round down to multiple of alignment: */
    154 		minx = scissor->minx & ~(gmem_alignw - 1);
    155 		miny = scissor->miny & ~(gmem_alignh - 1);
    156 		width = scissor->maxx - minx;
    157 		height = scissor->maxy - miny;
    158 	}
    159 
    160 	bin_w = align(width, gmem_alignw);
    161 	bin_h = align(height, gmem_alignh);
    162 
    163 	/* first, find a bin width that satisfies the maximum width
    164 	 * restrictions:
    165 	 */
    166 	while (bin_w > max_width) {
    167 		nbins_x++;
    168 		bin_w = align(width / nbins_x, gmem_alignw);
    169 	}
    170 
    171 	if (fd_mesa_debug & FD_DBG_MSGS) {
    172 		debug_printf("binning input: cbuf cpp:");
    173 		for (i = 0; i < pfb->nr_cbufs; i++)
    174 			debug_printf(" %d", cbuf_cpp[i]);
    175 		debug_printf(", zsbuf cpp: %d; %dx%d\n",
    176 				zsbuf_cpp[0], width, height);
    177 	}
    178 
    179 	/* then find a bin width/height that satisfies the memory
    180 	 * constraints:
    181 	 */
    182 	while (total_size(cbuf_cpp, zsbuf_cpp, bin_w, bin_h, gmem) > gmem_size) {
    183 		if (bin_w > bin_h) {
    184 			nbins_x++;
    185 			bin_w = align(width / nbins_x, gmem_alignw);
    186 		} else {
    187 			nbins_y++;
    188 			bin_h = align(height / nbins_y, gmem_alignh);
    189 		}
    190 	}
    191 
    192 	DBG("using %d bins of size %dx%d", nbins_x*nbins_y, bin_w, bin_h);
    193 
    194 	gmem->scissor = *scissor;
    195 	memcpy(gmem->cbuf_cpp, cbuf_cpp, sizeof(cbuf_cpp));
    196 	memcpy(gmem->zsbuf_cpp, zsbuf_cpp, sizeof(zsbuf_cpp));
    197 	gmem->bin_h = bin_h;
    198 	gmem->bin_w = bin_w;
    199 	gmem->nbins_x = nbins_x;
    200 	gmem->nbins_y = nbins_y;
    201 	gmem->minx = minx;
    202 	gmem->miny = miny;
    203 	gmem->width = width;
    204 	gmem->height = height;
    205 
    206 	/*
    207 	 * Assign tiles and pipes:
    208 	 *
    209 	 * At some point it might be worth playing with different
    210 	 * strategies and seeing if that makes much impact on
    211 	 * performance.
    212 	 */
    213 
    214 #define div_round_up(v, a)  (((v) + (a) - 1) / (a))
    215 	/* figure out number of tiles per pipe: */
    216 	tpp_x = tpp_y = 1;
    217 	while (div_round_up(nbins_y, tpp_y) > 8)
    218 		tpp_y += 2;
    219 	while ((div_round_up(nbins_y, tpp_y) *
    220 			div_round_up(nbins_x, tpp_x)) > 8)
    221 		tpp_x += 1;
    222 
    223 	gmem->maxpw = tpp_x;
    224 	gmem->maxph = tpp_y;
    225 
    226 	/* configure pipes: */
    227 	xoff = yoff = 0;
    228 	for (i = 0; i < npipes; i++) {
    229 		struct fd_vsc_pipe *pipe = &ctx->vsc_pipe[i];
    230 
    231 		if (xoff >= nbins_x) {
    232 			xoff = 0;
    233 			yoff += tpp_y;
    234 		}
    235 
    236 		if (yoff >= nbins_y) {
    237 			break;
    238 		}
    239 
    240 		pipe->x = xoff;
    241 		pipe->y = yoff;
    242 		pipe->w = MIN2(tpp_x, nbins_x - xoff);
    243 		pipe->h = MIN2(tpp_y, nbins_y - yoff);
    244 
    245 		xoff += tpp_x;
    246 	}
    247 
    248 	for (; i < npipes; i++) {
    249 		struct fd_vsc_pipe *pipe = &ctx->vsc_pipe[i];
    250 		pipe->x = pipe->y = pipe->w = pipe->h = 0;
    251 	}
    252 
    253 #if 0 /* debug */
    254 	printf("%dx%d ... tpp=%dx%d\n", nbins_x, nbins_y, tpp_x, tpp_y);
    255 	for (i = 0; i < 8; i++) {
    256 		struct fd_vsc_pipe *pipe = &ctx->pipe[i];
    257 		printf("pipe[%d]: %ux%u @ %u,%u\n", i,
    258 				pipe->w, pipe->h, pipe->x, pipe->y);
    259 	}
    260 #endif
    261 
    262 	/* configure tiles: */
    263 	t = 0;
    264 	yoff = miny;
    265 	memset(tile_n, 0, sizeof(tile_n));
    266 	for (i = 0; i < nbins_y; i++) {
    267 		uint32_t bw, bh;
    268 
    269 		xoff = minx;
    270 
    271 		/* clip bin height: */
    272 		bh = MIN2(bin_h, miny + height - yoff);
    273 
    274 		for (j = 0; j < nbins_x; j++) {
    275 			struct fd_tile *tile = &ctx->tile[t];
    276 			uint32_t p;
    277 
    278 			assert(t < ARRAY_SIZE(ctx->tile));
    279 
    280 			/* pipe number: */
    281 			p = ((i / tpp_y) * div_round_up(nbins_x, tpp_x)) + (j / tpp_x);
    282 
    283 			/* clip bin width: */
    284 			bw = MIN2(bin_w, minx + width - xoff);
    285 
    286 			tile->n = tile_n[p]++;
    287 			tile->p = p;
    288 			tile->bin_w = bw;
    289 			tile->bin_h = bh;
    290 			tile->xoff = xoff;
    291 			tile->yoff = yoff;
    292 
    293 			t++;
    294 
    295 			xoff += bw;
    296 		}
    297 
    298 		yoff += bh;
    299 	}
    300 
    301 #if 0 /* debug */
    302 	t = 0;
    303 	for (i = 0; i < nbins_y; i++) {
    304 		for (j = 0; j < nbins_x; j++) {
    305 			struct fd_tile *tile = &ctx->tile[t++];
    306 			printf("|p:%u n:%u|", tile->p, tile->n);
    307 		}
    308 		printf("\n");
    309 	}
    310 #endif
    311 }
    312 
    313 static void
    314 render_tiles(struct fd_batch *batch)
    315 {
    316 	struct fd_context *ctx = batch->ctx;
    317 	struct fd_gmem_stateobj *gmem = &ctx->gmem;
    318 	int i;
    319 
    320 	ctx->emit_tile_init(batch);
    321 
    322 	if (batch->restore)
    323 		ctx->stats.batch_restore++;
    324 
    325 	for (i = 0; i < (gmem->nbins_x * gmem->nbins_y); i++) {
    326 		struct fd_tile *tile = &ctx->tile[i];
    327 
    328 		DBG("bin_h=%d, yoff=%d, bin_w=%d, xoff=%d",
    329 			tile->bin_h, tile->yoff, tile->bin_w, tile->xoff);
    330 
    331 		ctx->emit_tile_prep(batch, tile);
    332 
    333 		if (batch->restore) {
    334 			ctx->emit_tile_mem2gmem(batch, tile);
    335 		}
    336 
    337 		ctx->emit_tile_renderprep(batch, tile);
    338 
    339 		if (ctx->query_prepare_tile)
    340 			ctx->query_prepare_tile(batch, i, batch->gmem);
    341 
    342 		/* emit IB to drawcmds: */
    343 		ctx->emit_ib(batch->gmem, batch->draw);
    344 		fd_reset_wfi(batch);
    345 
    346 		/* emit gmem2mem to transfer tile back to system memory: */
    347 		ctx->emit_tile_gmem2mem(batch, tile);
    348 	}
    349 
    350 	if (ctx->emit_tile_fini)
    351 		ctx->emit_tile_fini(batch);
    352 }
    353 
    354 static void
    355 render_sysmem(struct fd_batch *batch)
    356 {
    357 	struct fd_context *ctx = batch->ctx;
    358 
    359 	ctx->emit_sysmem_prep(batch);
    360 
    361 	if (ctx->query_prepare_tile)
    362 		ctx->query_prepare_tile(batch, 0, batch->gmem);
    363 
    364 	/* emit IB to drawcmds: */
    365 	ctx->emit_ib(batch->gmem, batch->draw);
    366 	fd_reset_wfi(batch);
    367 
    368 	if (ctx->emit_sysmem_fini)
    369 		ctx->emit_sysmem_fini(batch);
    370 }
    371 
    372 static void
    373 flush_ring(struct fd_batch *batch)
    374 {
    375 	/* for compute/blit batch, there is no batch->gmem, only batch->draw: */
    376 	struct fd_ringbuffer *ring = batch->nondraw ? batch->draw : batch->gmem;
    377 	uint32_t timestamp;
    378 	int out_fence_fd = -1;
    379 
    380 	fd_ringbuffer_flush2(ring, batch->in_fence_fd,
    381 			batch->needs_out_fence_fd ? &out_fence_fd : NULL);
    382 
    383 	timestamp = fd_ringbuffer_timestamp(ring);
    384 	fd_fence_populate(batch->fence, timestamp, out_fence_fd);
    385 }
    386 
    387 void
    388 fd_gmem_render_tiles(struct fd_batch *batch)
    389 {
    390 	struct fd_context *ctx = batch->ctx;
    391 	struct pipe_framebuffer_state *pfb = &batch->framebuffer;
    392 	bool sysmem = false;
    393 
    394 	if (ctx->emit_sysmem_prep && !batch->nondraw) {
    395 		if (batch->cleared || batch->gmem_reason ||
    396 				((batch->num_draws > 5) && !batch->blit)) {
    397 			DBG("GMEM: cleared=%x, gmem_reason=%x, num_draws=%u",
    398 				batch->cleared, batch->gmem_reason, batch->num_draws);
    399 		} else if (!(fd_mesa_debug & FD_DBG_NOBYPASS)) {
    400 			sysmem = true;
    401 		}
    402 
    403 		/* For ARB_framebuffer_no_attachments: */
    404 		if ((pfb->nr_cbufs == 0) && !pfb->zsbuf) {
    405 			sysmem = true;
    406 		}
    407 	}
    408 
    409 	fd_reset_wfi(batch);
    410 
    411 	ctx->stats.batch_total++;
    412 
    413 	if (batch->nondraw) {
    414 		DBG("%p: rendering non-draw", batch);
    415 		ctx->stats.batch_nondraw++;
    416 	} else if (sysmem) {
    417 		DBG("%p: rendering sysmem %ux%u (%s/%s)",
    418 			batch, pfb->width, pfb->height,
    419 			util_format_short_name(pipe_surface_format(pfb->cbufs[0])),
    420 			util_format_short_name(pipe_surface_format(pfb->zsbuf)));
    421 		if (ctx->query_prepare)
    422 			ctx->query_prepare(batch, 1);
    423 		render_sysmem(batch);
    424 		ctx->stats.batch_sysmem++;
    425 	} else {
    426 		struct fd_gmem_stateobj *gmem = &ctx->gmem;
    427 		calculate_tiles(batch);
    428 		DBG("%p: rendering %dx%d tiles %ux%u (%s/%s)",
    429 			batch, pfb->width, pfb->height, gmem->nbins_x, gmem->nbins_y,
    430 			util_format_short_name(pipe_surface_format(pfb->cbufs[0])),
    431 			util_format_short_name(pipe_surface_format(pfb->zsbuf)));
    432 		if (ctx->query_prepare)
    433 			ctx->query_prepare(batch, gmem->nbins_x * gmem->nbins_y);
    434 		render_tiles(batch);
    435 		ctx->stats.batch_gmem++;
    436 	}
    437 
    438 	flush_ring(batch);
    439 }
    440 
    441 /* special case for when we need to create a fence but have no rendering
    442  * to flush.. just emit a no-op string-marker packet.
    443  */
    444 void
    445 fd_gmem_render_noop(struct fd_batch *batch)
    446 {
    447 	struct fd_context *ctx = batch->ctx;
    448 	struct pipe_context *pctx = &ctx->base;
    449 
    450 	pctx->emit_string_marker(pctx, "noop", 4);
    451 	/* emit IB to drawcmds (which contain the string marker): */
    452 	ctx->emit_ib(batch->gmem, batch->draw);
    453 	flush_ring(batch);
    454 }
    455 
    456 /* tile needs restore if it isn't completely contained within the
    457  * cleared scissor:
    458  */
    459 static bool
    460 skip_restore(struct pipe_scissor_state *scissor, struct fd_tile *tile)
    461 {
    462 	unsigned minx = tile->xoff;
    463 	unsigned maxx = tile->xoff + tile->bin_w;
    464 	unsigned miny = tile->yoff;
    465 	unsigned maxy = tile->yoff + tile->bin_h;
    466 	return (minx >= scissor->minx) && (maxx <= scissor->maxx) &&
    467 			(miny >= scissor->miny) && (maxy <= scissor->maxy);
    468 }
    469 
    470 /* When deciding whether a tile needs mem2gmem, we need to take into
    471  * account the scissor rect(s) that were cleared.  To simplify we only
    472  * consider the last scissor rect for each buffer, since the common
    473  * case would be a single clear.
    474  */
    475 bool
    476 fd_gmem_needs_restore(struct fd_batch *batch, struct fd_tile *tile,
    477 		uint32_t buffers)
    478 {
    479 	if (!(batch->restore & buffers))
    480 		return false;
    481 
    482 	/* if buffers partially cleared, then slow-path to figure out
    483 	 * if this particular tile needs restoring:
    484 	 */
    485 	if ((buffers & FD_BUFFER_COLOR) &&
    486 			(batch->partial_cleared & FD_BUFFER_COLOR) &&
    487 			skip_restore(&batch->cleared_scissor.color, tile))
    488 		return false;
    489 	if ((buffers & FD_BUFFER_DEPTH) &&
    490 			(batch->partial_cleared & FD_BUFFER_DEPTH) &&
    491 			skip_restore(&batch->cleared_scissor.depth, tile))
    492 		return false;
    493 	if ((buffers & FD_BUFFER_STENCIL) &&
    494 			(batch->partial_cleared & FD_BUFFER_STENCIL) &&
    495 			skip_restore(&batch->cleared_scissor.stencil, tile))
    496 		return false;
    497 
    498 	return true;
    499 }
    500