Home | History | Annotate | Download | only in freedreno
      1 /* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
      2 
      3 /*
      4  * Copyright (C) 2012 Rob Clark <robclark (at) freedesktop.org>
      5  *
      6  * Permission is hereby granted, free of charge, to any person obtaining a
      7  * copy of this software and associated documentation files (the "Software"),
      8  * to deal in the Software without restriction, including without limitation
      9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
     10  * and/or sell copies of the Software, and to permit persons to whom the
     11  * Software is furnished to do so, subject to the following conditions:
     12  *
     13  * The above copyright notice and this permission notice (including the next
     14  * paragraph) shall be included in all copies or substantial portions of the
     15  * Software.
     16  *
     17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
     22  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
     23  * SOFTWARE.
     24  *
     25  * Authors:
     26  *    Rob Clark <robclark (at) freedesktop.org>
     27  */
     28 
     29 #include "pipe/p_state.h"
     30 #include "util/u_string.h"
     31 #include "util/u_memory.h"
     32 #include "util/u_inlines.h"
     33 #include "util/u_format.h"
     34 
     35 #include "freedreno_gmem.h"
     36 #include "freedreno_context.h"
     37 #include "freedreno_fence.h"
     38 #include "freedreno_resource.h"
     39 #include "freedreno_query_hw.h"
     40 #include "freedreno_util.h"
     41 
     42 /*
     43  * GMEM is the small (ie. 256KiB for a200, 512KiB for a220, etc) tile buffer
     44  * inside the GPU.  All rendering happens to GMEM.  Larger render targets
     45  * are split into tiles that are small enough for the color (and depth and/or
     46  * stencil, if enabled) buffers to fit within GMEM.  Before rendering a tile,
     47  * if there was not a clear invalidating the previous tile contents, we need
     48  * to restore the previous tiles contents (system mem -> GMEM), and after all
     49  * the draw calls, before moving to the next tile, we need to save the tile
     50  * contents (GMEM -> system mem).
     51  *
     52  * The code in this file handles dealing with GMEM and tiling.
     53  *
     54  * The structure of the ringbuffer ends up being:
     55  *
     56  *     +--<---<-- IB ---<---+---<---+---<---<---<--+
     57  *     |                    |       |              |
     58  *     v                    ^       ^              ^
     59  *   ------------------------------------------------------
     60  *     | clear/draw cmds | Tile0 | Tile1 | .... | TileN |
     61  *   ------------------------------------------------------
     62  *                       ^
     63  *                       |
     64  *                       address submitted in issueibcmds
     65  *
     66  * Where the per-tile section handles scissor setup, mem2gmem restore (if
     67  * needed), IB to draw cmds earlier in the ringbuffer, and then gmem2mem
     68  * resolve.
     69  */
     70 
     71 static uint32_t bin_width(struct fd_screen *screen)
     72 {
     73 	if (is_a4xx(screen) || is_a5xx(screen))
     74 		return 1024;
     75 	if (is_a3xx(screen))
     76 		return 992;
     77 	return 512;
     78 }
     79 
     80 static uint32_t
     81 total_size(uint8_t cbuf_cpp[], uint8_t zsbuf_cpp[2],
     82 		   uint32_t bin_w, uint32_t bin_h, struct fd_gmem_stateobj *gmem)
     83 {
     84 	uint32_t total = 0, i;
     85 
     86 	for (i = 0; i < MAX_RENDER_TARGETS; i++) {
     87 		if (cbuf_cpp[i]) {
     88 			gmem->cbuf_base[i] = align(total, 0x4000);
     89 			total = gmem->cbuf_base[i] + cbuf_cpp[i] * bin_w * bin_h;
     90 		}
     91 	}
     92 
     93 	if (zsbuf_cpp[0]) {
     94 		gmem->zsbuf_base[0] = align(total, 0x4000);
     95 		total = gmem->zsbuf_base[0] + zsbuf_cpp[0] * bin_w * bin_h;
     96 	}
     97 
     98 	if (zsbuf_cpp[1]) {
     99 		gmem->zsbuf_base[1] = align(total, 0x4000);
    100 		total = gmem->zsbuf_base[1] + zsbuf_cpp[1] * bin_w * bin_h;
    101 	}
    102 
    103 	return total;
    104 }
    105 
    106 static void
    107 calculate_tiles(struct fd_batch *batch)
    108 {
    109 	struct fd_context *ctx = batch->ctx;
    110 	struct fd_gmem_stateobj *gmem = &ctx->gmem;
    111 	struct pipe_scissor_state *scissor = &batch->max_scissor;
    112 	struct pipe_framebuffer_state *pfb = &batch->framebuffer;
    113 	const uint32_t gmem_alignw = ctx->screen->gmem_alignw;
    114 	const uint32_t gmem_alignh = ctx->screen->gmem_alignh;
    115 	const uint32_t gmem_size = ctx->screen->gmemsize_bytes;
    116 	uint32_t minx, miny, width, height;
    117 	uint32_t nbins_x = 1, nbins_y = 1;
    118 	uint32_t bin_w, bin_h;
    119 	uint32_t max_width = bin_width(ctx->screen);
    120 	uint8_t cbuf_cpp[MAX_RENDER_TARGETS] = {0}, zsbuf_cpp[2] = {0};
    121 	uint32_t i, j, t, xoff, yoff;
    122 	uint32_t tpp_x, tpp_y;
    123 	bool has_zs = !!(batch->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL));
    124 	int tile_n[ARRAY_SIZE(ctx->pipe)];
    125 
    126 	if (has_zs) {
    127 		struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture);
    128 		zsbuf_cpp[0] = rsc->cpp;
    129 		if (rsc->stencil)
    130 			zsbuf_cpp[1] = rsc->stencil->cpp;
    131 	}
    132 	for (i = 0; i < pfb->nr_cbufs; i++) {
    133 		if (pfb->cbufs[i])
    134 			cbuf_cpp[i] = util_format_get_blocksize(pfb->cbufs[i]->format);
    135 		else
    136 			cbuf_cpp[i] = 4;
    137 	}
    138 
    139 	if (!memcmp(gmem->zsbuf_cpp, zsbuf_cpp, sizeof(zsbuf_cpp)) &&
    140 		!memcmp(gmem->cbuf_cpp, cbuf_cpp, sizeof(cbuf_cpp)) &&
    141 		!memcmp(&gmem->scissor, scissor, sizeof(gmem->scissor))) {
    142 		/* everything is up-to-date */
    143 		return;
    144 	}
    145 
    146 	if (fd_mesa_debug & FD_DBG_NOSCIS) {
    147 		minx = 0;
    148 		miny = 0;
    149 		width = pfb->width;
    150 		height = pfb->height;
    151 	} else {
    152 		/* round down to multiple of alignment: */
    153 		minx = scissor->minx & ~(gmem_alignw - 1);
    154 		miny = scissor->miny & ~(gmem_alignh - 1);
    155 		width = scissor->maxx - minx;
    156 		height = scissor->maxy - miny;
    157 	}
    158 
    159 	bin_w = align(width, gmem_alignw);
    160 	bin_h = align(height, gmem_alignh);
    161 
    162 	/* first, find a bin width that satisfies the maximum width
    163 	 * restrictions:
    164 	 */
    165 	while (bin_w > max_width) {
    166 		nbins_x++;
    167 		bin_w = align(width / nbins_x, gmem_alignw);
    168 	}
    169 
    170 	if (fd_mesa_debug & FD_DBG_MSGS) {
    171 		debug_printf("binning input: cbuf cpp:");
    172 		for (i = 0; i < pfb->nr_cbufs; i++)
    173 			debug_printf(" %d", cbuf_cpp[i]);
    174 		debug_printf(", zsbuf cpp: %d; %dx%d\n",
    175 				zsbuf_cpp[0], width, height);
    176 	}
    177 
    178 	/* then find a bin width/height that satisfies the memory
    179 	 * constraints:
    180 	 */
    181 	while (total_size(cbuf_cpp, zsbuf_cpp, bin_w, bin_h, gmem) > gmem_size) {
    182 		if (bin_w > bin_h) {
    183 			nbins_x++;
    184 			bin_w = align(width / nbins_x, gmem_alignw);
    185 		} else {
    186 			nbins_y++;
    187 			bin_h = align(height / nbins_y, gmem_alignh);
    188 		}
    189 	}
    190 
    191 	DBG("using %d bins of size %dx%d", nbins_x*nbins_y, bin_w, bin_h);
    192 
    193 	gmem->scissor = *scissor;
    194 	memcpy(gmem->cbuf_cpp, cbuf_cpp, sizeof(cbuf_cpp));
    195 	memcpy(gmem->zsbuf_cpp, zsbuf_cpp, sizeof(zsbuf_cpp));
    196 	gmem->bin_h = bin_h;
    197 	gmem->bin_w = bin_w;
    198 	gmem->nbins_x = nbins_x;
    199 	gmem->nbins_y = nbins_y;
    200 	gmem->minx = minx;
    201 	gmem->miny = miny;
    202 	gmem->width = width;
    203 	gmem->height = height;
    204 
    205 	/*
    206 	 * Assign tiles and pipes:
    207 	 *
    208 	 * At some point it might be worth playing with different
    209 	 * strategies and seeing if that makes much impact on
    210 	 * performance.
    211 	 */
    212 
    213 #define div_round_up(v, a)  (((v) + (a) - 1) / (a))
    214 	/* figure out number of tiles per pipe: */
    215 	tpp_x = tpp_y = 1;
    216 	while (div_round_up(nbins_y, tpp_y) > 8)
    217 		tpp_y += 2;
    218 	while ((div_round_up(nbins_y, tpp_y) *
    219 			div_round_up(nbins_x, tpp_x)) > 8)
    220 		tpp_x += 1;
    221 
    222 	/* configure pipes: */
    223 	xoff = yoff = 0;
    224 	for (i = 0; i < ARRAY_SIZE(ctx->pipe); i++) {
    225 		struct fd_vsc_pipe *pipe = &ctx->pipe[i];
    226 
    227 		if (xoff >= nbins_x) {
    228 			xoff = 0;
    229 			yoff += tpp_y;
    230 		}
    231 
    232 		if (yoff >= nbins_y) {
    233 			break;
    234 		}
    235 
    236 		pipe->x = xoff;
    237 		pipe->y = yoff;
    238 		pipe->w = MIN2(tpp_x, nbins_x - xoff);
    239 		pipe->h = MIN2(tpp_y, nbins_y - yoff);
    240 
    241 		xoff += tpp_x;
    242 	}
    243 
    244 	for (; i < ARRAY_SIZE(ctx->pipe); i++) {
    245 		struct fd_vsc_pipe *pipe = &ctx->pipe[i];
    246 		pipe->x = pipe->y = pipe->w = pipe->h = 0;
    247 	}
    248 
    249 #if 0 /* debug */
    250 	printf("%dx%d ... tpp=%dx%d\n", nbins_x, nbins_y, tpp_x, tpp_y);
    251 	for (i = 0; i < 8; i++) {
    252 		struct fd_vsc_pipe *pipe = &ctx->pipe[i];
    253 		printf("pipe[%d]: %ux%u @ %u,%u\n", i,
    254 				pipe->w, pipe->h, pipe->x, pipe->y);
    255 	}
    256 #endif
    257 
    258 	/* configure tiles: */
    259 	t = 0;
    260 	yoff = miny;
    261 	memset(tile_n, 0, sizeof(tile_n));
    262 	for (i = 0; i < nbins_y; i++) {
    263 		uint32_t bw, bh;
    264 
    265 		xoff = minx;
    266 
    267 		/* clip bin height: */
    268 		bh = MIN2(bin_h, miny + height - yoff);
    269 
    270 		for (j = 0; j < nbins_x; j++) {
    271 			struct fd_tile *tile = &ctx->tile[t];
    272 			uint32_t p;
    273 
    274 			assert(t < ARRAY_SIZE(ctx->tile));
    275 
    276 			/* pipe number: */
    277 			p = ((i / tpp_y) * div_round_up(nbins_x, tpp_x)) + (j / tpp_x);
    278 
    279 			/* clip bin width: */
    280 			bw = MIN2(bin_w, minx + width - xoff);
    281 
    282 			tile->n = tile_n[p]++;
    283 			tile->p = p;
    284 			tile->bin_w = bw;
    285 			tile->bin_h = bh;
    286 			tile->xoff = xoff;
    287 			tile->yoff = yoff;
    288 
    289 			t++;
    290 
    291 			xoff += bw;
    292 		}
    293 
    294 		yoff += bh;
    295 	}
    296 
    297 #if 0 /* debug */
    298 	t = 0;
    299 	for (i = 0; i < nbins_y; i++) {
    300 		for (j = 0; j < nbins_x; j++) {
    301 			struct fd_tile *tile = &ctx->tile[t++];
    302 			printf("|p:%u n:%u|", tile->p, tile->n);
    303 		}
    304 		printf("\n");
    305 	}
    306 #endif
    307 }
    308 
    309 static void
    310 render_tiles(struct fd_batch *batch)
    311 {
    312 	struct fd_context *ctx = batch->ctx;
    313 	struct fd_gmem_stateobj *gmem = &ctx->gmem;
    314 	int i;
    315 
    316 	ctx->emit_tile_init(batch);
    317 
    318 	if (batch->restore)
    319 		ctx->stats.batch_restore++;
    320 
    321 	for (i = 0; i < (gmem->nbins_x * gmem->nbins_y); i++) {
    322 		struct fd_tile *tile = &ctx->tile[i];
    323 
    324 		DBG("bin_h=%d, yoff=%d, bin_w=%d, xoff=%d",
    325 			tile->bin_h, tile->yoff, tile->bin_w, tile->xoff);
    326 
    327 		ctx->emit_tile_prep(batch, tile);
    328 
    329 		if (batch->restore) {
    330 			ctx->emit_tile_mem2gmem(batch, tile);
    331 		}
    332 
    333 		ctx->emit_tile_renderprep(batch, tile);
    334 
    335 		fd_hw_query_prepare_tile(batch, i, batch->gmem);
    336 
    337 		/* emit IB to drawcmds: */
    338 		ctx->emit_ib(batch->gmem, batch->draw);
    339 		fd_reset_wfi(batch);
    340 
    341 		/* emit gmem2mem to transfer tile back to system memory: */
    342 		ctx->emit_tile_gmem2mem(batch, tile);
    343 	}
    344 
    345 	if (ctx->emit_tile_fini)
    346 		ctx->emit_tile_fini(batch);
    347 }
    348 
    349 static void
    350 render_sysmem(struct fd_batch *batch)
    351 {
    352 	struct fd_context *ctx = batch->ctx;
    353 
    354 	ctx->emit_sysmem_prep(batch);
    355 
    356 	fd_hw_query_prepare_tile(batch, 0, batch->gmem);
    357 
    358 	/* emit IB to drawcmds: */
    359 	ctx->emit_ib(batch->gmem, batch->draw);
    360 	fd_reset_wfi(batch);
    361 
    362 	if (ctx->emit_sysmem_fini)
    363 		ctx->emit_sysmem_fini(batch);
    364 }
    365 
    366 static void
    367 flush_ring(struct fd_batch *batch)
    368 {
    369 	struct fd_context *ctx = batch->ctx;
    370 	int out_fence_fd = -1;
    371 
    372 	fd_ringbuffer_flush2(batch->gmem, batch->in_fence_fd,
    373 			batch->needs_out_fence_fd ? &out_fence_fd : NULL);
    374 
    375 	fd_fence_ref(&ctx->screen->base, &ctx->last_fence, NULL);
    376 	ctx->last_fence = fd_fence_create(ctx,
    377 			fd_ringbuffer_timestamp(batch->gmem), out_fence_fd);
    378 }
    379 
    380 void
    381 fd_gmem_render_tiles(struct fd_batch *batch)
    382 {
    383 	struct fd_context *ctx = batch->ctx;
    384 	struct pipe_framebuffer_state *pfb = &batch->framebuffer;
    385 	bool sysmem = false;
    386 
    387 	if (ctx->emit_sysmem_prep) {
    388 		if (batch->cleared || batch->gmem_reason || (batch->num_draws > 5)) {
    389 			DBG("GMEM: cleared=%x, gmem_reason=%x, num_draws=%u",
    390 				batch->cleared, batch->gmem_reason, batch->num_draws);
    391 		} else if (!(fd_mesa_debug & FD_DBG_NOBYPASS)) {
    392 			sysmem = true;
    393 		}
    394 	}
    395 
    396 	fd_reset_wfi(batch);
    397 
    398 	ctx->stats.batch_total++;
    399 
    400 	if (sysmem) {
    401 		DBG("%p: rendering sysmem %ux%u (%s/%s)",
    402 			batch, pfb->width, pfb->height,
    403 			util_format_short_name(pipe_surface_format(pfb->cbufs[0])),
    404 			util_format_short_name(pipe_surface_format(pfb->zsbuf)));
    405 		fd_hw_query_prepare(batch, 1);
    406 		render_sysmem(batch);
    407 		ctx->stats.batch_sysmem++;
    408 	} else {
    409 		struct fd_gmem_stateobj *gmem = &ctx->gmem;
    410 		calculate_tiles(batch);
    411 		DBG("%p: rendering %dx%d tiles %ux%u (%s/%s)",
    412 			batch, pfb->width, pfb->height, gmem->nbins_x, gmem->nbins_y,
    413 			util_format_short_name(pipe_surface_format(pfb->cbufs[0])),
    414 			util_format_short_name(pipe_surface_format(pfb->zsbuf)));
    415 		fd_hw_query_prepare(batch, gmem->nbins_x * gmem->nbins_y);
    416 		render_tiles(batch);
    417 		ctx->stats.batch_gmem++;
    418 	}
    419 
    420 	flush_ring(batch);
    421 }
    422 
    423 /* special case for when we need to create a fence but have no rendering
    424  * to flush.. just emit a no-op string-marker packet.
    425  */
    426 void
    427 fd_gmem_render_noop(struct fd_batch *batch)
    428 {
    429 	struct fd_context *ctx = batch->ctx;
    430 	struct pipe_context *pctx = &ctx->base;
    431 
    432 	pctx->emit_string_marker(pctx, "noop", 4);
    433 	/* emit IB to drawcmds (which contain the string marker): */
    434 	ctx->emit_ib(batch->gmem, batch->draw);
    435 	flush_ring(batch);
    436 }
    437 
    438 /* tile needs restore if it isn't completely contained within the
    439  * cleared scissor:
    440  */
    441 static bool
    442 skip_restore(struct pipe_scissor_state *scissor, struct fd_tile *tile)
    443 {
    444 	unsigned minx = tile->xoff;
    445 	unsigned maxx = tile->xoff + tile->bin_w;
    446 	unsigned miny = tile->yoff;
    447 	unsigned maxy = tile->yoff + tile->bin_h;
    448 	return (minx >= scissor->minx) && (maxx <= scissor->maxx) &&
    449 			(miny >= scissor->miny) && (maxy <= scissor->maxy);
    450 }
    451 
    452 /* When deciding whether a tile needs mem2gmem, we need to take into
    453  * account the scissor rect(s) that were cleared.  To simplify we only
    454  * consider the last scissor rect for each buffer, since the common
    455  * case would be a single clear.
    456  */
    457 bool
    458 fd_gmem_needs_restore(struct fd_batch *batch, struct fd_tile *tile,
    459 		uint32_t buffers)
    460 {
    461 	if (!(batch->restore & buffers))
    462 		return false;
    463 
    464 	/* if buffers partially cleared, then slow-path to figure out
    465 	 * if this particular tile needs restoring:
    466 	 */
    467 	if ((buffers & FD_BUFFER_COLOR) &&
    468 			(batch->partial_cleared & FD_BUFFER_COLOR) &&
    469 			skip_restore(&batch->cleared_scissor.color, tile))
    470 		return false;
    471 	if ((buffers & FD_BUFFER_DEPTH) &&
    472 			(batch->partial_cleared & FD_BUFFER_DEPTH) &&
    473 			skip_restore(&batch->cleared_scissor.depth, tile))
    474 		return false;
    475 	if ((buffers & FD_BUFFER_STENCIL) &&
    476 			(batch->partial_cleared & FD_BUFFER_STENCIL) &&
    477 			skip_restore(&batch->cleared_scissor.stencil, tile))
    478 		return false;
    479 
    480 	return true;
    481 }
    482