Home | History | Annotate | Download | only in a4xx
      1 /* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
      2 
      3 /*
      4  * Copyright (C) 2014 Rob Clark <robclark (at) freedesktop.org>
      5  *
      6  * Permission is hereby granted, free of charge, to any person obtaining a
      7  * copy of this software and associated documentation files (the "Software"),
      8  * to deal in the Software without restriction, including without limitation
      9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
     10  * and/or sell copies of the Software, and to permit persons to whom the
     11  * Software is furnished to do so, subject to the following conditions:
     12  *
     13  * The above copyright notice and this permission notice (including the next
     14  * paragraph) shall be included in all copies or substantial portions of the
     15  * Software.
     16  *
     17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
     22  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
     23  * SOFTWARE.
     24  *
     25  * Authors:
     26  *    Rob Clark <robclark (at) freedesktop.org>
     27  */
     28 
     29 #include "pipe/p_state.h"
     30 #include "util/u_string.h"
     31 #include "util/u_memory.h"
     32 #include "util/u_inlines.h"
     33 #include "util/u_format.h"
     34 
     35 #include "freedreno_draw.h"
     36 #include "freedreno_state.h"
     37 #include "freedreno_resource.h"
     38 
     39 #include "fd4_gmem.h"
     40 #include "fd4_context.h"
     41 #include "fd4_draw.h"
     42 #include "fd4_emit.h"
     43 #include "fd4_program.h"
     44 #include "fd4_format.h"
     45 #include "fd4_zsa.h"
     46 
     47 static void
     48 emit_mrt(struct fd_ringbuffer *ring, unsigned nr_bufs,
     49 		struct pipe_surface **bufs, uint32_t *bases,
     50 		uint32_t bin_w, bool decode_srgb)
     51 {
     52 	enum a4xx_tile_mode tile_mode;
     53 	unsigned i;
     54 
     55 	if (bin_w) {
     56 		tile_mode = 2;
     57 	} else {
     58 		tile_mode = TILE4_LINEAR;
     59 	}
     60 
     61 	for (i = 0; i < A4XX_MAX_RENDER_TARGETS; i++) {
     62 		enum a4xx_color_fmt format = 0;
     63 		enum a3xx_color_swap swap = WZYX;
     64 		bool srgb = false;
     65 		struct fd_resource *rsc = NULL;
     66 		struct fd_resource_slice *slice = NULL;
     67 		uint32_t stride = 0;
     68 		uint32_t base = 0;
     69 		uint32_t offset = 0;
     70 
     71 		if ((i < nr_bufs) && bufs[i]) {
     72 			struct pipe_surface *psurf = bufs[i];
     73 			enum pipe_format pformat = psurf->format;
     74 
     75 			rsc = fd_resource(psurf->texture);
     76 
     77 			/* In case we're drawing to Z32F_S8, the "color" actually goes to
     78 			 * the stencil
     79 			 */
     80 			if (rsc->stencil) {
     81 				rsc = rsc->stencil;
     82 				pformat = rsc->base.b.format;
     83 				if (bases)
     84 					bases++;
     85 			}
     86 
     87 			slice = fd_resource_slice(rsc, psurf->u.tex.level);
     88 			format = fd4_pipe2color(pformat);
     89 			swap = fd4_pipe2swap(pformat);
     90 
     91 			if (decode_srgb)
     92 				srgb = util_format_is_srgb(pformat);
     93 			else
     94 				pformat = util_format_linear(pformat);
     95 
     96 			debug_assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer);
     97 
     98 			offset = fd_resource_offset(rsc, psurf->u.tex.level,
     99 					psurf->u.tex.first_layer);
    100 
    101 			if (bin_w) {
    102 				stride = bin_w * rsc->cpp;
    103 
    104 				if (bases) {
    105 					base = bases[i];
    106 				}
    107 			} else {
    108 				stride = slice->pitch * rsc->cpp;
    109 			}
    110 		} else if ((i < nr_bufs) && bases) {
    111 			base = bases[i];
    112 		}
    113 
    114 		OUT_PKT0(ring, REG_A4XX_RB_MRT_BUF_INFO(i), 3);
    115 		OUT_RING(ring, A4XX_RB_MRT_BUF_INFO_COLOR_FORMAT(format) |
    116 				A4XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(tile_mode) |
    117 				A4XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH(stride) |
    118 				A4XX_RB_MRT_BUF_INFO_COLOR_SWAP(swap) |
    119 				COND(srgb, A4XX_RB_MRT_BUF_INFO_COLOR_SRGB));
    120 		if (bin_w || (i >= nr_bufs) || !bufs[i]) {
    121 			OUT_RING(ring, base);
    122 			OUT_RING(ring, A4XX_RB_MRT_CONTROL3_STRIDE(stride));
    123 		} else {
    124 			OUT_RELOCW(ring, rsc->bo, offset, 0, 0);
    125 			/* RB_MRT[i].CONTROL3.STRIDE not emitted by c2d..
    126 			 * not sure if we need to skip it for bypass or
    127 			 * not.
    128 			 */
    129 			OUT_RING(ring, A4XX_RB_MRT_CONTROL3_STRIDE(0));
    130 		}
    131 	}
    132 }
    133 
    134 static bool
    135 use_hw_binning(struct fd_batch *batch)
    136 {
    137 	struct fd_gmem_stateobj *gmem = &batch->ctx->gmem;
    138 	struct pipe_framebuffer_state *pfb = &batch->framebuffer;
    139 
    140 	/* this seems to be a hw bug.. but this hack fixes piglit fbo-maxsize: */
    141 	if ((pfb->width > 4096) && (pfb->height > 4096))
    142 		return false;
    143 
    144 	return fd_binning_enabled && ((gmem->nbins_x * gmem->nbins_y) > 2);
    145 }
    146 
    147 /* transfer from gmem to system memory (ie. normal RAM) */
    148 
    149 static void
    150 emit_gmem2mem_surf(struct fd_batch *batch, bool stencil,
    151 		uint32_t base, struct pipe_surface *psurf)
    152 {
    153 	struct fd_ringbuffer *ring = batch->gmem;
    154 	struct fd_resource *rsc = fd_resource(psurf->texture);
    155 	enum pipe_format pformat = psurf->format;
    156 	struct fd_resource_slice *slice;
    157 	uint32_t offset;
    158 
    159 	if (stencil) {
    160 		debug_assert(rsc->stencil);
    161 		rsc = rsc->stencil;
    162 		pformat = rsc->base.b.format;
    163 	}
    164 
    165 	slice = &rsc->slices[psurf->u.tex.level];
    166 	offset = fd_resource_offset(rsc, psurf->u.tex.level,
    167 			psurf->u.tex.first_layer);
    168 
    169 	debug_assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer);
    170 
    171 	OUT_PKT0(ring, REG_A4XX_RB_COPY_CONTROL, 4);
    172 	OUT_RING(ring, A4XX_RB_COPY_CONTROL_MSAA_RESOLVE(MSAA_ONE) |
    173 			A4XX_RB_COPY_CONTROL_MODE(RB_COPY_RESOLVE) |
    174 			A4XX_RB_COPY_CONTROL_GMEM_BASE(base));
    175 	OUT_RELOCW(ring, rsc->bo, offset, 0, 0);   /* RB_COPY_DEST_BASE */
    176 	OUT_RING(ring, A4XX_RB_COPY_DEST_PITCH_PITCH(slice->pitch * rsc->cpp));
    177 	OUT_RING(ring, A4XX_RB_COPY_DEST_INFO_TILE(TILE4_LINEAR) |
    178 			A4XX_RB_COPY_DEST_INFO_FORMAT(fd4_pipe2color(pformat)) |
    179 			A4XX_RB_COPY_DEST_INFO_COMPONENT_ENABLE(0xf) |
    180 			A4XX_RB_COPY_DEST_INFO_ENDIAN(ENDIAN_NONE) |
    181 			A4XX_RB_COPY_DEST_INFO_SWAP(fd4_pipe2swap(pformat)));
    182 
    183 	fd4_draw(batch, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY,
    184 			DI_SRC_SEL_AUTO_INDEX, 2, 1, INDEX_SIZE_IGN, 0, 0, NULL);
    185 }
    186 
    187 static void
    188 fd4_emit_tile_gmem2mem(struct fd_batch *batch, struct fd_tile *tile)
    189 {
    190 	struct fd_context *ctx = batch->ctx;
    191 	struct fd_gmem_stateobj *gmem = &ctx->gmem;
    192 	struct fd_ringbuffer *ring = batch->gmem;
    193 	struct pipe_framebuffer_state *pfb = &batch->framebuffer;
    194 	struct fd4_emit emit = {
    195 			.debug = &ctx->debug,
    196 			.vtx = &ctx->solid_vbuf_state,
    197 			.prog = &ctx->solid_prog,
    198 			.key = {
    199 				.half_precision = true,
    200 			},
    201 	};
    202 
    203 	OUT_PKT0(ring, REG_A4XX_RB_DEPTH_CONTROL, 1);
    204 	OUT_RING(ring, A4XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_NEVER));
    205 
    206 	OUT_PKT0(ring, REG_A4XX_RB_STENCIL_CONTROL, 2);
    207 	OUT_RING(ring, A4XX_RB_STENCIL_CONTROL_FUNC(FUNC_NEVER) |
    208 			A4XX_RB_STENCIL_CONTROL_FAIL(STENCIL_KEEP) |
    209 			A4XX_RB_STENCIL_CONTROL_ZPASS(STENCIL_KEEP) |
    210 			A4XX_RB_STENCIL_CONTROL_ZFAIL(STENCIL_KEEP) |
    211 			A4XX_RB_STENCIL_CONTROL_FUNC_BF(FUNC_NEVER) |
    212 			A4XX_RB_STENCIL_CONTROL_FAIL_BF(STENCIL_KEEP) |
    213 			A4XX_RB_STENCIL_CONTROL_ZPASS_BF(STENCIL_KEEP) |
    214 			A4XX_RB_STENCIL_CONTROL_ZFAIL_BF(STENCIL_KEEP));
    215 	OUT_RING(ring, 0x00000000); /* RB_STENCIL_CONTROL2 */
    216 
    217 	OUT_PKT0(ring, REG_A4XX_RB_STENCILREFMASK, 2);
    218 	OUT_RING(ring, 0xff000000 |
    219 			A4XX_RB_STENCILREFMASK_STENCILREF(0) |
    220 			A4XX_RB_STENCILREFMASK_STENCILMASK(0) |
    221 			A4XX_RB_STENCILREFMASK_STENCILWRITEMASK(0xff));
    222 	OUT_RING(ring, 0xff000000 |
    223 			A4XX_RB_STENCILREFMASK_BF_STENCILREF(0) |
    224 			A4XX_RB_STENCILREFMASK_BF_STENCILMASK(0) |
    225 			A4XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK(0xff));
    226 
    227 	OUT_PKT0(ring, REG_A4XX_GRAS_SU_MODE_CONTROL, 1);
    228 	OUT_RING(ring, A4XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH(0));
    229 
    230 	fd_wfi(batch, ring);
    231 
    232 	OUT_PKT0(ring, REG_A4XX_GRAS_CL_CLIP_CNTL, 1);
    233 	OUT_RING(ring, 0x80000);      /* GRAS_CL_CLIP_CNTL */
    234 
    235 	OUT_PKT0(ring, REG_A4XX_GRAS_CL_VPORT_XOFFSET_0, 6);
    236 	OUT_RING(ring, A4XX_GRAS_CL_VPORT_XOFFSET_0((float)pfb->width/2.0));
    237 	OUT_RING(ring, A4XX_GRAS_CL_VPORT_XSCALE_0((float)pfb->width/2.0));
    238 	OUT_RING(ring, A4XX_GRAS_CL_VPORT_YOFFSET_0((float)pfb->height/2.0));
    239 	OUT_RING(ring, A4XX_GRAS_CL_VPORT_YSCALE_0(-(float)pfb->height/2.0));
    240 	OUT_RING(ring, A4XX_GRAS_CL_VPORT_ZOFFSET_0(0.0));
    241 	OUT_RING(ring, A4XX_GRAS_CL_VPORT_ZSCALE_0(1.0));
    242 
    243 	OUT_PKT0(ring, REG_A4XX_RB_RENDER_CONTROL, 1);
    244 	OUT_RING(ring, A4XX_RB_RENDER_CONTROL_DISABLE_COLOR_PIPE |
    245 			0xa);       /* XXX */
    246 
    247 	OUT_PKT0(ring, REG_A4XX_GRAS_SC_CONTROL, 1);
    248 	OUT_RING(ring, A4XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RESOLVE_PASS) |
    249 			A4XX_GRAS_SC_CONTROL_MSAA_DISABLE |
    250 			A4XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
    251 			A4XX_GRAS_SC_CONTROL_RASTER_MODE(1));
    252 
    253 	OUT_PKT0(ring, REG_A4XX_PC_PRIM_VTX_CNTL, 1);
    254 	OUT_RING(ring, A4XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST);
    255 
    256 	OUT_PKT0(ring, REG_A4XX_GRAS_ALPHA_CONTROL, 1);
    257 	OUT_RING(ring, 0x00000002);
    258 
    259 	OUT_PKT0(ring, REG_A4XX_GRAS_SC_WINDOW_SCISSOR_BR, 2);
    260 	OUT_RING(ring, A4XX_GRAS_SC_WINDOW_SCISSOR_BR_X(pfb->width - 1) |
    261 			A4XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(pfb->height - 1));
    262 	OUT_RING(ring, A4XX_GRAS_SC_WINDOW_SCISSOR_TL_X(0) |
    263 			A4XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(0));
    264 
    265 	OUT_PKT0(ring, REG_A4XX_VFD_INDEX_OFFSET, 2);
    266 	OUT_RING(ring, 0);            /* VFD_INDEX_OFFSET */
    267 	OUT_RING(ring, 0);            /* ??? UNKNOWN_2209 */
    268 
    269 	fd4_program_emit(ring, &emit, 0, NULL);
    270 	fd4_emit_vertex_bufs(ring, &emit);
    271 
    272 	if (batch->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) {
    273 		struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture);
    274 		if (!rsc->stencil || (batch->resolve & FD_BUFFER_DEPTH))
    275 			emit_gmem2mem_surf(batch, false, ctx->gmem.zsbuf_base[0], pfb->zsbuf);
    276 		if (rsc->stencil && (batch->resolve & FD_BUFFER_STENCIL))
    277 			emit_gmem2mem_surf(batch, true, ctx->gmem.zsbuf_base[1], pfb->zsbuf);
    278 	}
    279 
    280 	if (batch->resolve & FD_BUFFER_COLOR) {
    281 		unsigned i;
    282 		for (i = 0; i < pfb->nr_cbufs; i++) {
    283 			if (!pfb->cbufs[i])
    284 				continue;
    285 			if (!(batch->resolve & (PIPE_CLEAR_COLOR0 << i)))
    286 				continue;
    287 			emit_gmem2mem_surf(batch, false, gmem->cbuf_base[i], pfb->cbufs[i]);
    288 		}
    289 	}
    290 
    291 	OUT_PKT0(ring, REG_A4XX_GRAS_SC_CONTROL, 1);
    292 	OUT_RING(ring, A4XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
    293 			A4XX_GRAS_SC_CONTROL_MSAA_DISABLE |
    294 			A4XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
    295 			A4XX_GRAS_SC_CONTROL_RASTER_MODE(0));
    296 }
    297 
    298 /* transfer from system memory to gmem */
    299 
    300 static void
    301 emit_mem2gmem_surf(struct fd_batch *batch, uint32_t *bases,
    302 		struct pipe_surface **bufs, uint32_t nr_bufs, uint32_t bin_w)
    303 {
    304 	struct fd_ringbuffer *ring = batch->gmem;
    305 	struct pipe_surface *zsbufs[2];
    306 
    307 	emit_mrt(ring, nr_bufs, bufs, bases, bin_w, false);
    308 
    309 	if (bufs[0] && (bufs[0]->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT)) {
    310 		/* The gmem_restore_tex logic will put the first buffer's stencil
    311 		 * as color. Supply it with the proper information to make that
    312 		 * happen.
    313 		 */
    314 		zsbufs[0] = zsbufs[1] = bufs[0];
    315 		bufs = zsbufs;
    316 		nr_bufs = 2;
    317 	}
    318 
    319 	fd4_emit_gmem_restore_tex(ring, nr_bufs, bufs);
    320 
    321 	fd4_draw(batch, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY,
    322 			DI_SRC_SEL_AUTO_INDEX, 2, 1, INDEX_SIZE_IGN, 0, 0, NULL);
    323 }
    324 
    325 static void
    326 fd4_emit_tile_mem2gmem(struct fd_batch *batch, struct fd_tile *tile)
    327 {
    328 	struct fd_context *ctx = batch->ctx;
    329 	struct fd_gmem_stateobj *gmem = &ctx->gmem;
    330 	struct fd_ringbuffer *ring = batch->gmem;
    331 	struct pipe_framebuffer_state *pfb = &batch->framebuffer;
    332 	struct fd4_emit emit = {
    333 			.debug = &ctx->debug,
    334 			.vtx = &ctx->blit_vbuf_state,
    335 			.sprite_coord_enable = 1,
    336 			/* NOTE: They all use the same VP, this is for vtx bufs. */
    337 			.prog = &ctx->blit_prog[0],
    338 			.key = {
    339 				.half_precision = fd_half_precision(pfb),
    340 			},
    341 			.no_decode_srgb = true,
    342 	};
    343 	unsigned char mrt_comp[A4XX_MAX_RENDER_TARGETS] = {0};
    344 	float x0, y0, x1, y1;
    345 	unsigned bin_w = tile->bin_w;
    346 	unsigned bin_h = tile->bin_h;
    347 	unsigned i;
    348 
    349 	/* write texture coordinates to vertexbuf: */
    350 	x0 = ((float)tile->xoff) / ((float)pfb->width);
    351 	x1 = ((float)tile->xoff + bin_w) / ((float)pfb->width);
    352 	y0 = ((float)tile->yoff) / ((float)pfb->height);
    353 	y1 = ((float)tile->yoff + bin_h) / ((float)pfb->height);
    354 
    355 	OUT_PKT3(ring, CP_MEM_WRITE, 5);
    356 	OUT_RELOCW(ring, fd_resource(ctx->blit_texcoord_vbuf)->bo, 0, 0, 0);
    357 	OUT_RING(ring, fui(x0));
    358 	OUT_RING(ring, fui(y0));
    359 	OUT_RING(ring, fui(x1));
    360 	OUT_RING(ring, fui(y1));
    361 
    362 	for (i = 0; i < A4XX_MAX_RENDER_TARGETS; i++) {
    363 		mrt_comp[i] = ((i < pfb->nr_cbufs) && pfb->cbufs[i]) ? 0xf : 0;
    364 
    365 		OUT_PKT0(ring, REG_A4XX_RB_MRT_CONTROL(i), 1);
    366 		OUT_RING(ring, A4XX_RB_MRT_CONTROL_ROP_CODE(ROP_COPY) |
    367 				A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE(0xf));
    368 
    369 		OUT_PKT0(ring, REG_A4XX_RB_MRT_BLEND_CONTROL(i), 1);
    370 		OUT_RING(ring, A4XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(FACTOR_ONE) |
    371 				A4XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE(BLEND_DST_PLUS_SRC) |
    372 				A4XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR(FACTOR_ZERO) |
    373 				A4XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR(FACTOR_ONE) |
    374 				A4XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE(BLEND_DST_PLUS_SRC) |
    375 				A4XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR(FACTOR_ZERO));
    376 	}
    377 
    378 	OUT_PKT0(ring, REG_A4XX_RB_RENDER_COMPONENTS, 1);
    379 	OUT_RING(ring, A4XX_RB_RENDER_COMPONENTS_RT0(mrt_comp[0]) |
    380 			A4XX_RB_RENDER_COMPONENTS_RT1(mrt_comp[1]) |
    381 			A4XX_RB_RENDER_COMPONENTS_RT2(mrt_comp[2]) |
    382 			A4XX_RB_RENDER_COMPONENTS_RT3(mrt_comp[3]) |
    383 			A4XX_RB_RENDER_COMPONENTS_RT4(mrt_comp[4]) |
    384 			A4XX_RB_RENDER_COMPONENTS_RT5(mrt_comp[5]) |
    385 			A4XX_RB_RENDER_COMPONENTS_RT6(mrt_comp[6]) |
    386 			A4XX_RB_RENDER_COMPONENTS_RT7(mrt_comp[7]));
    387 
    388 	OUT_PKT0(ring, REG_A4XX_RB_RENDER_CONTROL, 1);
    389 	OUT_RING(ring, 0x8);          /* XXX RB_RENDER_CONTROL */
    390 
    391 	OUT_PKT0(ring, REG_A4XX_RB_DEPTH_CONTROL, 1);
    392 	OUT_RING(ring, A4XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_LESS));
    393 
    394 	OUT_PKT0(ring, REG_A4XX_GRAS_CL_CLIP_CNTL, 1);
    395 	OUT_RING(ring, 0x280000);     /* XXX GRAS_CL_CLIP_CNTL */
    396 
    397 	OUT_PKT0(ring, REG_A4XX_GRAS_SU_MODE_CONTROL, 1);
    398 	OUT_RING(ring, A4XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH(0) |
    399 			A4XX_GRAS_SU_MODE_CONTROL_RENDERING_PASS);
    400 
    401 	OUT_PKT0(ring, REG_A4XX_GRAS_CL_VPORT_XOFFSET_0, 6);
    402 	OUT_RING(ring, A4XX_GRAS_CL_VPORT_XOFFSET_0((float)bin_w/2.0));
    403 	OUT_RING(ring, A4XX_GRAS_CL_VPORT_XSCALE_0((float)bin_w/2.0));
    404 	OUT_RING(ring, A4XX_GRAS_CL_VPORT_YOFFSET_0((float)bin_h/2.0));
    405 	OUT_RING(ring, A4XX_GRAS_CL_VPORT_YSCALE_0(-(float)bin_h/2.0));
    406 	OUT_RING(ring, A4XX_GRAS_CL_VPORT_ZOFFSET_0(0.0));
    407 	OUT_RING(ring, A4XX_GRAS_CL_VPORT_ZSCALE_0(1.0));
    408 
    409 	OUT_PKT0(ring, REG_A4XX_GRAS_SC_WINDOW_SCISSOR_BR, 2);
    410 	OUT_RING(ring, A4XX_GRAS_SC_WINDOW_SCISSOR_BR_X(bin_w - 1) |
    411 			A4XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(bin_h - 1));
    412 	OUT_RING(ring, A4XX_GRAS_SC_WINDOW_SCISSOR_TL_X(0) |
    413 			A4XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(0));
    414 
    415 	OUT_PKT0(ring, REG_A4XX_GRAS_SC_SCREEN_SCISSOR_TL, 2);
    416 	OUT_RING(ring, A4XX_GRAS_SC_SCREEN_SCISSOR_TL_X(0) |
    417 			A4XX_GRAS_SC_SCREEN_SCISSOR_TL_Y(0));
    418 	OUT_RING(ring, A4XX_GRAS_SC_SCREEN_SCISSOR_BR_X(bin_w - 1) |
    419 			A4XX_GRAS_SC_SCREEN_SCISSOR_BR_Y(bin_h - 1));
    420 
    421 	OUT_PKT0(ring, REG_A4XX_RB_MODE_CONTROL, 1);
    422 	OUT_RING(ring, A4XX_RB_MODE_CONTROL_WIDTH(gmem->bin_w) |
    423 			A4XX_RB_MODE_CONTROL_HEIGHT(gmem->bin_h));
    424 
    425 	OUT_PKT0(ring, REG_A4XX_RB_STENCIL_CONTROL, 2);
    426 	OUT_RING(ring, A4XX_RB_STENCIL_CONTROL_FUNC(FUNC_ALWAYS) |
    427 			A4XX_RB_STENCIL_CONTROL_FAIL(STENCIL_KEEP) |
    428 			A4XX_RB_STENCIL_CONTROL_ZPASS(STENCIL_KEEP) |
    429 			A4XX_RB_STENCIL_CONTROL_ZFAIL(STENCIL_KEEP) |
    430 			A4XX_RB_STENCIL_CONTROL_FUNC_BF(FUNC_ALWAYS) |
    431 			A4XX_RB_STENCIL_CONTROL_FAIL_BF(STENCIL_KEEP) |
    432 			A4XX_RB_STENCIL_CONTROL_ZPASS_BF(STENCIL_KEEP) |
    433 			A4XX_RB_STENCIL_CONTROL_ZFAIL_BF(STENCIL_KEEP));
    434 	OUT_RING(ring, 0x00000000); /* RB_STENCIL_CONTROL2 */
    435 
    436 	OUT_PKT0(ring, REG_A4XX_GRAS_SC_CONTROL, 1);
    437 	OUT_RING(ring, A4XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
    438 			A4XX_GRAS_SC_CONTROL_MSAA_DISABLE |
    439 			A4XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
    440 			A4XX_GRAS_SC_CONTROL_RASTER_MODE(1));
    441 
    442 	OUT_PKT0(ring, REG_A4XX_PC_PRIM_VTX_CNTL, 1);
    443 	OUT_RING(ring, A4XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST |
    444 			A4XX_PC_PRIM_VTX_CNTL_VAROUT(1));
    445 
    446 	OUT_PKT0(ring, REG_A4XX_VFD_INDEX_OFFSET, 2);
    447 	OUT_RING(ring, 0);            /* VFD_INDEX_OFFSET */
    448 	OUT_RING(ring, 0);            /* ??? UNKNOWN_2209 */
    449 
    450 	fd4_emit_vertex_bufs(ring, &emit);
    451 
    452 	/* for gmem pitch/base calculations, we need to use the non-
    453 	 * truncated tile sizes:
    454 	 */
    455 	bin_w = gmem->bin_w;
    456 	bin_h = gmem->bin_h;
    457 
    458 	if (fd_gmem_needs_restore(batch, tile, FD_BUFFER_COLOR)) {
    459 		emit.prog = &ctx->blit_prog[pfb->nr_cbufs - 1];
    460 		emit.fp = NULL;      /* frag shader changed so clear cache */
    461 		fd4_program_emit(ring, &emit, pfb->nr_cbufs, pfb->cbufs);
    462 		emit_mem2gmem_surf(batch, gmem->cbuf_base, pfb->cbufs, pfb->nr_cbufs, bin_w);
    463 	}
    464 
    465 	if (fd_gmem_needs_restore(batch, tile, FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) {
    466 		switch (pfb->zsbuf->format) {
    467 		case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
    468 		case PIPE_FORMAT_Z32_FLOAT:
    469 			emit.prog = (pfb->zsbuf->format == PIPE_FORMAT_Z32_FLOAT) ?
    470 					&ctx->blit_z : &ctx->blit_zs;
    471 			emit.key.half_precision = false;
    472 
    473 			OUT_PKT0(ring, REG_A4XX_RB_DEPTH_CONTROL, 1);
    474 			OUT_RING(ring, A4XX_RB_DEPTH_CONTROL_Z_ENABLE |
    475 					A4XX_RB_DEPTH_CONTROL_Z_WRITE_ENABLE |
    476 					A4XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_ALWAYS) |
    477 					A4XX_RB_DEPTH_CONTROL_EARLY_Z_DISABLE);
    478 
    479 			OUT_PKT0(ring, REG_A4XX_GRAS_ALPHA_CONTROL, 1);
    480 			OUT_RING(ring, A4XX_GRAS_ALPHA_CONTROL_ALPHA_TEST_ENABLE);
    481 
    482 			OUT_PKT0(ring, REG_A4XX_GRAS_CL_CLIP_CNTL, 1);
    483 			OUT_RING(ring, 0x80000);   /* GRAS_CL_CLIP_CNTL */
    484 
    485 			break;
    486 		default:
    487 			/* Non-float can use a regular color write. It's split over 8-bit
    488 			 * components, so half precision is always sufficient.
    489 			 */
    490 			emit.prog = &ctx->blit_prog[0];
    491 			emit.key.half_precision = true;
    492 			break;
    493 		}
    494 		emit.fp = NULL;      /* frag shader changed so clear cache */
    495 		fd4_program_emit(ring, &emit, 1, &pfb->zsbuf);
    496 		emit_mem2gmem_surf(batch, gmem->zsbuf_base, &pfb->zsbuf, 1, bin_w);
    497 	}
    498 
    499 	OUT_PKT0(ring, REG_A4XX_GRAS_SC_CONTROL, 1);
    500 	OUT_RING(ring, A4XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
    501 			A4XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
    502 			A4XX_GRAS_SC_CONTROL_RASTER_MODE(0));
    503 
    504 	OUT_PKT0(ring, REG_A4XX_RB_MODE_CONTROL, 1);
    505 	OUT_RING(ring, A4XX_RB_MODE_CONTROL_WIDTH(gmem->bin_w) |
    506 			A4XX_RB_MODE_CONTROL_HEIGHT(gmem->bin_h) |
    507 			0x00010000);  /* XXX */
    508 }
    509 
    510 static void
    511 patch_draws(struct fd_batch *batch, enum pc_di_vis_cull_mode vismode)
    512 {
    513 	unsigned i;
    514 	for (i = 0; i < fd_patch_num_elements(&batch->draw_patches); i++) {
    515 		struct fd_cs_patch *patch = fd_patch_element(&batch->draw_patches, i);
    516 		*patch->cs = patch->val | DRAW4(0, 0, 0, vismode);
    517 	}
    518 	util_dynarray_resize(&batch->draw_patches, 0);
    519 }
    520 
    521 /* for rendering directly to system memory: */
    522 static void
    523 fd4_emit_sysmem_prep(struct fd_batch *batch)
    524 {
    525 	struct pipe_framebuffer_state *pfb = &batch->framebuffer;
    526 	struct fd_ringbuffer *ring = batch->gmem;
    527 
    528 	fd4_emit_restore(batch, ring);
    529 
    530 	OUT_PKT0(ring, REG_A4XX_RB_FRAME_BUFFER_DIMENSION, 1);
    531 	OUT_RING(ring, A4XX_RB_FRAME_BUFFER_DIMENSION_WIDTH(pfb->width) |
    532 			A4XX_RB_FRAME_BUFFER_DIMENSION_HEIGHT(pfb->height));
    533 
    534 	emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, NULL, 0, true);
    535 
    536 	/* setup scissor/offset for current tile: */
    537 	OUT_PKT0(ring, REG_A4XX_RB_BIN_OFFSET, 1);
    538 	OUT_RING(ring, A4XX_RB_BIN_OFFSET_X(0) |
    539 			A4XX_RB_BIN_OFFSET_Y(0));
    540 
    541 	OUT_PKT0(ring, REG_A4XX_GRAS_SC_SCREEN_SCISSOR_TL, 2);
    542 	OUT_RING(ring, A4XX_GRAS_SC_SCREEN_SCISSOR_TL_X(0) |
    543 			A4XX_GRAS_SC_SCREEN_SCISSOR_TL_Y(0));
    544 	OUT_RING(ring, A4XX_GRAS_SC_SCREEN_SCISSOR_BR_X(pfb->width - 1) |
    545 			A4XX_GRAS_SC_SCREEN_SCISSOR_BR_Y(pfb->height - 1));
    546 
    547 	OUT_PKT0(ring, REG_A4XX_RB_MODE_CONTROL, 1);
    548 	OUT_RING(ring, A4XX_RB_MODE_CONTROL_WIDTH(0) |
    549 			A4XX_RB_MODE_CONTROL_HEIGHT(0) |
    550 			0x00c00000);  /* XXX */
    551 
    552 	OUT_PKT0(ring, REG_A4XX_RB_RENDER_CONTROL, 1);
    553 	OUT_RING(ring, 0x8);
    554 
    555 	patch_draws(batch, IGNORE_VISIBILITY);
    556 }
    557 
    558 static void
    559 update_vsc_pipe(struct fd_batch *batch)
    560 {
    561 	struct fd_context *ctx = batch->ctx;
    562 	struct fd4_context *fd4_ctx = fd4_context(ctx);
    563 	struct fd_ringbuffer *ring = batch->gmem;
    564 	int i;
    565 
    566 	OUT_PKT0(ring, REG_A4XX_VSC_SIZE_ADDRESS, 1);
    567 	OUT_RELOCW(ring, fd4_ctx->vsc_size_mem, 0, 0, 0); /* VSC_SIZE_ADDRESS */
    568 
    569 	OUT_PKT0(ring, REG_A4XX_VSC_PIPE_CONFIG_REG(0), 8);
    570 	for (i = 0; i < 8; i++) {
    571 		struct fd_vsc_pipe *pipe = &ctx->pipe[i];
    572 		OUT_RING(ring, A4XX_VSC_PIPE_CONFIG_REG_X(pipe->x) |
    573 				A4XX_VSC_PIPE_CONFIG_REG_Y(pipe->y) |
    574 				A4XX_VSC_PIPE_CONFIG_REG_W(pipe->w) |
    575 				A4XX_VSC_PIPE_CONFIG_REG_H(pipe->h));
    576 	}
    577 
    578 	OUT_PKT0(ring, REG_A4XX_VSC_PIPE_DATA_ADDRESS_REG(0), 8);
    579 	for (i = 0; i < 8; i++) {
    580 		struct fd_vsc_pipe *pipe = &ctx->pipe[i];
    581 		if (!pipe->bo) {
    582 			pipe->bo = fd_bo_new(ctx->dev, 0x40000,
    583 					DRM_FREEDRENO_GEM_TYPE_KMEM);
    584 		}
    585 		OUT_RELOCW(ring, pipe->bo, 0, 0, 0);       /* VSC_PIPE_DATA_ADDRESS[i] */
    586 	}
    587 
    588 	OUT_PKT0(ring, REG_A4XX_VSC_PIPE_DATA_LENGTH_REG(0), 8);
    589 	for (i = 0; i < 8; i++) {
    590 		struct fd_vsc_pipe *pipe = &ctx->pipe[i];
    591 		OUT_RING(ring, fd_bo_size(pipe->bo) - 32); /* VSC_PIPE_DATA_LENGTH[i] */
    592 	}
    593 }
    594 
    595 static void
    596 emit_binning_pass(struct fd_batch *batch)
    597 {
    598 	struct fd_context *ctx = batch->ctx;
    599 	struct fd_gmem_stateobj *gmem = &ctx->gmem;
    600 	struct pipe_framebuffer_state *pfb = &batch->framebuffer;
    601 	struct fd_ringbuffer *ring = batch->gmem;
    602 	int i;
    603 
    604 	uint32_t x1 = gmem->minx;
    605 	uint32_t y1 = gmem->miny;
    606 	uint32_t x2 = gmem->minx + gmem->width - 1;
    607 	uint32_t y2 = gmem->miny + gmem->height - 1;
    608 
    609 	OUT_PKT0(ring, REG_A4XX_PC_BINNING_COMMAND, 1);
    610 	OUT_RING(ring, A4XX_PC_BINNING_COMMAND_BINNING_ENABLE);
    611 
    612 	OUT_PKT0(ring, REG_A4XX_GRAS_SC_CONTROL, 1);
    613 	OUT_RING(ring, A4XX_GRAS_SC_CONTROL_RENDER_MODE(RB_TILING_PASS) |
    614 			A4XX_GRAS_SC_CONTROL_MSAA_DISABLE |
    615 			A4XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
    616 			A4XX_GRAS_SC_CONTROL_RASTER_MODE(0));
    617 
    618 	OUT_PKT0(ring, REG_A4XX_RB_FRAME_BUFFER_DIMENSION, 1);
    619 	OUT_RING(ring, A4XX_RB_FRAME_BUFFER_DIMENSION_WIDTH(pfb->width) |
    620 			A4XX_RB_FRAME_BUFFER_DIMENSION_HEIGHT(pfb->height));
    621 
    622 	/* setup scissor/offset for whole screen: */
    623 	OUT_PKT0(ring, REG_A4XX_RB_BIN_OFFSET, 1);
    624 	OUT_RING(ring, A4XX_RB_BIN_OFFSET_X(x1) |
    625 			A4XX_RB_BIN_OFFSET_Y(y1));
    626 
    627 	OUT_PKT0(ring, REG_A4XX_GRAS_SC_SCREEN_SCISSOR_TL, 2);
    628 	OUT_RING(ring, A4XX_GRAS_SC_SCREEN_SCISSOR_TL_X(x1) |
    629 			A4XX_GRAS_SC_SCREEN_SCISSOR_TL_Y(y1));
    630 	OUT_RING(ring, A4XX_GRAS_SC_SCREEN_SCISSOR_BR_X(x2) |
    631 			A4XX_GRAS_SC_SCREEN_SCISSOR_BR_Y(y2));
    632 
    633 	for (i = 0; i < A4XX_MAX_RENDER_TARGETS; i++) {
    634 		OUT_PKT0(ring, REG_A4XX_RB_MRT_CONTROL(i), 1);
    635 		OUT_RING(ring, A4XX_RB_MRT_CONTROL_ROP_CODE(ROP_CLEAR) |
    636 				A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE(0xf));
    637 	}
    638 
    639 	/* emit IB to binning drawcmds: */
    640 	ctx->emit_ib(ring, batch->binning);
    641 
    642 	fd_reset_wfi(batch);
    643 	fd_wfi(batch, ring);
    644 
    645 	/* and then put stuff back the way it was: */
    646 
    647 	OUT_PKT0(ring, REG_A4XX_PC_BINNING_COMMAND, 1);
    648 	OUT_RING(ring, 0x00000000);
    649 
    650 	OUT_PKT0(ring, REG_A4XX_GRAS_SC_CONTROL, 1);
    651 	OUT_RING(ring, A4XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
    652 			A4XX_GRAS_SC_CONTROL_MSAA_DISABLE |
    653 			A4XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
    654 			A4XX_GRAS_SC_CONTROL_RASTER_MODE(0));
    655 
    656 	fd_event_write(batch, ring, CACHE_FLUSH);
    657 	fd_wfi(batch, ring);
    658 }
    659 
    660 /* before first tile */
    661 static void
    662 fd4_emit_tile_init(struct fd_batch *batch)
    663 {
    664 	struct fd_ringbuffer *ring = batch->gmem;
    665 	struct pipe_framebuffer_state *pfb = &batch->framebuffer;
    666 	struct fd_gmem_stateobj *gmem = &batch->ctx->gmem;
    667 
    668 	fd4_emit_restore(batch, ring);
    669 
    670 	OUT_PKT0(ring, REG_A4XX_VSC_BIN_SIZE, 1);
    671 	OUT_RING(ring, A4XX_VSC_BIN_SIZE_WIDTH(gmem->bin_w) |
    672 			A4XX_VSC_BIN_SIZE_HEIGHT(gmem->bin_h));
    673 
    674 	update_vsc_pipe(batch);
    675 
    676 	fd_wfi(batch, ring);
    677 	OUT_PKT0(ring, REG_A4XX_RB_FRAME_BUFFER_DIMENSION, 1);
    678 	OUT_RING(ring, A4XX_RB_FRAME_BUFFER_DIMENSION_WIDTH(pfb->width) |
    679 			A4XX_RB_FRAME_BUFFER_DIMENSION_HEIGHT(pfb->height));
    680 
    681 	if (use_hw_binning(batch)) {
    682 		OUT_PKT0(ring, REG_A4XX_RB_MODE_CONTROL, 1);
    683 		OUT_RING(ring, A4XX_RB_MODE_CONTROL_WIDTH(gmem->bin_w) |
    684 				A4XX_RB_MODE_CONTROL_HEIGHT(gmem->bin_h));
    685 
    686 		OUT_PKT0(ring, REG_A4XX_RB_RENDER_CONTROL, 1);
    687 		OUT_RING(ring, A4XX_RB_RENDER_CONTROL_BINNING_PASS |
    688 				A4XX_RB_RENDER_CONTROL_DISABLE_COLOR_PIPE |
    689 				0x8);
    690 
    691 		/* emit hw binning pass: */
    692 		emit_binning_pass(batch);
    693 
    694 		patch_draws(batch, USE_VISIBILITY);
    695 	} else {
    696 		patch_draws(batch, IGNORE_VISIBILITY);
    697 	}
    698 
    699 	OUT_PKT0(ring, REG_A4XX_RB_MODE_CONTROL, 1);
    700 	OUT_RING(ring, A4XX_RB_MODE_CONTROL_WIDTH(gmem->bin_w) |
    701 			A4XX_RB_MODE_CONTROL_HEIGHT(gmem->bin_h) |
    702 			A4XX_RB_MODE_CONTROL_ENABLE_GMEM);
    703 }
    704 
    705 /* before mem2gmem */
    706 static void
    707 fd4_emit_tile_prep(struct fd_batch *batch, struct fd_tile *tile)
    708 {
    709 	struct fd_context *ctx = batch->ctx;
    710 	struct fd_ringbuffer *ring = batch->gmem;
    711 	struct pipe_framebuffer_state *pfb = &batch->framebuffer;
    712 	struct fd_gmem_stateobj *gmem = &ctx->gmem;
    713 
    714 	if (pfb->zsbuf) {
    715 		struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture);
    716 		uint32_t cpp = rsc->cpp;
    717 
    718 		OUT_PKT0(ring, REG_A4XX_RB_DEPTH_INFO, 3);
    719 		OUT_RING(ring, A4XX_RB_DEPTH_INFO_DEPTH_BASE(gmem->zsbuf_base[0]) |
    720 				A4XX_RB_DEPTH_INFO_DEPTH_FORMAT(fd4_pipe2depth(pfb->zsbuf->format)));
    721 		OUT_RING(ring, A4XX_RB_DEPTH_PITCH(cpp * gmem->bin_w));
    722 		OUT_RING(ring, A4XX_RB_DEPTH_PITCH2(cpp * gmem->bin_w));
    723 
    724 		OUT_PKT0(ring, REG_A4XX_RB_STENCIL_INFO, 2);
    725 		if (rsc->stencil) {
    726 			OUT_RING(ring, A4XX_RB_STENCIL_INFO_SEPARATE_STENCIL |
    727 					A4XX_RB_STENCIL_INFO_STENCIL_BASE(gmem->zsbuf_base[1]));
    728 			OUT_RING(ring, A4XX_RB_STENCIL_PITCH(rsc->stencil->cpp * gmem->bin_w));
    729 		} else {
    730 			OUT_RING(ring, 0x00000000);
    731 			OUT_RING(ring, 0x00000000);
    732 		}
    733 	} else {
    734 		OUT_PKT0(ring, REG_A4XX_RB_DEPTH_INFO, 3);
    735 		OUT_RING(ring, 0x00000000);
    736 		OUT_RING(ring, 0x00000000);
    737 		OUT_RING(ring, 0x00000000);
    738 
    739 		OUT_PKT0(ring, REG_A4XX_RB_STENCIL_INFO, 2);
    740 		OUT_RING(ring, 0);            /* RB_STENCIL_INFO */
    741 		OUT_RING(ring, 0);            /* RB_STENCIL_PITCH */
    742 	}
    743 
    744 	OUT_PKT0(ring, REG_A4XX_GRAS_DEPTH_CONTROL, 1);
    745 	if (pfb->zsbuf) {
    746 		OUT_RING(ring, A4XX_GRAS_DEPTH_CONTROL_FORMAT(
    747 				fd4_pipe2depth(pfb->zsbuf->format)));
    748 	} else {
    749 		OUT_RING(ring, A4XX_GRAS_DEPTH_CONTROL_FORMAT(DEPTH4_NONE));
    750 	}
    751 }
    752 
    753 /* before IB to rendering cmds: */
    754 static void
    755 fd4_emit_tile_renderprep(struct fd_batch *batch, struct fd_tile *tile)
    756 {
    757 	struct fd_context *ctx = batch->ctx;
    758 	struct fd4_context *fd4_ctx = fd4_context(ctx);
    759 	struct fd_ringbuffer *ring = batch->gmem;
    760 	struct fd_gmem_stateobj *gmem = &ctx->gmem;
    761 	struct pipe_framebuffer_state *pfb = &batch->framebuffer;
    762 
    763 	uint32_t x1 = tile->xoff;
    764 	uint32_t y1 = tile->yoff;
    765 	uint32_t x2 = tile->xoff + tile->bin_w - 1;
    766 	uint32_t y2 = tile->yoff + tile->bin_h - 1;
    767 
    768 	if (use_hw_binning(batch)) {
    769 		struct fd_vsc_pipe *pipe = &ctx->pipe[tile->p];
    770 
    771 		assert(pipe->w * pipe->h);
    772 
    773 		fd_event_write(batch, ring, HLSQ_FLUSH);
    774 		fd_wfi(batch, ring);
    775 
    776 		OUT_PKT0(ring, REG_A4XX_PC_VSTREAM_CONTROL, 1);
    777 		OUT_RING(ring, A4XX_PC_VSTREAM_CONTROL_SIZE(pipe->w * pipe->h) |
    778 				A4XX_PC_VSTREAM_CONTROL_N(tile->n));
    779 
    780 		OUT_PKT3(ring, CP_SET_BIN_DATA, 2);
    781 		OUT_RELOCW(ring, pipe->bo, 0, 0, 0);    /* BIN_DATA_ADDR <- VSC_PIPE[p].DATA_ADDRESS */
    782 		OUT_RELOCW(ring, fd4_ctx->vsc_size_mem, /* BIN_SIZE_ADDR <- VSC_SIZE_ADDRESS + (p * 4) */
    783 				(tile->p * 4), 0, 0);
    784 	} else {
    785 		OUT_PKT0(ring, REG_A4XX_PC_VSTREAM_CONTROL, 1);
    786 		OUT_RING(ring, 0x00000000);
    787 	}
    788 
    789 	OUT_PKT3(ring, CP_SET_BIN, 3);
    790 	OUT_RING(ring, 0x00000000);
    791 	OUT_RING(ring, CP_SET_BIN_1_X1(x1) | CP_SET_BIN_1_Y1(y1));
    792 	OUT_RING(ring, CP_SET_BIN_2_X2(x2) | CP_SET_BIN_2_Y2(y2));
    793 
    794 	emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, gmem->cbuf_base, gmem->bin_w, true);
    795 
    796 	/* setup scissor/offset for current tile: */
    797 	OUT_PKT0(ring, REG_A4XX_RB_BIN_OFFSET, 1);
    798 	OUT_RING(ring, A4XX_RB_BIN_OFFSET_X(tile->xoff) |
    799 			A4XX_RB_BIN_OFFSET_Y(tile->yoff));
    800 
    801 	OUT_PKT0(ring, REG_A4XX_GRAS_SC_SCREEN_SCISSOR_TL, 2);
    802 	OUT_RING(ring, A4XX_GRAS_SC_SCREEN_SCISSOR_TL_X(x1) |
    803 			A4XX_GRAS_SC_SCREEN_SCISSOR_TL_Y(y1));
    804 	OUT_RING(ring, A4XX_GRAS_SC_SCREEN_SCISSOR_BR_X(x2) |
    805 			A4XX_GRAS_SC_SCREEN_SCISSOR_BR_Y(y2));
    806 
    807 	OUT_PKT0(ring, REG_A4XX_RB_RENDER_CONTROL, 1);
    808 	OUT_RING(ring, 0x8);
    809 }
    810 
    811 void
    812 fd4_gmem_init(struct pipe_context *pctx)
    813 {
    814 	struct fd_context *ctx = fd_context(pctx);
    815 
    816 	ctx->emit_sysmem_prep = fd4_emit_sysmem_prep;
    817 	ctx->emit_tile_init = fd4_emit_tile_init;
    818 	ctx->emit_tile_prep = fd4_emit_tile_prep;
    819 	ctx->emit_tile_mem2gmem = fd4_emit_tile_mem2gmem;
    820 	ctx->emit_tile_renderprep = fd4_emit_tile_renderprep;
    821 	ctx->emit_tile_gmem2mem = fd4_emit_tile_gmem2mem;
    822 }
    823