Home | History | Annotate | Download | only in a3xx
      1 /* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
      2 
      3 /*
      4  * Copyright (C) 2013 Rob Clark <robclark (at) freedesktop.org>
      5  *
      6  * Permission is hereby granted, free of charge, to any person obtaining a
      7  * copy of this software and associated documentation files (the "Software"),
      8  * to deal in the Software without restriction, including without limitation
      9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
     10  * and/or sell copies of the Software, and to permit persons to whom the
     11  * Software is furnished to do so, subject to the following conditions:
     12  *
     13  * The above copyright notice and this permission notice (including the next
     14  * paragraph) shall be included in all copies or substantial portions of the
     15  * Software.
     16  *
     17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
     22  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
     23  * SOFTWARE.
     24  *
     25  * Authors:
     26  *    Rob Clark <robclark (at) freedesktop.org>
     27  */
     28 
     29 #include "pipe/p_state.h"
     30 #include "util/u_string.h"
     31 #include "util/u_memory.h"
     32 #include "util/u_inlines.h"
     33 #include "util/u_format.h"
     34 
     35 #include "freedreno_draw.h"
     36 #include "freedreno_state.h"
     37 #include "freedreno_resource.h"
     38 
     39 #include "fd3_gmem.h"
     40 #include "fd3_context.h"
     41 #include "fd3_emit.h"
     42 #include "fd3_program.h"
     43 #include "fd3_format.h"
     44 #include "fd3_zsa.h"
     45 
     46 static void
     47 emit_mrt(struct fd_ringbuffer *ring, unsigned nr_bufs,
     48 		 struct pipe_surface **bufs, uint32_t *bases, uint32_t bin_w,
     49 		 bool decode_srgb)
     50 {
     51 	enum a3xx_tile_mode tile_mode;
     52 	unsigned i;
     53 
     54 	if (bin_w) {
     55 		tile_mode = TILE_32X32;
     56 	} else {
     57 		tile_mode = LINEAR;
     58 	}
     59 
     60 	for (i = 0; i < A3XX_MAX_RENDER_TARGETS; i++) {
     61 		enum pipe_format pformat = 0;
     62 		enum a3xx_color_fmt format = 0;
     63 		enum a3xx_color_swap swap = WZYX;
     64 		bool srgb = false;
     65 		struct fd_resource *rsc = NULL;
     66 		struct fd_resource_slice *slice = NULL;
     67 		uint32_t stride = 0;
     68 		uint32_t base = 0;
     69 		uint32_t offset = 0;
     70 
     71 		if ((i < nr_bufs) && bufs[i]) {
     72 			struct pipe_surface *psurf = bufs[i];
     73 
     74 			rsc = fd_resource(psurf->texture);
     75 			pformat = psurf->format;
     76 			/* In case we're drawing to Z32F_S8, the "color" actually goes to
     77 			 * the stencil
     78 			 */
     79 			if (rsc->stencil) {
     80 				rsc = rsc->stencil;
     81 				pformat = rsc->base.b.format;
     82 				if (bases)
     83 					bases++;
     84 			}
     85 			slice = fd_resource_slice(rsc, psurf->u.tex.level);
     86 			format = fd3_pipe2color(pformat);
     87 			swap = fd3_pipe2swap(pformat);
     88 			if (decode_srgb)
     89 				srgb = util_format_is_srgb(pformat);
     90 			else
     91 				pformat = util_format_linear(pformat);
     92 
     93 			debug_assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer);
     94 
     95 			offset = fd_resource_offset(rsc, psurf->u.tex.level,
     96 					psurf->u.tex.first_layer);
     97 
     98 			if (bin_w) {
     99 				stride = bin_w * rsc->cpp;
    100 
    101 				if (bases) {
    102 					base = bases[i];
    103 				}
    104 			} else {
    105 				stride = slice->pitch * rsc->cpp;
    106 			}
    107 		} else if (i < nr_bufs && bases) {
    108 			base = bases[i];
    109 		}
    110 
    111 		OUT_PKT0(ring, REG_A3XX_RB_MRT_BUF_INFO(i), 2);
    112 		OUT_RING(ring, A3XX_RB_MRT_BUF_INFO_COLOR_FORMAT(format) |
    113 				A3XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(tile_mode) |
    114 				A3XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH(stride) |
    115 				A3XX_RB_MRT_BUF_INFO_COLOR_SWAP(swap) |
    116 				COND(srgb, A3XX_RB_MRT_BUF_INFO_COLOR_SRGB));
    117 		if (bin_w || (i >= nr_bufs) || !bufs[i]) {
    118 			OUT_RING(ring, A3XX_RB_MRT_BUF_BASE_COLOR_BUF_BASE(base));
    119 		} else {
    120 			OUT_RELOCW(ring, rsc->bo, offset, 0, -1);
    121 		}
    122 
    123 		OUT_PKT0(ring, REG_A3XX_SP_FS_IMAGE_OUTPUT_REG(i), 1);
    124 		OUT_RING(ring, COND((i < nr_bufs) && bufs[i],
    125 							A3XX_SP_FS_IMAGE_OUTPUT_REG_MRTFORMAT(
    126 									fd3_fs_output_format(pformat))));
    127 	}
    128 }
    129 
    130 static bool
    131 use_hw_binning(struct fd_batch *batch)
    132 {
    133 	struct fd_gmem_stateobj *gmem = &batch->ctx->gmem;
    134 
    135 	/* workaround: combining scissor optimization and hw binning
    136 	 * seems problematic.  Seems like we end up with a mismatch
    137 	 * between binning pass and rendering pass, wrt. where the hw
    138 	 * thinks the vertices belong.  And the blob driver doesn't
    139 	 * seem to implement anything like scissor optimization, so
    140 	 * not entirely sure what I might be missing.
    141 	 *
    142 	 * But scissor optimization is mainly for window managers,
    143 	 * which don't have many vertices (and therefore doesn't
    144 	 * benefit much from binning pass).
    145 	 *
    146 	 * So for now just disable binning if scissor optimization is
    147 	 * used.
    148 	 */
    149 	if (gmem->minx || gmem->miny)
    150 		return false;
    151 
    152 	return fd_binning_enabled && ((gmem->nbins_x * gmem->nbins_y) > 2);
    153 }
    154 
    155 /* workaround for (hlsq?) lockup with hw binning on a3xx patchlevel 0 */
    156 static void update_vsc_pipe(struct fd_batch *batch);
    157 static void
    158 emit_binning_workaround(struct fd_batch *batch)
    159 {
    160 	struct fd_context *ctx = batch->ctx;
    161 	struct fd_gmem_stateobj *gmem = &ctx->gmem;
    162 	struct fd_ringbuffer *ring = batch->gmem;
    163 	struct fd3_emit emit = {
    164 			.debug = &ctx->debug,
    165 			.vtx = &ctx->solid_vbuf_state,
    166 			.prog = &ctx->solid_prog,
    167 			.key = {
    168 				.half_precision = true,
    169 			},
    170 	};
    171 
    172 	OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 2);
    173 	OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RESOLVE_PASS) |
    174 			A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE |
    175 			A3XX_RB_MODE_CONTROL_MRT(0));
    176 	OUT_RING(ring, A3XX_RB_RENDER_CONTROL_BIN_WIDTH(32) |
    177 			A3XX_RB_RENDER_CONTROL_DISABLE_COLOR_PIPE |
    178 			A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(FUNC_NEVER));
    179 
    180 	OUT_PKT0(ring, REG_A3XX_RB_COPY_CONTROL, 4);
    181 	OUT_RING(ring, A3XX_RB_COPY_CONTROL_MSAA_RESOLVE(MSAA_ONE) |
    182 			A3XX_RB_COPY_CONTROL_MODE(0) |
    183 			A3XX_RB_COPY_CONTROL_GMEM_BASE(0));
    184 	OUT_RELOCW(ring, fd_resource(ctx->solid_vbuf)->bo, 0x20, 0, -1);  /* RB_COPY_DEST_BASE */
    185 	OUT_RING(ring, A3XX_RB_COPY_DEST_PITCH_PITCH(128));
    186 	OUT_RING(ring, A3XX_RB_COPY_DEST_INFO_TILE(LINEAR) |
    187 			A3XX_RB_COPY_DEST_INFO_FORMAT(RB_R8G8B8A8_UNORM) |
    188 			A3XX_RB_COPY_DEST_INFO_SWAP(WZYX) |
    189 			A3XX_RB_COPY_DEST_INFO_COMPONENT_ENABLE(0xf) |
    190 			A3XX_RB_COPY_DEST_INFO_ENDIAN(ENDIAN_NONE));
    191 
    192 	OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1);
    193 	OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RESOLVE_PASS) |
    194 			A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
    195 			A3XX_GRAS_SC_CONTROL_RASTER_MODE(1));
    196 
    197 	fd3_program_emit(ring, &emit, 0, NULL);
    198 	fd3_emit_vertex_bufs(ring, &emit);
    199 
    200 	OUT_PKT0(ring, REG_A3XX_HLSQ_CONTROL_0_REG, 4);
    201 	OUT_RING(ring, A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE(FOUR_QUADS) |
    202 			A3XX_HLSQ_CONTROL_0_REG_FSSUPERTHREADENABLE |
    203 			A3XX_HLSQ_CONTROL_0_REG_RESERVED2 |
    204 			A3XX_HLSQ_CONTROL_0_REG_SPCONSTFULLUPDATE);
    205 	OUT_RING(ring, A3XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE(TWO_QUADS) |
    206 			A3XX_HLSQ_CONTROL_1_REG_VSSUPERTHREADENABLE);
    207 	OUT_RING(ring, A3XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD(31));
    208 	OUT_RING(ring, 0); /* HLSQ_CONTROL_3_REG */
    209 
    210 	OUT_PKT0(ring, REG_A3XX_HLSQ_CONST_FSPRESV_RANGE_REG, 1);
    211 	OUT_RING(ring, A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_STARTENTRY(0x20) |
    212 			A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_ENDENTRY(0x20));
    213 
    214 	OUT_PKT0(ring, REG_A3XX_RB_MSAA_CONTROL, 1);
    215 	OUT_RING(ring, A3XX_RB_MSAA_CONTROL_DISABLE |
    216 			A3XX_RB_MSAA_CONTROL_SAMPLES(MSAA_ONE) |
    217 			A3XX_RB_MSAA_CONTROL_SAMPLE_MASK(0xffff));
    218 
    219 	OUT_PKT0(ring, REG_A3XX_RB_DEPTH_CONTROL, 1);
    220 	OUT_RING(ring, A3XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_NEVER));
    221 
    222 	OUT_PKT0(ring, REG_A3XX_RB_STENCIL_CONTROL, 1);
    223 	OUT_RING(ring, A3XX_RB_STENCIL_CONTROL_FUNC(FUNC_NEVER) |
    224 			A3XX_RB_STENCIL_CONTROL_FAIL(STENCIL_KEEP) |
    225 			A3XX_RB_STENCIL_CONTROL_ZPASS(STENCIL_KEEP) |
    226 			A3XX_RB_STENCIL_CONTROL_ZFAIL(STENCIL_KEEP) |
    227 			A3XX_RB_STENCIL_CONTROL_FUNC_BF(FUNC_NEVER) |
    228 			A3XX_RB_STENCIL_CONTROL_FAIL_BF(STENCIL_KEEP) |
    229 			A3XX_RB_STENCIL_CONTROL_ZPASS_BF(STENCIL_KEEP) |
    230 			A3XX_RB_STENCIL_CONTROL_ZFAIL_BF(STENCIL_KEEP));
    231 
    232 	OUT_PKT0(ring, REG_A3XX_GRAS_SU_MODE_CONTROL, 1);
    233 	OUT_RING(ring, A3XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH(0.0));
    234 
    235 	OUT_PKT0(ring, REG_A3XX_VFD_INDEX_MIN, 4);
    236 	OUT_RING(ring, 0);            /* VFD_INDEX_MIN */
    237 	OUT_RING(ring, 2);            /* VFD_INDEX_MAX */
    238 	OUT_RING(ring, 0);            /* VFD_INSTANCEID_OFFSET */
    239 	OUT_RING(ring, 0);            /* VFD_INDEX_OFFSET */
    240 
    241 	OUT_PKT0(ring, REG_A3XX_PC_PRIM_VTX_CNTL, 1);
    242 	OUT_RING(ring, A3XX_PC_PRIM_VTX_CNTL_STRIDE_IN_VPC(0) |
    243 			A3XX_PC_PRIM_VTX_CNTL_POLYMODE_FRONT_PTYPE(PC_DRAW_TRIANGLES) |
    244 			A3XX_PC_PRIM_VTX_CNTL_POLYMODE_BACK_PTYPE(PC_DRAW_TRIANGLES) |
    245 			A3XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST);
    246 
    247 	OUT_PKT0(ring, REG_A3XX_GRAS_SC_WINDOW_SCISSOR_TL, 2);
    248 	OUT_RING(ring, A3XX_GRAS_SC_WINDOW_SCISSOR_TL_X(0) |
    249 			A3XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(1));
    250 	OUT_RING(ring, A3XX_GRAS_SC_WINDOW_SCISSOR_BR_X(0) |
    251 			A3XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(1));
    252 
    253 	OUT_PKT0(ring, REG_A3XX_GRAS_SC_SCREEN_SCISSOR_TL, 2);
    254 	OUT_RING(ring, A3XX_GRAS_SC_SCREEN_SCISSOR_TL_X(0) |
    255 			A3XX_GRAS_SC_SCREEN_SCISSOR_TL_Y(0));
    256 	OUT_RING(ring, A3XX_GRAS_SC_SCREEN_SCISSOR_BR_X(31) |
    257 			A3XX_GRAS_SC_SCREEN_SCISSOR_BR_Y(0));
    258 
    259 	fd_wfi(batch, ring);
    260 	OUT_PKT0(ring, REG_A3XX_GRAS_CL_VPORT_XOFFSET, 6);
    261 	OUT_RING(ring, A3XX_GRAS_CL_VPORT_XOFFSET(0.0));
    262 	OUT_RING(ring, A3XX_GRAS_CL_VPORT_XSCALE(1.0));
    263 	OUT_RING(ring, A3XX_GRAS_CL_VPORT_YOFFSET(0.0));
    264 	OUT_RING(ring, A3XX_GRAS_CL_VPORT_YSCALE(1.0));
    265 	OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZOFFSET(0.0));
    266 	OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZSCALE(1.0));
    267 
    268 	OUT_PKT0(ring, REG_A3XX_GRAS_CL_CLIP_CNTL, 1);
    269 	OUT_RING(ring, A3XX_GRAS_CL_CLIP_CNTL_CLIP_DISABLE |
    270 			A3XX_GRAS_CL_CLIP_CNTL_ZFAR_CLIP_DISABLE |
    271 			A3XX_GRAS_CL_CLIP_CNTL_VP_CLIP_CODE_IGNORE |
    272 			A3XX_GRAS_CL_CLIP_CNTL_VP_XFORM_DISABLE |
    273 			A3XX_GRAS_CL_CLIP_CNTL_PERSP_DIVISION_DISABLE);
    274 
    275 	OUT_PKT0(ring, REG_A3XX_GRAS_CL_GB_CLIP_ADJ, 1);
    276 	OUT_RING(ring, A3XX_GRAS_CL_GB_CLIP_ADJ_HORZ(0) |
    277 			A3XX_GRAS_CL_GB_CLIP_ADJ_VERT(0));
    278 
    279 	OUT_PKT3(ring, CP_DRAW_INDX_2, 5);
    280 	OUT_RING(ring, 0x00000000);   /* viz query info. */
    281 	OUT_RING(ring, DRAW(DI_PT_RECTLIST, DI_SRC_SEL_IMMEDIATE,
    282 						INDEX_SIZE_32_BIT, IGNORE_VISIBILITY, 0));
    283 	OUT_RING(ring, 2);            /* NumIndices */
    284 	OUT_RING(ring, 2);
    285 	OUT_RING(ring, 1);
    286 	fd_reset_wfi(batch);
    287 
    288 	OUT_PKT0(ring, REG_A3XX_HLSQ_CONTROL_0_REG, 1);
    289 	OUT_RING(ring, A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE(TWO_QUADS));
    290 
    291 	OUT_PKT0(ring, REG_A3XX_VFD_PERFCOUNTER0_SELECT, 1);
    292 	OUT_RING(ring, 0x00000000);
    293 
    294 	fd_wfi(batch, ring);
    295 	OUT_PKT0(ring, REG_A3XX_VSC_BIN_SIZE, 1);
    296 	OUT_RING(ring, A3XX_VSC_BIN_SIZE_WIDTH(gmem->bin_w) |
    297 			A3XX_VSC_BIN_SIZE_HEIGHT(gmem->bin_h));
    298 
    299 	OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1);
    300 	OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
    301 			A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
    302 			A3XX_GRAS_SC_CONTROL_RASTER_MODE(0));
    303 
    304 	OUT_PKT0(ring, REG_A3XX_GRAS_CL_CLIP_CNTL, 1);
    305 	OUT_RING(ring, 0x00000000);
    306 }
    307 
    308 /* transfer from gmem to system memory (ie. normal RAM) */
    309 
    310 static void
    311 emit_gmem2mem_surf(struct fd_batch *batch,
    312 				   enum adreno_rb_copy_control_mode mode,
    313 				   bool stencil,
    314 				   uint32_t base, struct pipe_surface *psurf)
    315 {
    316 	struct fd_ringbuffer *ring = batch->gmem;
    317 	struct fd_resource *rsc = fd_resource(psurf->texture);
    318 	enum pipe_format format = psurf->format;
    319 	if (stencil) {
    320 		rsc = rsc->stencil;
    321 		format = rsc->base.b.format;
    322 	}
    323 	struct fd_resource_slice *slice = fd_resource_slice(rsc, psurf->u.tex.level);
    324 	uint32_t offset = fd_resource_offset(rsc, psurf->u.tex.level,
    325 			psurf->u.tex.first_layer);
    326 
    327 	debug_assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer);
    328 
    329 	OUT_PKT0(ring, REG_A3XX_RB_COPY_CONTROL, 4);
    330 	OUT_RING(ring, A3XX_RB_COPY_CONTROL_MSAA_RESOLVE(MSAA_ONE) |
    331 			A3XX_RB_COPY_CONTROL_MODE(mode) |
    332 			A3XX_RB_COPY_CONTROL_GMEM_BASE(base) |
    333 			COND(format == PIPE_FORMAT_Z32_FLOAT ||
    334 				 format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT,
    335 				 A3XX_RB_COPY_CONTROL_DEPTH32_RESOLVE));
    336 
    337 	OUT_RELOCW(ring, rsc->bo, offset, 0, -1);    /* RB_COPY_DEST_BASE */
    338 	OUT_RING(ring, A3XX_RB_COPY_DEST_PITCH_PITCH(slice->pitch * rsc->cpp));
    339 	OUT_RING(ring, A3XX_RB_COPY_DEST_INFO_TILE(LINEAR) |
    340 			A3XX_RB_COPY_DEST_INFO_FORMAT(fd3_pipe2color(format)) |
    341 			A3XX_RB_COPY_DEST_INFO_COMPONENT_ENABLE(0xf) |
    342 			A3XX_RB_COPY_DEST_INFO_ENDIAN(ENDIAN_NONE) |
    343 			A3XX_RB_COPY_DEST_INFO_SWAP(fd3_pipe2swap(format)));
    344 
    345 	fd_draw(batch, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY,
    346 			DI_SRC_SEL_AUTO_INDEX, 2, 0, INDEX_SIZE_IGN, 0, 0, NULL);
    347 }
    348 
    349 static void
    350 fd3_emit_tile_gmem2mem(struct fd_batch *batch, struct fd_tile *tile)
    351 {
    352 	struct fd_context *ctx = batch->ctx;
    353 	struct fd_ringbuffer *ring = batch->gmem;
    354 	struct pipe_framebuffer_state *pfb = &batch->framebuffer;
    355 	struct fd3_emit emit = {
    356 			.debug = &ctx->debug,
    357 			.vtx = &ctx->solid_vbuf_state,
    358 			.prog = &ctx->solid_prog,
    359 			.key = {
    360 				.half_precision = true,
    361 			},
    362 	};
    363 	int i;
    364 
    365 	OUT_PKT0(ring, REG_A3XX_RB_DEPTH_CONTROL, 1);
    366 	OUT_RING(ring, A3XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_NEVER));
    367 
    368 	OUT_PKT0(ring, REG_A3XX_RB_STENCIL_CONTROL, 1);
    369 	OUT_RING(ring, A3XX_RB_STENCIL_CONTROL_FUNC(FUNC_NEVER) |
    370 			A3XX_RB_STENCIL_CONTROL_FAIL(STENCIL_KEEP) |
    371 			A3XX_RB_STENCIL_CONTROL_ZPASS(STENCIL_KEEP) |
    372 			A3XX_RB_STENCIL_CONTROL_ZFAIL(STENCIL_KEEP) |
    373 			A3XX_RB_STENCIL_CONTROL_FUNC_BF(FUNC_NEVER) |
    374 			A3XX_RB_STENCIL_CONTROL_FAIL_BF(STENCIL_KEEP) |
    375 			A3XX_RB_STENCIL_CONTROL_ZPASS_BF(STENCIL_KEEP) |
    376 			A3XX_RB_STENCIL_CONTROL_ZFAIL_BF(STENCIL_KEEP));
    377 
    378 	OUT_PKT0(ring, REG_A3XX_RB_STENCILREFMASK, 2);
    379 	OUT_RING(ring, 0xff000000 |
    380 			A3XX_RB_STENCILREFMASK_STENCILREF(0) |
    381 			A3XX_RB_STENCILREFMASK_STENCILMASK(0) |
    382 			A3XX_RB_STENCILREFMASK_STENCILWRITEMASK(0xff));
    383 	OUT_RING(ring, 0xff000000 |
    384 			A3XX_RB_STENCILREFMASK_STENCILREF(0) |
    385 			A3XX_RB_STENCILREFMASK_STENCILMASK(0) |
    386 			A3XX_RB_STENCILREFMASK_STENCILWRITEMASK(0xff));
    387 
    388 	OUT_PKT0(ring, REG_A3XX_GRAS_SU_MODE_CONTROL, 1);
    389 	OUT_RING(ring, A3XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH(0));
    390 
    391 	OUT_PKT0(ring, REG_A3XX_GRAS_CL_CLIP_CNTL, 1);
    392 	OUT_RING(ring, 0x00000000);   /* GRAS_CL_CLIP_CNTL */
    393 
    394 	fd_wfi(batch, ring);
    395 	OUT_PKT0(ring, REG_A3XX_GRAS_CL_VPORT_XOFFSET, 6);
    396 	OUT_RING(ring, A3XX_GRAS_CL_VPORT_XOFFSET((float)pfb->width/2.0 - 0.5));
    397 	OUT_RING(ring, A3XX_GRAS_CL_VPORT_XSCALE((float)pfb->width/2.0));
    398 	OUT_RING(ring, A3XX_GRAS_CL_VPORT_YOFFSET((float)pfb->height/2.0 - 0.5));
    399 	OUT_RING(ring, A3XX_GRAS_CL_VPORT_YSCALE(-(float)pfb->height/2.0));
    400 	OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZOFFSET(0.0));
    401 	OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZSCALE(1.0));
    402 
    403 	OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1);
    404 	OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RESOLVE_PASS) |
    405 			A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE |
    406 			A3XX_RB_MODE_CONTROL_MRT(0));
    407 
    408 	OUT_PKT0(ring, REG_A3XX_RB_RENDER_CONTROL, 1);
    409 	OUT_RING(ring, A3XX_RB_RENDER_CONTROL_DISABLE_COLOR_PIPE |
    410 			A3XX_RB_RENDER_CONTROL_ENABLE_GMEM |
    411 			A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(FUNC_NEVER) |
    412 			A3XX_RB_RENDER_CONTROL_BIN_WIDTH(ctx->gmem.bin_w));
    413 
    414 	OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1);
    415 	OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RESOLVE_PASS) |
    416 			A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
    417 			A3XX_GRAS_SC_CONTROL_RASTER_MODE(1));
    418 
    419 	OUT_PKT0(ring, REG_A3XX_PC_PRIM_VTX_CNTL, 1);
    420 	OUT_RING(ring, A3XX_PC_PRIM_VTX_CNTL_STRIDE_IN_VPC(0) |
    421 			A3XX_PC_PRIM_VTX_CNTL_POLYMODE_FRONT_PTYPE(PC_DRAW_TRIANGLES) |
    422 			A3XX_PC_PRIM_VTX_CNTL_POLYMODE_BACK_PTYPE(PC_DRAW_TRIANGLES) |
    423 			A3XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST);
    424 
    425 	OUT_PKT0(ring, REG_A3XX_GRAS_SC_WINDOW_SCISSOR_TL, 2);
    426 	OUT_RING(ring, A3XX_GRAS_SC_WINDOW_SCISSOR_TL_X(0) |
    427 			A3XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(0));
    428 	OUT_RING(ring, A3XX_GRAS_SC_WINDOW_SCISSOR_BR_X(pfb->width - 1) |
    429 			A3XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(pfb->height - 1));
    430 
    431 	OUT_PKT0(ring, REG_A3XX_VFD_INDEX_MIN, 4);
    432 	OUT_RING(ring, 0);            /* VFD_INDEX_MIN */
    433 	OUT_RING(ring, 2);            /* VFD_INDEX_MAX */
    434 	OUT_RING(ring, 0);            /* VFD_INSTANCEID_OFFSET */
    435 	OUT_RING(ring, 0);            /* VFD_INDEX_OFFSET */
    436 
    437 	fd3_program_emit(ring, &emit, 0, NULL);
    438 	fd3_emit_vertex_bufs(ring, &emit);
    439 
    440 	if (batch->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) {
    441 		struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture);
    442 		if (!rsc->stencil || batch->resolve & FD_BUFFER_DEPTH)
    443 			emit_gmem2mem_surf(batch, RB_COPY_DEPTH_STENCIL, false,
    444 							   ctx->gmem.zsbuf_base[0], pfb->zsbuf);
    445 		if (rsc->stencil && batch->resolve & FD_BUFFER_STENCIL)
    446 			emit_gmem2mem_surf(batch, RB_COPY_DEPTH_STENCIL, true,
    447 							   ctx->gmem.zsbuf_base[1], pfb->zsbuf);
    448 	}
    449 
    450 	if (batch->resolve & FD_BUFFER_COLOR) {
    451 		for (i = 0; i < pfb->nr_cbufs; i++) {
    452 			if (!pfb->cbufs[i])
    453 				continue;
    454 			if (!(batch->resolve & (PIPE_CLEAR_COLOR0 << i)))
    455 				continue;
    456 			emit_gmem2mem_surf(batch, RB_COPY_RESOLVE, false,
    457 							   ctx->gmem.cbuf_base[i], pfb->cbufs[i]);
    458 		}
    459 	}
    460 
    461 	OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1);
    462 	OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
    463 			A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE |
    464 			A3XX_RB_MODE_CONTROL_MRT(MAX2(1, pfb->nr_cbufs) - 1));
    465 
    466 	OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1);
    467 	OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
    468 			A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
    469 			A3XX_GRAS_SC_CONTROL_RASTER_MODE(0));
    470 }
    471 
    472 /* transfer from system memory to gmem */
    473 
    474 static void
    475 emit_mem2gmem_surf(struct fd_batch *batch, uint32_t bases[],
    476 		struct pipe_surface **psurf, uint32_t bufs, uint32_t bin_w)
    477 {
    478 	struct fd_ringbuffer *ring = batch->gmem;
    479 	struct pipe_surface *zsbufs[2];
    480 
    481 	assert(bufs > 0);
    482 
    483 	OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1);
    484 	OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
    485 				   A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE |
    486 				   A3XX_RB_MODE_CONTROL_MRT(bufs - 1));
    487 
    488 	emit_mrt(ring, bufs, psurf, bases, bin_w, false);
    489 
    490 	if (psurf[0] && (psurf[0]->format == PIPE_FORMAT_Z32_FLOAT ||
    491 					 psurf[0]->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT)) {
    492 		/* Depth is stored as unorm in gmem, so we have to write it in using a
    493 		 * special blit shader which writes depth.
    494 		 */
    495 		OUT_PKT0(ring, REG_A3XX_RB_DEPTH_CONTROL, 1);
    496 		OUT_RING(ring, (A3XX_RB_DEPTH_CONTROL_FRAG_WRITES_Z |
    497 						A3XX_RB_DEPTH_CONTROL_Z_WRITE_ENABLE |
    498 						A3XX_RB_DEPTH_CONTROL_Z_ENABLE |
    499 						A3XX_RB_DEPTH_CONTROL_EARLY_Z_DISABLE |
    500 						A3XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_ALWAYS)));
    501 
    502 		OUT_PKT0(ring, REG_A3XX_RB_DEPTH_INFO, 2);
    503 		OUT_RING(ring, A3XX_RB_DEPTH_INFO_DEPTH_BASE(bases[0]) |
    504 				 A3XX_RB_DEPTH_INFO_DEPTH_FORMAT(DEPTHX_32));
    505 		OUT_RING(ring, A3XX_RB_DEPTH_PITCH(4 * batch->ctx->gmem.bin_w));
    506 
    507 		if (psurf[0]->format == PIPE_FORMAT_Z32_FLOAT) {
    508 			OUT_PKT0(ring, REG_A3XX_RB_MRT_CONTROL(0), 1);
    509 			OUT_RING(ring, 0);
    510 		} else {
    511 			/* The gmem_restore_tex logic will put the first buffer's stencil
    512 			 * as color. Supply it with the proper information to make that
    513 			 * happen.
    514 			 */
    515 			zsbufs[0] = zsbufs[1] = psurf[0];
    516 			psurf = zsbufs;
    517 			bufs = 2;
    518 		}
    519 	} else {
    520 		OUT_PKT0(ring, REG_A3XX_SP_FS_OUTPUT_REG, 1);
    521 		OUT_RING(ring, A3XX_SP_FS_OUTPUT_REG_MRT(bufs - 1));
    522 	}
    523 
    524 	fd3_emit_gmem_restore_tex(ring, psurf, bufs);
    525 
    526 	fd_draw(batch, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY,
    527 			DI_SRC_SEL_AUTO_INDEX, 2, 0, INDEX_SIZE_IGN, 0, 0, NULL);
    528 }
    529 
    530 static void
    531 fd3_emit_tile_mem2gmem(struct fd_batch *batch, struct fd_tile *tile)
    532 {
    533 	struct fd_context *ctx = batch->ctx;
    534 	struct fd_gmem_stateobj *gmem = &ctx->gmem;
    535 	struct fd_ringbuffer *ring = batch->gmem;
    536 	struct pipe_framebuffer_state *pfb = &batch->framebuffer;
    537 	struct fd3_emit emit = {
    538 			.debug = &ctx->debug,
    539 			.vtx = &ctx->blit_vbuf_state,
    540 			.sprite_coord_enable = 1,
    541 			/* NOTE: They all use the same VP, this is for vtx bufs. */
    542 			.prog = &ctx->blit_prog[0],
    543 			.key = {
    544 				.half_precision = fd_half_precision(pfb),
    545 			},
    546 	};
    547 	float x0, y0, x1, y1;
    548 	unsigned bin_w = tile->bin_w;
    549 	unsigned bin_h = tile->bin_h;
    550 	unsigned i;
    551 
    552 	/* write texture coordinates to vertexbuf: */
    553 	x0 = ((float)tile->xoff) / ((float)pfb->width);
    554 	x1 = ((float)tile->xoff + bin_w) / ((float)pfb->width);
    555 	y0 = ((float)tile->yoff) / ((float)pfb->height);
    556 	y1 = ((float)tile->yoff + bin_h) / ((float)pfb->height);
    557 
    558 	OUT_PKT3(ring, CP_MEM_WRITE, 5);
    559 	OUT_RELOCW(ring, fd_resource(ctx->blit_texcoord_vbuf)->bo, 0, 0, 0);
    560 	OUT_RING(ring, fui(x0));
    561 	OUT_RING(ring, fui(y0));
    562 	OUT_RING(ring, fui(x1));
    563 	OUT_RING(ring, fui(y1));
    564 
    565 	fd3_emit_cache_flush(batch, ring);
    566 
    567 	for (i = 0; i < 4; i++) {
    568 		OUT_PKT0(ring, REG_A3XX_RB_MRT_CONTROL(i), 1);
    569 		OUT_RING(ring, A3XX_RB_MRT_CONTROL_ROP_CODE(ROP_COPY) |
    570 				A3XX_RB_MRT_CONTROL_DITHER_MODE(DITHER_DISABLE) |
    571 				A3XX_RB_MRT_CONTROL_COMPONENT_ENABLE(0xf));
    572 
    573 		OUT_PKT0(ring, REG_A3XX_RB_MRT_BLEND_CONTROL(i), 1);
    574 		OUT_RING(ring, A3XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(FACTOR_ONE) |
    575 				A3XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE(BLEND_DST_PLUS_SRC) |
    576 				A3XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR(FACTOR_ZERO) |
    577 				A3XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR(FACTOR_ONE) |
    578 				A3XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE(BLEND_DST_PLUS_SRC) |
    579 				A3XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR(FACTOR_ZERO));
    580 	}
    581 
    582 	OUT_PKT0(ring, REG_A3XX_RB_RENDER_CONTROL, 1);
    583 	OUT_RING(ring, A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(FUNC_ALWAYS) |
    584 			A3XX_RB_RENDER_CONTROL_BIN_WIDTH(gmem->bin_w));
    585 
    586 	fd_wfi(batch, ring);
    587 	OUT_PKT0(ring, REG_A3XX_RB_DEPTH_CONTROL, 1);
    588 	OUT_RING(ring, A3XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_LESS));
    589 
    590 	OUT_PKT0(ring, REG_A3XX_RB_DEPTH_INFO, 2);
    591 	OUT_RING(ring, 0);
    592 	OUT_RING(ring, 0);
    593 
    594 	OUT_PKT0(ring, REG_A3XX_GRAS_CL_CLIP_CNTL, 1);
    595 	OUT_RING(ring, A3XX_GRAS_CL_CLIP_CNTL_IJ_PERSP_CENTER);   /* GRAS_CL_CLIP_CNTL */
    596 
    597 	fd_wfi(batch, ring);
    598 	OUT_PKT0(ring, REG_A3XX_GRAS_CL_VPORT_XOFFSET, 6);
    599 	OUT_RING(ring, A3XX_GRAS_CL_VPORT_XOFFSET((float)bin_w/2.0 - 0.5));
    600 	OUT_RING(ring, A3XX_GRAS_CL_VPORT_XSCALE((float)bin_w/2.0));
    601 	OUT_RING(ring, A3XX_GRAS_CL_VPORT_YOFFSET((float)bin_h/2.0 - 0.5));
    602 	OUT_RING(ring, A3XX_GRAS_CL_VPORT_YSCALE(-(float)bin_h/2.0));
    603 	OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZOFFSET(0.0));
    604 	OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZSCALE(1.0));
    605 
    606 	OUT_PKT0(ring, REG_A3XX_GRAS_SC_WINDOW_SCISSOR_TL, 2);
    607 	OUT_RING(ring, A3XX_GRAS_SC_WINDOW_SCISSOR_TL_X(0) |
    608 			A3XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(0));
    609 	OUT_RING(ring, A3XX_GRAS_SC_WINDOW_SCISSOR_BR_X(bin_w - 1) |
    610 			A3XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(bin_h - 1));
    611 
    612 	OUT_PKT0(ring, REG_A3XX_GRAS_SC_SCREEN_SCISSOR_TL, 2);
    613 	OUT_RING(ring, A3XX_GRAS_SC_SCREEN_SCISSOR_TL_X(0) |
    614 			A3XX_GRAS_SC_SCREEN_SCISSOR_TL_Y(0));
    615 	OUT_RING(ring, A3XX_GRAS_SC_SCREEN_SCISSOR_BR_X(bin_w - 1) |
    616 			A3XX_GRAS_SC_SCREEN_SCISSOR_BR_Y(bin_h - 1));
    617 
    618 	OUT_PKT0(ring, REG_A3XX_RB_STENCIL_CONTROL, 1);
    619 	OUT_RING(ring, 0x2 |
    620 			A3XX_RB_STENCIL_CONTROL_FUNC(FUNC_ALWAYS) |
    621 			A3XX_RB_STENCIL_CONTROL_FAIL(STENCIL_KEEP) |
    622 			A3XX_RB_STENCIL_CONTROL_ZPASS(STENCIL_KEEP) |
    623 			A3XX_RB_STENCIL_CONTROL_ZFAIL(STENCIL_KEEP) |
    624 			A3XX_RB_STENCIL_CONTROL_FUNC_BF(FUNC_ALWAYS) |
    625 			A3XX_RB_STENCIL_CONTROL_FAIL_BF(STENCIL_KEEP) |
    626 			A3XX_RB_STENCIL_CONTROL_ZPASS_BF(STENCIL_KEEP) |
    627 			A3XX_RB_STENCIL_CONTROL_ZFAIL_BF(STENCIL_KEEP));
    628 
    629 	OUT_PKT0(ring, REG_A3XX_RB_STENCIL_INFO, 2);
    630 	OUT_RING(ring, 0); /* RB_STENCIL_INFO */
    631 	OUT_RING(ring, 0); /* RB_STENCIL_PITCH */
    632 
    633 	OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1);
    634 	OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
    635 			A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
    636 			A3XX_GRAS_SC_CONTROL_RASTER_MODE(1));
    637 
    638 	OUT_PKT0(ring, REG_A3XX_PC_PRIM_VTX_CNTL, 1);
    639 	OUT_RING(ring, A3XX_PC_PRIM_VTX_CNTL_STRIDE_IN_VPC(2) |
    640 			A3XX_PC_PRIM_VTX_CNTL_POLYMODE_FRONT_PTYPE(PC_DRAW_TRIANGLES) |
    641 			A3XX_PC_PRIM_VTX_CNTL_POLYMODE_BACK_PTYPE(PC_DRAW_TRIANGLES) |
    642 			A3XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST);
    643 
    644 	OUT_PKT0(ring, REG_A3XX_VFD_INDEX_MIN, 4);
    645 	OUT_RING(ring, 0);            /* VFD_INDEX_MIN */
    646 	OUT_RING(ring, 2);            /* VFD_INDEX_MAX */
    647 	OUT_RING(ring, 0);            /* VFD_INSTANCEID_OFFSET */
    648 	OUT_RING(ring, 0);            /* VFD_INDEX_OFFSET */
    649 
    650 	fd3_emit_vertex_bufs(ring, &emit);
    651 
    652 	/* for gmem pitch/base calculations, we need to use the non-
    653 	 * truncated tile sizes:
    654 	 */
    655 	bin_w = gmem->bin_w;
    656 	bin_h = gmem->bin_h;
    657 
    658 	if (fd_gmem_needs_restore(batch, tile, FD_BUFFER_COLOR)) {
    659 		emit.prog = &ctx->blit_prog[pfb->nr_cbufs - 1];
    660 		emit.fp = NULL;      /* frag shader changed so clear cache */
    661 		fd3_program_emit(ring, &emit, pfb->nr_cbufs, pfb->cbufs);
    662 		emit_mem2gmem_surf(batch, gmem->cbuf_base, pfb->cbufs, pfb->nr_cbufs, bin_w);
    663 	}
    664 
    665 	if (fd_gmem_needs_restore(batch, tile, FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) {
    666 		if (pfb->zsbuf->format != PIPE_FORMAT_Z32_FLOAT_S8X24_UINT &&
    667 			pfb->zsbuf->format != PIPE_FORMAT_Z32_FLOAT) {
    668 			/* Non-float can use a regular color write. It's split over 8-bit
    669 			 * components, so half precision is always sufficient.
    670 			 */
    671 			emit.prog = &ctx->blit_prog[0];
    672 			emit.key.half_precision = true;
    673 		} else {
    674 			/* Float depth needs special blit shader that writes depth */
    675 			if (pfb->zsbuf->format == PIPE_FORMAT_Z32_FLOAT)
    676 				emit.prog = &ctx->blit_z;
    677 			else
    678 				emit.prog = &ctx->blit_zs;
    679 			emit.key.half_precision = false;
    680 		}
    681 		emit.fp = NULL;      /* frag shader changed so clear cache */
    682 		fd3_program_emit(ring, &emit, 1, &pfb->zsbuf);
    683 		emit_mem2gmem_surf(batch, gmem->zsbuf_base, &pfb->zsbuf, 1, bin_w);
    684 	}
    685 
    686 	OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1);
    687 	OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
    688 			A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
    689 			A3XX_GRAS_SC_CONTROL_RASTER_MODE(0));
    690 
    691 	OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1);
    692 	OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
    693 				   A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE |
    694 				   A3XX_RB_MODE_CONTROL_MRT(MAX2(1, pfb->nr_cbufs) - 1));
    695 }
    696 
    697 static void
    698 patch_draws(struct fd_batch *batch, enum pc_di_vis_cull_mode vismode)
    699 {
    700 	unsigned i;
    701 	for (i = 0; i < fd_patch_num_elements(&batch->draw_patches); i++) {
    702 		struct fd_cs_patch *patch = fd_patch_element(&batch->draw_patches, i);
    703 		*patch->cs = patch->val | DRAW(0, 0, 0, vismode, 0);
    704 	}
    705 	util_dynarray_resize(&batch->draw_patches, 0);
    706 }
    707 
    708 static void
    709 patch_rbrc(struct fd_batch *batch, uint32_t val)
    710 {
    711 	unsigned i;
    712 	for (i = 0; i < fd_patch_num_elements(&batch->rbrc_patches); i++) {
    713 		struct fd_cs_patch *patch = fd_patch_element(&batch->rbrc_patches, i);
    714 		*patch->cs = patch->val | val;
    715 	}
    716 	util_dynarray_resize(&batch->rbrc_patches, 0);
    717 }
    718 
    719 /* for rendering directly to system memory: */
    720 static void
    721 fd3_emit_sysmem_prep(struct fd_batch *batch)
    722 {
    723 	struct pipe_framebuffer_state *pfb = &batch->framebuffer;
    724 	struct fd_ringbuffer *ring = batch->gmem;
    725 	uint32_t i, pitch = 0;
    726 
    727 	for (i = 0; i < pfb->nr_cbufs; i++) {
    728 		struct pipe_surface *psurf = pfb->cbufs[i];
    729 		if (!psurf)
    730 			continue;
    731 		pitch = fd_resource(psurf->texture)->slices[psurf->u.tex.level].pitch;
    732 	}
    733 
    734 	fd3_emit_restore(batch, ring);
    735 
    736 	OUT_PKT0(ring, REG_A3XX_RB_FRAME_BUFFER_DIMENSION, 1);
    737 	OUT_RING(ring, A3XX_RB_FRAME_BUFFER_DIMENSION_WIDTH(pfb->width) |
    738 			A3XX_RB_FRAME_BUFFER_DIMENSION_HEIGHT(pfb->height));
    739 
    740 	emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, NULL, 0, true);
    741 
    742 	/* setup scissor/offset for current tile: */
    743 	OUT_PKT0(ring, REG_A3XX_RB_WINDOW_OFFSET, 1);
    744 	OUT_RING(ring, A3XX_RB_WINDOW_OFFSET_X(0) |
    745 			A3XX_RB_WINDOW_OFFSET_Y(0));
    746 
    747 	OUT_PKT0(ring, REG_A3XX_GRAS_SC_SCREEN_SCISSOR_TL, 2);
    748 	OUT_RING(ring, A3XX_GRAS_SC_SCREEN_SCISSOR_TL_X(0) |
    749 			A3XX_GRAS_SC_SCREEN_SCISSOR_TL_Y(0));
    750 	OUT_RING(ring, A3XX_GRAS_SC_SCREEN_SCISSOR_BR_X(pfb->width - 1) |
    751 			A3XX_GRAS_SC_SCREEN_SCISSOR_BR_Y(pfb->height - 1));
    752 
    753 	OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1);
    754 	OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
    755 			A3XX_RB_MODE_CONTROL_GMEM_BYPASS |
    756 			A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE |
    757 			A3XX_RB_MODE_CONTROL_MRT(MAX2(1, pfb->nr_cbufs) - 1));
    758 
    759 	patch_draws(batch, IGNORE_VISIBILITY);
    760 	patch_rbrc(batch, A3XX_RB_RENDER_CONTROL_BIN_WIDTH(pitch));
    761 }
    762 
    763 static void
    764 update_vsc_pipe(struct fd_batch *batch)
    765 {
    766 	struct fd_context *ctx = batch->ctx;
    767 	struct fd3_context *fd3_ctx = fd3_context(ctx);
    768 	struct fd_ringbuffer *ring = batch->gmem;
    769 	int i;
    770 
    771 	OUT_PKT0(ring, REG_A3XX_VSC_SIZE_ADDRESS, 1);
    772 	OUT_RELOCW(ring, fd3_ctx->vsc_size_mem, 0, 0, 0); /* VSC_SIZE_ADDRESS */
    773 
    774 	for (i = 0; i < 8; i++) {
    775 		struct fd_vsc_pipe *pipe = &ctx->pipe[i];
    776 
    777 		if (!pipe->bo) {
    778 			pipe->bo = fd_bo_new(ctx->dev, 0x40000,
    779 					DRM_FREEDRENO_GEM_TYPE_KMEM);
    780 		}
    781 
    782 		OUT_PKT0(ring, REG_A3XX_VSC_PIPE(i), 3);
    783 		OUT_RING(ring, A3XX_VSC_PIPE_CONFIG_X(pipe->x) |
    784 				A3XX_VSC_PIPE_CONFIG_Y(pipe->y) |
    785 				A3XX_VSC_PIPE_CONFIG_W(pipe->w) |
    786 				A3XX_VSC_PIPE_CONFIG_H(pipe->h));
    787 		OUT_RELOCW(ring, pipe->bo, 0, 0, 0);       /* VSC_PIPE[i].DATA_ADDRESS */
    788 		OUT_RING(ring, fd_bo_size(pipe->bo) - 32); /* VSC_PIPE[i].DATA_LENGTH */
    789 	}
    790 }
    791 
    792 static void
    793 emit_binning_pass(struct fd_batch *batch)
    794 {
    795 	struct fd_context *ctx = batch->ctx;
    796 	struct fd_gmem_stateobj *gmem = &ctx->gmem;
    797 	struct pipe_framebuffer_state *pfb = &batch->framebuffer;
    798 	struct fd_ringbuffer *ring = batch->gmem;
    799 	int i;
    800 
    801 	uint32_t x1 = gmem->minx;
    802 	uint32_t y1 = gmem->miny;
    803 	uint32_t x2 = gmem->minx + gmem->width - 1;
    804 	uint32_t y2 = gmem->miny + gmem->height - 1;
    805 
    806 	if (ctx->screen->gpu_id == 320) {
    807 		emit_binning_workaround(batch);
    808 		fd_wfi(batch, ring);
    809 		OUT_PKT3(ring, CP_INVALIDATE_STATE, 1);
    810 		OUT_RING(ring, 0x00007fff);
    811 	}
    812 
    813 	OUT_PKT0(ring, REG_A3XX_VSC_BIN_CONTROL, 1);
    814 	OUT_RING(ring, A3XX_VSC_BIN_CONTROL_BINNING_ENABLE);
    815 
    816 	OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1);
    817 	OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_TILING_PASS) |
    818 			A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
    819 			A3XX_GRAS_SC_CONTROL_RASTER_MODE(0));
    820 
    821 	OUT_PKT0(ring, REG_A3XX_RB_FRAME_BUFFER_DIMENSION, 1);
    822 	OUT_RING(ring, A3XX_RB_FRAME_BUFFER_DIMENSION_WIDTH(pfb->width) |
    823 			A3XX_RB_FRAME_BUFFER_DIMENSION_HEIGHT(pfb->height));
    824 
    825 	OUT_PKT0(ring, REG_A3XX_RB_RENDER_CONTROL, 1);
    826 	OUT_RING(ring, A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(FUNC_NEVER) |
    827 			A3XX_RB_RENDER_CONTROL_DISABLE_COLOR_PIPE |
    828 			A3XX_RB_RENDER_CONTROL_BIN_WIDTH(gmem->bin_w));
    829 
    830 	/* setup scissor/offset for whole screen: */
    831 	OUT_PKT0(ring, REG_A3XX_RB_WINDOW_OFFSET, 1);
    832 	OUT_RING(ring, A3XX_RB_WINDOW_OFFSET_X(x1) |
    833 			A3XX_RB_WINDOW_OFFSET_Y(y1));
    834 
    835 	OUT_PKT0(ring, REG_A3XX_RB_LRZ_VSC_CONTROL, 1);
    836 	OUT_RING(ring, A3XX_RB_LRZ_VSC_CONTROL_BINNING_ENABLE);
    837 
    838 	OUT_PKT0(ring, REG_A3XX_GRAS_SC_SCREEN_SCISSOR_TL, 2);
    839 	OUT_RING(ring, A3XX_GRAS_SC_SCREEN_SCISSOR_TL_X(x1) |
    840 			A3XX_GRAS_SC_SCREEN_SCISSOR_TL_Y(y1));
    841 	OUT_RING(ring, A3XX_GRAS_SC_SCREEN_SCISSOR_BR_X(x2) |
    842 			A3XX_GRAS_SC_SCREEN_SCISSOR_BR_Y(y2));
    843 
    844 	OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1);
    845 	OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_TILING_PASS) |
    846 			A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE |
    847 			A3XX_RB_MODE_CONTROL_MRT(0));
    848 
    849 	for (i = 0; i < 4; i++) {
    850 		OUT_PKT0(ring, REG_A3XX_RB_MRT_CONTROL(i), 1);
    851 		OUT_RING(ring, A3XX_RB_MRT_CONTROL_ROP_CODE(ROP_CLEAR) |
    852 				A3XX_RB_MRT_CONTROL_DITHER_MODE(DITHER_DISABLE) |
    853 				A3XX_RB_MRT_CONTROL_COMPONENT_ENABLE(0));
    854 	}
    855 
    856 	OUT_PKT0(ring, REG_A3XX_PC_VSTREAM_CONTROL, 1);
    857 	OUT_RING(ring, A3XX_PC_VSTREAM_CONTROL_SIZE(1) |
    858 			A3XX_PC_VSTREAM_CONTROL_N(0));
    859 
    860 	/* emit IB to binning drawcmds: */
    861 	ctx->emit_ib(ring, batch->binning);
    862 	fd_reset_wfi(batch);
    863 
    864 	fd_wfi(batch, ring);
    865 
    866 	/* and then put stuff back the way it was: */
    867 
    868 	OUT_PKT0(ring, REG_A3XX_VSC_BIN_CONTROL, 1);
    869 	OUT_RING(ring, 0x00000000);
    870 
    871 	OUT_PKT0(ring, REG_A3XX_SP_SP_CTRL_REG, 1);
    872 	OUT_RING(ring, A3XX_SP_SP_CTRL_REG_RESOLVE |
    873 			A3XX_SP_SP_CTRL_REG_CONSTMODE(1) |
    874 			A3XX_SP_SP_CTRL_REG_SLEEPMODE(1) |
    875 			A3XX_SP_SP_CTRL_REG_L0MODE(0));
    876 
    877 	OUT_PKT0(ring, REG_A3XX_RB_LRZ_VSC_CONTROL, 1);
    878 	OUT_RING(ring, 0x00000000);
    879 
    880 	OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1);
    881 	OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
    882 			A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
    883 			A3XX_GRAS_SC_CONTROL_RASTER_MODE(0));
    884 
    885 	OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 2);
    886 	OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
    887 			A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE |
    888 			A3XX_RB_MODE_CONTROL_MRT(pfb->nr_cbufs - 1));
    889 	OUT_RING(ring, A3XX_RB_RENDER_CONTROL_ENABLE_GMEM |
    890 			A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(FUNC_NEVER) |
    891 			A3XX_RB_RENDER_CONTROL_BIN_WIDTH(gmem->bin_w));
    892 
    893 	fd_event_write(batch, ring, CACHE_FLUSH);
    894 	fd_wfi(batch, ring);
    895 
    896 	if (ctx->screen->gpu_id == 320) {
    897 		/* dummy-draw workaround: */
    898 		OUT_PKT3(ring, CP_DRAW_INDX, 3);
    899 		OUT_RING(ring, 0x00000000);
    900 		OUT_RING(ring, DRAW(1, DI_SRC_SEL_AUTO_INDEX,
    901 							INDEX_SIZE_IGN, IGNORE_VISIBILITY, 0));
    902 		OUT_RING(ring, 0);             /* NumIndices */
    903 		fd_reset_wfi(batch);
    904 	}
    905 
    906 	OUT_PKT3(ring, CP_NOP, 4);
    907 	OUT_RING(ring, 0x00000000);
    908 	OUT_RING(ring, 0x00000000);
    909 	OUT_RING(ring, 0x00000000);
    910 	OUT_RING(ring, 0x00000000);
    911 
    912 	fd_wfi(batch, ring);
    913 
    914 	if (ctx->screen->gpu_id == 320) {
    915 		emit_binning_workaround(batch);
    916 	}
    917 }
    918 
    919 /* before first tile */
    920 static void
    921 fd3_emit_tile_init(struct fd_batch *batch)
    922 {
    923 	struct fd_ringbuffer *ring = batch->gmem;
    924 	struct pipe_framebuffer_state *pfb = &batch->framebuffer;
    925 	struct fd_gmem_stateobj *gmem = &batch->ctx->gmem;
    926 	uint32_t rb_render_control;
    927 
    928 	fd3_emit_restore(batch, ring);
    929 
    930 	/* note: use gmem->bin_w/h, the bin_w/h parameters may be truncated
    931 	 * at the right and bottom edge tiles
    932 	 */
    933 	OUT_PKT0(ring, REG_A3XX_VSC_BIN_SIZE, 1);
    934 	OUT_RING(ring, A3XX_VSC_BIN_SIZE_WIDTH(gmem->bin_w) |
    935 			A3XX_VSC_BIN_SIZE_HEIGHT(gmem->bin_h));
    936 
    937 	update_vsc_pipe(batch);
    938 
    939 	fd_wfi(batch, ring);
    940 	OUT_PKT0(ring, REG_A3XX_RB_FRAME_BUFFER_DIMENSION, 1);
    941 	OUT_RING(ring, A3XX_RB_FRAME_BUFFER_DIMENSION_WIDTH(pfb->width) |
    942 			A3XX_RB_FRAME_BUFFER_DIMENSION_HEIGHT(pfb->height));
    943 
    944 	if (use_hw_binning(batch)) {
    945 		/* emit hw binning pass: */
    946 		emit_binning_pass(batch);
    947 
    948 		patch_draws(batch, USE_VISIBILITY);
    949 	} else {
    950 		patch_draws(batch, IGNORE_VISIBILITY);
    951 	}
    952 
    953 	rb_render_control = A3XX_RB_RENDER_CONTROL_ENABLE_GMEM |
    954 			A3XX_RB_RENDER_CONTROL_BIN_WIDTH(gmem->bin_w);
    955 
    956 	patch_rbrc(batch, rb_render_control);
    957 }
    958 
    959 /* before mem2gmem */
    960 static void
    961 fd3_emit_tile_prep(struct fd_batch *batch, struct fd_tile *tile)
    962 {
    963 	struct fd_ringbuffer *ring = batch->gmem;
    964 	struct pipe_framebuffer_state *pfb = &batch->framebuffer;
    965 
    966 	OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1);
    967 	OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
    968 			A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE |
    969 			A3XX_RB_MODE_CONTROL_MRT(MAX2(1, pfb->nr_cbufs) - 1));
    970 }
    971 
    972 /* before IB to rendering cmds: */
    973 static void
    974 fd3_emit_tile_renderprep(struct fd_batch *batch, struct fd_tile *tile)
    975 {
    976 	struct fd_context *ctx = batch->ctx;
    977 	struct fd3_context *fd3_ctx = fd3_context(ctx);
    978 	struct fd_ringbuffer *ring = batch->gmem;
    979 	struct fd_gmem_stateobj *gmem = &ctx->gmem;
    980 	struct pipe_framebuffer_state *pfb = &batch->framebuffer;
    981 
    982 	uint32_t x1 = tile->xoff;
    983 	uint32_t y1 = tile->yoff;
    984 	uint32_t x2 = tile->xoff + tile->bin_w - 1;
    985 	uint32_t y2 = tile->yoff + tile->bin_h - 1;
    986 
    987 	uint32_t reg;
    988 
    989 	OUT_PKT0(ring, REG_A3XX_RB_DEPTH_INFO, 2);
    990 	reg = A3XX_RB_DEPTH_INFO_DEPTH_BASE(gmem->zsbuf_base[0]);
    991 	if (pfb->zsbuf) {
    992 		reg |= A3XX_RB_DEPTH_INFO_DEPTH_FORMAT(fd_pipe2depth(pfb->zsbuf->format));
    993 	}
    994 	OUT_RING(ring, reg);
    995 	if (pfb->zsbuf) {
    996 		struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture);
    997 		OUT_RING(ring, A3XX_RB_DEPTH_PITCH(rsc->cpp * gmem->bin_w));
    998 		if (rsc->stencil) {
    999 			OUT_PKT0(ring, REG_A3XX_RB_STENCIL_INFO, 2);
   1000 			OUT_RING(ring, A3XX_RB_STENCIL_INFO_STENCIL_BASE(gmem->zsbuf_base[1]));
   1001 			OUT_RING(ring, A3XX_RB_STENCIL_PITCH(rsc->stencil->cpp * gmem->bin_w));
   1002 		}
   1003 	} else {
   1004 		OUT_RING(ring, 0x00000000);
   1005 	}
   1006 
   1007 	if (use_hw_binning(batch)) {
   1008 		struct fd_vsc_pipe *pipe = &ctx->pipe[tile->p];
   1009 
   1010 		assert(pipe->w * pipe->h);
   1011 
   1012 		fd_event_write(batch, ring, HLSQ_FLUSH);
   1013 		fd_wfi(batch, ring);
   1014 
   1015 		OUT_PKT0(ring, REG_A3XX_PC_VSTREAM_CONTROL, 1);
   1016 		OUT_RING(ring, A3XX_PC_VSTREAM_CONTROL_SIZE(pipe->w * pipe->h) |
   1017 				A3XX_PC_VSTREAM_CONTROL_N(tile->n));
   1018 
   1019 
   1020 		OUT_PKT3(ring, CP_SET_BIN_DATA, 2);
   1021 		OUT_RELOCW(ring, pipe->bo, 0, 0, 0);    /* BIN_DATA_ADDR <- VSC_PIPE[p].DATA_ADDRESS */
   1022 		OUT_RELOCW(ring, fd3_ctx->vsc_size_mem, /* BIN_SIZE_ADDR <- VSC_SIZE_ADDRESS + (p * 4) */
   1023 				(tile->p * 4), 0, 0);
   1024 	} else {
   1025 		OUT_PKT0(ring, REG_A3XX_PC_VSTREAM_CONTROL, 1);
   1026 		OUT_RING(ring, 0x00000000);
   1027 	}
   1028 
   1029 	OUT_PKT3(ring, CP_SET_BIN, 3);
   1030 	OUT_RING(ring, 0x00000000);
   1031 	OUT_RING(ring, CP_SET_BIN_1_X1(x1) | CP_SET_BIN_1_Y1(y1));
   1032 	OUT_RING(ring, CP_SET_BIN_2_X2(x2) | CP_SET_BIN_2_Y2(y2));
   1033 
   1034 	emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, gmem->cbuf_base, gmem->bin_w, true);
   1035 
   1036 	/* setup scissor/offset for current tile: */
   1037 	OUT_PKT0(ring, REG_A3XX_RB_WINDOW_OFFSET, 1);
   1038 	OUT_RING(ring, A3XX_RB_WINDOW_OFFSET_X(tile->xoff) |
   1039 			A3XX_RB_WINDOW_OFFSET_Y(tile->yoff));
   1040 
   1041 	OUT_PKT0(ring, REG_A3XX_GRAS_SC_SCREEN_SCISSOR_TL, 2);
   1042 	OUT_RING(ring, A3XX_GRAS_SC_SCREEN_SCISSOR_TL_X(x1) |
   1043 			A3XX_GRAS_SC_SCREEN_SCISSOR_TL_Y(y1));
   1044 	OUT_RING(ring, A3XX_GRAS_SC_SCREEN_SCISSOR_BR_X(x2) |
   1045 			A3XX_GRAS_SC_SCREEN_SCISSOR_BR_Y(y2));
   1046 }
   1047 
   1048 void
   1049 fd3_gmem_init(struct pipe_context *pctx)
   1050 {
   1051 	struct fd_context *ctx = fd_context(pctx);
   1052 
   1053 	ctx->emit_sysmem_prep = fd3_emit_sysmem_prep;
   1054 	ctx->emit_tile_init = fd3_emit_tile_init;
   1055 	ctx->emit_tile_prep = fd3_emit_tile_prep;
   1056 	ctx->emit_tile_mem2gmem = fd3_emit_tile_mem2gmem;
   1057 	ctx->emit_tile_renderprep = fd3_emit_tile_renderprep;
   1058 	ctx->emit_tile_gmem2mem = fd3_emit_tile_gmem2mem;
   1059 }
   1060