Home | History | Annotate | Download | only in a4xx
      1 /* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
      2 
      3 /*
      4  * Copyright (C) 2014 Rob Clark <robclark (at) freedesktop.org>
      5  *
      6  * Permission is hereby granted, free of charge, to any person obtaining a
      7  * copy of this software and associated documentation files (the "Software"),
      8  * to deal in the Software without restriction, including without limitation
      9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
     10  * and/or sell copies of the Software, and to permit persons to whom the
     11  * Software is furnished to do so, subject to the following conditions:
     12  *
     13  * The above copyright notice and this permission notice (including the next
     14  * paragraph) shall be included in all copies or substantial portions of the
     15  * Software.
     16  *
     17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
     22  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
     23  * SOFTWARE.
     24  *
     25  * Authors:
     26  *    Rob Clark <robclark (at) freedesktop.org>
     27  */
     28 
     29 #include "pipe/p_state.h"
     30 #include "util/u_string.h"
     31 #include "util/u_memory.h"
     32 #include "util/u_helpers.h"
     33 #include "util/u_format.h"
     34 #include "util/u_viewport.h"
     35 
     36 #include "freedreno_resource.h"
     37 #include "freedreno_query_hw.h"
     38 
     39 #include "fd4_emit.h"
     40 #include "fd4_blend.h"
     41 #include "fd4_context.h"
     42 #include "fd4_program.h"
     43 #include "fd4_rasterizer.h"
     44 #include "fd4_texture.h"
     45 #include "fd4_format.h"
     46 #include "fd4_zsa.h"
     47 
     48 static const enum adreno_state_block sb[] = {
     49 	[SHADER_VERTEX]   = SB_VERT_SHADER,
     50 	[SHADER_FRAGMENT] = SB_FRAG_SHADER,
     51 };
     52 
     53 /* regid:          base const register
     54  * prsc or dwords: buffer containing constant values
     55  * sizedwords:     size of const value buffer
     56  */
     57 static void
     58 fd4_emit_const(struct fd_ringbuffer *ring, enum shader_t type,
     59 		uint32_t regid, uint32_t offset, uint32_t sizedwords,
     60 		const uint32_t *dwords, struct pipe_resource *prsc)
     61 {
     62 	uint32_t i, sz;
     63 	enum adreno_state_src src;
     64 
     65 	debug_assert((regid % 4) == 0);
     66 	debug_assert((sizedwords % 4) == 0);
     67 
     68 	if (prsc) {
     69 		sz = 0;
     70 		src = 0x2;  // TODO ??
     71 	} else {
     72 		sz = sizedwords;
     73 		src = SS_DIRECT;
     74 	}
     75 
     76 	OUT_PKT3(ring, CP_LOAD_STATE, 2 + sz);
     77 	OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(regid/4) |
     78 			CP_LOAD_STATE_0_STATE_SRC(src) |
     79 			CP_LOAD_STATE_0_STATE_BLOCK(sb[type]) |
     80 			CP_LOAD_STATE_0_NUM_UNIT(sizedwords/4));
     81 	if (prsc) {
     82 		struct fd_bo *bo = fd_resource(prsc)->bo;
     83 		OUT_RELOC(ring, bo, offset,
     84 				CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS), 0);
     85 	} else {
     86 		OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) |
     87 				CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS));
     88 		dwords = (uint32_t *)&((uint8_t *)dwords)[offset];
     89 	}
     90 	for (i = 0; i < sz; i++) {
     91 		OUT_RING(ring, dwords[i]);
     92 	}
     93 }
     94 
     95 static void
     96 fd4_emit_const_bo(struct fd_ringbuffer *ring, enum shader_t type, boolean write,
     97 		uint32_t regid, uint32_t num, struct pipe_resource **prscs, uint32_t *offsets)
     98 {
     99 	uint32_t anum = align(num, 4);
    100 	uint32_t i;
    101 
    102 	debug_assert((regid % 4) == 0);
    103 
    104 	OUT_PKT3(ring, CP_LOAD_STATE, 2 + anum);
    105 	OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(regid/4) |
    106 			CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) |
    107 			CP_LOAD_STATE_0_STATE_BLOCK(sb[type]) |
    108 			CP_LOAD_STATE_0_NUM_UNIT(anum/4));
    109 	OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) |
    110 			CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS));
    111 
    112 	for (i = 0; i < num; i++) {
    113 		if (prscs[i]) {
    114 			if (write) {
    115 				OUT_RELOCW(ring, fd_resource(prscs[i])->bo, offsets[i], 0, 0);
    116 			} else {
    117 				OUT_RELOC(ring, fd_resource(prscs[i])->bo, offsets[i], 0, 0);
    118 			}
    119 		} else {
    120 			OUT_RING(ring, 0xbad00000 | (i << 16));
    121 		}
    122 	}
    123 
    124 	for (; i < anum; i++)
    125 		OUT_RING(ring, 0xffffffff);
    126 }
    127 
    128 static void
    129 emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring,
    130 		enum adreno_state_block sb, struct fd_texture_stateobj *tex,
    131 		const struct ir3_shader_variant *v)
    132 {
    133 	static const uint32_t bcolor_reg[] = {
    134 			[SB_VERT_TEX] = REG_A4XX_TPL1_TP_VS_BORDER_COLOR_BASE_ADDR,
    135 			[SB_FRAG_TEX] = REG_A4XX_TPL1_TP_FS_BORDER_COLOR_BASE_ADDR,
    136 	};
    137 	struct fd4_context *fd4_ctx = fd4_context(ctx);
    138 	bool needs_border = false;
    139 	unsigned i;
    140 
    141 	if (tex->num_samplers > 0) {
    142 		int num_samplers;
    143 
    144 		/* not sure if this is an a420.0 workaround, but we seem
    145 		 * to need to emit these in pairs.. emit a final dummy
    146 		 * entry if odd # of samplers:
    147 		 */
    148 		num_samplers = align(tex->num_samplers, 2);
    149 
    150 		/* output sampler state: */
    151 		OUT_PKT3(ring, CP_LOAD_STATE, 2 + (2 * num_samplers));
    152 		OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(0) |
    153 				CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) |
    154 				CP_LOAD_STATE_0_STATE_BLOCK(sb) |
    155 				CP_LOAD_STATE_0_NUM_UNIT(num_samplers));
    156 		OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_SHADER) |
    157 				CP_LOAD_STATE_1_EXT_SRC_ADDR(0));
    158 		for (i = 0; i < tex->num_samplers; i++) {
    159 			static const struct fd4_sampler_stateobj dummy_sampler = {};
    160 			const struct fd4_sampler_stateobj *sampler = tex->samplers[i] ?
    161 					fd4_sampler_stateobj(tex->samplers[i]) :
    162 					&dummy_sampler;
    163 			OUT_RING(ring, sampler->texsamp0);
    164 			OUT_RING(ring, sampler->texsamp1);
    165 
    166 			needs_border |= sampler->needs_border;
    167 		}
    168 
    169 		for (; i < num_samplers; i++) {
    170 			OUT_RING(ring, 0x00000000);
    171 			OUT_RING(ring, 0x00000000);
    172 		}
    173 	}
    174 
    175 	if (tex->num_textures > 0) {
    176 		unsigned num_textures = tex->num_textures + v->astc_srgb.count;
    177 
    178 		/* emit texture state: */
    179 		OUT_PKT3(ring, CP_LOAD_STATE, 2 + (8 * num_textures));
    180 		OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(0) |
    181 				CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) |
    182 				CP_LOAD_STATE_0_STATE_BLOCK(sb) |
    183 				CP_LOAD_STATE_0_NUM_UNIT(num_textures));
    184 		OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS) |
    185 				CP_LOAD_STATE_1_EXT_SRC_ADDR(0));
    186 		for (i = 0; i < tex->num_textures; i++) {
    187 			static const struct fd4_pipe_sampler_view dummy_view = {};
    188 			const struct fd4_pipe_sampler_view *view = tex->textures[i] ?
    189 					fd4_pipe_sampler_view(tex->textures[i]) :
    190 					&dummy_view;
    191 
    192 			OUT_RING(ring, view->texconst0);
    193 			OUT_RING(ring, view->texconst1);
    194 			OUT_RING(ring, view->texconst2);
    195 			OUT_RING(ring, view->texconst3);
    196 			if (view->base.texture) {
    197 				struct fd_resource *rsc = fd_resource(view->base.texture);
    198 				OUT_RELOC(ring, rsc->bo, view->offset, view->texconst4, 0);
    199 			} else {
    200 				OUT_RING(ring, 0x00000000);
    201 			}
    202 			OUT_RING(ring, 0x00000000);
    203 			OUT_RING(ring, 0x00000000);
    204 			OUT_RING(ring, 0x00000000);
    205 		}
    206 
    207 		for (i = 0; i < v->astc_srgb.count; i++) {
    208 			static const struct fd4_pipe_sampler_view dummy_view = {};
    209 			const struct fd4_pipe_sampler_view *view;
    210 			unsigned idx = v->astc_srgb.orig_idx[i];
    211 
    212 			view = tex->textures[idx] ?
    213 					fd4_pipe_sampler_view(tex->textures[idx]) :
    214 					&dummy_view;
    215 
    216 			debug_assert(view->texconst0 & A4XX_TEX_CONST_0_SRGB);
    217 
    218 			OUT_RING(ring, view->texconst0 & ~A4XX_TEX_CONST_0_SRGB);
    219 			OUT_RING(ring, view->texconst1);
    220 			OUT_RING(ring, view->texconst2);
    221 			OUT_RING(ring, view->texconst3);
    222 			if (view->base.texture) {
    223 				struct fd_resource *rsc = fd_resource(view->base.texture);
    224 				OUT_RELOC(ring, rsc->bo, view->offset, view->texconst4, 0);
    225 			} else {
    226 				OUT_RING(ring, 0x00000000);
    227 			}
    228 			OUT_RING(ring, 0x00000000);
    229 			OUT_RING(ring, 0x00000000);
    230 			OUT_RING(ring, 0x00000000);
    231 		}
    232 	} else {
    233 		debug_assert(v->astc_srgb.count == 0);
    234 	}
    235 
    236 	if (needs_border) {
    237 		unsigned off;
    238 		void *ptr;
    239 
    240 		u_upload_alloc(fd4_ctx->border_color_uploader,
    241 				0, BORDER_COLOR_UPLOAD_SIZE,
    242 				BORDER_COLOR_UPLOAD_SIZE, &off,
    243 				&fd4_ctx->border_color_buf,
    244 				&ptr);
    245 
    246 		fd_setup_border_colors(tex, ptr, 0);
    247 		OUT_PKT0(ring, bcolor_reg[sb], 1);
    248 		OUT_RELOC(ring, fd_resource(fd4_ctx->border_color_buf)->bo, off, 0, 0);
    249 
    250 		u_upload_unmap(fd4_ctx->border_color_uploader);
    251 	}
    252 }
    253 
    254 /* emit texture state for mem->gmem restore operation.. eventually it would
    255  * be good to get rid of this and use normal CSO/etc state for more of these
    256  * special cases..
    257  */
    258 void
    259 fd4_emit_gmem_restore_tex(struct fd_ringbuffer *ring, unsigned nr_bufs,
    260 		struct pipe_surface **bufs)
    261 {
    262 	unsigned char mrt_comp[A4XX_MAX_RENDER_TARGETS];
    263 	int i;
    264 
    265 	for (i = 0; i < A4XX_MAX_RENDER_TARGETS; i++) {
    266 		mrt_comp[i] = (i < nr_bufs) ? 0xf : 0;
    267 	}
    268 
    269 	/* output sampler state: */
    270 	OUT_PKT3(ring, CP_LOAD_STATE, 2 + (2 * nr_bufs));
    271 	OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(0) |
    272 			CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) |
    273 			CP_LOAD_STATE_0_STATE_BLOCK(SB_FRAG_TEX) |
    274 			CP_LOAD_STATE_0_NUM_UNIT(nr_bufs));
    275 	OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_SHADER) |
    276 			CP_LOAD_STATE_1_EXT_SRC_ADDR(0));
    277 	for (i = 0; i < nr_bufs; i++) {
    278 		OUT_RING(ring, A4XX_TEX_SAMP_0_XY_MAG(A4XX_TEX_NEAREST) |
    279 				A4XX_TEX_SAMP_0_XY_MIN(A4XX_TEX_NEAREST) |
    280 				A4XX_TEX_SAMP_0_WRAP_S(A4XX_TEX_CLAMP_TO_EDGE) |
    281 				A4XX_TEX_SAMP_0_WRAP_T(A4XX_TEX_CLAMP_TO_EDGE) |
    282 				A4XX_TEX_SAMP_0_WRAP_R(A4XX_TEX_REPEAT));
    283 		OUT_RING(ring, 0x00000000);
    284 	}
    285 
    286 	/* emit texture state: */
    287 	OUT_PKT3(ring, CP_LOAD_STATE, 2 + (8 * nr_bufs));
    288 	OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(0) |
    289 			CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) |
    290 			CP_LOAD_STATE_0_STATE_BLOCK(SB_FRAG_TEX) |
    291 			CP_LOAD_STATE_0_NUM_UNIT(nr_bufs));
    292 	OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS) |
    293 			CP_LOAD_STATE_1_EXT_SRC_ADDR(0));
    294 	for (i = 0; i < nr_bufs; i++) {
    295 		if (bufs[i]) {
    296 			struct fd_resource *rsc = fd_resource(bufs[i]->texture);
    297 			enum pipe_format format = fd_gmem_restore_format(bufs[i]->format);
    298 
    299 			/* The restore blit_zs shader expects stencil in sampler 0,
    300 			 * and depth in sampler 1
    301 			 */
    302 			if (rsc->stencil && (i == 0)) {
    303 				rsc = rsc->stencil;
    304 				format = fd_gmem_restore_format(rsc->base.b.format);
    305 			}
    306 
    307 			/* note: PIPE_BUFFER disallowed for surfaces */
    308 			unsigned lvl = bufs[i]->u.tex.level;
    309 			struct fd_resource_slice *slice = fd_resource_slice(rsc, lvl);
    310 			unsigned offset = fd_resource_offset(rsc, lvl, bufs[i]->u.tex.first_layer);
    311 
    312 			/* z32 restore is accomplished using depth write.  If there is
    313 			 * no stencil component (ie. PIPE_FORMAT_Z32_FLOAT_S8X24_UINT)
    314 			 * then no render target:
    315 			 *
    316 			 * (The same applies for z32_s8x24, since for stencil sampler
    317 			 * state the above 'if' will replace 'format' with s8)
    318 			 */
    319 			if ((format == PIPE_FORMAT_Z32_FLOAT) ||
    320 					(format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT))
    321 				mrt_comp[i] = 0;
    322 
    323 			debug_assert(bufs[i]->u.tex.first_layer == bufs[i]->u.tex.last_layer);
    324 
    325 			OUT_RING(ring, A4XX_TEX_CONST_0_FMT(fd4_pipe2tex(format)) |
    326 					A4XX_TEX_CONST_0_TYPE(A4XX_TEX_2D) |
    327 					fd4_tex_swiz(format,  PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y,
    328 							PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W));
    329 			OUT_RING(ring, A4XX_TEX_CONST_1_WIDTH(bufs[i]->width) |
    330 					A4XX_TEX_CONST_1_HEIGHT(bufs[i]->height));
    331 			OUT_RING(ring, A4XX_TEX_CONST_2_PITCH(slice->pitch * rsc->cpp) |
    332 					A4XX_TEX_CONST_2_FETCHSIZE(fd4_pipe2fetchsize(format)));
    333 			OUT_RING(ring, 0x00000000);
    334 			OUT_RELOC(ring, rsc->bo, offset, 0, 0);
    335 			OUT_RING(ring, 0x00000000);
    336 			OUT_RING(ring, 0x00000000);
    337 			OUT_RING(ring, 0x00000000);
    338 		} else {
    339 			OUT_RING(ring, A4XX_TEX_CONST_0_FMT(0) |
    340 					A4XX_TEX_CONST_0_TYPE(A4XX_TEX_2D) |
    341 					A4XX_TEX_CONST_0_SWIZ_X(A4XX_TEX_ONE) |
    342 					A4XX_TEX_CONST_0_SWIZ_Y(A4XX_TEX_ONE) |
    343 					A4XX_TEX_CONST_0_SWIZ_Z(A4XX_TEX_ONE) |
    344 					A4XX_TEX_CONST_0_SWIZ_W(A4XX_TEX_ONE));
    345 			OUT_RING(ring, A4XX_TEX_CONST_1_WIDTH(0) |
    346 					A4XX_TEX_CONST_1_HEIGHT(0));
    347 			OUT_RING(ring, A4XX_TEX_CONST_2_PITCH(0));
    348 			OUT_RING(ring, 0x00000000);
    349 			OUT_RING(ring, 0x00000000);
    350 			OUT_RING(ring, 0x00000000);
    351 			OUT_RING(ring, 0x00000000);
    352 			OUT_RING(ring, 0x00000000);
    353 		}
    354 	}
    355 
    356 	OUT_PKT0(ring, REG_A4XX_RB_RENDER_COMPONENTS, 1);
    357 	OUT_RING(ring, A4XX_RB_RENDER_COMPONENTS_RT0(mrt_comp[0]) |
    358 			A4XX_RB_RENDER_COMPONENTS_RT1(mrt_comp[1]) |
    359 			A4XX_RB_RENDER_COMPONENTS_RT2(mrt_comp[2]) |
    360 			A4XX_RB_RENDER_COMPONENTS_RT3(mrt_comp[3]) |
    361 			A4XX_RB_RENDER_COMPONENTS_RT4(mrt_comp[4]) |
    362 			A4XX_RB_RENDER_COMPONENTS_RT5(mrt_comp[5]) |
    363 			A4XX_RB_RENDER_COMPONENTS_RT6(mrt_comp[6]) |
    364 			A4XX_RB_RENDER_COMPONENTS_RT7(mrt_comp[7]));
    365 }
    366 
    367 void
    368 fd4_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd4_emit *emit)
    369 {
    370 	int32_t i, j, last = -1;
    371 	uint32_t total_in = 0;
    372 	const struct fd_vertex_state *vtx = emit->vtx;
    373 	const struct ir3_shader_variant *vp = fd4_emit_get_vp(emit);
    374 	unsigned vertex_regid = regid(63, 0);
    375 	unsigned instance_regid = regid(63, 0);
    376 	unsigned vtxcnt_regid = regid(63, 0);
    377 
    378 	/* Note that sysvals come *after* normal inputs: */
    379 	for (i = 0; i < vp->inputs_count; i++) {
    380 		if (!vp->inputs[i].compmask)
    381 			continue;
    382 		if (vp->inputs[i].sysval) {
    383 			switch(vp->inputs[i].slot) {
    384 			case SYSTEM_VALUE_BASE_VERTEX:
    385 				/* handled elsewhere */
    386 				break;
    387 			case SYSTEM_VALUE_VERTEX_ID_ZERO_BASE:
    388 				vertex_regid = vp->inputs[i].regid;
    389 				break;
    390 			case SYSTEM_VALUE_INSTANCE_ID:
    391 				instance_regid = vp->inputs[i].regid;
    392 				break;
    393 			case SYSTEM_VALUE_VERTEX_CNT:
    394 				vtxcnt_regid = vp->inputs[i].regid;
    395 				break;
    396 			default:
    397 				unreachable("invalid system value");
    398 				break;
    399 			}
    400 		} else if (i < vtx->vtx->num_elements) {
    401 			last = i;
    402 		}
    403 	}
    404 
    405 	for (i = 0, j = 0; i <= last; i++) {
    406 		assert(!vp->inputs[i].sysval);
    407 		if (vp->inputs[i].compmask) {
    408 			struct pipe_vertex_element *elem = &vtx->vtx->pipe[i];
    409 			const struct pipe_vertex_buffer *vb =
    410 					&vtx->vertexbuf.vb[elem->vertex_buffer_index];
    411 			struct fd_resource *rsc = fd_resource(vb->buffer);
    412 			enum pipe_format pfmt = elem->src_format;
    413 			enum a4xx_vtx_fmt fmt = fd4_pipe2vtx(pfmt);
    414 			bool switchnext = (i != last) ||
    415 					(vertex_regid != regid(63, 0)) ||
    416 					(instance_regid != regid(63, 0)) ||
    417 					(vtxcnt_regid != regid(63, 0));
    418 			bool isint = util_format_is_pure_integer(pfmt);
    419 			uint32_t fs = util_format_get_blocksize(pfmt);
    420 			uint32_t off = vb->buffer_offset + elem->src_offset;
    421 			uint32_t size = fd_bo_size(rsc->bo) - off;
    422 			debug_assert(fmt != ~0);
    423 
    424 			OUT_PKT0(ring, REG_A4XX_VFD_FETCH(j), 4);
    425 			OUT_RING(ring, A4XX_VFD_FETCH_INSTR_0_FETCHSIZE(fs - 1) |
    426 					A4XX_VFD_FETCH_INSTR_0_BUFSTRIDE(vb->stride) |
    427 					COND(elem->instance_divisor, A4XX_VFD_FETCH_INSTR_0_INSTANCED) |
    428 					COND(switchnext, A4XX_VFD_FETCH_INSTR_0_SWITCHNEXT));
    429 			OUT_RELOC(ring, rsc->bo, off, 0, 0);
    430 			OUT_RING(ring, A4XX_VFD_FETCH_INSTR_2_SIZE(size));
    431 			OUT_RING(ring, A4XX_VFD_FETCH_INSTR_3_STEPRATE(MAX2(1, elem->instance_divisor)));
    432 
    433 			OUT_PKT0(ring, REG_A4XX_VFD_DECODE_INSTR(j), 1);
    434 			OUT_RING(ring, A4XX_VFD_DECODE_INSTR_CONSTFILL |
    435 					A4XX_VFD_DECODE_INSTR_WRITEMASK(vp->inputs[i].compmask) |
    436 					A4XX_VFD_DECODE_INSTR_FORMAT(fmt) |
    437 					A4XX_VFD_DECODE_INSTR_SWAP(fd4_pipe2swap(pfmt)) |
    438 					A4XX_VFD_DECODE_INSTR_REGID(vp->inputs[i].regid) |
    439 					A4XX_VFD_DECODE_INSTR_SHIFTCNT(fs) |
    440 					A4XX_VFD_DECODE_INSTR_LASTCOMPVALID |
    441 					COND(isint, A4XX_VFD_DECODE_INSTR_INT) |
    442 					COND(switchnext, A4XX_VFD_DECODE_INSTR_SWITCHNEXT));
    443 
    444 			total_in += vp->inputs[i].ncomp;
    445 			j++;
    446 		}
    447 	}
    448 
    449 	/* hw doesn't like to be configured for zero vbo's, it seems: */
    450 	if (last < 0) {
    451 		/* just recycle the shader bo, we just need to point to *something*
    452 		 * valid:
    453 		 */
    454 		struct fd_bo *dummy_vbo = vp->bo;
    455 		bool switchnext = (vertex_regid != regid(63, 0)) ||
    456 				(instance_regid != regid(63, 0)) ||
    457 				(vtxcnt_regid != regid(63, 0));
    458 
    459 		OUT_PKT0(ring, REG_A4XX_VFD_FETCH(0), 4);
    460 		OUT_RING(ring, A4XX_VFD_FETCH_INSTR_0_FETCHSIZE(0) |
    461 				A4XX_VFD_FETCH_INSTR_0_BUFSTRIDE(0) |
    462 				COND(switchnext, A4XX_VFD_FETCH_INSTR_0_SWITCHNEXT));
    463 		OUT_RELOC(ring, dummy_vbo, 0, 0, 0);
    464 		OUT_RING(ring, A4XX_VFD_FETCH_INSTR_2_SIZE(1));
    465 		OUT_RING(ring, A4XX_VFD_FETCH_INSTR_3_STEPRATE(1));
    466 
    467 		OUT_PKT0(ring, REG_A4XX_VFD_DECODE_INSTR(0), 1);
    468 		OUT_RING(ring, A4XX_VFD_DECODE_INSTR_CONSTFILL |
    469 				A4XX_VFD_DECODE_INSTR_WRITEMASK(0x1) |
    470 				A4XX_VFD_DECODE_INSTR_FORMAT(VFMT4_8_UNORM) |
    471 				A4XX_VFD_DECODE_INSTR_SWAP(XYZW) |
    472 				A4XX_VFD_DECODE_INSTR_REGID(regid(0,0)) |
    473 				A4XX_VFD_DECODE_INSTR_SHIFTCNT(1) |
    474 				A4XX_VFD_DECODE_INSTR_LASTCOMPVALID |
    475 				COND(switchnext, A4XX_VFD_DECODE_INSTR_SWITCHNEXT));
    476 
    477 		total_in = 1;
    478 		j = 1;
    479 	}
    480 
    481 	OUT_PKT0(ring, REG_A4XX_VFD_CONTROL_0, 5);
    482 	OUT_RING(ring, A4XX_VFD_CONTROL_0_TOTALATTRTOVS(total_in) |
    483 			0xa0000 | /* XXX */
    484 			A4XX_VFD_CONTROL_0_STRMDECINSTRCNT(j) |
    485 			A4XX_VFD_CONTROL_0_STRMFETCHINSTRCNT(j));
    486 	OUT_RING(ring, A4XX_VFD_CONTROL_1_MAXSTORAGE(129) | // XXX
    487 			A4XX_VFD_CONTROL_1_REGID4VTX(vertex_regid) |
    488 			A4XX_VFD_CONTROL_1_REGID4INST(instance_regid));
    489 	OUT_RING(ring, 0x00000000);   /* XXX VFD_CONTROL_2 */
    490 	OUT_RING(ring, A4XX_VFD_CONTROL_3_REGID_VTXCNT(vtxcnt_regid));
    491 	OUT_RING(ring, 0x00000000);   /* XXX VFD_CONTROL_4 */
    492 
    493 	/* cache invalidate, otherwise vertex fetch could see
    494 	 * stale vbo contents:
    495 	 */
    496 	OUT_PKT0(ring, REG_A4XX_UCHE_INVALIDATE0, 2);
    497 	OUT_RING(ring, 0x00000000);
    498 	OUT_RING(ring, 0x00000012);
    499 }
    500 
    501 void
    502 fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
    503 		struct fd4_emit *emit)
    504 {
    505 	const struct ir3_shader_variant *vp = fd4_emit_get_vp(emit);
    506 	const struct ir3_shader_variant *fp = fd4_emit_get_fp(emit);
    507 	uint32_t dirty = emit->dirty;
    508 
    509 	emit_marker(ring, 5);
    510 
    511 	if ((dirty & FD_DIRTY_FRAMEBUFFER) && !emit->key.binning_pass) {
    512 		struct pipe_framebuffer_state *pfb = &ctx->batch->framebuffer;
    513 		unsigned char mrt_comp[A4XX_MAX_RENDER_TARGETS] = {0};
    514 
    515 		for (unsigned i = 0; i < A4XX_MAX_RENDER_TARGETS; i++) {
    516 			mrt_comp[i] = ((i < pfb->nr_cbufs) && pfb->cbufs[i]) ? 0xf : 0;
    517 		}
    518 
    519 		OUT_PKT0(ring, REG_A4XX_RB_RENDER_COMPONENTS, 1);
    520 		OUT_RING(ring, A4XX_RB_RENDER_COMPONENTS_RT0(mrt_comp[0]) |
    521 				A4XX_RB_RENDER_COMPONENTS_RT1(mrt_comp[1]) |
    522 				A4XX_RB_RENDER_COMPONENTS_RT2(mrt_comp[2]) |
    523 				A4XX_RB_RENDER_COMPONENTS_RT3(mrt_comp[3]) |
    524 				A4XX_RB_RENDER_COMPONENTS_RT4(mrt_comp[4]) |
    525 				A4XX_RB_RENDER_COMPONENTS_RT5(mrt_comp[5]) |
    526 				A4XX_RB_RENDER_COMPONENTS_RT6(mrt_comp[6]) |
    527 				A4XX_RB_RENDER_COMPONENTS_RT7(mrt_comp[7]));
    528 	}
    529 
    530 	if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_FRAMEBUFFER)) {
    531 		struct fd4_zsa_stateobj *zsa = fd4_zsa_stateobj(ctx->zsa);
    532 		struct pipe_framebuffer_state *pfb = &ctx->batch->framebuffer;
    533 		uint32_t rb_alpha_control = zsa->rb_alpha_control;
    534 
    535 		if (util_format_is_pure_integer(pipe_surface_format(pfb->cbufs[0])))
    536 			rb_alpha_control &= ~A4XX_RB_ALPHA_CONTROL_ALPHA_TEST;
    537 
    538 		OUT_PKT0(ring, REG_A4XX_RB_ALPHA_CONTROL, 1);
    539 		OUT_RING(ring, rb_alpha_control);
    540 
    541 		OUT_PKT0(ring, REG_A4XX_RB_STENCIL_CONTROL, 2);
    542 		OUT_RING(ring, zsa->rb_stencil_control);
    543 		OUT_RING(ring, zsa->rb_stencil_control2);
    544 	}
    545 
    546 	if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_STENCIL_REF)) {
    547 		struct fd4_zsa_stateobj *zsa = fd4_zsa_stateobj(ctx->zsa);
    548 		struct pipe_stencil_ref *sr = &ctx->stencil_ref;
    549 
    550 		OUT_PKT0(ring, REG_A4XX_RB_STENCILREFMASK, 2);
    551 		OUT_RING(ring, zsa->rb_stencilrefmask |
    552 				A4XX_RB_STENCILREFMASK_STENCILREF(sr->ref_value[0]));
    553 		OUT_RING(ring, zsa->rb_stencilrefmask_bf |
    554 				A4XX_RB_STENCILREFMASK_BF_STENCILREF(sr->ref_value[1]));
    555 	}
    556 
    557 	if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_RASTERIZER | FD_DIRTY_PROG)) {
    558 		struct fd4_zsa_stateobj *zsa = fd4_zsa_stateobj(ctx->zsa);
    559 		bool fragz = fp->has_kill | fp->writes_pos;
    560 		bool clamp = !ctx->rasterizer->depth_clip;
    561 
    562 		OUT_PKT0(ring, REG_A4XX_RB_DEPTH_CONTROL, 1);
    563 		OUT_RING(ring, zsa->rb_depth_control |
    564 				COND(clamp, A4XX_RB_DEPTH_CONTROL_Z_CLAMP_ENABLE) |
    565 				COND(fragz, A4XX_RB_DEPTH_CONTROL_EARLY_Z_DISABLE) |
    566 				COND(fragz && fp->frag_coord, A4XX_RB_DEPTH_CONTROL_FORCE_FRAGZ_TO_FS));
    567 
    568 		/* maybe this register/bitfield needs a better name.. this
    569 		 * appears to be just disabling early-z
    570 		 */
    571 		OUT_PKT0(ring, REG_A4XX_GRAS_ALPHA_CONTROL, 1);
    572 		OUT_RING(ring, zsa->gras_alpha_control |
    573 				COND(fragz, A4XX_GRAS_ALPHA_CONTROL_ALPHA_TEST_ENABLE) |
    574 				COND(fragz && fp->frag_coord, A4XX_GRAS_ALPHA_CONTROL_FORCE_FRAGZ_TO_FS));
    575 	}
    576 
    577 	if (dirty & FD_DIRTY_RASTERIZER) {
    578 		struct fd4_rasterizer_stateobj *rasterizer =
    579 				fd4_rasterizer_stateobj(ctx->rasterizer);
    580 
    581 		OUT_PKT0(ring, REG_A4XX_GRAS_SU_MODE_CONTROL, 1);
    582 		OUT_RING(ring, rasterizer->gras_su_mode_control |
    583 				A4XX_GRAS_SU_MODE_CONTROL_RENDERING_PASS);
    584 
    585 		OUT_PKT0(ring, REG_A4XX_GRAS_SU_POINT_MINMAX, 2);
    586 		OUT_RING(ring, rasterizer->gras_su_point_minmax);
    587 		OUT_RING(ring, rasterizer->gras_su_point_size);
    588 
    589 		OUT_PKT0(ring, REG_A4XX_GRAS_SU_POLY_OFFSET_SCALE, 2);
    590 		OUT_RING(ring, rasterizer->gras_su_poly_offset_scale);
    591 		OUT_RING(ring, rasterizer->gras_su_poly_offset_offset);
    592 
    593 		OUT_PKT0(ring, REG_A4XX_GRAS_CL_CLIP_CNTL, 1);
    594 		OUT_RING(ring, rasterizer->gras_cl_clip_cntl);
    595 	}
    596 
    597 	/* NOTE: since primitive_restart is not actually part of any
    598 	 * state object, we need to make sure that we always emit
    599 	 * PRIM_VTX_CNTL.. either that or be more clever and detect
    600 	 * when it changes.
    601 	 */
    602 	if (emit->info) {
    603 		const struct pipe_draw_info *info = emit->info;
    604 		struct fd4_rasterizer_stateobj *rast =
    605 			fd4_rasterizer_stateobj(ctx->rasterizer);
    606 		uint32_t val = rast->pc_prim_vtx_cntl;
    607 
    608 		if (info->indexed && info->primitive_restart)
    609 			val |= A4XX_PC_PRIM_VTX_CNTL_PRIMITIVE_RESTART;
    610 
    611 		val |= COND(vp->writes_psize, A4XX_PC_PRIM_VTX_CNTL_PSIZE);
    612 
    613 		if (fp->total_in > 0) {
    614 			uint32_t varout = align(fp->total_in, 16) / 16;
    615 			if (varout > 1)
    616 				varout = align(varout, 2);
    617 			val |= A4XX_PC_PRIM_VTX_CNTL_VAROUT(varout);
    618 		}
    619 
    620 		OUT_PKT0(ring, REG_A4XX_PC_PRIM_VTX_CNTL, 2);
    621 		OUT_RING(ring, val);
    622 		OUT_RING(ring, rast->pc_prim_vtx_cntl2);
    623 	}
    624 
    625 	if (dirty & FD_DIRTY_SCISSOR) {
    626 		struct pipe_scissor_state *scissor = fd_context_get_scissor(ctx);
    627 
    628 		OUT_PKT0(ring, REG_A4XX_GRAS_SC_WINDOW_SCISSOR_BR, 2);
    629 		OUT_RING(ring, A4XX_GRAS_SC_WINDOW_SCISSOR_BR_X(scissor->maxx - 1) |
    630 				A4XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(scissor->maxy - 1));
    631 		OUT_RING(ring, A4XX_GRAS_SC_WINDOW_SCISSOR_TL_X(scissor->minx) |
    632 				A4XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(scissor->miny));
    633 
    634 		ctx->batch->max_scissor.minx = MIN2(ctx->batch->max_scissor.minx, scissor->minx);
    635 		ctx->batch->max_scissor.miny = MIN2(ctx->batch->max_scissor.miny, scissor->miny);
    636 		ctx->batch->max_scissor.maxx = MAX2(ctx->batch->max_scissor.maxx, scissor->maxx);
    637 		ctx->batch->max_scissor.maxy = MAX2(ctx->batch->max_scissor.maxy, scissor->maxy);
    638 	}
    639 
    640 	if (dirty & FD_DIRTY_VIEWPORT) {
    641 		fd_wfi(ctx->batch, ring);
    642 		OUT_PKT0(ring, REG_A4XX_GRAS_CL_VPORT_XOFFSET_0, 6);
    643 		OUT_RING(ring, A4XX_GRAS_CL_VPORT_XOFFSET_0(ctx->viewport.translate[0]));
    644 		OUT_RING(ring, A4XX_GRAS_CL_VPORT_XSCALE_0(ctx->viewport.scale[0]));
    645 		OUT_RING(ring, A4XX_GRAS_CL_VPORT_YOFFSET_0(ctx->viewport.translate[1]));
    646 		OUT_RING(ring, A4XX_GRAS_CL_VPORT_YSCALE_0(ctx->viewport.scale[1]));
    647 		OUT_RING(ring, A4XX_GRAS_CL_VPORT_ZOFFSET_0(ctx->viewport.translate[2]));
    648 		OUT_RING(ring, A4XX_GRAS_CL_VPORT_ZSCALE_0(ctx->viewport.scale[2]));
    649 	}
    650 
    651 	if (dirty & (FD_DIRTY_VIEWPORT | FD_DIRTY_RASTERIZER | FD_DIRTY_FRAMEBUFFER)) {
    652 		float zmin, zmax;
    653 		int depth = 24;
    654 		if (ctx->batch->framebuffer.zsbuf) {
    655 			depth = util_format_get_component_bits(
    656 					pipe_surface_format(ctx->batch->framebuffer.zsbuf),
    657 					UTIL_FORMAT_COLORSPACE_ZS, 0);
    658 		}
    659 		util_viewport_zmin_zmax(&ctx->viewport, ctx->rasterizer->clip_halfz,
    660 								&zmin, &zmax);
    661 
    662 		OUT_PKT0(ring, REG_A4XX_RB_VPORT_Z_CLAMP(0), 2);
    663 		if (depth == 32) {
    664 			OUT_RING(ring, fui(zmin));
    665 			OUT_RING(ring, fui(zmax));
    666 		} else if (depth == 16) {
    667 			OUT_RING(ring, (uint32_t)(zmin * 0xffff));
    668 			OUT_RING(ring, (uint32_t)(zmax * 0xffff));
    669 		} else {
    670 			OUT_RING(ring, (uint32_t)(zmin * 0xffffff));
    671 			OUT_RING(ring, (uint32_t)(zmax * 0xffffff));
    672 		}
    673 	}
    674 
    675 	if (dirty & (FD_DIRTY_PROG | FD_DIRTY_FRAMEBUFFER)) {
    676 		struct pipe_framebuffer_state *pfb = &ctx->batch->framebuffer;
    677 		unsigned n = pfb->nr_cbufs;
    678 		/* if we have depth/stencil, we need at least on MRT: */
    679 		if (pfb->zsbuf)
    680 			n = MAX2(1, n);
    681 		fd4_program_emit(ring, emit, n, pfb->cbufs);
    682 	}
    683 
    684 	if (emit->prog == &ctx->prog) { /* evil hack to deal sanely with clear path */
    685 		ir3_emit_consts(vp, ring, ctx, emit->info, dirty);
    686 		if (!emit->key.binning_pass)
    687 			ir3_emit_consts(fp, ring, ctx, emit->info, dirty);
    688 	}
    689 
    690 	if ((dirty & FD_DIRTY_BLEND)) {
    691 		struct fd4_blend_stateobj *blend = fd4_blend_stateobj(ctx->blend);
    692 		uint32_t i;
    693 
    694 		for (i = 0; i < A4XX_MAX_RENDER_TARGETS; i++) {
    695 			enum pipe_format format = pipe_surface_format(
    696 					ctx->batch->framebuffer.cbufs[i]);
    697 			bool is_int = util_format_is_pure_integer(format);
    698 			bool has_alpha = util_format_has_alpha(format);
    699 			uint32_t control = blend->rb_mrt[i].control;
    700 			uint32_t blend_control = blend->rb_mrt[i].blend_control_alpha;
    701 
    702 			if (is_int) {
    703 				control &= A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE__MASK;
    704 				control |= A4XX_RB_MRT_CONTROL_ROP_CODE(ROP_COPY);
    705 			}
    706 
    707 			if (has_alpha) {
    708 				blend_control |= blend->rb_mrt[i].blend_control_rgb;
    709 			} else {
    710 				blend_control |= blend->rb_mrt[i].blend_control_no_alpha_rgb;
    711 				control &= ~A4XX_RB_MRT_CONTROL_BLEND2;
    712 			}
    713 
    714 			OUT_PKT0(ring, REG_A4XX_RB_MRT_CONTROL(i), 1);
    715 			OUT_RING(ring, control);
    716 
    717 			OUT_PKT0(ring, REG_A4XX_RB_MRT_BLEND_CONTROL(i), 1);
    718 			OUT_RING(ring, blend_control);
    719 		}
    720 
    721 		OUT_PKT0(ring, REG_A4XX_RB_FS_OUTPUT, 1);
    722 		OUT_RING(ring, blend->rb_fs_output |
    723 				A4XX_RB_FS_OUTPUT_SAMPLE_MASK(0xffff));
    724 	}
    725 
    726 	if (dirty & FD_DIRTY_BLEND_COLOR) {
    727 		struct pipe_blend_color *bcolor = &ctx->blend_color;
    728 
    729 		OUT_PKT0(ring, REG_A4XX_RB_BLEND_RED, 8);
    730 		OUT_RING(ring, A4XX_RB_BLEND_RED_FLOAT(bcolor->color[0]) |
    731 				A4XX_RB_BLEND_RED_UINT(bcolor->color[0] * 0xff) |
    732 				A4XX_RB_BLEND_RED_SINT(bcolor->color[0] * 0x7f));
    733 		OUT_RING(ring, A4XX_RB_BLEND_RED_F32(bcolor->color[0]));
    734 		OUT_RING(ring, A4XX_RB_BLEND_GREEN_FLOAT(bcolor->color[1]) |
    735 				A4XX_RB_BLEND_GREEN_UINT(bcolor->color[1] * 0xff) |
    736 				A4XX_RB_BLEND_GREEN_SINT(bcolor->color[1] * 0x7f));
    737 		OUT_RING(ring, A4XX_RB_BLEND_RED_F32(bcolor->color[1]));
    738 		OUT_RING(ring, A4XX_RB_BLEND_BLUE_FLOAT(bcolor->color[2]) |
    739 				A4XX_RB_BLEND_BLUE_UINT(bcolor->color[2] * 0xff) |
    740 				A4XX_RB_BLEND_BLUE_SINT(bcolor->color[2] * 0x7f));
    741 		OUT_RING(ring, A4XX_RB_BLEND_BLUE_F32(bcolor->color[2]));
    742 		OUT_RING(ring, A4XX_RB_BLEND_ALPHA_FLOAT(bcolor->color[3]) |
    743 				A4XX_RB_BLEND_ALPHA_UINT(bcolor->color[3] * 0xff) |
    744 				A4XX_RB_BLEND_ALPHA_SINT(bcolor->color[3] * 0x7f));
    745 		OUT_RING(ring, A4XX_RB_BLEND_ALPHA_F32(bcolor->color[3]));
    746 	}
    747 
    748 	if (dirty & FD_DIRTY_VERTTEX) {
    749 		if (vp->has_samp)
    750 			emit_textures(ctx, ring, SB_VERT_TEX, &ctx->verttex, vp);
    751 		else
    752 			dirty &= ~FD_DIRTY_VERTTEX;
    753 	}
    754 
    755 	if (dirty & FD_DIRTY_FRAGTEX) {
    756 		if (fp->has_samp)
    757 			emit_textures(ctx, ring, SB_FRAG_TEX, &ctx->fragtex, fp);
    758 		else
    759 			dirty &= ~FD_DIRTY_FRAGTEX;
    760 	}
    761 
    762 	ctx->dirty &= ~dirty;
    763 }
    764 
    765 /* emit setup at begin of new cmdstream buffer (don't rely on previous
    766  * state, there could have been a context switch between ioctls):
    767  */
    768 void
    769 fd4_emit_restore(struct fd_batch *batch, struct fd_ringbuffer *ring)
    770 {
    771 	struct fd_context *ctx = batch->ctx;
    772 	struct fd4_context *fd4_ctx = fd4_context(ctx);
    773 
    774 	OUT_PKT0(ring, REG_A4XX_RBBM_PERFCTR_CTL, 1);
    775 	OUT_RING(ring, 0x00000001);
    776 
    777 	OUT_PKT0(ring, REG_A4XX_GRAS_DEBUG_ECO_CONTROL, 1);
    778 	OUT_RING(ring, 0x00000000);
    779 
    780 	OUT_PKT0(ring, REG_A4XX_SP_MODE_CONTROL, 1);
    781 	OUT_RING(ring, 0x00000006);
    782 
    783 	OUT_PKT0(ring, REG_A4XX_TPL1_TP_MODE_CONTROL, 1);
    784 	OUT_RING(ring, 0x0000003a);
    785 
    786 	OUT_PKT0(ring, REG_A4XX_UNKNOWN_0D01, 1);
    787 	OUT_RING(ring, 0x00000001);
    788 
    789 	OUT_PKT0(ring, REG_A4XX_UNKNOWN_0E42, 1);
    790 	OUT_RING(ring, 0x00000000);
    791 
    792 	OUT_PKT0(ring, REG_A4XX_UCHE_CACHE_WAYS_VFD, 1);
    793 	OUT_RING(ring, 0x00000007);
    794 
    795 	OUT_PKT0(ring, REG_A4XX_UCHE_CACHE_MODE_CONTROL, 1);
    796 	OUT_RING(ring, 0x00000000);
    797 
    798 	OUT_PKT0(ring, REG_A4XX_UCHE_INVALIDATE0, 2);
    799 	OUT_RING(ring, 0x00000000);
    800 	OUT_RING(ring, 0x00000012);
    801 
    802 	OUT_PKT0(ring, REG_A4XX_HLSQ_MODE_CONTROL, 1);
    803 	OUT_RING(ring, 0x00000000);
    804 
    805 	OUT_PKT0(ring, REG_A4XX_UNKNOWN_0CC5, 1);
    806 	OUT_RING(ring, 0x00000006);
    807 
    808 	OUT_PKT0(ring, REG_A4XX_UNKNOWN_0CC6, 1);
    809 	OUT_RING(ring, 0x00000000);
    810 
    811 	OUT_PKT0(ring, REG_A4XX_UNKNOWN_0EC2, 1);
    812 	OUT_RING(ring, 0x00040000);
    813 
    814 	OUT_PKT0(ring, REG_A4XX_UNKNOWN_2001, 1);
    815 	OUT_RING(ring, 0x00000000);
    816 
    817 	OUT_PKT3(ring, CP_INVALIDATE_STATE, 1);
    818 	OUT_RING(ring, 0x00001000);
    819 
    820 	OUT_PKT0(ring, REG_A4XX_UNKNOWN_20EF, 1);
    821 	OUT_RING(ring, 0x00000000);
    822 
    823 	OUT_PKT0(ring, REG_A4XX_RB_BLEND_RED, 4);
    824 	OUT_RING(ring, A4XX_RB_BLEND_RED_UINT(0) |
    825 			A4XX_RB_BLEND_RED_FLOAT(0.0));
    826 	OUT_RING(ring, A4XX_RB_BLEND_GREEN_UINT(0) |
    827 			A4XX_RB_BLEND_GREEN_FLOAT(0.0));
    828 	OUT_RING(ring, A4XX_RB_BLEND_BLUE_UINT(0) |
    829 			A4XX_RB_BLEND_BLUE_FLOAT(0.0));
    830 	OUT_RING(ring, A4XX_RB_BLEND_ALPHA_UINT(0x7fff) |
    831 			A4XX_RB_BLEND_ALPHA_FLOAT(1.0));
    832 
    833 	OUT_PKT0(ring, REG_A4XX_UNKNOWN_2152, 1);
    834 	OUT_RING(ring, 0x00000000);
    835 
    836 	OUT_PKT0(ring, REG_A4XX_UNKNOWN_2153, 1);
    837 	OUT_RING(ring, 0x00000000);
    838 
    839 	OUT_PKT0(ring, REG_A4XX_UNKNOWN_2154, 1);
    840 	OUT_RING(ring, 0x00000000);
    841 
    842 	OUT_PKT0(ring, REG_A4XX_UNKNOWN_2155, 1);
    843 	OUT_RING(ring, 0x00000000);
    844 
    845 	OUT_PKT0(ring, REG_A4XX_UNKNOWN_2156, 1);
    846 	OUT_RING(ring, 0x00000000);
    847 
    848 	OUT_PKT0(ring, REG_A4XX_UNKNOWN_2157, 1);
    849 	OUT_RING(ring, 0x00000000);
    850 
    851 	OUT_PKT0(ring, REG_A4XX_UNKNOWN_21C3, 1);
    852 	OUT_RING(ring, 0x0000001d);
    853 
    854 	OUT_PKT0(ring, REG_A4XX_PC_GS_PARAM, 1);
    855 	OUT_RING(ring, 0x00000000);
    856 
    857 	OUT_PKT0(ring, REG_A4XX_UNKNOWN_21E6, 1);
    858 	OUT_RING(ring, 0x00000001);
    859 
    860 	OUT_PKT0(ring, REG_A4XX_PC_HS_PARAM, 1);
    861 	OUT_RING(ring, 0x00000000);
    862 
    863 	OUT_PKT0(ring, REG_A4XX_UNKNOWN_22D7, 1);
    864 	OUT_RING(ring, 0x00000000);
    865 
    866 	OUT_PKT0(ring, REG_A4XX_TPL1_TP_TEX_OFFSET, 1);
    867 	OUT_RING(ring, 0x00000000);
    868 
    869 	OUT_PKT0(ring, REG_A4XX_TPL1_TP_TEX_COUNT, 1);
    870 	OUT_RING(ring, A4XX_TPL1_TP_TEX_COUNT_VS(16) |
    871 			A4XX_TPL1_TP_TEX_COUNT_HS(0) |
    872 			A4XX_TPL1_TP_TEX_COUNT_DS(0) |
    873 			A4XX_TPL1_TP_TEX_COUNT_GS(0));
    874 
    875 	OUT_PKT0(ring, REG_A4XX_TPL1_TP_FS_TEX_COUNT, 1);
    876 	OUT_RING(ring, 16);
    877 
    878 	/* we don't use this yet.. probably best to disable.. */
    879 	OUT_PKT3(ring, CP_SET_DRAW_STATE, 2);
    880 	OUT_RING(ring, CP_SET_DRAW_STATE__0_COUNT(0) |
    881 			CP_SET_DRAW_STATE__0_DISABLE_ALL_GROUPS |
    882 			CP_SET_DRAW_STATE__0_GROUP_ID(0));
    883 	OUT_RING(ring, CP_SET_DRAW_STATE__1_ADDR_LO(0));
    884 
    885 	OUT_PKT0(ring, REG_A4XX_SP_VS_PVT_MEM_PARAM, 2);
    886 	OUT_RING(ring, 0x08000001);                  /* SP_VS_PVT_MEM_PARAM */
    887 	OUT_RELOC(ring, fd4_ctx->vs_pvt_mem, 0,0,0); /* SP_VS_PVT_MEM_ADDR */
    888 
    889 	OUT_PKT0(ring, REG_A4XX_SP_FS_PVT_MEM_PARAM, 2);
    890 	OUT_RING(ring, 0x08000001);                  /* SP_FS_PVT_MEM_PARAM */
    891 	OUT_RELOC(ring, fd4_ctx->fs_pvt_mem, 0,0,0); /* SP_FS_PVT_MEM_ADDR */
    892 
    893 	OUT_PKT0(ring, REG_A4XX_GRAS_SC_CONTROL, 1);
    894 	OUT_RING(ring, A4XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
    895 			A4XX_GRAS_SC_CONTROL_MSAA_DISABLE |
    896 			A4XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
    897 			A4XX_GRAS_SC_CONTROL_RASTER_MODE(0));
    898 
    899 	OUT_PKT0(ring, REG_A4XX_RB_MSAA_CONTROL, 1);
    900 	OUT_RING(ring, A4XX_RB_MSAA_CONTROL_DISABLE |
    901 			A4XX_RB_MSAA_CONTROL_SAMPLES(MSAA_ONE));
    902 
    903 	OUT_PKT0(ring, REG_A4XX_GRAS_CL_GB_CLIP_ADJ, 1);
    904 	OUT_RING(ring, A4XX_GRAS_CL_GB_CLIP_ADJ_HORZ(0) |
    905 			A4XX_GRAS_CL_GB_CLIP_ADJ_VERT(0));
    906 
    907 	OUT_PKT0(ring, REG_A4XX_RB_ALPHA_CONTROL, 1);
    908 	OUT_RING(ring, A4XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC(FUNC_ALWAYS));
    909 
    910 	OUT_PKT0(ring, REG_A4XX_RB_FS_OUTPUT, 1);
    911 	OUT_RING(ring, A4XX_RB_FS_OUTPUT_SAMPLE_MASK(0xffff));
    912 
    913 	OUT_PKT0(ring, REG_A4XX_GRAS_CLEAR_CNTL, 1);
    914 	OUT_RING(ring, A4XX_GRAS_CLEAR_CNTL_NOT_FASTCLEAR);
    915 
    916 	OUT_PKT0(ring, REG_A4XX_GRAS_ALPHA_CONTROL, 1);
    917 	OUT_RING(ring, 0x0);
    918 
    919 	fd_hw_query_enable(batch, ring);
    920 }
    921 
    922 static void
    923 fd4_emit_ib(struct fd_ringbuffer *ring, struct fd_ringbuffer *target)
    924 {
    925 	__OUT_IB(ring, true, target);
    926 }
    927 
    928 void
    929 fd4_emit_init(struct pipe_context *pctx)
    930 {
    931 	struct fd_context *ctx = fd_context(pctx);
    932 	ctx->emit_const = fd4_emit_const;
    933 	ctx->emit_const_bo = fd4_emit_const_bo;
    934 	ctx->emit_ib = fd4_emit_ib;
    935 }
    936