Home | History | Annotate | Download | only in a5xx
      1 /*
      2  * Copyright (C) 2016 Rob Clark <robclark (at) freedesktop.org>
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8  * and/or sell copies of the Software, and to permit persons to whom the
      9  * Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice (including the next
     12  * paragraph) shall be included in all copies or substantial portions of the
     13  * Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
     20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
     21  * SOFTWARE.
     22  *
     23  * Authors:
     24  *    Rob Clark <robclark (at) freedesktop.org>
     25  */
     26 
     27 #include "pipe/p_state.h"
     28 #include "util/u_string.h"
     29 #include "util/u_memory.h"
     30 #include "util/u_helpers.h"
     31 #include "util/u_format.h"
     32 #include "util/u_viewport.h"
     33 
     34 #include "freedreno_resource.h"
     35 #include "freedreno_query_hw.h"
     36 
     37 #include "fd5_emit.h"
     38 #include "fd5_blend.h"
     39 #include "fd5_blitter.h"
     40 #include "fd5_context.h"
     41 #include "fd5_image.h"
     42 #include "fd5_program.h"
     43 #include "fd5_rasterizer.h"
     44 #include "fd5_texture.h"
     45 #include "fd5_format.h"
     46 #include "fd5_zsa.h"
     47 
     48 /* regid:          base const register
     49  * prsc or dwords: buffer containing constant values
     50  * sizedwords:     size of const value buffer
     51  */
     52 static void
     53 fd5_emit_const(struct fd_ringbuffer *ring, enum shader_t type,
     54 		uint32_t regid, uint32_t offset, uint32_t sizedwords,
     55 		const uint32_t *dwords, struct pipe_resource *prsc)
     56 {
     57 	uint32_t i, sz;
     58 	enum a4xx_state_src src;
     59 
     60 	debug_assert((regid % 4) == 0);
     61 	debug_assert((sizedwords % 4) == 0);
     62 
     63 	if (prsc) {
     64 		sz = 0;
     65 		src = SS4_INDIRECT;
     66 	} else {
     67 		sz = sizedwords;
     68 		src = SS4_DIRECT;
     69 	}
     70 
     71 	OUT_PKT7(ring, CP_LOAD_STATE4, 3 + sz);
     72 	OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(regid/4) |
     73 			CP_LOAD_STATE4_0_STATE_SRC(src) |
     74 			CP_LOAD_STATE4_0_STATE_BLOCK(fd4_stage2shadersb(type)) |
     75 			CP_LOAD_STATE4_0_NUM_UNIT(sizedwords/4));
     76 	if (prsc) {
     77 		struct fd_bo *bo = fd_resource(prsc)->bo;
     78 		OUT_RELOC(ring, bo, offset,
     79 				CP_LOAD_STATE4_1_STATE_TYPE(ST4_CONSTANTS), 0);
     80 	} else {
     81 		OUT_RING(ring, CP_LOAD_STATE4_1_EXT_SRC_ADDR(0) |
     82 				CP_LOAD_STATE4_1_STATE_TYPE(ST4_CONSTANTS));
     83 		OUT_RING(ring, CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI(0));
     84 		dwords = (uint32_t *)&((uint8_t *)dwords)[offset];
     85 	}
     86 	for (i = 0; i < sz; i++) {
     87 		OUT_RING(ring, dwords[i]);
     88 	}
     89 }
     90 
     91 static void
     92 fd5_emit_const_bo(struct fd_ringbuffer *ring, enum shader_t type, boolean write,
     93 		uint32_t regid, uint32_t num, struct pipe_resource **prscs, uint32_t *offsets)
     94 {
     95 	uint32_t anum = align(num, 2);
     96 	uint32_t i;
     97 
     98 	debug_assert((regid % 4) == 0);
     99 
    100 	OUT_PKT7(ring, CP_LOAD_STATE4, 3 + (2 * anum));
    101 	OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(regid/4) |
    102 			CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) |
    103 			CP_LOAD_STATE4_0_STATE_BLOCK(fd4_stage2shadersb(type)) |
    104 			CP_LOAD_STATE4_0_NUM_UNIT(anum/2));
    105 	OUT_RING(ring, CP_LOAD_STATE4_1_EXT_SRC_ADDR(0) |
    106 			CP_LOAD_STATE4_1_STATE_TYPE(ST4_CONSTANTS));
    107 	OUT_RING(ring, CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI(0));
    108 
    109 	for (i = 0; i < num; i++) {
    110 		if (prscs[i]) {
    111 			if (write) {
    112 				OUT_RELOCW(ring, fd_resource(prscs[i])->bo, offsets[i], 0, 0);
    113 			} else {
    114 				OUT_RELOC(ring, fd_resource(prscs[i])->bo, offsets[i], 0, 0);
    115 			}
    116 		} else {
    117 			OUT_RING(ring, 0xbad00000 | (i << 16));
    118 			OUT_RING(ring, 0xbad00000 | (i << 16));
    119 		}
    120 	}
    121 
    122 	for (; i < anum; i++) {
    123 		OUT_RING(ring, 0xffffffff);
    124 		OUT_RING(ring, 0xffffffff);
    125 	}
    126 }
    127 
    128 /* Border color layout is diff from a4xx/a5xx.. if it turns out to be
    129  * the same as a6xx then move this somewhere common ;-)
    130  *
    131  * Entry layout looks like (total size, 0x60 bytes):
    132  */
    133 
    134 struct PACKED bcolor_entry {
    135 	uint32_t fp32[4];
    136 	uint16_t ui16[4];
    137 	int16_t  si16[4];
    138 	uint16_t fp16[4];
    139 	uint16_t rgb565;
    140 	uint16_t rgb5a1;
    141 	uint16_t rgba4;
    142 	uint8_t __pad0[2];
    143 	uint8_t  ui8[4];
    144 	int8_t   si8[4];
    145 	uint32_t rgb10a2;
    146 	uint32_t z24; /* also s8? */
    147 	uint8_t  __pad1[32];
    148 };
    149 
    150 #define FD5_BORDER_COLOR_SIZE        0x60
    151 #define FD5_BORDER_COLOR_UPLOAD_SIZE (2 * PIPE_MAX_SAMPLERS * FD5_BORDER_COLOR_SIZE)
    152 
    153 static void
    154 setup_border_colors(struct fd_texture_stateobj *tex, struct bcolor_entry *entries)
    155 {
    156 	unsigned i, j;
    157 	STATIC_ASSERT(sizeof(struct bcolor_entry) == FD5_BORDER_COLOR_SIZE);
    158 
    159 	for (i = 0; i < tex->num_samplers; i++) {
    160 		struct bcolor_entry *e = &entries[i];
    161 		struct pipe_sampler_state *sampler = tex->samplers[i];
    162 		union pipe_color_union *bc;
    163 
    164 		if (!sampler)
    165 			continue;
    166 
    167 		bc = &sampler->border_color;
    168 
    169 		/*
    170 		 * XXX HACK ALERT XXX
    171 		 *
    172 		 * The border colors need to be swizzled in a particular
    173 		 * format-dependent order. Even though samplers don't know about
    174 		 * formats, we can assume that with a GL state tracker, there's a
    175 		 * 1:1 correspondence between sampler and texture. Take advantage
    176 		 * of that knowledge.
    177 		 */
    178 		if ((i >= tex->num_textures) || !tex->textures[i])
    179 			continue;
    180 
    181 		const struct util_format_description *desc =
    182 				util_format_description(tex->textures[i]->format);
    183 
    184 		e->rgb565 = 0;
    185 		e->rgb5a1 = 0;
    186 		e->rgba4 = 0;
    187 		e->rgb10a2 = 0;
    188 		e->z24 = 0;
    189 
    190 		for (j = 0; j < 4; j++) {
    191 			int c = desc->swizzle[j];
    192 
    193 			if (c >= 4)
    194 				continue;
    195 
    196 			if (desc->channel[c].pure_integer) {
    197 				uint16_t clamped;
    198 				switch (desc->channel[c].size) {
    199 				case 2:
    200 					assert(desc->channel[c].type == UTIL_FORMAT_TYPE_UNSIGNED);
    201 					clamped = CLAMP(bc->ui[j], 0, 0x3);
    202 					break;
    203 				case 8:
    204 					if (desc->channel[c].type == UTIL_FORMAT_TYPE_SIGNED)
    205 						clamped = CLAMP(bc->i[j], -128, 127);
    206 					else
    207 						clamped = CLAMP(bc->ui[j], 0, 255);
    208 					break;
    209 				case 10:
    210 					assert(desc->channel[c].type == UTIL_FORMAT_TYPE_UNSIGNED);
    211 					clamped = CLAMP(bc->ui[j], 0, 0x3ff);
    212 					break;
    213 				case 16:
    214 					if (desc->channel[c].type == UTIL_FORMAT_TYPE_SIGNED)
    215 						clamped = CLAMP(bc->i[j], -32768, 32767);
    216 					else
    217 						clamped = CLAMP(bc->ui[j], 0, 65535);
    218 					break;
    219 				default:
    220 					assert(!"Unexpected bit size");
    221 				case 32:
    222 					clamped = 0;
    223 					break;
    224 				}
    225 				e->fp32[c] = bc->ui[j];
    226 				e->fp16[c] = clamped;
    227 			} else {
    228 				float f = bc->f[j];
    229 				float f_u = CLAMP(f, 0, 1);
    230 				float f_s = CLAMP(f, -1, 1);
    231 
    232 				e->fp32[c] = fui(f);
    233 				e->fp16[c] = util_float_to_half(f);
    234 				e->ui16[c] = f_u * 0xffff;
    235 				e->si16[c] = f_s * 0x7fff;
    236 				e->ui8[c]  = f_u * 0xff;
    237 				e->si8[c]  = f_s * 0x7f;
    238 				if (c == 1)
    239 					e->rgb565 |= (int)(f_u * 0x3f) << 5;
    240 				else if (c < 3)
    241 					e->rgb565 |= (int)(f_u * 0x1f) << (c ? 11 : 0);
    242 				if (c == 3)
    243 					e->rgb5a1 |= (f_u > 0.5) ? 0x8000 : 0;
    244 				else
    245 					e->rgb5a1 |= (int)(f_u * 0x1f) << (c * 5);
    246 				if (c == 3)
    247 					e->rgb10a2 |= (int)(f_u * 0x3) << 30;
    248 				else
    249 					e->rgb10a2 |= (int)(f_u * 0x3ff) << (c * 10);
    250 				e->rgba4 |= (int)(f_u * 0xf) << (c * 4);
    251 				if (c == 0)
    252 					e->z24 = f_u * 0xffffff;
    253 			}
    254 		}
    255 
    256 #ifdef DEBUG
    257 		memset(&e->__pad0, 0, sizeof(e->__pad0));
    258 		memset(&e->__pad1, 0, sizeof(e->__pad1));
    259 #endif
    260 	}
    261 }
    262 
    263 static void
    264 emit_border_color(struct fd_context *ctx, struct fd_ringbuffer *ring)
    265 {
    266 	struct fd5_context *fd5_ctx = fd5_context(ctx);
    267 	struct bcolor_entry *entries;
    268 	unsigned off;
    269 	void *ptr;
    270 
    271 	STATIC_ASSERT(sizeof(struct bcolor_entry) == FD5_BORDER_COLOR_SIZE);
    272 
    273 	u_upload_alloc(fd5_ctx->border_color_uploader,
    274 			0, FD5_BORDER_COLOR_UPLOAD_SIZE,
    275 			FD5_BORDER_COLOR_UPLOAD_SIZE, &off,
    276 			&fd5_ctx->border_color_buf,
    277 			&ptr);
    278 
    279 	entries = ptr;
    280 
    281 	setup_border_colors(&ctx->tex[PIPE_SHADER_VERTEX], &entries[0]);
    282 	setup_border_colors(&ctx->tex[PIPE_SHADER_FRAGMENT],
    283 			&entries[ctx->tex[PIPE_SHADER_VERTEX].num_samplers]);
    284 
    285 	OUT_PKT4(ring, REG_A5XX_TPL1_TP_BORDER_COLOR_BASE_ADDR_LO, 2);
    286 	OUT_RELOC(ring, fd_resource(fd5_ctx->border_color_buf)->bo, off, 0, 0);
    287 
    288 	u_upload_unmap(fd5_ctx->border_color_uploader);
    289 }
    290 
    291 static bool
    292 emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring,
    293 		enum a4xx_state_block sb, struct fd_texture_stateobj *tex)
    294 {
    295 	bool needs_border = false;
    296 	unsigned bcolor_offset = (sb == SB4_FS_TEX) ? ctx->tex[PIPE_SHADER_VERTEX].num_samplers : 0;
    297 	unsigned i;
    298 
    299 	if (tex->num_samplers > 0) {
    300 		/* output sampler state: */
    301 		OUT_PKT7(ring, CP_LOAD_STATE4, 3 + (4 * tex->num_samplers));
    302 		OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(0) |
    303 				CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) |
    304 				CP_LOAD_STATE4_0_STATE_BLOCK(sb) |
    305 				CP_LOAD_STATE4_0_NUM_UNIT(tex->num_samplers));
    306 		OUT_RING(ring, CP_LOAD_STATE4_1_STATE_TYPE(ST4_SHADER) |
    307 				CP_LOAD_STATE4_1_EXT_SRC_ADDR(0));
    308 		OUT_RING(ring, CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI(0));
    309 		for (i = 0; i < tex->num_samplers; i++) {
    310 			static const struct fd5_sampler_stateobj dummy_sampler = {};
    311 			const struct fd5_sampler_stateobj *sampler = tex->samplers[i] ?
    312 					fd5_sampler_stateobj(tex->samplers[i]) :
    313 					&dummy_sampler;
    314 			OUT_RING(ring, sampler->texsamp0);
    315 			OUT_RING(ring, sampler->texsamp1);
    316 			OUT_RING(ring, sampler->texsamp2 |
    317 					A5XX_TEX_SAMP_2_BCOLOR_OFFSET(bcolor_offset));
    318 			OUT_RING(ring, sampler->texsamp3);
    319 
    320 			needs_border |= sampler->needs_border;
    321 		}
    322 	}
    323 
    324 	if (tex->num_textures > 0) {
    325 		unsigned num_textures = tex->num_textures;
    326 
    327 		/* emit texture state: */
    328 		OUT_PKT7(ring, CP_LOAD_STATE4, 3 + (12 * num_textures));
    329 		OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(0) |
    330 				CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) |
    331 				CP_LOAD_STATE4_0_STATE_BLOCK(sb) |
    332 				CP_LOAD_STATE4_0_NUM_UNIT(num_textures));
    333 		OUT_RING(ring, CP_LOAD_STATE4_1_STATE_TYPE(ST4_CONSTANTS) |
    334 				CP_LOAD_STATE4_1_EXT_SRC_ADDR(0));
    335 		OUT_RING(ring, CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI(0));
    336 		for (i = 0; i < tex->num_textures; i++) {
    337 			static const struct fd5_pipe_sampler_view dummy_view = {};
    338 			const struct fd5_pipe_sampler_view *view = tex->textures[i] ?
    339 					fd5_pipe_sampler_view(tex->textures[i]) :
    340 					&dummy_view;
    341 			enum a5xx_tile_mode tile_mode = TILE5_LINEAR;
    342 
    343 			if (view->base.texture)
    344 				tile_mode = fd_resource(view->base.texture)->tile_mode;
    345 
    346 			OUT_RING(ring, view->texconst0 |
    347 					A5XX_TEX_CONST_0_TILE_MODE(tile_mode));
    348 			OUT_RING(ring, view->texconst1);
    349 			OUT_RING(ring, view->texconst2);
    350 			OUT_RING(ring, view->texconst3);
    351 			if (view->base.texture) {
    352 				struct fd_resource *rsc = fd_resource(view->base.texture);
    353 				if (view->base.format == PIPE_FORMAT_X32_S8X24_UINT)
    354 					rsc = rsc->stencil;
    355 				OUT_RELOC(ring, rsc->bo, view->offset,
    356 						(uint64_t)view->texconst5 << 32, 0);
    357 			} else {
    358 				OUT_RING(ring, 0x00000000);
    359 				OUT_RING(ring, view->texconst5);
    360 			}
    361 			OUT_RING(ring, view->texconst6);
    362 			OUT_RING(ring, view->texconst7);
    363 			OUT_RING(ring, view->texconst8);
    364 			OUT_RING(ring, view->texconst9);
    365 			OUT_RING(ring, view->texconst10);
    366 			OUT_RING(ring, view->texconst11);
    367 		}
    368 	}
    369 
    370 	return needs_border;
    371 }
    372 
    373 static void
    374 emit_ssbos(struct fd_context *ctx, struct fd_ringbuffer *ring,
    375 		enum a4xx_state_block sb, struct fd_shaderbuf_stateobj *so)
    376 {
    377 	unsigned count = util_last_bit(so->enabled_mask);
    378 
    379 	if (count == 0)
    380 		return;
    381 
    382 	OUT_PKT7(ring, CP_LOAD_STATE4, 3 + (4 * count));
    383 	OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(0) |
    384 			CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) |
    385 			CP_LOAD_STATE4_0_STATE_BLOCK(sb) |
    386 			CP_LOAD_STATE4_0_NUM_UNIT(count));
    387 	OUT_RING(ring, CP_LOAD_STATE4_1_STATE_TYPE(0) |
    388 			CP_LOAD_STATE4_1_EXT_SRC_ADDR(0));
    389 	OUT_RING(ring, CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI(0));
    390 	for (unsigned i = 0; i < count; i++) {
    391 		OUT_RING(ring, 0x00000000);
    392 		OUT_RING(ring, 0x00000000);
    393 		OUT_RING(ring, 0x00000000);
    394 		OUT_RING(ring, 0x00000000);
    395 	}
    396 
    397 	OUT_PKT7(ring, CP_LOAD_STATE4, 3 + (2 * count));
    398 	OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(0) |
    399 			CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) |
    400 			CP_LOAD_STATE4_0_STATE_BLOCK(sb) |
    401 			CP_LOAD_STATE4_0_NUM_UNIT(count));
    402 	OUT_RING(ring, CP_LOAD_STATE4_1_STATE_TYPE(1) |
    403 			CP_LOAD_STATE4_1_EXT_SRC_ADDR(0));
    404 	OUT_RING(ring, CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI(0));
    405 	for (unsigned i = 0; i < count; i++) {
    406 		struct pipe_shader_buffer *buf = &so->sb[i];
    407 		unsigned sz = buf->buffer_size;
    408 
    409 		/* width is in dwords, overflows into height: */
    410 		sz /= 4;
    411 
    412 		OUT_RING(ring, A5XX_SSBO_1_0_WIDTH(sz));
    413 		OUT_RING(ring, A5XX_SSBO_1_1_HEIGHT(sz >> 16));
    414 	}
    415 
    416 	OUT_PKT7(ring, CP_LOAD_STATE4, 3 + (2 * count));
    417 	OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(0) |
    418 			CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) |
    419 			CP_LOAD_STATE4_0_STATE_BLOCK(sb) |
    420 			CP_LOAD_STATE4_0_NUM_UNIT(count));
    421 	OUT_RING(ring, CP_LOAD_STATE4_1_STATE_TYPE(2) |
    422 			CP_LOAD_STATE4_1_EXT_SRC_ADDR(0));
    423 	OUT_RING(ring, CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI(0));
    424 	for (unsigned i = 0; i < count; i++) {
    425 		struct pipe_shader_buffer *buf = &so->sb[i];
    426 		if (buf->buffer) {
    427 			struct fd_resource *rsc = fd_resource(buf->buffer);
    428 			OUT_RELOCW(ring, rsc->bo, buf->buffer_offset, 0, 0);
    429 		} else {
    430 			OUT_RING(ring, 0x00000000);
    431 			OUT_RING(ring, 0x00000000);
    432 		}
    433 	}
    434 }
    435 
    436 void
    437 fd5_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd5_emit *emit)
    438 {
    439 	int32_t i, j;
    440 	const struct fd_vertex_state *vtx = emit->vtx;
    441 	const struct ir3_shader_variant *vp = fd5_emit_get_vp(emit);
    442 
    443 	for (i = 0, j = 0; i <= vp->inputs_count; i++) {
    444 		if (vp->inputs[i].sysval)
    445 			continue;
    446 		if (vp->inputs[i].compmask) {
    447 			struct pipe_vertex_element *elem = &vtx->vtx->pipe[i];
    448 			const struct pipe_vertex_buffer *vb =
    449 					&vtx->vertexbuf.vb[elem->vertex_buffer_index];
    450 			struct fd_resource *rsc = fd_resource(vb->buffer.resource);
    451 			enum pipe_format pfmt = elem->src_format;
    452 			enum a5xx_vtx_fmt fmt = fd5_pipe2vtx(pfmt);
    453 			bool isint = util_format_is_pure_integer(pfmt);
    454 			uint32_t off = vb->buffer_offset + elem->src_offset;
    455 			uint32_t size = fd_bo_size(rsc->bo) - off;
    456 			debug_assert(fmt != ~0);
    457 
    458 			OUT_PKT4(ring, REG_A5XX_VFD_FETCH(j), 4);
    459 			OUT_RELOC(ring, rsc->bo, off, 0, 0);
    460 			OUT_RING(ring, size);           /* VFD_FETCH[j].SIZE */
    461 			OUT_RING(ring, vb->stride);     /* VFD_FETCH[j].STRIDE */
    462 
    463 			OUT_PKT4(ring, REG_A5XX_VFD_DECODE(j), 2);
    464 			OUT_RING(ring, A5XX_VFD_DECODE_INSTR_IDX(j) |
    465 					A5XX_VFD_DECODE_INSTR_FORMAT(fmt) |
    466 					COND(elem->instance_divisor, A5XX_VFD_DECODE_INSTR_INSTANCED) |
    467 					A5XX_VFD_DECODE_INSTR_SWAP(fd5_pipe2swap(pfmt)) |
    468 					A5XX_VFD_DECODE_INSTR_UNK30 |
    469 					COND(!isint, A5XX_VFD_DECODE_INSTR_FLOAT));
    470 			OUT_RING(ring, MAX2(1, elem->instance_divisor)); /* VFD_DECODE[j].STEP_RATE */
    471 
    472 			OUT_PKT4(ring, REG_A5XX_VFD_DEST_CNTL(j), 1);
    473 			OUT_RING(ring, A5XX_VFD_DEST_CNTL_INSTR_WRITEMASK(vp->inputs[i].compmask) |
    474 					A5XX_VFD_DEST_CNTL_INSTR_REGID(vp->inputs[i].regid));
    475 
    476 			j++;
    477 		}
    478 	}
    479 
    480 	OUT_PKT4(ring, REG_A5XX_VFD_CONTROL_0, 1);
    481 	OUT_RING(ring, A5XX_VFD_CONTROL_0_VTXCNT(j));
    482 }
    483 
    484 void
    485 fd5_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
    486 		struct fd5_emit *emit)
    487 {
    488 	struct pipe_framebuffer_state *pfb = &ctx->batch->framebuffer;
    489 	const struct ir3_shader_variant *vp = fd5_emit_get_vp(emit);
    490 	const struct ir3_shader_variant *fp = fd5_emit_get_fp(emit);
    491 	const enum fd_dirty_3d_state dirty = emit->dirty;
    492 	bool needs_border = false;
    493 
    494 	emit_marker5(ring, 5);
    495 
    496 	if ((dirty & FD_DIRTY_FRAMEBUFFER) && !emit->key.binning_pass) {
    497 		unsigned char mrt_comp[A5XX_MAX_RENDER_TARGETS] = {0};
    498 
    499 		for (unsigned i = 0; i < A5XX_MAX_RENDER_TARGETS; i++) {
    500 			mrt_comp[i] = ((i < pfb->nr_cbufs) && pfb->cbufs[i]) ? 0xf : 0;
    501 		}
    502 
    503 		OUT_PKT4(ring, REG_A5XX_RB_RENDER_COMPONENTS, 1);
    504 		OUT_RING(ring, A5XX_RB_RENDER_COMPONENTS_RT0(mrt_comp[0]) |
    505 				A5XX_RB_RENDER_COMPONENTS_RT1(mrt_comp[1]) |
    506 				A5XX_RB_RENDER_COMPONENTS_RT2(mrt_comp[2]) |
    507 				A5XX_RB_RENDER_COMPONENTS_RT3(mrt_comp[3]) |
    508 				A5XX_RB_RENDER_COMPONENTS_RT4(mrt_comp[4]) |
    509 				A5XX_RB_RENDER_COMPONENTS_RT5(mrt_comp[5]) |
    510 				A5XX_RB_RENDER_COMPONENTS_RT6(mrt_comp[6]) |
    511 				A5XX_RB_RENDER_COMPONENTS_RT7(mrt_comp[7]));
    512 	}
    513 
    514 	if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_FRAMEBUFFER)) {
    515 		struct fd5_zsa_stateobj *zsa = fd5_zsa_stateobj(ctx->zsa);
    516 		uint32_t rb_alpha_control = zsa->rb_alpha_control;
    517 
    518 		if (util_format_is_pure_integer(pipe_surface_format(pfb->cbufs[0])))
    519 			rb_alpha_control &= ~A5XX_RB_ALPHA_CONTROL_ALPHA_TEST;
    520 
    521 		OUT_PKT4(ring, REG_A5XX_RB_ALPHA_CONTROL, 1);
    522 		OUT_RING(ring, rb_alpha_control);
    523 
    524 		OUT_PKT4(ring, REG_A5XX_RB_STENCIL_CONTROL, 1);
    525 		OUT_RING(ring, zsa->rb_stencil_control);
    526 	}
    527 
    528 	if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_BLEND | FD_DIRTY_PROG)) {
    529 		struct fd5_blend_stateobj *blend = fd5_blend_stateobj(ctx->blend);
    530 		struct fd5_zsa_stateobj *zsa = fd5_zsa_stateobj(ctx->zsa);
    531 
    532 		if (pfb->zsbuf) {
    533 			struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture);
    534 			uint32_t gras_lrz_cntl = zsa->gras_lrz_cntl;
    535 
    536 			if (emit->no_lrz_write || !rsc->lrz || !rsc->lrz_valid)
    537 				gras_lrz_cntl = 0;
    538 			else if (emit->key.binning_pass && blend->lrz_write && zsa->lrz_write)
    539 				gras_lrz_cntl |= A5XX_GRAS_LRZ_CNTL_LRZ_WRITE;
    540 
    541 			OUT_PKT4(ring, REG_A5XX_GRAS_LRZ_CNTL, 1);
    542 			OUT_RING(ring, gras_lrz_cntl);
    543 		}
    544 	}
    545 
    546 	if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_STENCIL_REF)) {
    547 		struct fd5_zsa_stateobj *zsa = fd5_zsa_stateobj(ctx->zsa);
    548 		struct pipe_stencil_ref *sr = &ctx->stencil_ref;
    549 
    550 		OUT_PKT4(ring, REG_A5XX_RB_STENCILREFMASK, 2);
    551 		OUT_RING(ring, zsa->rb_stencilrefmask |
    552 				A5XX_RB_STENCILREFMASK_STENCILREF(sr->ref_value[0]));
    553 		OUT_RING(ring, zsa->rb_stencilrefmask_bf |
    554 				A5XX_RB_STENCILREFMASK_BF_STENCILREF(sr->ref_value[1]));
    555 	}
    556 
    557 	if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_RASTERIZER | FD_DIRTY_PROG)) {
    558 		struct fd5_zsa_stateobj *zsa = fd5_zsa_stateobj(ctx->zsa);
    559 		bool fragz = fp->has_kill | fp->writes_pos;
    560 
    561 		OUT_PKT4(ring, REG_A5XX_RB_DEPTH_CNTL, 1);
    562 		OUT_RING(ring, zsa->rb_depth_cntl);
    563 
    564 		OUT_PKT4(ring, REG_A5XX_RB_DEPTH_PLANE_CNTL, 1);
    565 		OUT_RING(ring, COND(fragz, A5XX_RB_DEPTH_PLANE_CNTL_FRAG_WRITES_Z) |
    566 				COND(fragz && fp->frag_coord, A5XX_RB_DEPTH_PLANE_CNTL_UNK1));
    567 
    568 		OUT_PKT4(ring, REG_A5XX_GRAS_SU_DEPTH_PLANE_CNTL, 1);
    569 		OUT_RING(ring, COND(fragz, A5XX_GRAS_SU_DEPTH_PLANE_CNTL_FRAG_WRITES_Z) |
    570 				COND(fragz && fp->frag_coord, A5XX_GRAS_SU_DEPTH_PLANE_CNTL_UNK1));
    571 	}
    572 
    573 	if (dirty & FD_DIRTY_SCISSOR) {
    574 		struct pipe_scissor_state *scissor = fd_context_get_scissor(ctx);
    575 
    576 		OUT_PKT4(ring, REG_A5XX_GRAS_SC_SCREEN_SCISSOR_TL_0, 2);
    577 		OUT_RING(ring, A5XX_GRAS_SC_SCREEN_SCISSOR_TL_0_X(scissor->minx) |
    578 				A5XX_GRAS_SC_SCREEN_SCISSOR_TL_0_Y(scissor->miny));
    579 		OUT_RING(ring, A5XX_GRAS_SC_SCREEN_SCISSOR_TL_0_X(scissor->maxx - 1) |
    580 				A5XX_GRAS_SC_SCREEN_SCISSOR_TL_0_Y(scissor->maxy - 1));
    581 
    582 		OUT_PKT4(ring, REG_A5XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0, 2);
    583 		OUT_RING(ring, A5XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_X(scissor->minx) |
    584 				A5XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_Y(scissor->miny));
    585 		OUT_RING(ring, A5XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_X(scissor->maxx - 1) |
    586 				A5XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_Y(scissor->maxy - 1));
    587 
    588 		ctx->batch->max_scissor.minx = MIN2(ctx->batch->max_scissor.minx, scissor->minx);
    589 		ctx->batch->max_scissor.miny = MIN2(ctx->batch->max_scissor.miny, scissor->miny);
    590 		ctx->batch->max_scissor.maxx = MAX2(ctx->batch->max_scissor.maxx, scissor->maxx);
    591 		ctx->batch->max_scissor.maxy = MAX2(ctx->batch->max_scissor.maxy, scissor->maxy);
    592 	}
    593 
    594 	if (dirty & FD_DIRTY_VIEWPORT) {
    595 		fd_wfi(ctx->batch, ring);
    596 		OUT_PKT4(ring, REG_A5XX_GRAS_CL_VPORT_XOFFSET_0, 6);
    597 		OUT_RING(ring, A5XX_GRAS_CL_VPORT_XOFFSET_0(ctx->viewport.translate[0]));
    598 		OUT_RING(ring, A5XX_GRAS_CL_VPORT_XSCALE_0(ctx->viewport.scale[0]));
    599 		OUT_RING(ring, A5XX_GRAS_CL_VPORT_YOFFSET_0(ctx->viewport.translate[1]));
    600 		OUT_RING(ring, A5XX_GRAS_CL_VPORT_YSCALE_0(ctx->viewport.scale[1]));
    601 		OUT_RING(ring, A5XX_GRAS_CL_VPORT_ZOFFSET_0(ctx->viewport.translate[2]));
    602 		OUT_RING(ring, A5XX_GRAS_CL_VPORT_ZSCALE_0(ctx->viewport.scale[2]));
    603 	}
    604 
    605 	if (dirty & FD_DIRTY_PROG)
    606 		fd5_program_emit(ctx, ring, emit);
    607 
    608 	if (dirty & FD_DIRTY_RASTERIZER) {
    609 		struct fd5_rasterizer_stateobj *rasterizer =
    610 				fd5_rasterizer_stateobj(ctx->rasterizer);
    611 
    612 		OUT_PKT4(ring, REG_A5XX_GRAS_SU_CNTL, 1);
    613 		OUT_RING(ring, rasterizer->gras_su_cntl);
    614 
    615 		OUT_PKT4(ring, REG_A5XX_GRAS_SU_POINT_MINMAX, 2);
    616 		OUT_RING(ring, rasterizer->gras_su_point_minmax);
    617 		OUT_RING(ring, rasterizer->gras_su_point_size);
    618 
    619 		OUT_PKT4(ring, REG_A5XX_GRAS_SU_POLY_OFFSET_SCALE, 3);
    620 		OUT_RING(ring, rasterizer->gras_su_poly_offset_scale);
    621 		OUT_RING(ring, rasterizer->gras_su_poly_offset_offset);
    622 		OUT_RING(ring, rasterizer->gras_su_poly_offset_clamp);
    623 
    624 		OUT_PKT4(ring, REG_A5XX_PC_RASTER_CNTL, 1);
    625 		OUT_RING(ring, rasterizer->pc_raster_cntl);
    626 
    627 		OUT_PKT4(ring, REG_A5XX_GRAS_CL_CNTL, 1);
    628 		OUT_RING(ring, rasterizer->gras_cl_clip_cntl);
    629 	}
    630 
    631 	/* note: must come after program emit.. because there is some overlap
    632 	 * in registers, ex. PC_PRIMITIVE_CNTL and we rely on some cached
    633 	 * values from fd5_program_emit() to avoid having to re-emit the prog
    634 	 * every time rast state changes.
    635 	 *
    636 	 * Since the primitive restart state is not part of a tracked object, we
    637 	 * re-emit this register every time.
    638 	 */
    639 	if (emit->info && ctx->rasterizer) {
    640 		struct fd5_rasterizer_stateobj *rasterizer =
    641 				fd5_rasterizer_stateobj(ctx->rasterizer);
    642 		unsigned max_loc = fd5_context(ctx)->max_loc;
    643 
    644 		OUT_PKT4(ring, REG_A5XX_PC_PRIMITIVE_CNTL, 1);
    645 		OUT_RING(ring, rasterizer->pc_primitive_cntl |
    646 				 A5XX_PC_PRIMITIVE_CNTL_STRIDE_IN_VPC(max_loc) |
    647 				 COND(emit->info->primitive_restart && emit->info->index_size,
    648 					  A5XX_PC_PRIMITIVE_CNTL_PRIMITIVE_RESTART));
    649 	}
    650 
    651 	if (dirty & (FD_DIRTY_FRAMEBUFFER | FD_DIRTY_RASTERIZER | FD_DIRTY_PROG)) {
    652 		uint32_t posz_regid = ir3_find_output_regid(fp, FRAG_RESULT_DEPTH);
    653 		unsigned nr = pfb->nr_cbufs;
    654 
    655 		if (emit->key.binning_pass)
    656 			nr = 0;
    657 		else if (ctx->rasterizer->rasterizer_discard)
    658 			nr = 0;
    659 
    660 		OUT_PKT4(ring, REG_A5XX_RB_FS_OUTPUT_CNTL, 1);
    661 		OUT_RING(ring, A5XX_RB_FS_OUTPUT_CNTL_MRT(nr) |
    662 				COND(fp->writes_pos, A5XX_RB_FS_OUTPUT_CNTL_FRAG_WRITES_Z));
    663 
    664 		OUT_PKT4(ring, REG_A5XX_SP_FS_OUTPUT_CNTL, 1);
    665 		OUT_RING(ring, A5XX_SP_FS_OUTPUT_CNTL_MRT(nr) |
    666 				A5XX_SP_FS_OUTPUT_CNTL_DEPTH_REGID(posz_regid) |
    667 				A5XX_SP_FS_OUTPUT_CNTL_SAMPLEMASK_REGID(regid(63, 0)));
    668 	}
    669 
    670 	if (emit->prog == &ctx->prog) { /* evil hack to deal sanely with clear path */
    671 		ir3_emit_vs_consts(vp, ring, ctx, emit->info);
    672 		if (!emit->key.binning_pass)
    673 			ir3_emit_fs_consts(fp, ring, ctx);
    674 
    675 		struct pipe_stream_output_info *info = &vp->shader->stream_output;
    676 		if (info->num_outputs) {
    677 			struct fd_streamout_stateobj *so = &ctx->streamout;
    678 
    679 			for (unsigned i = 0; i < so->num_targets; i++) {
    680 				struct pipe_stream_output_target *target = so->targets[i];
    681 
    682 				if (!target)
    683 					continue;
    684 
    685 				unsigned offset = (so->offsets[i] * info->stride[i] * 4) +
    686 						target->buffer_offset;
    687 
    688 				OUT_PKT4(ring, REG_A5XX_VPC_SO_BUFFER_BASE_LO(i), 3);
    689 				/* VPC_SO[i].BUFFER_BASE_LO: */
    690 				OUT_RELOCW(ring, fd_resource(target->buffer)->bo, 0, 0, 0);
    691 				OUT_RING(ring, target->buffer_size + offset);
    692 
    693 				OUT_PKT4(ring, REG_A5XX_VPC_SO_BUFFER_OFFSET(i), 3);
    694 				OUT_RING(ring, offset);
    695 				/* VPC_SO[i].FLUSH_BASE_LO/HI: */
    696 				// TODO just give hw a dummy addr for now.. we should
    697 				// be using this an then CP_MEM_TO_REG to set the
    698 				// VPC_SO[i].BUFFER_OFFSET for the next draw..
    699 				OUT_RELOCW(ring, fd5_context(ctx)->blit_mem, 0x100, 0, 0);
    700 
    701 				emit->streamout_mask |= (1 << i);
    702 			}
    703 		}
    704 	}
    705 
    706 	if ((dirty & FD_DIRTY_BLEND)) {
    707 		struct fd5_blend_stateobj *blend = fd5_blend_stateobj(ctx->blend);
    708 		uint32_t i;
    709 
    710 		for (i = 0; i < A5XX_MAX_RENDER_TARGETS; i++) {
    711 			enum pipe_format format = pipe_surface_format(pfb->cbufs[i]);
    712 			bool is_int = util_format_is_pure_integer(format);
    713 			bool has_alpha = util_format_has_alpha(format);
    714 			uint32_t control = blend->rb_mrt[i].control;
    715 			uint32_t blend_control = blend->rb_mrt[i].blend_control_alpha;
    716 
    717 			if (is_int) {
    718 				control &= A5XX_RB_MRT_CONTROL_COMPONENT_ENABLE__MASK;
    719 				control |= A5XX_RB_MRT_CONTROL_ROP_CODE(ROP_COPY);
    720 			}
    721 
    722 			if (has_alpha) {
    723 				blend_control |= blend->rb_mrt[i].blend_control_rgb;
    724 			} else {
    725 				blend_control |= blend->rb_mrt[i].blend_control_no_alpha_rgb;
    726 				control &= ~A5XX_RB_MRT_CONTROL_BLEND2;
    727 			}
    728 
    729 			OUT_PKT4(ring, REG_A5XX_RB_MRT_CONTROL(i), 1);
    730 			OUT_RING(ring, control);
    731 
    732 			OUT_PKT4(ring, REG_A5XX_RB_MRT_BLEND_CONTROL(i), 1);
    733 			OUT_RING(ring, blend_control);
    734 		}
    735 
    736 		OUT_PKT4(ring, REG_A5XX_RB_BLEND_CNTL, 1);
    737 		OUT_RING(ring, blend->rb_blend_cntl |
    738 				A5XX_RB_BLEND_CNTL_SAMPLE_MASK(0xffff));
    739 
    740 		OUT_PKT4(ring, REG_A5XX_SP_BLEND_CNTL, 1);
    741 		OUT_RING(ring, blend->sp_blend_cntl);
    742 	}
    743 
    744 	if (dirty & FD_DIRTY_BLEND_COLOR) {
    745 		struct pipe_blend_color *bcolor = &ctx->blend_color;
    746 
    747 		OUT_PKT4(ring, REG_A5XX_RB_BLEND_RED, 8);
    748 		OUT_RING(ring, A5XX_RB_BLEND_RED_FLOAT(bcolor->color[0]) |
    749 				A5XX_RB_BLEND_RED_UINT(bcolor->color[0] * 0xff) |
    750 				A5XX_RB_BLEND_RED_SINT(bcolor->color[0] * 0x7f));
    751 		OUT_RING(ring, A5XX_RB_BLEND_RED_F32(bcolor->color[0]));
    752 		OUT_RING(ring, A5XX_RB_BLEND_GREEN_FLOAT(bcolor->color[1]) |
    753 				A5XX_RB_BLEND_GREEN_UINT(bcolor->color[1] * 0xff) |
    754 				A5XX_RB_BLEND_GREEN_SINT(bcolor->color[1] * 0x7f));
    755 		OUT_RING(ring, A5XX_RB_BLEND_RED_F32(bcolor->color[1]));
    756 		OUT_RING(ring, A5XX_RB_BLEND_BLUE_FLOAT(bcolor->color[2]) |
    757 				A5XX_RB_BLEND_BLUE_UINT(bcolor->color[2] * 0xff) |
    758 				A5XX_RB_BLEND_BLUE_SINT(bcolor->color[2] * 0x7f));
    759 		OUT_RING(ring, A5XX_RB_BLEND_BLUE_F32(bcolor->color[2]));
    760 		OUT_RING(ring, A5XX_RB_BLEND_ALPHA_FLOAT(bcolor->color[3]) |
    761 				A5XX_RB_BLEND_ALPHA_UINT(bcolor->color[3] * 0xff) |
    762 				A5XX_RB_BLEND_ALPHA_SINT(bcolor->color[3] * 0x7f));
    763 		OUT_RING(ring, A5XX_RB_BLEND_ALPHA_F32(bcolor->color[3]));
    764 	}
    765 
    766 	if (ctx->dirty_shader[PIPE_SHADER_VERTEX] & FD_DIRTY_SHADER_TEX) {
    767 		needs_border |= emit_textures(ctx, ring, SB4_VS_TEX,
    768 				&ctx->tex[PIPE_SHADER_VERTEX]);
    769 		OUT_PKT4(ring, REG_A5XX_TPL1_VS_TEX_COUNT, 1);
    770 		OUT_RING(ring, ctx->tex[PIPE_SHADER_VERTEX].num_textures);
    771 	}
    772 
    773 	if (ctx->dirty_shader[PIPE_SHADER_FRAGMENT] & FD_DIRTY_SHADER_TEX) {
    774 		needs_border |= emit_textures(ctx, ring, SB4_FS_TEX,
    775 				&ctx->tex[PIPE_SHADER_FRAGMENT]);
    776 	}
    777 
    778 	OUT_PKT4(ring, REG_A5XX_TPL1_FS_TEX_COUNT, 1);
    779 	OUT_RING(ring, ctx->shaderimg[PIPE_SHADER_FRAGMENT].enabled_mask ?
    780 			~0 : ctx->tex[PIPE_SHADER_FRAGMENT].num_textures);
    781 
    782 	OUT_PKT4(ring, REG_A5XX_TPL1_CS_TEX_COUNT, 1);
    783 	OUT_RING(ring, 0);
    784 
    785 	if (needs_border)
    786 		emit_border_color(ctx, ring);
    787 
    788 	if (ctx->dirty_shader[PIPE_SHADER_FRAGMENT] & FD_DIRTY_SHADER_SSBO)
    789 		emit_ssbos(ctx, ring, SB4_SSBO, &ctx->shaderbuf[PIPE_SHADER_FRAGMENT]);
    790 
    791 	if (ctx->dirty_shader[PIPE_SHADER_FRAGMENT] & FD_DIRTY_SHADER_IMAGE)
    792 		fd5_emit_images(ctx, ring, PIPE_SHADER_FRAGMENT);
    793 }
    794 
    795 void
    796 fd5_emit_cs_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
    797 		struct ir3_shader_variant *cp)
    798 {
    799 	enum fd_dirty_shader_state dirty = ctx->dirty_shader[PIPE_SHADER_COMPUTE];
    800 
    801 	if (dirty & FD_DIRTY_SHADER_TEX) {
    802 		bool needs_border = false;
    803 		needs_border |= emit_textures(ctx, ring, SB4_CS_TEX,
    804 				&ctx->tex[PIPE_SHADER_COMPUTE]);
    805 
    806 		if (needs_border)
    807 			emit_border_color(ctx, ring);
    808 
    809 		OUT_PKT4(ring, REG_A5XX_TPL1_VS_TEX_COUNT, 1);
    810 		OUT_RING(ring, 0);
    811 
    812 		OUT_PKT4(ring, REG_A5XX_TPL1_HS_TEX_COUNT, 1);
    813 		OUT_RING(ring, 0);
    814 
    815 		OUT_PKT4(ring, REG_A5XX_TPL1_DS_TEX_COUNT, 1);
    816 		OUT_RING(ring, 0);
    817 
    818 		OUT_PKT4(ring, REG_A5XX_TPL1_GS_TEX_COUNT, 1);
    819 		OUT_RING(ring, 0);
    820 
    821 		OUT_PKT4(ring, REG_A5XX_TPL1_FS_TEX_COUNT, 1);
    822 		OUT_RING(ring, 0);
    823 	}
    824 
    825 	OUT_PKT4(ring, REG_A5XX_TPL1_CS_TEX_COUNT, 1);
    826 	OUT_RING(ring, ctx->shaderimg[PIPE_SHADER_COMPUTE].enabled_mask ?
    827 			~0 : ctx->tex[PIPE_SHADER_COMPUTE].num_textures);
    828 
    829 	if (dirty & FD_DIRTY_SHADER_SSBO)
    830 		emit_ssbos(ctx, ring, SB4_CS_SSBO, &ctx->shaderbuf[PIPE_SHADER_COMPUTE]);
    831 
    832 	if (dirty & FD_DIRTY_SHADER_IMAGE)
    833 		fd5_emit_images(ctx, ring, PIPE_SHADER_COMPUTE);
    834 }
    835 
    836 /* emit setup at begin of new cmdstream buffer (don't rely on previous
    837  * state, there could have been a context switch between ioctls):
    838  */
    839 void
    840 fd5_emit_restore(struct fd_batch *batch, struct fd_ringbuffer *ring)
    841 {
    842 	struct fd_context *ctx = batch->ctx;
    843 
    844 	fd5_set_render_mode(ctx, ring, BYPASS);
    845 	fd5_cache_flush(batch, ring);
    846 
    847 	OUT_PKT4(ring, REG_A5XX_HLSQ_UPDATE_CNTL, 1);
    848 	OUT_RING(ring, 0xfffff);
    849 
    850 /*
    851 t7              opcode: CP_PERFCOUNTER_ACTION (50) (4 dwords)
    852 0000000500024048:               70d08003 00000000 001c5000 00000005
    853 t7              opcode: CP_PERFCOUNTER_ACTION (50) (4 dwords)
    854 0000000500024058:               70d08003 00000010 001c7000 00000005
    855 
    856 t7              opcode: CP_WAIT_FOR_IDLE (26) (1 dwords)
    857 0000000500024068:               70268000
    858 */
    859 
    860 	OUT_PKT4(ring, REG_A5XX_PC_RESTART_INDEX, 1);
    861 	OUT_RING(ring, 0xffffffff);
    862 
    863 	OUT_PKT4(ring, REG_A5XX_PC_RASTER_CNTL, 1);
    864 	OUT_RING(ring, 0x00000012);
    865 
    866 	OUT_PKT4(ring, REG_A5XX_GRAS_SU_POINT_MINMAX, 2);
    867 	OUT_RING(ring, A5XX_GRAS_SU_POINT_MINMAX_MIN(1.0) |
    868 			A5XX_GRAS_SU_POINT_MINMAX_MAX(4092.0));
    869 	OUT_RING(ring, A5XX_GRAS_SU_POINT_SIZE(0.5));
    870 
    871 	OUT_PKT4(ring, REG_A5XX_GRAS_SU_CONSERVATIVE_RAS_CNTL, 1);
    872 	OUT_RING(ring, 0x00000000);   /* GRAS_SU_CONSERVATIVE_RAS_CNTL */
    873 
    874 	OUT_PKT4(ring, REG_A5XX_GRAS_SC_SCREEN_SCISSOR_CNTL, 1);
    875 	OUT_RING(ring, 0x00000000);   /* GRAS_SC_SCREEN_SCISSOR_CNTL */
    876 
    877 	OUT_PKT4(ring, REG_A5XX_SP_VS_CONFIG_MAX_CONST, 1);
    878 	OUT_RING(ring, 0);            /* SP_VS_CONFIG_MAX_CONST */
    879 
    880 	OUT_PKT4(ring, REG_A5XX_SP_FS_CONFIG_MAX_CONST, 1);
    881 	OUT_RING(ring, 0);            /* SP_FS_CONFIG_MAX_CONST */
    882 
    883 	OUT_PKT4(ring, REG_A5XX_UNKNOWN_E292, 2);
    884 	OUT_RING(ring, 0x00000000);   /* UNKNOWN_E292 */
    885 	OUT_RING(ring, 0x00000000);   /* UNKNOWN_E293 */
    886 
    887 	OUT_PKT4(ring, REG_A5XX_RB_MODE_CNTL, 1);
    888 	OUT_RING(ring, 0x00000044);   /* RB_MODE_CNTL */
    889 
    890 	OUT_PKT4(ring, REG_A5XX_RB_DBG_ECO_CNTL, 1);
    891 	OUT_RING(ring, 0x00100000);   /* RB_DBG_ECO_CNTL */
    892 
    893 	OUT_PKT4(ring, REG_A5XX_VFD_MODE_CNTL, 1);
    894 	OUT_RING(ring, 0x00000000);   /* VFD_MODE_CNTL */
    895 
    896 	OUT_PKT4(ring, REG_A5XX_PC_MODE_CNTL, 1);
    897 	OUT_RING(ring, 0x0000001f);   /* PC_MODE_CNTL */
    898 
    899 	OUT_PKT4(ring, REG_A5XX_SP_MODE_CNTL, 1);
    900 	OUT_RING(ring, 0x0000001e);   /* SP_MODE_CNTL */
    901 
    902 	OUT_PKT4(ring, REG_A5XX_SP_DBG_ECO_CNTL, 1);
    903 	OUT_RING(ring, 0x40000800);   /* SP_DBG_ECO_CNTL */
    904 
    905 	OUT_PKT4(ring, REG_A5XX_TPL1_MODE_CNTL, 1);
    906 	OUT_RING(ring, 0x00000544);   /* TPL1_MODE_CNTL */
    907 
    908 	OUT_PKT4(ring, REG_A5XX_HLSQ_TIMEOUT_THRESHOLD_0, 2);
    909 	OUT_RING(ring, 0x00000080);   /* HLSQ_TIMEOUT_THRESHOLD_0 */
    910 	OUT_RING(ring, 0x00000000);   /* HLSQ_TIMEOUT_THRESHOLD_1 */
    911 
    912 	OUT_PKT4(ring, REG_A5XX_VPC_DBG_ECO_CNTL, 1);
    913 	OUT_RING(ring, 0x00000400);   /* VPC_DBG_ECO_CNTL */
    914 
    915 	OUT_PKT4(ring, REG_A5XX_HLSQ_MODE_CNTL, 1);
    916 	OUT_RING(ring, 0x00000001);   /* HLSQ_MODE_CNTL */
    917 
    918 	OUT_PKT4(ring, REG_A5XX_VPC_MODE_CNTL, 1);
    919 	OUT_RING(ring, 0x00000000);   /* VPC_MODE_CNTL */
    920 
    921 	/* we don't use this yet.. probably best to disable.. */
    922 	OUT_PKT7(ring, CP_SET_DRAW_STATE, 3);
    923 	OUT_RING(ring, CP_SET_DRAW_STATE__0_COUNT(0) |
    924 			CP_SET_DRAW_STATE__0_DISABLE_ALL_GROUPS |
    925 			CP_SET_DRAW_STATE__0_GROUP_ID(0));
    926 	OUT_RING(ring, CP_SET_DRAW_STATE__1_ADDR_LO(0));
    927 	OUT_RING(ring, CP_SET_DRAW_STATE__2_ADDR_HI(0));
    928 
    929 	OUT_PKT4(ring, REG_A5XX_GRAS_SU_CONSERVATIVE_RAS_CNTL, 1);
    930 	OUT_RING(ring, 0x00000000);   /* GRAS_SU_CONSERVATIVE_RAS_CNTL */
    931 
    932 	OUT_PKT4(ring, REG_A5XX_GRAS_SC_BIN_CNTL, 1);
    933 	OUT_RING(ring, 0x00000000);   /* GRAS_SC_BIN_CNTL */
    934 
    935 	OUT_PKT4(ring, REG_A5XX_GRAS_SC_BIN_CNTL, 1);
    936 	OUT_RING(ring, 0x00000000);   /* GRAS_SC_BIN_CNTL */
    937 
    938 	OUT_PKT4(ring, REG_A5XX_VPC_FS_PRIMITIVEID_CNTL, 1);
    939 	OUT_RING(ring, 0x000000ff);   /* VPC_FS_PRIMITIVEID_CNTL */
    940 
    941 	OUT_PKT4(ring, REG_A5XX_VPC_SO_OVERRIDE, 1);
    942 	OUT_RING(ring, A5XX_VPC_SO_OVERRIDE_SO_DISABLE);
    943 
    944 	OUT_PKT4(ring, REG_A5XX_VPC_SO_BUFFER_BASE_LO(0), 3);
    945 	OUT_RING(ring, 0x00000000);   /* VPC_SO_BUFFER_BASE_LO_0 */
    946 	OUT_RING(ring, 0x00000000);   /* VPC_SO_BUFFER_BASE_HI_0 */
    947 	OUT_RING(ring, 0x00000000);   /* VPC_SO_BUFFER_SIZE_0 */
    948 
    949 	OUT_PKT4(ring, REG_A5XX_VPC_SO_FLUSH_BASE_LO(0), 2);
    950 	OUT_RING(ring, 0x00000000);   /* VPC_SO_FLUSH_BASE_LO_0 */
    951 	OUT_RING(ring, 0x00000000);   /* VPC_SO_FLUSH_BASE_HI_0 */
    952 
    953 	OUT_PKT4(ring, REG_A5XX_PC_GS_PARAM, 1);
    954 	OUT_RING(ring, 0x00000000);   /* PC_GS_PARAM */
    955 
    956 	OUT_PKT4(ring, REG_A5XX_PC_HS_PARAM, 1);
    957 	OUT_RING(ring, 0x00000000);   /* PC_HS_PARAM */
    958 
    959 	OUT_PKT4(ring, REG_A5XX_TPL1_TP_FS_ROTATION_CNTL, 1);
    960 	OUT_RING(ring, 0x00000000);   /* TPL1_TP_FS_ROTATION_CNTL */
    961 
    962 	OUT_PKT4(ring, REG_A5XX_UNKNOWN_E001, 1);
    963 	OUT_RING(ring, 0x00000000);   /* UNKNOWN_E001 */
    964 
    965 	OUT_PKT4(ring, REG_A5XX_UNKNOWN_E004, 1);
    966 	OUT_RING(ring, 0x00000000);   /* UNKNOWN_E004 */
    967 
    968 	OUT_PKT4(ring, REG_A5XX_GRAS_SU_LAYERED, 1);
    969 	OUT_RING(ring, 0x00000000);   /* GRAS_SU_LAYERED */
    970 
    971 	OUT_PKT4(ring, REG_A5XX_UNKNOWN_E29A, 1);
    972 	OUT_RING(ring, 0x00ffff00);   /* UNKNOWN_E29A */
    973 
    974 	OUT_PKT4(ring, REG_A5XX_VPC_SO_BUF_CNTL, 1);
    975 	OUT_RING(ring, 0x00000000);   /* VPC_SO_BUF_CNTL */
    976 
    977 	OUT_PKT4(ring, REG_A5XX_VPC_SO_BUFFER_OFFSET(0), 1);
    978 	OUT_RING(ring, 0x00000000);   /* UNKNOWN_E2AB */
    979 
    980 	OUT_PKT4(ring, REG_A5XX_UNKNOWN_E389, 1);
    981 	OUT_RING(ring, 0x00000000);   /* UNKNOWN_E389 */
    982 
    983 	OUT_PKT4(ring, REG_A5XX_PC_GS_LAYERED, 1);
    984 	OUT_RING(ring, 0x00000000);   /* PC_GS_LAYERED */
    985 
    986 	OUT_PKT4(ring, REG_A5XX_UNKNOWN_E5AB, 1);
    987 	OUT_RING(ring, 0x00000000);   /* UNKNOWN_E5AB */
    988 
    989 	OUT_PKT4(ring, REG_A5XX_UNKNOWN_E5C2, 1);
    990 	OUT_RING(ring, 0x00000000);   /* UNKNOWN_E5C2 */
    991 
    992 	OUT_PKT4(ring, REG_A5XX_VPC_SO_BUFFER_BASE_LO(1), 3);
    993 	OUT_RING(ring, 0x00000000);
    994 	OUT_RING(ring, 0x00000000);
    995 	OUT_RING(ring, 0x00000000);
    996 
    997 	OUT_PKT4(ring, REG_A5XX_VPC_SO_BUFFER_OFFSET(1), 6);
    998 	OUT_RING(ring, 0x00000000);
    999 	OUT_RING(ring, 0x00000000);
   1000 	OUT_RING(ring, 0x00000000);
   1001 	OUT_RING(ring, 0x00000000);
   1002 	OUT_RING(ring, 0x00000000);
   1003 	OUT_RING(ring, 0x00000000);
   1004 
   1005 	OUT_PKT4(ring, REG_A5XX_VPC_SO_BUFFER_OFFSET(2), 6);
   1006 	OUT_RING(ring, 0x00000000);
   1007 	OUT_RING(ring, 0x00000000);
   1008 	OUT_RING(ring, 0x00000000);
   1009 	OUT_RING(ring, 0x00000000);
   1010 	OUT_RING(ring, 0x00000000);
   1011 	OUT_RING(ring, 0x00000000);
   1012 
   1013 	OUT_PKT4(ring, REG_A5XX_VPC_SO_BUFFER_OFFSET(3), 3);
   1014 	OUT_RING(ring, 0x00000000);
   1015 	OUT_RING(ring, 0x00000000);
   1016 	OUT_RING(ring, 0x00000000);
   1017 
   1018 	OUT_PKT4(ring, REG_A5XX_UNKNOWN_E5DB, 1);
   1019 	OUT_RING(ring, 0x00000000);
   1020 
   1021 	OUT_PKT4(ring, REG_A5XX_UNKNOWN_E600, 1);
   1022 	OUT_RING(ring, 0x00000000);
   1023 
   1024 	OUT_PKT4(ring, REG_A5XX_UNKNOWN_E640, 1);
   1025 	OUT_RING(ring, 0x00000000);
   1026 
   1027 	OUT_PKT4(ring, REG_A5XX_TPL1_VS_TEX_COUNT, 4);
   1028 	OUT_RING(ring, 0x00000000);
   1029 	OUT_RING(ring, 0x00000000);
   1030 	OUT_RING(ring, 0x00000000);
   1031 	OUT_RING(ring, 0x00000000);
   1032 
   1033 	OUT_PKT4(ring, REG_A5XX_TPL1_FS_TEX_COUNT, 2);
   1034 	OUT_RING(ring, 0x00000000);
   1035 	OUT_RING(ring, 0x00000000);
   1036 
   1037 	OUT_PKT4(ring, REG_A5XX_UNKNOWN_E7C0, 3);
   1038 	OUT_RING(ring, 0x00000000);
   1039 	OUT_RING(ring, 0x00000000);
   1040 	OUT_RING(ring, 0x00000000);
   1041 
   1042 	OUT_PKT4(ring, REG_A5XX_UNKNOWN_E7C5, 3);
   1043 	OUT_RING(ring, 0x00000000);
   1044 	OUT_RING(ring, 0x00000000);
   1045 	OUT_RING(ring, 0x00000000);
   1046 
   1047 	OUT_PKT4(ring, REG_A5XX_UNKNOWN_E7CA, 3);
   1048 	OUT_RING(ring, 0x00000000);
   1049 	OUT_RING(ring, 0x00000000);
   1050 	OUT_RING(ring, 0x00000000);
   1051 
   1052 	OUT_PKT4(ring, REG_A5XX_UNKNOWN_E7CF, 3);
   1053 	OUT_RING(ring, 0x00000000);
   1054 	OUT_RING(ring, 0x00000000);
   1055 	OUT_RING(ring, 0x00000000);
   1056 
   1057 	OUT_PKT4(ring, REG_A5XX_UNKNOWN_E7D4, 3);
   1058 	OUT_RING(ring, 0x00000000);
   1059 	OUT_RING(ring, 0x00000000);
   1060 	OUT_RING(ring, 0x00000000);
   1061 
   1062 	OUT_PKT4(ring, REG_A5XX_UNKNOWN_E7D9, 3);
   1063 	OUT_RING(ring, 0x00000000);
   1064 	OUT_RING(ring, 0x00000000);
   1065 	OUT_RING(ring, 0x00000000);
   1066 
   1067 	OUT_PKT4(ring, REG_A5XX_RB_CLEAR_CNTL, 1);
   1068 	OUT_RING(ring, 0x00000000);
   1069 }
   1070 
   1071 static void
   1072 fd5_emit_ib(struct fd_ringbuffer *ring, struct fd_ringbuffer *target)
   1073 {
   1074 	__OUT_IB5(ring, target);
   1075 }
   1076 
   1077 static void
   1078 fd5_mem_to_mem(struct fd_ringbuffer *ring, struct pipe_resource *dst,
   1079 		unsigned dst_off, struct pipe_resource *src, unsigned src_off,
   1080 		unsigned sizedwords)
   1081 {
   1082 	struct fd_bo *src_bo = fd_resource(src)->bo;
   1083 	struct fd_bo *dst_bo = fd_resource(dst)->bo;
   1084 	unsigned i;
   1085 
   1086 	for (i = 0; i < sizedwords; i++) {
   1087 		OUT_PKT7(ring, CP_MEM_TO_MEM, 5);
   1088 		OUT_RING(ring, 0x00000000);
   1089 		OUT_RELOCW(ring, dst_bo, dst_off, 0, 0);
   1090 		OUT_RELOC (ring, src_bo, src_off, 0, 0);
   1091 
   1092 		dst_off += 4;
   1093 		src_off += 4;
   1094 	}
   1095 }
   1096 
   1097 void
   1098 fd5_emit_init(struct pipe_context *pctx)
   1099 {
   1100 	struct fd_context *ctx = fd_context(pctx);
   1101 	ctx->emit_const = fd5_emit_const;
   1102 	ctx->emit_const_bo = fd5_emit_const_bo;
   1103 	ctx->emit_ib = fd5_emit_ib;
   1104 	ctx->mem_to_mem = fd5_mem_to_mem;
   1105 }
   1106