1 /* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ 2 3 /* 4 * Copyright (C) 2013 Rob Clark <robclark (at) freedesktop.org> 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the "Software"), 8 * to deal in the Software without restriction, including without limitation 9 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 * and/or sell copies of the Software, and to permit persons to whom the 11 * Software is furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice (including the next 14 * paragraph) shall be included in all copies or substantial portions of the 15 * Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 * SOFTWARE. 24 * 25 * Authors: 26 * Rob Clark <robclark (at) freedesktop.org> 27 */ 28 29 #include "pipe/p_state.h" 30 #include "util/u_string.h" 31 #include "util/u_memory.h" 32 #include "util/u_helpers.h" 33 #include "util/u_format.h" 34 #include "util/u_viewport.h" 35 36 #include "freedreno_resource.h" 37 #include "freedreno_query_hw.h" 38 39 #include "fd3_emit.h" 40 #include "fd3_blend.h" 41 #include "fd3_context.h" 42 #include "fd3_program.h" 43 #include "fd3_rasterizer.h" 44 #include "fd3_texture.h" 45 #include "fd3_format.h" 46 #include "fd3_zsa.h" 47 48 static const enum adreno_state_block sb[] = { 49 [SHADER_VERTEX] = SB_VERT_SHADER, 50 [SHADER_FRAGMENT] = SB_FRAG_SHADER, 51 }; 52 53 /* regid: base const register 54 * prsc or dwords: buffer containing constant values 55 * sizedwords: size of const value buffer 56 */ 57 static void 58 fd3_emit_const(struct fd_ringbuffer *ring, enum shader_t type, 59 uint32_t regid, uint32_t offset, uint32_t sizedwords, 60 const uint32_t *dwords, struct pipe_resource *prsc) 61 { 62 uint32_t i, sz; 63 enum adreno_state_src src; 64 65 debug_assert((regid % 4) == 0); 66 debug_assert((sizedwords % 4) == 0); 67 68 if (prsc) { 69 sz = 0; 70 src = SS_INDIRECT; 71 } else { 72 sz = sizedwords; 73 src = SS_DIRECT; 74 } 75 76 OUT_PKT3(ring, CP_LOAD_STATE, 2 + sz); 77 OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(regid/2) | 78 CP_LOAD_STATE_0_STATE_SRC(src) | 79 CP_LOAD_STATE_0_STATE_BLOCK(sb[type]) | 80 CP_LOAD_STATE_0_NUM_UNIT(sizedwords/2)); 81 if (prsc) { 82 struct fd_bo *bo = fd_resource(prsc)->bo; 83 OUT_RELOC(ring, bo, offset, 84 CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS), 0); 85 } else { 86 OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) | 87 CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS)); 88 dwords = (uint32_t *)&((uint8_t *)dwords)[offset]; 89 } 90 for (i = 0; i < sz; i++) { 91 OUT_RING(ring, dwords[i]); 92 } 93 } 94 95 static void 96 fd3_emit_const_bo(struct fd_ringbuffer *ring, enum shader_t type, boolean write, 97 uint32_t regid, uint32_t num, struct pipe_resource **prscs, uint32_t *offsets) 98 { 99 uint32_t anum = align(num, 4); 100 uint32_t i; 101 102 debug_assert((regid % 4) == 0); 103 104 OUT_PKT3(ring, CP_LOAD_STATE, 2 + anum); 105 OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(regid/2) | 106 CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) | 107 CP_LOAD_STATE_0_STATE_BLOCK(sb[type]) | 108 CP_LOAD_STATE_0_NUM_UNIT(anum/2)); 109 OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) | 110 CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS)); 111 112 for (i = 0; i < num; i++) { 113 if (prscs[i]) { 114 if (write) { 115 OUT_RELOCW(ring, fd_resource(prscs[i])->bo, offsets[i], 0, 0); 116 } else { 117 OUT_RELOC(ring, fd_resource(prscs[i])->bo, offsets[i], 0, 0); 118 } 119 } else { 120 OUT_RING(ring, 0xbad00000 | (i << 16)); 121 } 122 } 123 124 for (; i < anum; i++) 125 OUT_RING(ring, 0xffffffff); 126 } 127 128 #define VERT_TEX_OFF 0 129 #define FRAG_TEX_OFF 16 130 #define BASETABLE_SZ A3XX_MAX_MIP_LEVELS 131 132 static void 133 emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring, 134 enum adreno_state_block sb, struct fd_texture_stateobj *tex) 135 { 136 static const unsigned tex_off[] = { 137 [SB_VERT_TEX] = VERT_TEX_OFF, 138 [SB_FRAG_TEX] = FRAG_TEX_OFF, 139 }; 140 static const enum adreno_state_block mipaddr[] = { 141 [SB_VERT_TEX] = SB_VERT_MIPADDR, 142 [SB_FRAG_TEX] = SB_FRAG_MIPADDR, 143 }; 144 static const uint32_t bcolor_reg[] = { 145 [SB_VERT_TEX] = REG_A3XX_TPL1_TP_VS_BORDER_COLOR_BASE_ADDR, 146 [SB_FRAG_TEX] = REG_A3XX_TPL1_TP_FS_BORDER_COLOR_BASE_ADDR, 147 }; 148 struct fd3_context *fd3_ctx = fd3_context(ctx); 149 bool needs_border = false; 150 unsigned i, j; 151 152 if (tex->num_samplers > 0) { 153 /* output sampler state: */ 154 OUT_PKT3(ring, CP_LOAD_STATE, 2 + (2 * tex->num_samplers)); 155 OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(tex_off[sb]) | 156 CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) | 157 CP_LOAD_STATE_0_STATE_BLOCK(sb) | 158 CP_LOAD_STATE_0_NUM_UNIT(tex->num_samplers)); 159 OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_SHADER) | 160 CP_LOAD_STATE_1_EXT_SRC_ADDR(0)); 161 for (i = 0; i < tex->num_samplers; i++) { 162 static const struct fd3_sampler_stateobj dummy_sampler = {}; 163 const struct fd3_sampler_stateobj *sampler = tex->samplers[i] ? 164 fd3_sampler_stateobj(tex->samplers[i]) : 165 &dummy_sampler; 166 167 OUT_RING(ring, sampler->texsamp0); 168 OUT_RING(ring, sampler->texsamp1); 169 170 needs_border |= sampler->needs_border; 171 } 172 } 173 174 if (tex->num_textures > 0) { 175 /* emit texture state: */ 176 OUT_PKT3(ring, CP_LOAD_STATE, 2 + (4 * tex->num_textures)); 177 OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(tex_off[sb]) | 178 CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) | 179 CP_LOAD_STATE_0_STATE_BLOCK(sb) | 180 CP_LOAD_STATE_0_NUM_UNIT(tex->num_textures)); 181 OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS) | 182 CP_LOAD_STATE_1_EXT_SRC_ADDR(0)); 183 for (i = 0; i < tex->num_textures; i++) { 184 static const struct fd3_pipe_sampler_view dummy_view = {}; 185 const struct fd3_pipe_sampler_view *view = tex->textures[i] ? 186 fd3_pipe_sampler_view(tex->textures[i]) : 187 &dummy_view; 188 OUT_RING(ring, view->texconst0); 189 OUT_RING(ring, view->texconst1); 190 OUT_RING(ring, view->texconst2 | 191 A3XX_TEX_CONST_2_INDX(BASETABLE_SZ * i)); 192 OUT_RING(ring, view->texconst3); 193 } 194 195 /* emit mipaddrs: */ 196 OUT_PKT3(ring, CP_LOAD_STATE, 2 + (BASETABLE_SZ * tex->num_textures)); 197 OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(BASETABLE_SZ * tex_off[sb]) | 198 CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) | 199 CP_LOAD_STATE_0_STATE_BLOCK(mipaddr[sb]) | 200 CP_LOAD_STATE_0_NUM_UNIT(BASETABLE_SZ * tex->num_textures)); 201 OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS) | 202 CP_LOAD_STATE_1_EXT_SRC_ADDR(0)); 203 for (i = 0; i < tex->num_textures; i++) { 204 static const struct fd3_pipe_sampler_view dummy_view = { 205 .base.target = PIPE_TEXTURE_1D, /* anything !PIPE_BUFFER */ 206 .base.u.tex.first_level = 1, 207 }; 208 const struct fd3_pipe_sampler_view *view = tex->textures[i] ? 209 fd3_pipe_sampler_view(tex->textures[i]) : 210 &dummy_view; 211 struct fd_resource *rsc = fd_resource(view->base.texture); 212 if (rsc && rsc->base.target == PIPE_BUFFER) { 213 OUT_RELOC(ring, rsc->bo, view->base.u.buf.offset, 0, 0); 214 j = 1; 215 } else { 216 unsigned start = fd_sampler_first_level(&view->base); 217 unsigned end = fd_sampler_last_level(&view->base); 218 219 for (j = 0; j < (end - start + 1); j++) { 220 struct fd_resource_slice *slice = 221 fd_resource_slice(rsc, j + start); 222 OUT_RELOC(ring, rsc->bo, slice->offset, 0, 0); 223 } 224 } 225 226 /* pad the remaining entries w/ null: */ 227 for (; j < BASETABLE_SZ; j++) { 228 OUT_RING(ring, 0x00000000); 229 } 230 } 231 } 232 233 if (needs_border) { 234 unsigned off; 235 void *ptr; 236 237 u_upload_alloc(fd3_ctx->border_color_uploader, 238 0, BORDER_COLOR_UPLOAD_SIZE, 239 BORDER_COLOR_UPLOAD_SIZE, &off, 240 &fd3_ctx->border_color_buf, 241 &ptr); 242 243 fd_setup_border_colors(tex, ptr, tex_off[sb]); 244 245 OUT_PKT0(ring, bcolor_reg[sb], 1); 246 OUT_RELOC(ring, fd_resource(fd3_ctx->border_color_buf)->bo, off, 0, 0); 247 248 u_upload_unmap(fd3_ctx->border_color_uploader); 249 } 250 } 251 252 /* emit texture state for mem->gmem restore operation.. eventually it would 253 * be good to get rid of this and use normal CSO/etc state for more of these 254 * special cases, but for now the compiler is not sufficient.. 255 * 256 * Also, for using normal state, not quite sure how to handle the special 257 * case format (fd3_gmem_restore_format()) stuff for restoring depth/stencil. 258 */ 259 void 260 fd3_emit_gmem_restore_tex(struct fd_ringbuffer *ring, 261 struct pipe_surface **psurf, 262 int bufs) 263 { 264 int i, j; 265 266 /* output sampler state: */ 267 OUT_PKT3(ring, CP_LOAD_STATE, 2 + 2 * bufs); 268 OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(FRAG_TEX_OFF) | 269 CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) | 270 CP_LOAD_STATE_0_STATE_BLOCK(SB_FRAG_TEX) | 271 CP_LOAD_STATE_0_NUM_UNIT(bufs)); 272 OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_SHADER) | 273 CP_LOAD_STATE_1_EXT_SRC_ADDR(0)); 274 for (i = 0; i < bufs; i++) { 275 OUT_RING(ring, A3XX_TEX_SAMP_0_XY_MAG(A3XX_TEX_NEAREST) | 276 A3XX_TEX_SAMP_0_XY_MIN(A3XX_TEX_NEAREST) | 277 A3XX_TEX_SAMP_0_WRAP_S(A3XX_TEX_CLAMP_TO_EDGE) | 278 A3XX_TEX_SAMP_0_WRAP_T(A3XX_TEX_CLAMP_TO_EDGE) | 279 A3XX_TEX_SAMP_0_WRAP_R(A3XX_TEX_REPEAT)); 280 OUT_RING(ring, 0x00000000); 281 } 282 283 /* emit texture state: */ 284 OUT_PKT3(ring, CP_LOAD_STATE, 2 + 4 * bufs); 285 OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(FRAG_TEX_OFF) | 286 CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) | 287 CP_LOAD_STATE_0_STATE_BLOCK(SB_FRAG_TEX) | 288 CP_LOAD_STATE_0_NUM_UNIT(bufs)); 289 OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS) | 290 CP_LOAD_STATE_1_EXT_SRC_ADDR(0)); 291 for (i = 0; i < bufs; i++) { 292 if (!psurf[i]) { 293 OUT_RING(ring, A3XX_TEX_CONST_0_TYPE(A3XX_TEX_2D) | 294 A3XX_TEX_CONST_0_SWIZ_X(A3XX_TEX_ONE) | 295 A3XX_TEX_CONST_0_SWIZ_Y(A3XX_TEX_ONE) | 296 A3XX_TEX_CONST_0_SWIZ_Z(A3XX_TEX_ONE) | 297 A3XX_TEX_CONST_0_SWIZ_W(A3XX_TEX_ONE)); 298 OUT_RING(ring, 0x00000000); 299 OUT_RING(ring, A3XX_TEX_CONST_2_INDX(BASETABLE_SZ * i)); 300 OUT_RING(ring, 0x00000000); 301 continue; 302 } 303 304 struct fd_resource *rsc = fd_resource(psurf[i]->texture); 305 enum pipe_format format = fd_gmem_restore_format(psurf[i]->format); 306 /* The restore blit_zs shader expects stencil in sampler 0, and depth 307 * in sampler 1 308 */ 309 if (rsc->stencil && i == 0) { 310 rsc = rsc->stencil; 311 format = fd_gmem_restore_format(rsc->base.format); 312 } 313 314 /* note: PIPE_BUFFER disallowed for surfaces */ 315 unsigned lvl = psurf[i]->u.tex.level; 316 struct fd_resource_slice *slice = fd_resource_slice(rsc, lvl); 317 318 debug_assert(psurf[i]->u.tex.first_layer == psurf[i]->u.tex.last_layer); 319 320 OUT_RING(ring, A3XX_TEX_CONST_0_FMT(fd3_pipe2tex(format)) | 321 A3XX_TEX_CONST_0_TYPE(A3XX_TEX_2D) | 322 fd3_tex_swiz(format, PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, 323 PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W)); 324 OUT_RING(ring, A3XX_TEX_CONST_1_FETCHSIZE(TFETCH_DISABLE) | 325 A3XX_TEX_CONST_1_WIDTH(psurf[i]->width) | 326 A3XX_TEX_CONST_1_HEIGHT(psurf[i]->height)); 327 OUT_RING(ring, A3XX_TEX_CONST_2_PITCH(slice->pitch * rsc->cpp) | 328 A3XX_TEX_CONST_2_INDX(BASETABLE_SZ * i)); 329 OUT_RING(ring, 0x00000000); 330 } 331 332 /* emit mipaddrs: */ 333 OUT_PKT3(ring, CP_LOAD_STATE, 2 + BASETABLE_SZ * bufs); 334 OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(BASETABLE_SZ * FRAG_TEX_OFF) | 335 CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) | 336 CP_LOAD_STATE_0_STATE_BLOCK(SB_FRAG_MIPADDR) | 337 CP_LOAD_STATE_0_NUM_UNIT(BASETABLE_SZ * bufs)); 338 OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS) | 339 CP_LOAD_STATE_1_EXT_SRC_ADDR(0)); 340 for (i = 0; i < bufs; i++) { 341 if (psurf[i]) { 342 struct fd_resource *rsc = fd_resource(psurf[i]->texture); 343 /* Matches above logic for blit_zs shader */ 344 if (rsc->stencil && i == 0) 345 rsc = rsc->stencil; 346 unsigned lvl = psurf[i]->u.tex.level; 347 uint32_t offset = fd_resource_offset(rsc, lvl, psurf[i]->u.tex.first_layer); 348 OUT_RELOC(ring, rsc->bo, offset, 0, 0); 349 } else { 350 OUT_RING(ring, 0x00000000); 351 } 352 353 /* pad the remaining entries w/ null: */ 354 for (j = 1; j < BASETABLE_SZ; j++) { 355 OUT_RING(ring, 0x00000000); 356 } 357 } 358 } 359 360 void 361 fd3_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd3_emit *emit) 362 { 363 int32_t i, j, last = -1; 364 uint32_t total_in = 0; 365 const struct fd_vertex_state *vtx = emit->vtx; 366 const struct ir3_shader_variant *vp = fd3_emit_get_vp(emit); 367 unsigned vertex_regid = regid(63, 0); 368 unsigned instance_regid = regid(63, 0); 369 unsigned vtxcnt_regid = regid(63, 0); 370 371 /* Note that sysvals come *after* normal inputs: */ 372 for (i = 0; i < vp->inputs_count; i++) { 373 if (!vp->inputs[i].compmask) 374 continue; 375 if (vp->inputs[i].sysval) { 376 switch(vp->inputs[i].slot) { 377 case SYSTEM_VALUE_BASE_VERTEX: 378 /* handled elsewhere */ 379 break; 380 case SYSTEM_VALUE_VERTEX_ID_ZERO_BASE: 381 vertex_regid = vp->inputs[i].regid; 382 break; 383 case SYSTEM_VALUE_INSTANCE_ID: 384 instance_regid = vp->inputs[i].regid; 385 break; 386 case SYSTEM_VALUE_VERTEX_CNT: 387 vtxcnt_regid = vp->inputs[i].regid; 388 break; 389 default: 390 unreachable("invalid system value"); 391 break; 392 } 393 } else if (i < vtx->vtx->num_elements) { 394 last = i; 395 } 396 } 397 398 for (i = 0, j = 0; i <= last; i++) { 399 assert(!vp->inputs[i].sysval); 400 if (vp->inputs[i].compmask) { 401 struct pipe_vertex_element *elem = &vtx->vtx->pipe[i]; 402 const struct pipe_vertex_buffer *vb = 403 &vtx->vertexbuf.vb[elem->vertex_buffer_index]; 404 struct fd_resource *rsc = fd_resource(vb->buffer.resource); 405 enum pipe_format pfmt = elem->src_format; 406 enum a3xx_vtx_fmt fmt = fd3_pipe2vtx(pfmt); 407 bool switchnext = (i != last) || 408 (vertex_regid != regid(63, 0)) || 409 (instance_regid != regid(63, 0)) || 410 (vtxcnt_regid != regid(63, 0)); 411 bool isint = util_format_is_pure_integer(pfmt); 412 uint32_t fs = util_format_get_blocksize(pfmt); 413 414 debug_assert(fmt != ~0); 415 416 OUT_PKT0(ring, REG_A3XX_VFD_FETCH(j), 2); 417 OUT_RING(ring, A3XX_VFD_FETCH_INSTR_0_FETCHSIZE(fs - 1) | 418 A3XX_VFD_FETCH_INSTR_0_BUFSTRIDE(vb->stride) | 419 COND(switchnext, A3XX_VFD_FETCH_INSTR_0_SWITCHNEXT) | 420 A3XX_VFD_FETCH_INSTR_0_INDEXCODE(j) | 421 COND(elem->instance_divisor, A3XX_VFD_FETCH_INSTR_0_INSTANCED) | 422 A3XX_VFD_FETCH_INSTR_0_STEPRATE(MAX2(1, elem->instance_divisor))); 423 OUT_RELOC(ring, rsc->bo, vb->buffer_offset + elem->src_offset, 0, 0); 424 425 OUT_PKT0(ring, REG_A3XX_VFD_DECODE_INSTR(j), 1); 426 OUT_RING(ring, A3XX_VFD_DECODE_INSTR_CONSTFILL | 427 A3XX_VFD_DECODE_INSTR_WRITEMASK(vp->inputs[i].compmask) | 428 A3XX_VFD_DECODE_INSTR_FORMAT(fmt) | 429 A3XX_VFD_DECODE_INSTR_SWAP(fd3_pipe2swap(pfmt)) | 430 A3XX_VFD_DECODE_INSTR_REGID(vp->inputs[i].regid) | 431 A3XX_VFD_DECODE_INSTR_SHIFTCNT(fs) | 432 A3XX_VFD_DECODE_INSTR_LASTCOMPVALID | 433 COND(isint, A3XX_VFD_DECODE_INSTR_INT) | 434 COND(switchnext, A3XX_VFD_DECODE_INSTR_SWITCHNEXT)); 435 436 total_in += vp->inputs[i].ncomp; 437 j++; 438 } 439 } 440 441 /* hw doesn't like to be configured for zero vbo's, it seems: */ 442 if (last < 0) { 443 /* just recycle the shader bo, we just need to point to *something* 444 * valid: 445 */ 446 struct fd_bo *dummy_vbo = vp->bo; 447 bool switchnext = (vertex_regid != regid(63, 0)) || 448 (instance_regid != regid(63, 0)) || 449 (vtxcnt_regid != regid(63, 0)); 450 451 OUT_PKT0(ring, REG_A3XX_VFD_FETCH(0), 2); 452 OUT_RING(ring, A3XX_VFD_FETCH_INSTR_0_FETCHSIZE(0) | 453 A3XX_VFD_FETCH_INSTR_0_BUFSTRIDE(0) | 454 COND(switchnext, A3XX_VFD_FETCH_INSTR_0_SWITCHNEXT) | 455 A3XX_VFD_FETCH_INSTR_0_INDEXCODE(0) | 456 A3XX_VFD_FETCH_INSTR_0_STEPRATE(1)); 457 OUT_RELOC(ring, dummy_vbo, 0, 0, 0); 458 459 OUT_PKT0(ring, REG_A3XX_VFD_DECODE_INSTR(0), 1); 460 OUT_RING(ring, A3XX_VFD_DECODE_INSTR_CONSTFILL | 461 A3XX_VFD_DECODE_INSTR_WRITEMASK(0x1) | 462 A3XX_VFD_DECODE_INSTR_FORMAT(VFMT_8_UNORM) | 463 A3XX_VFD_DECODE_INSTR_SWAP(XYZW) | 464 A3XX_VFD_DECODE_INSTR_REGID(regid(0,0)) | 465 A3XX_VFD_DECODE_INSTR_SHIFTCNT(1) | 466 A3XX_VFD_DECODE_INSTR_LASTCOMPVALID | 467 COND(switchnext, A3XX_VFD_DECODE_INSTR_SWITCHNEXT)); 468 469 total_in = 1; 470 j = 1; 471 } 472 473 OUT_PKT0(ring, REG_A3XX_VFD_CONTROL_0, 2); 474 OUT_RING(ring, A3XX_VFD_CONTROL_0_TOTALATTRTOVS(total_in) | 475 A3XX_VFD_CONTROL_0_PACKETSIZE(2) | 476 A3XX_VFD_CONTROL_0_STRMDECINSTRCNT(j) | 477 A3XX_VFD_CONTROL_0_STRMFETCHINSTRCNT(j)); 478 OUT_RING(ring, A3XX_VFD_CONTROL_1_MAXSTORAGE(1) | // XXX 479 A3XX_VFD_CONTROL_1_REGID4VTX(vertex_regid) | 480 A3XX_VFD_CONTROL_1_REGID4INST(instance_regid)); 481 482 OUT_PKT0(ring, REG_A3XX_VFD_VS_THREADING_THRESHOLD, 1); 483 OUT_RING(ring, A3XX_VFD_VS_THREADING_THRESHOLD_REGID_THRESHOLD(15) | 484 A3XX_VFD_VS_THREADING_THRESHOLD_REGID_VTXCNT(vtxcnt_regid)); 485 } 486 487 void 488 fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, 489 struct fd3_emit *emit) 490 { 491 const struct ir3_shader_variant *vp = fd3_emit_get_vp(emit); 492 const struct ir3_shader_variant *fp = fd3_emit_get_fp(emit); 493 const enum fd_dirty_3d_state dirty = emit->dirty; 494 495 emit_marker(ring, 5); 496 497 if (dirty & FD_DIRTY_SAMPLE_MASK) { 498 OUT_PKT0(ring, REG_A3XX_RB_MSAA_CONTROL, 1); 499 OUT_RING(ring, A3XX_RB_MSAA_CONTROL_DISABLE | 500 A3XX_RB_MSAA_CONTROL_SAMPLES(MSAA_ONE) | 501 A3XX_RB_MSAA_CONTROL_SAMPLE_MASK(ctx->sample_mask)); 502 } 503 504 if ((dirty & (FD_DIRTY_ZSA | FD_DIRTY_PROG | FD_DIRTY_BLEND_DUAL)) && 505 !emit->key.binning_pass) { 506 uint32_t val = fd3_zsa_stateobj(ctx->zsa)->rb_render_control | 507 fd3_blend_stateobj(ctx->blend)->rb_render_control; 508 509 val |= COND(fp->frag_face, A3XX_RB_RENDER_CONTROL_FACENESS); 510 val |= COND(fp->frag_coord, A3XX_RB_RENDER_CONTROL_XCOORD | 511 A3XX_RB_RENDER_CONTROL_YCOORD | 512 A3XX_RB_RENDER_CONTROL_ZCOORD | 513 A3XX_RB_RENDER_CONTROL_WCOORD); 514 515 /* I suppose if we needed to (which I don't *think* we need 516 * to), we could emit this for binning pass too. But we 517 * would need to keep a different patch-list for binning 518 * vs render pass. 519 */ 520 521 OUT_PKT0(ring, REG_A3XX_RB_RENDER_CONTROL, 1); 522 OUT_RINGP(ring, val, &ctx->batch->rbrc_patches); 523 } 524 525 if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_STENCIL_REF)) { 526 struct fd3_zsa_stateobj *zsa = fd3_zsa_stateobj(ctx->zsa); 527 struct pipe_stencil_ref *sr = &ctx->stencil_ref; 528 529 OUT_PKT0(ring, REG_A3XX_RB_ALPHA_REF, 1); 530 OUT_RING(ring, zsa->rb_alpha_ref); 531 532 OUT_PKT0(ring, REG_A3XX_RB_STENCIL_CONTROL, 1); 533 OUT_RING(ring, zsa->rb_stencil_control); 534 535 OUT_PKT0(ring, REG_A3XX_RB_STENCILREFMASK, 2); 536 OUT_RING(ring, zsa->rb_stencilrefmask | 537 A3XX_RB_STENCILREFMASK_STENCILREF(sr->ref_value[0])); 538 OUT_RING(ring, zsa->rb_stencilrefmask_bf | 539 A3XX_RB_STENCILREFMASK_BF_STENCILREF(sr->ref_value[1])); 540 } 541 542 if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_RASTERIZER | FD_DIRTY_PROG)) { 543 uint32_t val = fd3_zsa_stateobj(ctx->zsa)->rb_depth_control; 544 if (fp->writes_pos) { 545 val |= A3XX_RB_DEPTH_CONTROL_FRAG_WRITES_Z; 546 val |= A3XX_RB_DEPTH_CONTROL_EARLY_Z_DISABLE; 547 } 548 if (fp->has_kill) { 549 val |= A3XX_RB_DEPTH_CONTROL_EARLY_Z_DISABLE; 550 } 551 if (!ctx->rasterizer->depth_clip) { 552 val |= A3XX_RB_DEPTH_CONTROL_Z_CLAMP_ENABLE; 553 } 554 OUT_PKT0(ring, REG_A3XX_RB_DEPTH_CONTROL, 1); 555 OUT_RING(ring, val); 556 } 557 558 if (dirty & FD_DIRTY_RASTERIZER) { 559 struct fd3_rasterizer_stateobj *rasterizer = 560 fd3_rasterizer_stateobj(ctx->rasterizer); 561 562 OUT_PKT0(ring, REG_A3XX_GRAS_SU_MODE_CONTROL, 1); 563 OUT_RING(ring, rasterizer->gras_su_mode_control); 564 565 OUT_PKT0(ring, REG_A3XX_GRAS_SU_POINT_MINMAX, 2); 566 OUT_RING(ring, rasterizer->gras_su_point_minmax); 567 OUT_RING(ring, rasterizer->gras_su_point_size); 568 569 OUT_PKT0(ring, REG_A3XX_GRAS_SU_POLY_OFFSET_SCALE, 2); 570 OUT_RING(ring, rasterizer->gras_su_poly_offset_scale); 571 OUT_RING(ring, rasterizer->gras_su_poly_offset_offset); 572 } 573 574 if (dirty & (FD_DIRTY_RASTERIZER | FD_DIRTY_PROG)) { 575 uint32_t val = fd3_rasterizer_stateobj(ctx->rasterizer) 576 ->gras_cl_clip_cntl; 577 uint8_t planes = ctx->rasterizer->clip_plane_enable; 578 val |= COND(fp->writes_pos, A3XX_GRAS_CL_CLIP_CNTL_ZCLIP_DISABLE); 579 val |= COND(fp->frag_coord, A3XX_GRAS_CL_CLIP_CNTL_ZCOORD | 580 A3XX_GRAS_CL_CLIP_CNTL_WCOORD); 581 if (!emit->key.ucp_enables) 582 val |= A3XX_GRAS_CL_CLIP_CNTL_NUM_USER_CLIP_PLANES( 583 MIN2(util_bitcount(planes), 6)); 584 OUT_PKT0(ring, REG_A3XX_GRAS_CL_CLIP_CNTL, 1); 585 OUT_RING(ring, val); 586 } 587 588 if (dirty & (FD_DIRTY_RASTERIZER | FD_DIRTY_PROG | FD_DIRTY_UCP)) { 589 uint32_t planes = ctx->rasterizer->clip_plane_enable; 590 int count = 0; 591 592 if (emit->key.ucp_enables) 593 planes = 0; 594 595 while (planes && count < 6) { 596 int i = ffs(planes) - 1; 597 598 planes &= ~(1U << i); 599 fd_wfi(ctx->batch, ring); 600 OUT_PKT0(ring, REG_A3XX_GRAS_CL_USER_PLANE(count++), 4); 601 OUT_RING(ring, fui(ctx->ucp.ucp[i][0])); 602 OUT_RING(ring, fui(ctx->ucp.ucp[i][1])); 603 OUT_RING(ring, fui(ctx->ucp.ucp[i][2])); 604 OUT_RING(ring, fui(ctx->ucp.ucp[i][3])); 605 } 606 } 607 608 /* NOTE: since primitive_restart is not actually part of any 609 * state object, we need to make sure that we always emit 610 * PRIM_VTX_CNTL.. either that or be more clever and detect 611 * when it changes. 612 */ 613 if (emit->info) { 614 const struct pipe_draw_info *info = emit->info; 615 uint32_t val = fd3_rasterizer_stateobj(ctx->rasterizer) 616 ->pc_prim_vtx_cntl; 617 618 if (!emit->key.binning_pass) { 619 uint32_t stride_in_vpc = align(fp->total_in, 4) / 4; 620 if (stride_in_vpc > 0) 621 stride_in_vpc = MAX2(stride_in_vpc, 2); 622 val |= A3XX_PC_PRIM_VTX_CNTL_STRIDE_IN_VPC(stride_in_vpc); 623 } 624 625 if (info->index_size && info->primitive_restart) { 626 val |= A3XX_PC_PRIM_VTX_CNTL_PRIMITIVE_RESTART; 627 } 628 629 val |= COND(vp->writes_psize, A3XX_PC_PRIM_VTX_CNTL_PSIZE); 630 631 OUT_PKT0(ring, REG_A3XX_PC_PRIM_VTX_CNTL, 1); 632 OUT_RING(ring, val); 633 } 634 635 if (dirty & (FD_DIRTY_SCISSOR | FD_DIRTY_RASTERIZER | FD_DIRTY_VIEWPORT)) { 636 struct pipe_scissor_state *scissor = fd_context_get_scissor(ctx); 637 int minx = scissor->minx; 638 int miny = scissor->miny; 639 int maxx = scissor->maxx; 640 int maxy = scissor->maxy; 641 642 /* Unfortunately there is no separate depth clip disable, only an all 643 * or nothing deal. So when we disable clipping, we must handle the 644 * viewport clip via scissors. 645 */ 646 if (!ctx->rasterizer->depth_clip) { 647 struct pipe_viewport_state *vp = &ctx->viewport; 648 minx = MAX2(minx, (int)floorf(vp->translate[0] - fabsf(vp->scale[0]))); 649 miny = MAX2(miny, (int)floorf(vp->translate[1] - fabsf(vp->scale[1]))); 650 maxx = MIN2(maxx, (int)ceilf(vp->translate[0] + fabsf(vp->scale[0]))); 651 maxy = MIN2(maxy, (int)ceilf(vp->translate[1] + fabsf(vp->scale[1]))); 652 } 653 654 OUT_PKT0(ring, REG_A3XX_GRAS_SC_WINDOW_SCISSOR_TL, 2); 655 OUT_RING(ring, A3XX_GRAS_SC_WINDOW_SCISSOR_TL_X(minx) | 656 A3XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(miny)); 657 OUT_RING(ring, A3XX_GRAS_SC_WINDOW_SCISSOR_BR_X(maxx - 1) | 658 A3XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(maxy - 1)); 659 660 ctx->batch->max_scissor.minx = MIN2(ctx->batch->max_scissor.minx, minx); 661 ctx->batch->max_scissor.miny = MIN2(ctx->batch->max_scissor.miny, miny); 662 ctx->batch->max_scissor.maxx = MAX2(ctx->batch->max_scissor.maxx, maxx); 663 ctx->batch->max_scissor.maxy = MAX2(ctx->batch->max_scissor.maxy, maxy); 664 } 665 666 if (dirty & FD_DIRTY_VIEWPORT) { 667 fd_wfi(ctx->batch, ring); 668 OUT_PKT0(ring, REG_A3XX_GRAS_CL_VPORT_XOFFSET, 6); 669 OUT_RING(ring, A3XX_GRAS_CL_VPORT_XOFFSET(ctx->viewport.translate[0] - 0.5)); 670 OUT_RING(ring, A3XX_GRAS_CL_VPORT_XSCALE(ctx->viewport.scale[0])); 671 OUT_RING(ring, A3XX_GRAS_CL_VPORT_YOFFSET(ctx->viewport.translate[1] - 0.5)); 672 OUT_RING(ring, A3XX_GRAS_CL_VPORT_YSCALE(ctx->viewport.scale[1])); 673 OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZOFFSET(ctx->viewport.translate[2])); 674 OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZSCALE(ctx->viewport.scale[2])); 675 } 676 677 if (dirty & (FD_DIRTY_VIEWPORT | FD_DIRTY_RASTERIZER | FD_DIRTY_FRAMEBUFFER)) { 678 float zmin, zmax; 679 int depth = 24; 680 if (ctx->batch->framebuffer.zsbuf) { 681 depth = util_format_get_component_bits( 682 pipe_surface_format(ctx->batch->framebuffer.zsbuf), 683 UTIL_FORMAT_COLORSPACE_ZS, 0); 684 } 685 util_viewport_zmin_zmax(&ctx->viewport, ctx->rasterizer->clip_halfz, 686 &zmin, &zmax); 687 688 OUT_PKT0(ring, REG_A3XX_RB_Z_CLAMP_MIN, 2); 689 if (depth == 32) { 690 OUT_RING(ring, (uint32_t)(zmin * 0xffffffff)); 691 OUT_RING(ring, (uint32_t)(zmax * 0xffffffff)); 692 } else if (depth == 16) { 693 OUT_RING(ring, (uint32_t)(zmin * 0xffff)); 694 OUT_RING(ring, (uint32_t)(zmax * 0xffff)); 695 } else { 696 OUT_RING(ring, (uint32_t)(zmin * 0xffffff)); 697 OUT_RING(ring, (uint32_t)(zmax * 0xffffff)); 698 } 699 } 700 701 if (dirty & (FD_DIRTY_PROG | FD_DIRTY_FRAMEBUFFER | FD_DIRTY_BLEND_DUAL)) { 702 struct pipe_framebuffer_state *pfb = &ctx->batch->framebuffer; 703 int nr_cbufs = pfb->nr_cbufs; 704 if (fd3_blend_stateobj(ctx->blend)->rb_render_control & 705 A3XX_RB_RENDER_CONTROL_DUAL_COLOR_IN_ENABLE) 706 nr_cbufs++; 707 fd3_program_emit(ring, emit, nr_cbufs, pfb->cbufs); 708 } 709 710 /* TODO we should not need this or fd_wfi() before emit_constants(): 711 */ 712 OUT_PKT3(ring, CP_EVENT_WRITE, 1); 713 OUT_RING(ring, HLSQ_FLUSH); 714 715 if (emit->prog == &ctx->prog) { /* evil hack to deal sanely with clear path */ 716 ir3_emit_vs_consts(vp, ring, ctx, emit->info); 717 if (!emit->key.binning_pass) 718 ir3_emit_fs_consts(fp, ring, ctx); 719 } 720 721 if (dirty & (FD_DIRTY_BLEND | FD_DIRTY_FRAMEBUFFER)) { 722 struct fd3_blend_stateobj *blend = fd3_blend_stateobj(ctx->blend); 723 uint32_t i; 724 725 for (i = 0; i < ARRAY_SIZE(blend->rb_mrt); i++) { 726 enum pipe_format format = 727 pipe_surface_format(ctx->batch->framebuffer.cbufs[i]); 728 const struct util_format_description *desc = 729 util_format_description(format); 730 bool is_float = util_format_is_float(format); 731 bool is_int = util_format_is_pure_integer(format); 732 bool has_alpha = util_format_has_alpha(format); 733 uint32_t control = blend->rb_mrt[i].control; 734 uint32_t blend_control = blend->rb_mrt[i].blend_control_alpha; 735 736 if (is_int) { 737 control &= (A3XX_RB_MRT_CONTROL_COMPONENT_ENABLE__MASK | 738 A3XX_RB_MRT_CONTROL_DITHER_MODE__MASK); 739 control |= A3XX_RB_MRT_CONTROL_ROP_CODE(ROP_COPY); 740 } 741 742 if (format == PIPE_FORMAT_NONE) 743 control &= ~A3XX_RB_MRT_CONTROL_COMPONENT_ENABLE__MASK; 744 745 if (has_alpha) { 746 blend_control |= blend->rb_mrt[i].blend_control_rgb; 747 } else { 748 blend_control |= blend->rb_mrt[i].blend_control_no_alpha_rgb; 749 control &= ~A3XX_RB_MRT_CONTROL_BLEND2; 750 } 751 752 if (format && util_format_get_component_bits( 753 format, UTIL_FORMAT_COLORSPACE_RGB, 0) < 8) { 754 const struct pipe_rt_blend_state *rt; 755 if (ctx->blend->independent_blend_enable) 756 rt = &ctx->blend->rt[i]; 757 else 758 rt = &ctx->blend->rt[0]; 759 760 if (!util_format_colormask_full(desc, rt->colormask)) 761 control |= A3XX_RB_MRT_CONTROL_READ_DEST_ENABLE; 762 } 763 764 OUT_PKT0(ring, REG_A3XX_RB_MRT_CONTROL(i), 1); 765 OUT_RING(ring, control); 766 767 OUT_PKT0(ring, REG_A3XX_RB_MRT_BLEND_CONTROL(i), 1); 768 OUT_RING(ring, blend_control | 769 COND(!is_float, A3XX_RB_MRT_BLEND_CONTROL_CLAMP_ENABLE)); 770 } 771 } 772 773 if (dirty & FD_DIRTY_BLEND_COLOR) { 774 struct pipe_blend_color *bcolor = &ctx->blend_color; 775 OUT_PKT0(ring, REG_A3XX_RB_BLEND_RED, 4); 776 OUT_RING(ring, A3XX_RB_BLEND_RED_UINT(bcolor->color[0] * 255.0) | 777 A3XX_RB_BLEND_RED_FLOAT(bcolor->color[0])); 778 OUT_RING(ring, A3XX_RB_BLEND_GREEN_UINT(bcolor->color[1] * 255.0) | 779 A3XX_RB_BLEND_GREEN_FLOAT(bcolor->color[1])); 780 OUT_RING(ring, A3XX_RB_BLEND_BLUE_UINT(bcolor->color[2] * 255.0) | 781 A3XX_RB_BLEND_BLUE_FLOAT(bcolor->color[2])); 782 OUT_RING(ring, A3XX_RB_BLEND_ALPHA_UINT(bcolor->color[3] * 255.0) | 783 A3XX_RB_BLEND_ALPHA_FLOAT(bcolor->color[3])); 784 } 785 786 if (dirty & FD_DIRTY_TEX) 787 fd_wfi(ctx->batch, ring); 788 789 if (ctx->dirty_shader[PIPE_SHADER_VERTEX] & FD_DIRTY_SHADER_TEX) 790 emit_textures(ctx, ring, SB_VERT_TEX, &ctx->tex[PIPE_SHADER_VERTEX]); 791 792 if (ctx->dirty_shader[PIPE_SHADER_FRAGMENT] & FD_DIRTY_SHADER_TEX) 793 emit_textures(ctx, ring, SB_FRAG_TEX, &ctx->tex[PIPE_SHADER_FRAGMENT]); 794 } 795 796 /* emit setup at begin of new cmdstream buffer (don't rely on previous 797 * state, there could have been a context switch between ioctls): 798 */ 799 void 800 fd3_emit_restore(struct fd_batch *batch, struct fd_ringbuffer *ring) 801 { 802 struct fd_context *ctx = batch->ctx; 803 struct fd3_context *fd3_ctx = fd3_context(ctx); 804 int i; 805 806 if (ctx->screen->gpu_id == 320) { 807 OUT_PKT3(ring, CP_REG_RMW, 3); 808 OUT_RING(ring, REG_A3XX_RBBM_CLOCK_CTL); 809 OUT_RING(ring, 0xfffcffff); 810 OUT_RING(ring, 0x00000000); 811 } 812 813 fd_wfi(batch, ring); 814 OUT_PKT3(ring, CP_INVALIDATE_STATE, 1); 815 OUT_RING(ring, 0x00007fff); 816 817 OUT_PKT0(ring, REG_A3XX_SP_VS_PVT_MEM_PARAM_REG, 3); 818 OUT_RING(ring, 0x08000001); /* SP_VS_PVT_MEM_CTRL_REG */ 819 OUT_RELOC(ring, fd3_ctx->vs_pvt_mem, 0,0,0); /* SP_VS_PVT_MEM_ADDR_REG */ 820 OUT_RING(ring, 0x00000000); /* SP_VS_PVT_MEM_SIZE_REG */ 821 822 OUT_PKT0(ring, REG_A3XX_SP_FS_PVT_MEM_PARAM_REG, 3); 823 OUT_RING(ring, 0x08000001); /* SP_FS_PVT_MEM_CTRL_REG */ 824 OUT_RELOC(ring, fd3_ctx->fs_pvt_mem, 0,0,0); /* SP_FS_PVT_MEM_ADDR_REG */ 825 OUT_RING(ring, 0x00000000); /* SP_FS_PVT_MEM_SIZE_REG */ 826 827 OUT_PKT0(ring, REG_A3XX_PC_VERTEX_REUSE_BLOCK_CNTL, 1); 828 OUT_RING(ring, 0x0000000b); /* PC_VERTEX_REUSE_BLOCK_CNTL */ 829 830 OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1); 831 OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) | 832 A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) | 833 A3XX_GRAS_SC_CONTROL_RASTER_MODE(0)); 834 835 OUT_PKT0(ring, REG_A3XX_RB_MSAA_CONTROL, 2); 836 OUT_RING(ring, A3XX_RB_MSAA_CONTROL_DISABLE | 837 A3XX_RB_MSAA_CONTROL_SAMPLES(MSAA_ONE) | 838 A3XX_RB_MSAA_CONTROL_SAMPLE_MASK(0xffff)); 839 OUT_RING(ring, 0x00000000); /* RB_ALPHA_REF */ 840 841 OUT_PKT0(ring, REG_A3XX_GRAS_CL_GB_CLIP_ADJ, 1); 842 OUT_RING(ring, A3XX_GRAS_CL_GB_CLIP_ADJ_HORZ(0) | 843 A3XX_GRAS_CL_GB_CLIP_ADJ_VERT(0)); 844 845 OUT_PKT0(ring, REG_A3XX_GRAS_TSE_DEBUG_ECO, 1); 846 OUT_RING(ring, 0x00000001); /* GRAS_TSE_DEBUG_ECO */ 847 848 OUT_PKT0(ring, REG_A3XX_TPL1_TP_VS_TEX_OFFSET, 1); 849 OUT_RING(ring, A3XX_TPL1_TP_VS_TEX_OFFSET_SAMPLEROFFSET(VERT_TEX_OFF) | 850 A3XX_TPL1_TP_VS_TEX_OFFSET_MEMOBJOFFSET(VERT_TEX_OFF) | 851 A3XX_TPL1_TP_VS_TEX_OFFSET_BASETABLEPTR(BASETABLE_SZ * VERT_TEX_OFF)); 852 853 OUT_PKT0(ring, REG_A3XX_TPL1_TP_FS_TEX_OFFSET, 1); 854 OUT_RING(ring, A3XX_TPL1_TP_FS_TEX_OFFSET_SAMPLEROFFSET(FRAG_TEX_OFF) | 855 A3XX_TPL1_TP_FS_TEX_OFFSET_MEMOBJOFFSET(FRAG_TEX_OFF) | 856 A3XX_TPL1_TP_FS_TEX_OFFSET_BASETABLEPTR(BASETABLE_SZ * FRAG_TEX_OFF)); 857 858 OUT_PKT0(ring, REG_A3XX_VPC_VARY_CYLWRAP_ENABLE_0, 2); 859 OUT_RING(ring, 0x00000000); /* VPC_VARY_CYLWRAP_ENABLE_0 */ 860 OUT_RING(ring, 0x00000000); /* VPC_VARY_CYLWRAP_ENABLE_1 */ 861 862 OUT_PKT0(ring, REG_A3XX_UNKNOWN_0E43, 1); 863 OUT_RING(ring, 0x00000001); /* UNKNOWN_0E43 */ 864 865 OUT_PKT0(ring, REG_A3XX_UNKNOWN_0F03, 1); 866 OUT_RING(ring, 0x00000001); /* UNKNOWN_0F03 */ 867 868 OUT_PKT0(ring, REG_A3XX_UNKNOWN_0EE0, 1); 869 OUT_RING(ring, 0x00000003); /* UNKNOWN_0EE0 */ 870 871 OUT_PKT0(ring, REG_A3XX_UNKNOWN_0C3D, 1); 872 OUT_RING(ring, 0x00000001); /* UNKNOWN_0C3D */ 873 874 OUT_PKT0(ring, REG_A3XX_HLSQ_PERFCOUNTER0_SELECT, 1); 875 OUT_RING(ring, 0x00000000); /* HLSQ_PERFCOUNTER0_SELECT */ 876 877 OUT_PKT0(ring, REG_A3XX_HLSQ_CONST_VSPRESV_RANGE_REG, 2); 878 OUT_RING(ring, A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_STARTENTRY(0) | 879 A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_ENDENTRY(0)); 880 OUT_RING(ring, A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_STARTENTRY(0) | 881 A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_ENDENTRY(0)); 882 883 fd3_emit_cache_flush(batch, ring); 884 885 OUT_PKT0(ring, REG_A3XX_GRAS_CL_CLIP_CNTL, 1); 886 OUT_RING(ring, 0x00000000); /* GRAS_CL_CLIP_CNTL */ 887 888 OUT_PKT0(ring, REG_A3XX_GRAS_SU_POINT_MINMAX, 2); 889 OUT_RING(ring, 0xffc00010); /* GRAS_SU_POINT_MINMAX */ 890 OUT_RING(ring, 0x00000008); /* GRAS_SU_POINT_SIZE */ 891 892 OUT_PKT0(ring, REG_A3XX_PC_RESTART_INDEX, 1); 893 OUT_RING(ring, 0xffffffff); /* PC_RESTART_INDEX */ 894 895 OUT_PKT0(ring, REG_A3XX_RB_WINDOW_OFFSET, 1); 896 OUT_RING(ring, A3XX_RB_WINDOW_OFFSET_X(0) | 897 A3XX_RB_WINDOW_OFFSET_Y(0)); 898 899 OUT_PKT0(ring, REG_A3XX_RB_BLEND_RED, 4); 900 OUT_RING(ring, A3XX_RB_BLEND_RED_UINT(0) | 901 A3XX_RB_BLEND_RED_FLOAT(0.0)); 902 OUT_RING(ring, A3XX_RB_BLEND_GREEN_UINT(0) | 903 A3XX_RB_BLEND_GREEN_FLOAT(0.0)); 904 OUT_RING(ring, A3XX_RB_BLEND_BLUE_UINT(0) | 905 A3XX_RB_BLEND_BLUE_FLOAT(0.0)); 906 OUT_RING(ring, A3XX_RB_BLEND_ALPHA_UINT(0xff) | 907 A3XX_RB_BLEND_ALPHA_FLOAT(1.0)); 908 909 for (i = 0; i < 6; i++) { 910 OUT_PKT0(ring, REG_A3XX_GRAS_CL_USER_PLANE(i), 4); 911 OUT_RING(ring, 0x00000000); /* GRAS_CL_USER_PLANE[i].X */ 912 OUT_RING(ring, 0x00000000); /* GRAS_CL_USER_PLANE[i].Y */ 913 OUT_RING(ring, 0x00000000); /* GRAS_CL_USER_PLANE[i].Z */ 914 OUT_RING(ring, 0x00000000); /* GRAS_CL_USER_PLANE[i].W */ 915 } 916 917 OUT_PKT0(ring, REG_A3XX_PC_VSTREAM_CONTROL, 1); 918 OUT_RING(ring, 0x00000000); 919 920 fd_event_write(batch, ring, CACHE_FLUSH); 921 922 if (is_a3xx_p0(ctx->screen)) { 923 OUT_PKT3(ring, CP_DRAW_INDX, 3); 924 OUT_RING(ring, 0x00000000); 925 OUT_RING(ring, DRAW(1, DI_SRC_SEL_AUTO_INDEX, 926 INDEX_SIZE_IGN, IGNORE_VISIBILITY, 0)); 927 OUT_RING(ring, 0); /* NumIndices */ 928 } 929 930 OUT_PKT3(ring, CP_NOP, 4); 931 OUT_RING(ring, 0x00000000); 932 OUT_RING(ring, 0x00000000); 933 OUT_RING(ring, 0x00000000); 934 OUT_RING(ring, 0x00000000); 935 936 fd_wfi(batch, ring); 937 938 fd_hw_query_enable(batch, ring); 939 } 940 941 static void 942 fd3_emit_ib(struct fd_ringbuffer *ring, struct fd_ringbuffer *target) 943 { 944 __OUT_IB(ring, true, target); 945 } 946 947 void 948 fd3_emit_init(struct pipe_context *pctx) 949 { 950 struct fd_context *ctx = fd_context(pctx); 951 ctx->emit_const = fd3_emit_const; 952 ctx->emit_const_bo = fd3_emit_const_bo; 953 ctx->emit_ib = fd3_emit_ib; 954 } 955