1 /* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ 2 3 /* 4 * Copyright (C) 2013 Rob Clark <robclark (at) freedesktop.org> 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the "Software"), 8 * to deal in the Software without restriction, including without limitation 9 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 * and/or sell copies of the Software, and to permit persons to whom the 11 * Software is furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice (including the next 14 * paragraph) shall be included in all copies or substantial portions of the 15 * Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 * SOFTWARE. 24 * 25 * Authors: 26 * Rob Clark <robclark (at) freedesktop.org> 27 */ 28 29 #include "pipe/p_state.h" 30 #include "util/u_string.h" 31 #include "util/u_memory.h" 32 #include "util/u_inlines.h" 33 #include "util/u_format.h" 34 35 #include "freedreno_draw.h" 36 #include "freedreno_state.h" 37 #include "freedreno_resource.h" 38 39 #include "fd3_gmem.h" 40 #include "fd3_context.h" 41 #include "fd3_emit.h" 42 #include "fd3_program.h" 43 #include "fd3_format.h" 44 #include "fd3_zsa.h" 45 46 static void 47 emit_mrt(struct fd_ringbuffer *ring, unsigned nr_bufs, 48 struct pipe_surface **bufs, uint32_t *bases, uint32_t bin_w, 49 bool decode_srgb) 50 { 51 enum a3xx_tile_mode tile_mode; 52 unsigned i; 53 54 if (bin_w) { 55 tile_mode = TILE_32X32; 56 } else { 57 tile_mode = LINEAR; 58 } 59 60 for (i = 0; i < A3XX_MAX_RENDER_TARGETS; i++) { 61 enum pipe_format pformat = 0; 62 enum a3xx_color_fmt format = 0; 63 enum a3xx_color_swap swap = WZYX; 64 bool srgb = false; 65 struct fd_resource *rsc = NULL; 66 struct fd_resource_slice *slice = NULL; 67 uint32_t stride = 0; 68 uint32_t base = 0; 69 uint32_t offset = 0; 70 71 if ((i < nr_bufs) && bufs[i]) { 72 struct pipe_surface *psurf = bufs[i]; 73 74 rsc = fd_resource(psurf->texture); 75 pformat = psurf->format; 76 /* In case we're drawing to Z32F_S8, the "color" actually goes to 77 * the stencil 78 */ 79 if (rsc->stencil) { 80 rsc = rsc->stencil; 81 pformat = rsc->base.b.format; 82 if (bases) 83 bases++; 84 } 85 slice = fd_resource_slice(rsc, psurf->u.tex.level); 86 format = fd3_pipe2color(pformat); 87 swap = fd3_pipe2swap(pformat); 88 if (decode_srgb) 89 srgb = util_format_is_srgb(pformat); 90 else 91 pformat = util_format_linear(pformat); 92 93 debug_assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer); 94 95 offset = fd_resource_offset(rsc, psurf->u.tex.level, 96 psurf->u.tex.first_layer); 97 98 if (bin_w) { 99 stride = bin_w * rsc->cpp; 100 101 if (bases) { 102 base = bases[i]; 103 } 104 } else { 105 stride = slice->pitch * rsc->cpp; 106 } 107 } else if (i < nr_bufs && bases) { 108 base = bases[i]; 109 } 110 111 OUT_PKT0(ring, REG_A3XX_RB_MRT_BUF_INFO(i), 2); 112 OUT_RING(ring, A3XX_RB_MRT_BUF_INFO_COLOR_FORMAT(format) | 113 A3XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(tile_mode) | 114 A3XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH(stride) | 115 A3XX_RB_MRT_BUF_INFO_COLOR_SWAP(swap) | 116 COND(srgb, A3XX_RB_MRT_BUF_INFO_COLOR_SRGB)); 117 if (bin_w || (i >= nr_bufs) || !bufs[i]) { 118 OUT_RING(ring, A3XX_RB_MRT_BUF_BASE_COLOR_BUF_BASE(base)); 119 } else { 120 OUT_RELOCW(ring, rsc->bo, offset, 0, -1); 121 } 122 123 OUT_PKT0(ring, REG_A3XX_SP_FS_IMAGE_OUTPUT_REG(i), 1); 124 OUT_RING(ring, COND((i < nr_bufs) && bufs[i], 125 A3XX_SP_FS_IMAGE_OUTPUT_REG_MRTFORMAT( 126 fd3_fs_output_format(pformat)))); 127 } 128 } 129 130 static bool 131 use_hw_binning(struct fd_batch *batch) 132 { 133 struct fd_gmem_stateobj *gmem = &batch->ctx->gmem; 134 135 /* workaround: combining scissor optimization and hw binning 136 * seems problematic. Seems like we end up with a mismatch 137 * between binning pass and rendering pass, wrt. where the hw 138 * thinks the vertices belong. And the blob driver doesn't 139 * seem to implement anything like scissor optimization, so 140 * not entirely sure what I might be missing. 141 * 142 * But scissor optimization is mainly for window managers, 143 * which don't have many vertices (and therefore doesn't 144 * benefit much from binning pass). 145 * 146 * So for now just disable binning if scissor optimization is 147 * used. 148 */ 149 if (gmem->minx || gmem->miny) 150 return false; 151 152 return fd_binning_enabled && ((gmem->nbins_x * gmem->nbins_y) > 2); 153 } 154 155 /* workaround for (hlsq?) lockup with hw binning on a3xx patchlevel 0 */ 156 static void update_vsc_pipe(struct fd_batch *batch); 157 static void 158 emit_binning_workaround(struct fd_batch *batch) 159 { 160 struct fd_context *ctx = batch->ctx; 161 struct fd_gmem_stateobj *gmem = &ctx->gmem; 162 struct fd_ringbuffer *ring = batch->gmem; 163 struct fd3_emit emit = { 164 .debug = &ctx->debug, 165 .vtx = &ctx->solid_vbuf_state, 166 .prog = &ctx->solid_prog, 167 .key = { 168 .half_precision = true, 169 }, 170 }; 171 172 OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 2); 173 OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RESOLVE_PASS) | 174 A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE | 175 A3XX_RB_MODE_CONTROL_MRT(0)); 176 OUT_RING(ring, A3XX_RB_RENDER_CONTROL_BIN_WIDTH(32) | 177 A3XX_RB_RENDER_CONTROL_DISABLE_COLOR_PIPE | 178 A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(FUNC_NEVER)); 179 180 OUT_PKT0(ring, REG_A3XX_RB_COPY_CONTROL, 4); 181 OUT_RING(ring, A3XX_RB_COPY_CONTROL_MSAA_RESOLVE(MSAA_ONE) | 182 A3XX_RB_COPY_CONTROL_MODE(0) | 183 A3XX_RB_COPY_CONTROL_GMEM_BASE(0)); 184 OUT_RELOCW(ring, fd_resource(ctx->solid_vbuf)->bo, 0x20, 0, -1); /* RB_COPY_DEST_BASE */ 185 OUT_RING(ring, A3XX_RB_COPY_DEST_PITCH_PITCH(128)); 186 OUT_RING(ring, A3XX_RB_COPY_DEST_INFO_TILE(LINEAR) | 187 A3XX_RB_COPY_DEST_INFO_FORMAT(RB_R8G8B8A8_UNORM) | 188 A3XX_RB_COPY_DEST_INFO_SWAP(WZYX) | 189 A3XX_RB_COPY_DEST_INFO_COMPONENT_ENABLE(0xf) | 190 A3XX_RB_COPY_DEST_INFO_ENDIAN(ENDIAN_NONE)); 191 192 OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1); 193 OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RESOLVE_PASS) | 194 A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) | 195 A3XX_GRAS_SC_CONTROL_RASTER_MODE(1)); 196 197 fd3_program_emit(ring, &emit, 0, NULL); 198 fd3_emit_vertex_bufs(ring, &emit); 199 200 OUT_PKT0(ring, REG_A3XX_HLSQ_CONTROL_0_REG, 4); 201 OUT_RING(ring, A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE(FOUR_QUADS) | 202 A3XX_HLSQ_CONTROL_0_REG_FSSUPERTHREADENABLE | 203 A3XX_HLSQ_CONTROL_0_REG_RESERVED2 | 204 A3XX_HLSQ_CONTROL_0_REG_SPCONSTFULLUPDATE); 205 OUT_RING(ring, A3XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE(TWO_QUADS) | 206 A3XX_HLSQ_CONTROL_1_REG_VSSUPERTHREADENABLE); 207 OUT_RING(ring, A3XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD(31)); 208 OUT_RING(ring, 0); /* HLSQ_CONTROL_3_REG */ 209 210 OUT_PKT0(ring, REG_A3XX_HLSQ_CONST_FSPRESV_RANGE_REG, 1); 211 OUT_RING(ring, A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_STARTENTRY(0x20) | 212 A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_ENDENTRY(0x20)); 213 214 OUT_PKT0(ring, REG_A3XX_RB_MSAA_CONTROL, 1); 215 OUT_RING(ring, A3XX_RB_MSAA_CONTROL_DISABLE | 216 A3XX_RB_MSAA_CONTROL_SAMPLES(MSAA_ONE) | 217 A3XX_RB_MSAA_CONTROL_SAMPLE_MASK(0xffff)); 218 219 OUT_PKT0(ring, REG_A3XX_RB_DEPTH_CONTROL, 1); 220 OUT_RING(ring, A3XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_NEVER)); 221 222 OUT_PKT0(ring, REG_A3XX_RB_STENCIL_CONTROL, 1); 223 OUT_RING(ring, A3XX_RB_STENCIL_CONTROL_FUNC(FUNC_NEVER) | 224 A3XX_RB_STENCIL_CONTROL_FAIL(STENCIL_KEEP) | 225 A3XX_RB_STENCIL_CONTROL_ZPASS(STENCIL_KEEP) | 226 A3XX_RB_STENCIL_CONTROL_ZFAIL(STENCIL_KEEP) | 227 A3XX_RB_STENCIL_CONTROL_FUNC_BF(FUNC_NEVER) | 228 A3XX_RB_STENCIL_CONTROL_FAIL_BF(STENCIL_KEEP) | 229 A3XX_RB_STENCIL_CONTROL_ZPASS_BF(STENCIL_KEEP) | 230 A3XX_RB_STENCIL_CONTROL_ZFAIL_BF(STENCIL_KEEP)); 231 232 OUT_PKT0(ring, REG_A3XX_GRAS_SU_MODE_CONTROL, 1); 233 OUT_RING(ring, A3XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH(0.0)); 234 235 OUT_PKT0(ring, REG_A3XX_VFD_INDEX_MIN, 4); 236 OUT_RING(ring, 0); /* VFD_INDEX_MIN */ 237 OUT_RING(ring, 2); /* VFD_INDEX_MAX */ 238 OUT_RING(ring, 0); /* VFD_INSTANCEID_OFFSET */ 239 OUT_RING(ring, 0); /* VFD_INDEX_OFFSET */ 240 241 OUT_PKT0(ring, REG_A3XX_PC_PRIM_VTX_CNTL, 1); 242 OUT_RING(ring, A3XX_PC_PRIM_VTX_CNTL_STRIDE_IN_VPC(0) | 243 A3XX_PC_PRIM_VTX_CNTL_POLYMODE_FRONT_PTYPE(PC_DRAW_TRIANGLES) | 244 A3XX_PC_PRIM_VTX_CNTL_POLYMODE_BACK_PTYPE(PC_DRAW_TRIANGLES) | 245 A3XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST); 246 247 OUT_PKT0(ring, REG_A3XX_GRAS_SC_WINDOW_SCISSOR_TL, 2); 248 OUT_RING(ring, A3XX_GRAS_SC_WINDOW_SCISSOR_TL_X(0) | 249 A3XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(1)); 250 OUT_RING(ring, A3XX_GRAS_SC_WINDOW_SCISSOR_BR_X(0) | 251 A3XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(1)); 252 253 OUT_PKT0(ring, REG_A3XX_GRAS_SC_SCREEN_SCISSOR_TL, 2); 254 OUT_RING(ring, A3XX_GRAS_SC_SCREEN_SCISSOR_TL_X(0) | 255 A3XX_GRAS_SC_SCREEN_SCISSOR_TL_Y(0)); 256 OUT_RING(ring, A3XX_GRAS_SC_SCREEN_SCISSOR_BR_X(31) | 257 A3XX_GRAS_SC_SCREEN_SCISSOR_BR_Y(0)); 258 259 fd_wfi(batch, ring); 260 OUT_PKT0(ring, REG_A3XX_GRAS_CL_VPORT_XOFFSET, 6); 261 OUT_RING(ring, A3XX_GRAS_CL_VPORT_XOFFSET(0.0)); 262 OUT_RING(ring, A3XX_GRAS_CL_VPORT_XSCALE(1.0)); 263 OUT_RING(ring, A3XX_GRAS_CL_VPORT_YOFFSET(0.0)); 264 OUT_RING(ring, A3XX_GRAS_CL_VPORT_YSCALE(1.0)); 265 OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZOFFSET(0.0)); 266 OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZSCALE(1.0)); 267 268 OUT_PKT0(ring, REG_A3XX_GRAS_CL_CLIP_CNTL, 1); 269 OUT_RING(ring, A3XX_GRAS_CL_CLIP_CNTL_CLIP_DISABLE | 270 A3XX_GRAS_CL_CLIP_CNTL_ZFAR_CLIP_DISABLE | 271 A3XX_GRAS_CL_CLIP_CNTL_VP_CLIP_CODE_IGNORE | 272 A3XX_GRAS_CL_CLIP_CNTL_VP_XFORM_DISABLE | 273 A3XX_GRAS_CL_CLIP_CNTL_PERSP_DIVISION_DISABLE); 274 275 OUT_PKT0(ring, REG_A3XX_GRAS_CL_GB_CLIP_ADJ, 1); 276 OUT_RING(ring, A3XX_GRAS_CL_GB_CLIP_ADJ_HORZ(0) | 277 A3XX_GRAS_CL_GB_CLIP_ADJ_VERT(0)); 278 279 OUT_PKT3(ring, CP_DRAW_INDX_2, 5); 280 OUT_RING(ring, 0x00000000); /* viz query info. */ 281 OUT_RING(ring, DRAW(DI_PT_RECTLIST, DI_SRC_SEL_IMMEDIATE, 282 INDEX_SIZE_32_BIT, IGNORE_VISIBILITY, 0)); 283 OUT_RING(ring, 2); /* NumIndices */ 284 OUT_RING(ring, 2); 285 OUT_RING(ring, 1); 286 fd_reset_wfi(batch); 287 288 OUT_PKT0(ring, REG_A3XX_HLSQ_CONTROL_0_REG, 1); 289 OUT_RING(ring, A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE(TWO_QUADS)); 290 291 OUT_PKT0(ring, REG_A3XX_VFD_PERFCOUNTER0_SELECT, 1); 292 OUT_RING(ring, 0x00000000); 293 294 fd_wfi(batch, ring); 295 OUT_PKT0(ring, REG_A3XX_VSC_BIN_SIZE, 1); 296 OUT_RING(ring, A3XX_VSC_BIN_SIZE_WIDTH(gmem->bin_w) | 297 A3XX_VSC_BIN_SIZE_HEIGHT(gmem->bin_h)); 298 299 OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1); 300 OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) | 301 A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) | 302 A3XX_GRAS_SC_CONTROL_RASTER_MODE(0)); 303 304 OUT_PKT0(ring, REG_A3XX_GRAS_CL_CLIP_CNTL, 1); 305 OUT_RING(ring, 0x00000000); 306 } 307 308 /* transfer from gmem to system memory (ie. normal RAM) */ 309 310 static void 311 emit_gmem2mem_surf(struct fd_batch *batch, 312 enum adreno_rb_copy_control_mode mode, 313 bool stencil, 314 uint32_t base, struct pipe_surface *psurf) 315 { 316 struct fd_ringbuffer *ring = batch->gmem; 317 struct fd_resource *rsc = fd_resource(psurf->texture); 318 enum pipe_format format = psurf->format; 319 if (stencil) { 320 rsc = rsc->stencil; 321 format = rsc->base.b.format; 322 } 323 struct fd_resource_slice *slice = fd_resource_slice(rsc, psurf->u.tex.level); 324 uint32_t offset = fd_resource_offset(rsc, psurf->u.tex.level, 325 psurf->u.tex.first_layer); 326 327 debug_assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer); 328 329 OUT_PKT0(ring, REG_A3XX_RB_COPY_CONTROL, 4); 330 OUT_RING(ring, A3XX_RB_COPY_CONTROL_MSAA_RESOLVE(MSAA_ONE) | 331 A3XX_RB_COPY_CONTROL_MODE(mode) | 332 A3XX_RB_COPY_CONTROL_GMEM_BASE(base) | 333 COND(format == PIPE_FORMAT_Z32_FLOAT || 334 format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT, 335 A3XX_RB_COPY_CONTROL_DEPTH32_RESOLVE)); 336 337 OUT_RELOCW(ring, rsc->bo, offset, 0, -1); /* RB_COPY_DEST_BASE */ 338 OUT_RING(ring, A3XX_RB_COPY_DEST_PITCH_PITCH(slice->pitch * rsc->cpp)); 339 OUT_RING(ring, A3XX_RB_COPY_DEST_INFO_TILE(LINEAR) | 340 A3XX_RB_COPY_DEST_INFO_FORMAT(fd3_pipe2color(format)) | 341 A3XX_RB_COPY_DEST_INFO_COMPONENT_ENABLE(0xf) | 342 A3XX_RB_COPY_DEST_INFO_ENDIAN(ENDIAN_NONE) | 343 A3XX_RB_COPY_DEST_INFO_SWAP(fd3_pipe2swap(format))); 344 345 fd_draw(batch, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY, 346 DI_SRC_SEL_AUTO_INDEX, 2, 0, INDEX_SIZE_IGN, 0, 0, NULL); 347 } 348 349 static void 350 fd3_emit_tile_gmem2mem(struct fd_batch *batch, struct fd_tile *tile) 351 { 352 struct fd_context *ctx = batch->ctx; 353 struct fd_ringbuffer *ring = batch->gmem; 354 struct pipe_framebuffer_state *pfb = &batch->framebuffer; 355 struct fd3_emit emit = { 356 .debug = &ctx->debug, 357 .vtx = &ctx->solid_vbuf_state, 358 .prog = &ctx->solid_prog, 359 .key = { 360 .half_precision = true, 361 }, 362 }; 363 int i; 364 365 OUT_PKT0(ring, REG_A3XX_RB_DEPTH_CONTROL, 1); 366 OUT_RING(ring, A3XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_NEVER)); 367 368 OUT_PKT0(ring, REG_A3XX_RB_STENCIL_CONTROL, 1); 369 OUT_RING(ring, A3XX_RB_STENCIL_CONTROL_FUNC(FUNC_NEVER) | 370 A3XX_RB_STENCIL_CONTROL_FAIL(STENCIL_KEEP) | 371 A3XX_RB_STENCIL_CONTROL_ZPASS(STENCIL_KEEP) | 372 A3XX_RB_STENCIL_CONTROL_ZFAIL(STENCIL_KEEP) | 373 A3XX_RB_STENCIL_CONTROL_FUNC_BF(FUNC_NEVER) | 374 A3XX_RB_STENCIL_CONTROL_FAIL_BF(STENCIL_KEEP) | 375 A3XX_RB_STENCIL_CONTROL_ZPASS_BF(STENCIL_KEEP) | 376 A3XX_RB_STENCIL_CONTROL_ZFAIL_BF(STENCIL_KEEP)); 377 378 OUT_PKT0(ring, REG_A3XX_RB_STENCILREFMASK, 2); 379 OUT_RING(ring, 0xff000000 | 380 A3XX_RB_STENCILREFMASK_STENCILREF(0) | 381 A3XX_RB_STENCILREFMASK_STENCILMASK(0) | 382 A3XX_RB_STENCILREFMASK_STENCILWRITEMASK(0xff)); 383 OUT_RING(ring, 0xff000000 | 384 A3XX_RB_STENCILREFMASK_STENCILREF(0) | 385 A3XX_RB_STENCILREFMASK_STENCILMASK(0) | 386 A3XX_RB_STENCILREFMASK_STENCILWRITEMASK(0xff)); 387 388 OUT_PKT0(ring, REG_A3XX_GRAS_SU_MODE_CONTROL, 1); 389 OUT_RING(ring, A3XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH(0)); 390 391 OUT_PKT0(ring, REG_A3XX_GRAS_CL_CLIP_CNTL, 1); 392 OUT_RING(ring, 0x00000000); /* GRAS_CL_CLIP_CNTL */ 393 394 fd_wfi(batch, ring); 395 OUT_PKT0(ring, REG_A3XX_GRAS_CL_VPORT_XOFFSET, 6); 396 OUT_RING(ring, A3XX_GRAS_CL_VPORT_XOFFSET((float)pfb->width/2.0 - 0.5)); 397 OUT_RING(ring, A3XX_GRAS_CL_VPORT_XSCALE((float)pfb->width/2.0)); 398 OUT_RING(ring, A3XX_GRAS_CL_VPORT_YOFFSET((float)pfb->height/2.0 - 0.5)); 399 OUT_RING(ring, A3XX_GRAS_CL_VPORT_YSCALE(-(float)pfb->height/2.0)); 400 OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZOFFSET(0.0)); 401 OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZSCALE(1.0)); 402 403 OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1); 404 OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RESOLVE_PASS) | 405 A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE | 406 A3XX_RB_MODE_CONTROL_MRT(0)); 407 408 OUT_PKT0(ring, REG_A3XX_RB_RENDER_CONTROL, 1); 409 OUT_RING(ring, A3XX_RB_RENDER_CONTROL_DISABLE_COLOR_PIPE | 410 A3XX_RB_RENDER_CONTROL_ENABLE_GMEM | 411 A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(FUNC_NEVER) | 412 A3XX_RB_RENDER_CONTROL_BIN_WIDTH(ctx->gmem.bin_w)); 413 414 OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1); 415 OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RESOLVE_PASS) | 416 A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) | 417 A3XX_GRAS_SC_CONTROL_RASTER_MODE(1)); 418 419 OUT_PKT0(ring, REG_A3XX_PC_PRIM_VTX_CNTL, 1); 420 OUT_RING(ring, A3XX_PC_PRIM_VTX_CNTL_STRIDE_IN_VPC(0) | 421 A3XX_PC_PRIM_VTX_CNTL_POLYMODE_FRONT_PTYPE(PC_DRAW_TRIANGLES) | 422 A3XX_PC_PRIM_VTX_CNTL_POLYMODE_BACK_PTYPE(PC_DRAW_TRIANGLES) | 423 A3XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST); 424 425 OUT_PKT0(ring, REG_A3XX_GRAS_SC_WINDOW_SCISSOR_TL, 2); 426 OUT_RING(ring, A3XX_GRAS_SC_WINDOW_SCISSOR_TL_X(0) | 427 A3XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(0)); 428 OUT_RING(ring, A3XX_GRAS_SC_WINDOW_SCISSOR_BR_X(pfb->width - 1) | 429 A3XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(pfb->height - 1)); 430 431 OUT_PKT0(ring, REG_A3XX_VFD_INDEX_MIN, 4); 432 OUT_RING(ring, 0); /* VFD_INDEX_MIN */ 433 OUT_RING(ring, 2); /* VFD_INDEX_MAX */ 434 OUT_RING(ring, 0); /* VFD_INSTANCEID_OFFSET */ 435 OUT_RING(ring, 0); /* VFD_INDEX_OFFSET */ 436 437 fd3_program_emit(ring, &emit, 0, NULL); 438 fd3_emit_vertex_bufs(ring, &emit); 439 440 if (batch->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) { 441 struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture); 442 if (!rsc->stencil || batch->resolve & FD_BUFFER_DEPTH) 443 emit_gmem2mem_surf(batch, RB_COPY_DEPTH_STENCIL, false, 444 ctx->gmem.zsbuf_base[0], pfb->zsbuf); 445 if (rsc->stencil && batch->resolve & FD_BUFFER_STENCIL) 446 emit_gmem2mem_surf(batch, RB_COPY_DEPTH_STENCIL, true, 447 ctx->gmem.zsbuf_base[1], pfb->zsbuf); 448 } 449 450 if (batch->resolve & FD_BUFFER_COLOR) { 451 for (i = 0; i < pfb->nr_cbufs; i++) { 452 if (!pfb->cbufs[i]) 453 continue; 454 if (!(batch->resolve & (PIPE_CLEAR_COLOR0 << i))) 455 continue; 456 emit_gmem2mem_surf(batch, RB_COPY_RESOLVE, false, 457 ctx->gmem.cbuf_base[i], pfb->cbufs[i]); 458 } 459 } 460 461 OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1); 462 OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RENDERING_PASS) | 463 A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE | 464 A3XX_RB_MODE_CONTROL_MRT(MAX2(1, pfb->nr_cbufs) - 1)); 465 466 OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1); 467 OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) | 468 A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) | 469 A3XX_GRAS_SC_CONTROL_RASTER_MODE(0)); 470 } 471 472 /* transfer from system memory to gmem */ 473 474 static void 475 emit_mem2gmem_surf(struct fd_batch *batch, uint32_t bases[], 476 struct pipe_surface **psurf, uint32_t bufs, uint32_t bin_w) 477 { 478 struct fd_ringbuffer *ring = batch->gmem; 479 struct pipe_surface *zsbufs[2]; 480 481 assert(bufs > 0); 482 483 OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1); 484 OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RENDERING_PASS) | 485 A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE | 486 A3XX_RB_MODE_CONTROL_MRT(bufs - 1)); 487 488 emit_mrt(ring, bufs, psurf, bases, bin_w, false); 489 490 if (psurf[0] && (psurf[0]->format == PIPE_FORMAT_Z32_FLOAT || 491 psurf[0]->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT)) { 492 /* Depth is stored as unorm in gmem, so we have to write it in using a 493 * special blit shader which writes depth. 494 */ 495 OUT_PKT0(ring, REG_A3XX_RB_DEPTH_CONTROL, 1); 496 OUT_RING(ring, (A3XX_RB_DEPTH_CONTROL_FRAG_WRITES_Z | 497 A3XX_RB_DEPTH_CONTROL_Z_WRITE_ENABLE | 498 A3XX_RB_DEPTH_CONTROL_Z_ENABLE | 499 A3XX_RB_DEPTH_CONTROL_EARLY_Z_DISABLE | 500 A3XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_ALWAYS))); 501 502 OUT_PKT0(ring, REG_A3XX_RB_DEPTH_INFO, 2); 503 OUT_RING(ring, A3XX_RB_DEPTH_INFO_DEPTH_BASE(bases[0]) | 504 A3XX_RB_DEPTH_INFO_DEPTH_FORMAT(DEPTHX_32)); 505 OUT_RING(ring, A3XX_RB_DEPTH_PITCH(4 * batch->ctx->gmem.bin_w)); 506 507 if (psurf[0]->format == PIPE_FORMAT_Z32_FLOAT) { 508 OUT_PKT0(ring, REG_A3XX_RB_MRT_CONTROL(0), 1); 509 OUT_RING(ring, 0); 510 } else { 511 /* The gmem_restore_tex logic will put the first buffer's stencil 512 * as color. Supply it with the proper information to make that 513 * happen. 514 */ 515 zsbufs[0] = zsbufs[1] = psurf[0]; 516 psurf = zsbufs; 517 bufs = 2; 518 } 519 } else { 520 OUT_PKT0(ring, REG_A3XX_SP_FS_OUTPUT_REG, 1); 521 OUT_RING(ring, A3XX_SP_FS_OUTPUT_REG_MRT(bufs - 1)); 522 } 523 524 fd3_emit_gmem_restore_tex(ring, psurf, bufs); 525 526 fd_draw(batch, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY, 527 DI_SRC_SEL_AUTO_INDEX, 2, 0, INDEX_SIZE_IGN, 0, 0, NULL); 528 } 529 530 static void 531 fd3_emit_tile_mem2gmem(struct fd_batch *batch, struct fd_tile *tile) 532 { 533 struct fd_context *ctx = batch->ctx; 534 struct fd_gmem_stateobj *gmem = &ctx->gmem; 535 struct fd_ringbuffer *ring = batch->gmem; 536 struct pipe_framebuffer_state *pfb = &batch->framebuffer; 537 struct fd3_emit emit = { 538 .debug = &ctx->debug, 539 .vtx = &ctx->blit_vbuf_state, 540 .sprite_coord_enable = 1, 541 /* NOTE: They all use the same VP, this is for vtx bufs. */ 542 .prog = &ctx->blit_prog[0], 543 .key = { 544 .half_precision = fd_half_precision(pfb), 545 }, 546 }; 547 float x0, y0, x1, y1; 548 unsigned bin_w = tile->bin_w; 549 unsigned bin_h = tile->bin_h; 550 unsigned i; 551 552 /* write texture coordinates to vertexbuf: */ 553 x0 = ((float)tile->xoff) / ((float)pfb->width); 554 x1 = ((float)tile->xoff + bin_w) / ((float)pfb->width); 555 y0 = ((float)tile->yoff) / ((float)pfb->height); 556 y1 = ((float)tile->yoff + bin_h) / ((float)pfb->height); 557 558 OUT_PKT3(ring, CP_MEM_WRITE, 5); 559 OUT_RELOCW(ring, fd_resource(ctx->blit_texcoord_vbuf)->bo, 0, 0, 0); 560 OUT_RING(ring, fui(x0)); 561 OUT_RING(ring, fui(y0)); 562 OUT_RING(ring, fui(x1)); 563 OUT_RING(ring, fui(y1)); 564 565 fd3_emit_cache_flush(batch, ring); 566 567 for (i = 0; i < 4; i++) { 568 OUT_PKT0(ring, REG_A3XX_RB_MRT_CONTROL(i), 1); 569 OUT_RING(ring, A3XX_RB_MRT_CONTROL_ROP_CODE(ROP_COPY) | 570 A3XX_RB_MRT_CONTROL_DITHER_MODE(DITHER_DISABLE) | 571 A3XX_RB_MRT_CONTROL_COMPONENT_ENABLE(0xf)); 572 573 OUT_PKT0(ring, REG_A3XX_RB_MRT_BLEND_CONTROL(i), 1); 574 OUT_RING(ring, A3XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(FACTOR_ONE) | 575 A3XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE(BLEND_DST_PLUS_SRC) | 576 A3XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR(FACTOR_ZERO) | 577 A3XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR(FACTOR_ONE) | 578 A3XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE(BLEND_DST_PLUS_SRC) | 579 A3XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR(FACTOR_ZERO)); 580 } 581 582 OUT_PKT0(ring, REG_A3XX_RB_RENDER_CONTROL, 1); 583 OUT_RING(ring, A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(FUNC_ALWAYS) | 584 A3XX_RB_RENDER_CONTROL_BIN_WIDTH(gmem->bin_w)); 585 586 fd_wfi(batch, ring); 587 OUT_PKT0(ring, REG_A3XX_RB_DEPTH_CONTROL, 1); 588 OUT_RING(ring, A3XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_LESS)); 589 590 OUT_PKT0(ring, REG_A3XX_RB_DEPTH_INFO, 2); 591 OUT_RING(ring, 0); 592 OUT_RING(ring, 0); 593 594 OUT_PKT0(ring, REG_A3XX_GRAS_CL_CLIP_CNTL, 1); 595 OUT_RING(ring, A3XX_GRAS_CL_CLIP_CNTL_IJ_PERSP_CENTER); /* GRAS_CL_CLIP_CNTL */ 596 597 fd_wfi(batch, ring); 598 OUT_PKT0(ring, REG_A3XX_GRAS_CL_VPORT_XOFFSET, 6); 599 OUT_RING(ring, A3XX_GRAS_CL_VPORT_XOFFSET((float)bin_w/2.0 - 0.5)); 600 OUT_RING(ring, A3XX_GRAS_CL_VPORT_XSCALE((float)bin_w/2.0)); 601 OUT_RING(ring, A3XX_GRAS_CL_VPORT_YOFFSET((float)bin_h/2.0 - 0.5)); 602 OUT_RING(ring, A3XX_GRAS_CL_VPORT_YSCALE(-(float)bin_h/2.0)); 603 OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZOFFSET(0.0)); 604 OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZSCALE(1.0)); 605 606 OUT_PKT0(ring, REG_A3XX_GRAS_SC_WINDOW_SCISSOR_TL, 2); 607 OUT_RING(ring, A3XX_GRAS_SC_WINDOW_SCISSOR_TL_X(0) | 608 A3XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(0)); 609 OUT_RING(ring, A3XX_GRAS_SC_WINDOW_SCISSOR_BR_X(bin_w - 1) | 610 A3XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(bin_h - 1)); 611 612 OUT_PKT0(ring, REG_A3XX_GRAS_SC_SCREEN_SCISSOR_TL, 2); 613 OUT_RING(ring, A3XX_GRAS_SC_SCREEN_SCISSOR_TL_X(0) | 614 A3XX_GRAS_SC_SCREEN_SCISSOR_TL_Y(0)); 615 OUT_RING(ring, A3XX_GRAS_SC_SCREEN_SCISSOR_BR_X(bin_w - 1) | 616 A3XX_GRAS_SC_SCREEN_SCISSOR_BR_Y(bin_h - 1)); 617 618 OUT_PKT0(ring, REG_A3XX_RB_STENCIL_CONTROL, 1); 619 OUT_RING(ring, 0x2 | 620 A3XX_RB_STENCIL_CONTROL_FUNC(FUNC_ALWAYS) | 621 A3XX_RB_STENCIL_CONTROL_FAIL(STENCIL_KEEP) | 622 A3XX_RB_STENCIL_CONTROL_ZPASS(STENCIL_KEEP) | 623 A3XX_RB_STENCIL_CONTROL_ZFAIL(STENCIL_KEEP) | 624 A3XX_RB_STENCIL_CONTROL_FUNC_BF(FUNC_ALWAYS) | 625 A3XX_RB_STENCIL_CONTROL_FAIL_BF(STENCIL_KEEP) | 626 A3XX_RB_STENCIL_CONTROL_ZPASS_BF(STENCIL_KEEP) | 627 A3XX_RB_STENCIL_CONTROL_ZFAIL_BF(STENCIL_KEEP)); 628 629 OUT_PKT0(ring, REG_A3XX_RB_STENCIL_INFO, 2); 630 OUT_RING(ring, 0); /* RB_STENCIL_INFO */ 631 OUT_RING(ring, 0); /* RB_STENCIL_PITCH */ 632 633 OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1); 634 OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) | 635 A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) | 636 A3XX_GRAS_SC_CONTROL_RASTER_MODE(1)); 637 638 OUT_PKT0(ring, REG_A3XX_PC_PRIM_VTX_CNTL, 1); 639 OUT_RING(ring, A3XX_PC_PRIM_VTX_CNTL_STRIDE_IN_VPC(2) | 640 A3XX_PC_PRIM_VTX_CNTL_POLYMODE_FRONT_PTYPE(PC_DRAW_TRIANGLES) | 641 A3XX_PC_PRIM_VTX_CNTL_POLYMODE_BACK_PTYPE(PC_DRAW_TRIANGLES) | 642 A3XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST); 643 644 OUT_PKT0(ring, REG_A3XX_VFD_INDEX_MIN, 4); 645 OUT_RING(ring, 0); /* VFD_INDEX_MIN */ 646 OUT_RING(ring, 2); /* VFD_INDEX_MAX */ 647 OUT_RING(ring, 0); /* VFD_INSTANCEID_OFFSET */ 648 OUT_RING(ring, 0); /* VFD_INDEX_OFFSET */ 649 650 fd3_emit_vertex_bufs(ring, &emit); 651 652 /* for gmem pitch/base calculations, we need to use the non- 653 * truncated tile sizes: 654 */ 655 bin_w = gmem->bin_w; 656 bin_h = gmem->bin_h; 657 658 if (fd_gmem_needs_restore(batch, tile, FD_BUFFER_COLOR)) { 659 emit.prog = &ctx->blit_prog[pfb->nr_cbufs - 1]; 660 emit.fp = NULL; /* frag shader changed so clear cache */ 661 fd3_program_emit(ring, &emit, pfb->nr_cbufs, pfb->cbufs); 662 emit_mem2gmem_surf(batch, gmem->cbuf_base, pfb->cbufs, pfb->nr_cbufs, bin_w); 663 } 664 665 if (fd_gmem_needs_restore(batch, tile, FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) { 666 if (pfb->zsbuf->format != PIPE_FORMAT_Z32_FLOAT_S8X24_UINT && 667 pfb->zsbuf->format != PIPE_FORMAT_Z32_FLOAT) { 668 /* Non-float can use a regular color write. It's split over 8-bit 669 * components, so half precision is always sufficient. 670 */ 671 emit.prog = &ctx->blit_prog[0]; 672 emit.key.half_precision = true; 673 } else { 674 /* Float depth needs special blit shader that writes depth */ 675 if (pfb->zsbuf->format == PIPE_FORMAT_Z32_FLOAT) 676 emit.prog = &ctx->blit_z; 677 else 678 emit.prog = &ctx->blit_zs; 679 emit.key.half_precision = false; 680 } 681 emit.fp = NULL; /* frag shader changed so clear cache */ 682 fd3_program_emit(ring, &emit, 1, &pfb->zsbuf); 683 emit_mem2gmem_surf(batch, gmem->zsbuf_base, &pfb->zsbuf, 1, bin_w); 684 } 685 686 OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1); 687 OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) | 688 A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) | 689 A3XX_GRAS_SC_CONTROL_RASTER_MODE(0)); 690 691 OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1); 692 OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RENDERING_PASS) | 693 A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE | 694 A3XX_RB_MODE_CONTROL_MRT(MAX2(1, pfb->nr_cbufs) - 1)); 695 } 696 697 static void 698 patch_draws(struct fd_batch *batch, enum pc_di_vis_cull_mode vismode) 699 { 700 unsigned i; 701 for (i = 0; i < fd_patch_num_elements(&batch->draw_patches); i++) { 702 struct fd_cs_patch *patch = fd_patch_element(&batch->draw_patches, i); 703 *patch->cs = patch->val | DRAW(0, 0, 0, vismode, 0); 704 } 705 util_dynarray_resize(&batch->draw_patches, 0); 706 } 707 708 static void 709 patch_rbrc(struct fd_batch *batch, uint32_t val) 710 { 711 unsigned i; 712 for (i = 0; i < fd_patch_num_elements(&batch->rbrc_patches); i++) { 713 struct fd_cs_patch *patch = fd_patch_element(&batch->rbrc_patches, i); 714 *patch->cs = patch->val | val; 715 } 716 util_dynarray_resize(&batch->rbrc_patches, 0); 717 } 718 719 /* for rendering directly to system memory: */ 720 static void 721 fd3_emit_sysmem_prep(struct fd_batch *batch) 722 { 723 struct pipe_framebuffer_state *pfb = &batch->framebuffer; 724 struct fd_ringbuffer *ring = batch->gmem; 725 uint32_t i, pitch = 0; 726 727 for (i = 0; i < pfb->nr_cbufs; i++) { 728 struct pipe_surface *psurf = pfb->cbufs[i]; 729 if (!psurf) 730 continue; 731 pitch = fd_resource(psurf->texture)->slices[psurf->u.tex.level].pitch; 732 } 733 734 fd3_emit_restore(batch, ring); 735 736 OUT_PKT0(ring, REG_A3XX_RB_FRAME_BUFFER_DIMENSION, 1); 737 OUT_RING(ring, A3XX_RB_FRAME_BUFFER_DIMENSION_WIDTH(pfb->width) | 738 A3XX_RB_FRAME_BUFFER_DIMENSION_HEIGHT(pfb->height)); 739 740 emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, NULL, 0, true); 741 742 /* setup scissor/offset for current tile: */ 743 OUT_PKT0(ring, REG_A3XX_RB_WINDOW_OFFSET, 1); 744 OUT_RING(ring, A3XX_RB_WINDOW_OFFSET_X(0) | 745 A3XX_RB_WINDOW_OFFSET_Y(0)); 746 747 OUT_PKT0(ring, REG_A3XX_GRAS_SC_SCREEN_SCISSOR_TL, 2); 748 OUT_RING(ring, A3XX_GRAS_SC_SCREEN_SCISSOR_TL_X(0) | 749 A3XX_GRAS_SC_SCREEN_SCISSOR_TL_Y(0)); 750 OUT_RING(ring, A3XX_GRAS_SC_SCREEN_SCISSOR_BR_X(pfb->width - 1) | 751 A3XX_GRAS_SC_SCREEN_SCISSOR_BR_Y(pfb->height - 1)); 752 753 OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1); 754 OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RENDERING_PASS) | 755 A3XX_RB_MODE_CONTROL_GMEM_BYPASS | 756 A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE | 757 A3XX_RB_MODE_CONTROL_MRT(MAX2(1, pfb->nr_cbufs) - 1)); 758 759 patch_draws(batch, IGNORE_VISIBILITY); 760 patch_rbrc(batch, A3XX_RB_RENDER_CONTROL_BIN_WIDTH(pitch)); 761 } 762 763 static void 764 update_vsc_pipe(struct fd_batch *batch) 765 { 766 struct fd_context *ctx = batch->ctx; 767 struct fd3_context *fd3_ctx = fd3_context(ctx); 768 struct fd_ringbuffer *ring = batch->gmem; 769 int i; 770 771 OUT_PKT0(ring, REG_A3XX_VSC_SIZE_ADDRESS, 1); 772 OUT_RELOCW(ring, fd3_ctx->vsc_size_mem, 0, 0, 0); /* VSC_SIZE_ADDRESS */ 773 774 for (i = 0; i < 8; i++) { 775 struct fd_vsc_pipe *pipe = &ctx->pipe[i]; 776 777 if (!pipe->bo) { 778 pipe->bo = fd_bo_new(ctx->dev, 0x40000, 779 DRM_FREEDRENO_GEM_TYPE_KMEM); 780 } 781 782 OUT_PKT0(ring, REG_A3XX_VSC_PIPE(i), 3); 783 OUT_RING(ring, A3XX_VSC_PIPE_CONFIG_X(pipe->x) | 784 A3XX_VSC_PIPE_CONFIG_Y(pipe->y) | 785 A3XX_VSC_PIPE_CONFIG_W(pipe->w) | 786 A3XX_VSC_PIPE_CONFIG_H(pipe->h)); 787 OUT_RELOCW(ring, pipe->bo, 0, 0, 0); /* VSC_PIPE[i].DATA_ADDRESS */ 788 OUT_RING(ring, fd_bo_size(pipe->bo) - 32); /* VSC_PIPE[i].DATA_LENGTH */ 789 } 790 } 791 792 static void 793 emit_binning_pass(struct fd_batch *batch) 794 { 795 struct fd_context *ctx = batch->ctx; 796 struct fd_gmem_stateobj *gmem = &ctx->gmem; 797 struct pipe_framebuffer_state *pfb = &batch->framebuffer; 798 struct fd_ringbuffer *ring = batch->gmem; 799 int i; 800 801 uint32_t x1 = gmem->minx; 802 uint32_t y1 = gmem->miny; 803 uint32_t x2 = gmem->minx + gmem->width - 1; 804 uint32_t y2 = gmem->miny + gmem->height - 1; 805 806 if (ctx->screen->gpu_id == 320) { 807 emit_binning_workaround(batch); 808 fd_wfi(batch, ring); 809 OUT_PKT3(ring, CP_INVALIDATE_STATE, 1); 810 OUT_RING(ring, 0x00007fff); 811 } 812 813 OUT_PKT0(ring, REG_A3XX_VSC_BIN_CONTROL, 1); 814 OUT_RING(ring, A3XX_VSC_BIN_CONTROL_BINNING_ENABLE); 815 816 OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1); 817 OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_TILING_PASS) | 818 A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) | 819 A3XX_GRAS_SC_CONTROL_RASTER_MODE(0)); 820 821 OUT_PKT0(ring, REG_A3XX_RB_FRAME_BUFFER_DIMENSION, 1); 822 OUT_RING(ring, A3XX_RB_FRAME_BUFFER_DIMENSION_WIDTH(pfb->width) | 823 A3XX_RB_FRAME_BUFFER_DIMENSION_HEIGHT(pfb->height)); 824 825 OUT_PKT0(ring, REG_A3XX_RB_RENDER_CONTROL, 1); 826 OUT_RING(ring, A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(FUNC_NEVER) | 827 A3XX_RB_RENDER_CONTROL_DISABLE_COLOR_PIPE | 828 A3XX_RB_RENDER_CONTROL_BIN_WIDTH(gmem->bin_w)); 829 830 /* setup scissor/offset for whole screen: */ 831 OUT_PKT0(ring, REG_A3XX_RB_WINDOW_OFFSET, 1); 832 OUT_RING(ring, A3XX_RB_WINDOW_OFFSET_X(x1) | 833 A3XX_RB_WINDOW_OFFSET_Y(y1)); 834 835 OUT_PKT0(ring, REG_A3XX_RB_LRZ_VSC_CONTROL, 1); 836 OUT_RING(ring, A3XX_RB_LRZ_VSC_CONTROL_BINNING_ENABLE); 837 838 OUT_PKT0(ring, REG_A3XX_GRAS_SC_SCREEN_SCISSOR_TL, 2); 839 OUT_RING(ring, A3XX_GRAS_SC_SCREEN_SCISSOR_TL_X(x1) | 840 A3XX_GRAS_SC_SCREEN_SCISSOR_TL_Y(y1)); 841 OUT_RING(ring, A3XX_GRAS_SC_SCREEN_SCISSOR_BR_X(x2) | 842 A3XX_GRAS_SC_SCREEN_SCISSOR_BR_Y(y2)); 843 844 OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1); 845 OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_TILING_PASS) | 846 A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE | 847 A3XX_RB_MODE_CONTROL_MRT(0)); 848 849 for (i = 0; i < 4; i++) { 850 OUT_PKT0(ring, REG_A3XX_RB_MRT_CONTROL(i), 1); 851 OUT_RING(ring, A3XX_RB_MRT_CONTROL_ROP_CODE(ROP_CLEAR) | 852 A3XX_RB_MRT_CONTROL_DITHER_MODE(DITHER_DISABLE) | 853 A3XX_RB_MRT_CONTROL_COMPONENT_ENABLE(0)); 854 } 855 856 OUT_PKT0(ring, REG_A3XX_PC_VSTREAM_CONTROL, 1); 857 OUT_RING(ring, A3XX_PC_VSTREAM_CONTROL_SIZE(1) | 858 A3XX_PC_VSTREAM_CONTROL_N(0)); 859 860 /* emit IB to binning drawcmds: */ 861 ctx->emit_ib(ring, batch->binning); 862 fd_reset_wfi(batch); 863 864 fd_wfi(batch, ring); 865 866 /* and then put stuff back the way it was: */ 867 868 OUT_PKT0(ring, REG_A3XX_VSC_BIN_CONTROL, 1); 869 OUT_RING(ring, 0x00000000); 870 871 OUT_PKT0(ring, REG_A3XX_SP_SP_CTRL_REG, 1); 872 OUT_RING(ring, A3XX_SP_SP_CTRL_REG_RESOLVE | 873 A3XX_SP_SP_CTRL_REG_CONSTMODE(1) | 874 A3XX_SP_SP_CTRL_REG_SLEEPMODE(1) | 875 A3XX_SP_SP_CTRL_REG_L0MODE(0)); 876 877 OUT_PKT0(ring, REG_A3XX_RB_LRZ_VSC_CONTROL, 1); 878 OUT_RING(ring, 0x00000000); 879 880 OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1); 881 OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) | 882 A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) | 883 A3XX_GRAS_SC_CONTROL_RASTER_MODE(0)); 884 885 OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 2); 886 OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RENDERING_PASS) | 887 A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE | 888 A3XX_RB_MODE_CONTROL_MRT(pfb->nr_cbufs - 1)); 889 OUT_RING(ring, A3XX_RB_RENDER_CONTROL_ENABLE_GMEM | 890 A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(FUNC_NEVER) | 891 A3XX_RB_RENDER_CONTROL_BIN_WIDTH(gmem->bin_w)); 892 893 fd_event_write(batch, ring, CACHE_FLUSH); 894 fd_wfi(batch, ring); 895 896 if (ctx->screen->gpu_id == 320) { 897 /* dummy-draw workaround: */ 898 OUT_PKT3(ring, CP_DRAW_INDX, 3); 899 OUT_RING(ring, 0x00000000); 900 OUT_RING(ring, DRAW(1, DI_SRC_SEL_AUTO_INDEX, 901 INDEX_SIZE_IGN, IGNORE_VISIBILITY, 0)); 902 OUT_RING(ring, 0); /* NumIndices */ 903 fd_reset_wfi(batch); 904 } 905 906 OUT_PKT3(ring, CP_NOP, 4); 907 OUT_RING(ring, 0x00000000); 908 OUT_RING(ring, 0x00000000); 909 OUT_RING(ring, 0x00000000); 910 OUT_RING(ring, 0x00000000); 911 912 fd_wfi(batch, ring); 913 914 if (ctx->screen->gpu_id == 320) { 915 emit_binning_workaround(batch); 916 } 917 } 918 919 /* before first tile */ 920 static void 921 fd3_emit_tile_init(struct fd_batch *batch) 922 { 923 struct fd_ringbuffer *ring = batch->gmem; 924 struct pipe_framebuffer_state *pfb = &batch->framebuffer; 925 struct fd_gmem_stateobj *gmem = &batch->ctx->gmem; 926 uint32_t rb_render_control; 927 928 fd3_emit_restore(batch, ring); 929 930 /* note: use gmem->bin_w/h, the bin_w/h parameters may be truncated 931 * at the right and bottom edge tiles 932 */ 933 OUT_PKT0(ring, REG_A3XX_VSC_BIN_SIZE, 1); 934 OUT_RING(ring, A3XX_VSC_BIN_SIZE_WIDTH(gmem->bin_w) | 935 A3XX_VSC_BIN_SIZE_HEIGHT(gmem->bin_h)); 936 937 update_vsc_pipe(batch); 938 939 fd_wfi(batch, ring); 940 OUT_PKT0(ring, REG_A3XX_RB_FRAME_BUFFER_DIMENSION, 1); 941 OUT_RING(ring, A3XX_RB_FRAME_BUFFER_DIMENSION_WIDTH(pfb->width) | 942 A3XX_RB_FRAME_BUFFER_DIMENSION_HEIGHT(pfb->height)); 943 944 if (use_hw_binning(batch)) { 945 /* emit hw binning pass: */ 946 emit_binning_pass(batch); 947 948 patch_draws(batch, USE_VISIBILITY); 949 } else { 950 patch_draws(batch, IGNORE_VISIBILITY); 951 } 952 953 rb_render_control = A3XX_RB_RENDER_CONTROL_ENABLE_GMEM | 954 A3XX_RB_RENDER_CONTROL_BIN_WIDTH(gmem->bin_w); 955 956 patch_rbrc(batch, rb_render_control); 957 } 958 959 /* before mem2gmem */ 960 static void 961 fd3_emit_tile_prep(struct fd_batch *batch, struct fd_tile *tile) 962 { 963 struct fd_ringbuffer *ring = batch->gmem; 964 struct pipe_framebuffer_state *pfb = &batch->framebuffer; 965 966 OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1); 967 OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RENDERING_PASS) | 968 A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE | 969 A3XX_RB_MODE_CONTROL_MRT(MAX2(1, pfb->nr_cbufs) - 1)); 970 } 971 972 /* before IB to rendering cmds: */ 973 static void 974 fd3_emit_tile_renderprep(struct fd_batch *batch, struct fd_tile *tile) 975 { 976 struct fd_context *ctx = batch->ctx; 977 struct fd3_context *fd3_ctx = fd3_context(ctx); 978 struct fd_ringbuffer *ring = batch->gmem; 979 struct fd_gmem_stateobj *gmem = &ctx->gmem; 980 struct pipe_framebuffer_state *pfb = &batch->framebuffer; 981 982 uint32_t x1 = tile->xoff; 983 uint32_t y1 = tile->yoff; 984 uint32_t x2 = tile->xoff + tile->bin_w - 1; 985 uint32_t y2 = tile->yoff + tile->bin_h - 1; 986 987 uint32_t reg; 988 989 OUT_PKT0(ring, REG_A3XX_RB_DEPTH_INFO, 2); 990 reg = A3XX_RB_DEPTH_INFO_DEPTH_BASE(gmem->zsbuf_base[0]); 991 if (pfb->zsbuf) { 992 reg |= A3XX_RB_DEPTH_INFO_DEPTH_FORMAT(fd_pipe2depth(pfb->zsbuf->format)); 993 } 994 OUT_RING(ring, reg); 995 if (pfb->zsbuf) { 996 struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture); 997 OUT_RING(ring, A3XX_RB_DEPTH_PITCH(rsc->cpp * gmem->bin_w)); 998 if (rsc->stencil) { 999 OUT_PKT0(ring, REG_A3XX_RB_STENCIL_INFO, 2); 1000 OUT_RING(ring, A3XX_RB_STENCIL_INFO_STENCIL_BASE(gmem->zsbuf_base[1])); 1001 OUT_RING(ring, A3XX_RB_STENCIL_PITCH(rsc->stencil->cpp * gmem->bin_w)); 1002 } 1003 } else { 1004 OUT_RING(ring, 0x00000000); 1005 } 1006 1007 if (use_hw_binning(batch)) { 1008 struct fd_vsc_pipe *pipe = &ctx->pipe[tile->p]; 1009 1010 assert(pipe->w * pipe->h); 1011 1012 fd_event_write(batch, ring, HLSQ_FLUSH); 1013 fd_wfi(batch, ring); 1014 1015 OUT_PKT0(ring, REG_A3XX_PC_VSTREAM_CONTROL, 1); 1016 OUT_RING(ring, A3XX_PC_VSTREAM_CONTROL_SIZE(pipe->w * pipe->h) | 1017 A3XX_PC_VSTREAM_CONTROL_N(tile->n)); 1018 1019 1020 OUT_PKT3(ring, CP_SET_BIN_DATA, 2); 1021 OUT_RELOCW(ring, pipe->bo, 0, 0, 0); /* BIN_DATA_ADDR <- VSC_PIPE[p].DATA_ADDRESS */ 1022 OUT_RELOCW(ring, fd3_ctx->vsc_size_mem, /* BIN_SIZE_ADDR <- VSC_SIZE_ADDRESS + (p * 4) */ 1023 (tile->p * 4), 0, 0); 1024 } else { 1025 OUT_PKT0(ring, REG_A3XX_PC_VSTREAM_CONTROL, 1); 1026 OUT_RING(ring, 0x00000000); 1027 } 1028 1029 OUT_PKT3(ring, CP_SET_BIN, 3); 1030 OUT_RING(ring, 0x00000000); 1031 OUT_RING(ring, CP_SET_BIN_1_X1(x1) | CP_SET_BIN_1_Y1(y1)); 1032 OUT_RING(ring, CP_SET_BIN_2_X2(x2) | CP_SET_BIN_2_Y2(y2)); 1033 1034 emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, gmem->cbuf_base, gmem->bin_w, true); 1035 1036 /* setup scissor/offset for current tile: */ 1037 OUT_PKT0(ring, REG_A3XX_RB_WINDOW_OFFSET, 1); 1038 OUT_RING(ring, A3XX_RB_WINDOW_OFFSET_X(tile->xoff) | 1039 A3XX_RB_WINDOW_OFFSET_Y(tile->yoff)); 1040 1041 OUT_PKT0(ring, REG_A3XX_GRAS_SC_SCREEN_SCISSOR_TL, 2); 1042 OUT_RING(ring, A3XX_GRAS_SC_SCREEN_SCISSOR_TL_X(x1) | 1043 A3XX_GRAS_SC_SCREEN_SCISSOR_TL_Y(y1)); 1044 OUT_RING(ring, A3XX_GRAS_SC_SCREEN_SCISSOR_BR_X(x2) | 1045 A3XX_GRAS_SC_SCREEN_SCISSOR_BR_Y(y2)); 1046 } 1047 1048 void 1049 fd3_gmem_init(struct pipe_context *pctx) 1050 { 1051 struct fd_context *ctx = fd_context(pctx); 1052 1053 ctx->emit_sysmem_prep = fd3_emit_sysmem_prep; 1054 ctx->emit_tile_init = fd3_emit_tile_init; 1055 ctx->emit_tile_prep = fd3_emit_tile_prep; 1056 ctx->emit_tile_mem2gmem = fd3_emit_tile_mem2gmem; 1057 ctx->emit_tile_renderprep = fd3_emit_tile_renderprep; 1058 ctx->emit_tile_gmem2mem = fd3_emit_tile_gmem2mem; 1059 } 1060