Home | History | Annotate | Download | only in radeonsi
      1 /*
      2  * Copyright 2012 Advanced Micro Devices, Inc.
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * on the rights to use, copy, modify, merge, publish, distribute, sub
      8  * license, and/or sell copies of the Software, and to permit persons to whom
      9  * the Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice (including the next
     12  * paragraph) shall be included in all copies or substantial portions of the
     13  * Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
     18  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
     19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
     20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
     21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
     22  *
     23  * Authors:
     24  *      Christian Knig <christian.koenig (at) amd.com>
     25  */
     26 
     27 #include "util/u_memory.h"
     28 #include "util/u_framebuffer.h"
     29 #include "util/u_blitter.h"
     30 #include "tgsi/tgsi_parse.h"
     31 #include "radeonsi_pipe.h"
     32 #include "radeonsi_shader.h"
     33 #include "si_state.h"
     34 #include "sid.h"
     35 
     36 /*
     37  * Shaders
     38  */
     39 
     40 static void si_pipe_shader_vs(struct pipe_context *ctx, struct si_pipe_shader *shader)
     41 {
     42 	struct r600_context *rctx = (struct r600_context *)ctx;
     43 	struct si_pm4_state *pm4;
     44 	unsigned num_sgprs, num_user_sgprs;
     45 	unsigned nparams, i;
     46 	uint64_t va;
     47 
     48 	if (si_pipe_shader_create(ctx, shader))
     49 		return;
     50 
     51 	si_pm4_delete_state(rctx, vs, shader->pm4);
     52 	pm4 = shader->pm4 = CALLOC_STRUCT(si_pm4_state);
     53 
     54 	si_pm4_inval_shader_cache(pm4);
     55 
     56 	/* Certain attributes (position, psize, etc.) don't count as params.
     57 	 * VS is required to export at least one param and r600_shader_from_tgsi()
     58 	 * takes care of adding a dummy export.
     59 	 */
     60 	for (nparams = 0, i = 0 ; i < shader->shader.noutput; i++) {
     61 		if (shader->shader.output[i].name != TGSI_SEMANTIC_POSITION)
     62 			nparams++;
     63 	}
     64 	if (nparams < 1)
     65 		nparams = 1;
     66 
     67 	si_pm4_set_reg(pm4, R_0286C4_SPI_VS_OUT_CONFIG,
     68 		       S_0286C4_VS_EXPORT_COUNT(nparams - 1));
     69 
     70 	si_pm4_set_reg(pm4, R_02870C_SPI_SHADER_POS_FORMAT,
     71 		       S_02870C_POS0_EXPORT_FORMAT(V_02870C_SPI_SHADER_4COMP) |
     72 		       S_02870C_POS1_EXPORT_FORMAT(V_02870C_SPI_SHADER_NONE) |
     73 		       S_02870C_POS2_EXPORT_FORMAT(V_02870C_SPI_SHADER_NONE) |
     74 		       S_02870C_POS3_EXPORT_FORMAT(V_02870C_SPI_SHADER_NONE));
     75 
     76 	va = r600_resource_va(ctx->screen, (void *)shader->bo);
     77 	si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ);
     78 	si_pm4_set_reg(pm4, R_00B120_SPI_SHADER_PGM_LO_VS, va >> 8);
     79 	si_pm4_set_reg(pm4, R_00B124_SPI_SHADER_PGM_HI_VS, va >> 40);
     80 
     81 	num_user_sgprs = 8;
     82 	num_sgprs = shader->num_sgprs;
     83 	if (num_user_sgprs > num_sgprs)
     84 		num_sgprs = num_user_sgprs;
     85 	/* Last 2 reserved SGPRs are used for VCC */
     86 	num_sgprs += 2;
     87 	assert(num_sgprs <= 104);
     88 
     89 	si_pm4_set_reg(pm4, R_00B128_SPI_SHADER_PGM_RSRC1_VS,
     90 		       S_00B128_VGPRS((shader->num_vgprs - 1) / 4) |
     91 		       S_00B128_SGPRS((num_sgprs - 1) / 8));
     92 	si_pm4_set_reg(pm4, R_00B12C_SPI_SHADER_PGM_RSRC2_VS,
     93 		       S_00B12C_USER_SGPR(num_user_sgprs));
     94 
     95 	si_pm4_bind_state(rctx, vs, shader->pm4);
     96 }
     97 
     98 static void si_pipe_shader_ps(struct pipe_context *ctx, struct si_pipe_shader *shader)
     99 {
    100 	struct r600_context *rctx = (struct r600_context *)ctx;
    101 	struct si_pm4_state *pm4;
    102 	unsigned i, exports_ps, num_cout, spi_ps_in_control, db_shader_control;
    103 	unsigned num_sgprs, num_user_sgprs;
    104 	int ninterp = 0;
    105 	boolean have_linear = FALSE, have_centroid = FALSE, have_perspective = FALSE;
    106 	unsigned spi_baryc_cntl, spi_ps_input_ena;
    107 	uint64_t va;
    108 
    109 	if (si_pipe_shader_create(ctx, shader))
    110 		return;
    111 
    112 	si_pm4_delete_state(rctx, ps, shader->pm4);
    113 	pm4 = shader->pm4 = CALLOC_STRUCT(si_pm4_state);
    114 
    115 	si_pm4_inval_shader_cache(pm4);
    116 
    117 	db_shader_control = S_02880C_Z_ORDER(V_02880C_EARLY_Z_THEN_LATE_Z);
    118 	for (i = 0; i < shader->shader.ninput; i++) {
    119 		ninterp++;
    120 		/* XXX: Flat shading hangs the GPU */
    121 		if (shader->shader.input[i].interpolate == TGSI_INTERPOLATE_CONSTANT ||
    122 		    (shader->shader.input[i].interpolate == TGSI_INTERPOLATE_COLOR &&
    123 		     rctx->queued.named.rasterizer->flatshade))
    124 			have_linear = TRUE;
    125 		if (shader->shader.input[i].interpolate == TGSI_INTERPOLATE_LINEAR)
    126 			have_linear = TRUE;
    127 		if (shader->shader.input[i].interpolate == TGSI_INTERPOLATE_PERSPECTIVE)
    128 			have_perspective = TRUE;
    129 		if (shader->shader.input[i].centroid)
    130 			have_centroid = TRUE;
    131 	}
    132 
    133 	for (i = 0; i < shader->shader.noutput; i++) {
    134 		if (shader->shader.output[i].name == TGSI_SEMANTIC_POSITION)
    135 			db_shader_control |= S_02880C_Z_EXPORT_ENABLE(1);
    136 		if (shader->shader.output[i].name == TGSI_SEMANTIC_STENCIL)
    137 			db_shader_control |= 0; // XXX OP_VAL or TEST_VAL?
    138 	}
    139 	if (shader->shader.uses_kill)
    140 		db_shader_control |= S_02880C_KILL_ENABLE(1);
    141 
    142 	exports_ps = 0;
    143 	num_cout = 0;
    144 	for (i = 0; i < shader->shader.noutput; i++) {
    145 		if (shader->shader.output[i].name == TGSI_SEMANTIC_POSITION ||
    146 		    shader->shader.output[i].name == TGSI_SEMANTIC_STENCIL)
    147 			exports_ps |= 1;
    148 		else if (shader->shader.output[i].name == TGSI_SEMANTIC_COLOR) {
    149 			if (shader->shader.fs_write_all)
    150 				num_cout = shader->shader.nr_cbufs;
    151 			else
    152 				num_cout++;
    153 		}
    154 	}
    155 	if (!exports_ps) {
    156 		/* always at least export 1 component per pixel */
    157 		exports_ps = 2;
    158 	}
    159 
    160 	spi_ps_in_control = S_0286D8_NUM_INTERP(ninterp);
    161 
    162 	spi_baryc_cntl = 0;
    163 	if (have_perspective)
    164 		spi_baryc_cntl |= have_centroid ?
    165 			S_0286E0_PERSP_CENTROID_CNTL(1) : S_0286E0_PERSP_CENTER_CNTL(1);
    166 	if (have_linear)
    167 		spi_baryc_cntl |= have_centroid ?
    168 			S_0286E0_LINEAR_CENTROID_CNTL(1) : S_0286E0_LINEAR_CENTER_CNTL(1);
    169 
    170 	si_pm4_set_reg(pm4, R_0286E0_SPI_BARYC_CNTL, spi_baryc_cntl);
    171 	spi_ps_input_ena = shader->spi_ps_input_ena;
    172 	/* we need to enable at least one of them, otherwise we hang the GPU */
    173 	if (!G_0286CC_PERSP_SAMPLE_ENA(spi_ps_input_ena) &&
    174 	    !G_0286CC_PERSP_CENTROID_ENA(spi_ps_input_ena) &&
    175 	    !G_0286CC_PERSP_PULL_MODEL_ENA(spi_ps_input_ena) &&
    176 	    !G_0286CC_LINEAR_SAMPLE_ENA(spi_ps_input_ena) &&
    177 	    !G_0286CC_LINEAR_CENTER_ENA(spi_ps_input_ena) &&
    178 	    !G_0286CC_LINEAR_CENTROID_ENA(spi_ps_input_ena) &&
    179 	    !G_0286CC_LINE_STIPPLE_TEX_ENA(spi_ps_input_ena)) {
    180 
    181 		spi_ps_input_ena |= S_0286CC_PERSP_SAMPLE_ENA(1);
    182 	}
    183 	si_pm4_set_reg(pm4, R_0286CC_SPI_PS_INPUT_ENA, spi_ps_input_ena);
    184 	si_pm4_set_reg(pm4, R_0286D0_SPI_PS_INPUT_ADDR, spi_ps_input_ena);
    185 	si_pm4_set_reg(pm4, R_0286D8_SPI_PS_IN_CONTROL, spi_ps_in_control);
    186 
    187 	/* XXX: Depends on Z buffer format? */
    188 	si_pm4_set_reg(pm4, R_028710_SPI_SHADER_Z_FORMAT, 0);
    189 
    190 	va = r600_resource_va(ctx->screen, (void *)shader->bo);
    191 	si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ);
    192 	si_pm4_set_reg(pm4, R_00B020_SPI_SHADER_PGM_LO_PS, va >> 8);
    193 	si_pm4_set_reg(pm4, R_00B024_SPI_SHADER_PGM_HI_PS, va >> 40);
    194 
    195 	num_user_sgprs = 6;
    196 	num_sgprs = shader->num_sgprs;
    197 	if (num_user_sgprs > num_sgprs)
    198 		num_sgprs = num_user_sgprs;
    199 	/* Last 2 reserved SGPRs are used for VCC */
    200 	num_sgprs += 2;
    201 	assert(num_sgprs <= 104);
    202 
    203 	si_pm4_set_reg(pm4, R_00B028_SPI_SHADER_PGM_RSRC1_PS,
    204 		       S_00B028_VGPRS((shader->num_vgprs - 1) / 4) |
    205 		       S_00B028_SGPRS((num_sgprs - 1) / 8));
    206 	si_pm4_set_reg(pm4, R_00B02C_SPI_SHADER_PGM_RSRC2_PS,
    207 		       S_00B02C_USER_SGPR(num_user_sgprs));
    208 
    209 	si_pm4_set_reg(pm4, R_02880C_DB_SHADER_CONTROL, db_shader_control);
    210 
    211 	shader->sprite_coord_enable = rctx->sprite_coord_enable;
    212 	si_pm4_bind_state(rctx, ps, shader->pm4);
    213 }
    214 
    215 /*
    216  * Drawing
    217  */
    218 
    219 static unsigned si_conv_pipe_prim(unsigned pprim)
    220 {
    221         static const unsigned prim_conv[] = {
    222 		[PIPE_PRIM_POINTS]			= V_008958_DI_PT_POINTLIST,
    223 		[PIPE_PRIM_LINES]			= V_008958_DI_PT_LINELIST,
    224 		[PIPE_PRIM_LINE_LOOP]			= V_008958_DI_PT_LINELOOP,
    225 		[PIPE_PRIM_LINE_STRIP]			= V_008958_DI_PT_LINESTRIP,
    226 		[PIPE_PRIM_TRIANGLES]			= V_008958_DI_PT_TRILIST,
    227 		[PIPE_PRIM_TRIANGLE_STRIP]		= V_008958_DI_PT_TRISTRIP,
    228 		[PIPE_PRIM_TRIANGLE_FAN]		= V_008958_DI_PT_TRIFAN,
    229 		[PIPE_PRIM_QUADS]			= V_008958_DI_PT_QUADLIST,
    230 		[PIPE_PRIM_QUAD_STRIP]			= V_008958_DI_PT_QUADSTRIP,
    231 		[PIPE_PRIM_POLYGON]			= V_008958_DI_PT_POLYGON,
    232 		[PIPE_PRIM_LINES_ADJACENCY]		= ~0,
    233 		[PIPE_PRIM_LINE_STRIP_ADJACENCY]	= ~0,
    234 		[PIPE_PRIM_TRIANGLES_ADJACENCY]		= ~0,
    235 		[PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY]	= ~0
    236         };
    237 	unsigned result = prim_conv[pprim];
    238         if (result == ~0) {
    239 		R600_ERR("unsupported primitive type %d\n", pprim);
    240         }
    241 	return result;
    242 }
    243 
    244 static bool si_update_draw_info_state(struct r600_context *rctx,
    245 			       const struct pipe_draw_info *info)
    246 {
    247 	struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state);
    248 	unsigned prim = si_conv_pipe_prim(info->mode);
    249 	unsigned ls_mask = 0;
    250 
    251 	if (pm4 == NULL)
    252 		return false;
    253 
    254 	if (prim == ~0) {
    255 		FREE(pm4);
    256 		return false;
    257 	}
    258 
    259 	si_pm4_set_reg(pm4, R_008958_VGT_PRIMITIVE_TYPE, prim);
    260 	si_pm4_set_reg(pm4, R_028400_VGT_MAX_VTX_INDX, ~0);
    261 	si_pm4_set_reg(pm4, R_028404_VGT_MIN_VTX_INDX, 0);
    262 	si_pm4_set_reg(pm4, R_028408_VGT_INDX_OFFSET,
    263 		       info->indexed ? info->index_bias : info->start);
    264 	si_pm4_set_reg(pm4, R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX, info->restart_index);
    265 	si_pm4_set_reg(pm4, R_028A94_VGT_MULTI_PRIM_IB_RESET_EN, info->primitive_restart);
    266 #if 0
    267 	si_pm4_set_reg(pm4, R_03CFF0_SQ_VTX_BASE_VTX_LOC, 0);
    268 	si_pm4_set_reg(pm4, R_03CFF4_SQ_VTX_START_INST_LOC, info->start_instance);
    269 #endif
    270 
    271         if (prim == V_008958_DI_PT_LINELIST)
    272                 ls_mask = 1;
    273         else if (prim == V_008958_DI_PT_LINESTRIP)
    274                 ls_mask = 2;
    275 	si_pm4_set_reg(pm4, R_028A0C_PA_SC_LINE_STIPPLE,
    276 		       S_028A0C_AUTO_RESET_CNTL(ls_mask) |
    277 		       rctx->pa_sc_line_stipple);
    278 
    279         if (info->mode == PIPE_PRIM_QUADS || info->mode == PIPE_PRIM_QUAD_STRIP || info->mode == PIPE_PRIM_POLYGON) {
    280 		si_pm4_set_reg(pm4, R_028814_PA_SU_SC_MODE_CNTL,
    281 			       S_028814_PROVOKING_VTX_LAST(1) | rctx->pa_su_sc_mode_cntl);
    282         } else {
    283 		si_pm4_set_reg(pm4, R_028814_PA_SU_SC_MODE_CNTL, rctx->pa_su_sc_mode_cntl);
    284         }
    285 	si_pm4_set_reg(pm4, R_02881C_PA_CL_VS_OUT_CNTL,
    286 		       prim == PIPE_PRIM_POINTS ? rctx->pa_cl_vs_out_cntl : 0
    287 		       /*| (rctx->rasterizer->clip_plane_enable &
    288 		       rctx->vs_shader->shader.clip_dist_write)*/);
    289 	si_pm4_set_reg(pm4, R_028810_PA_CL_CLIP_CNTL, rctx->pa_cl_clip_cntl
    290 			/*| (rctx->vs_shader->shader.clip_dist_write ||
    291 			rctx->vs_shader->shader.vs_prohibit_ucps ?
    292 			0 : rctx->rasterizer->clip_plane_enable & 0x3F)*/);
    293 
    294 	si_pm4_set_state(rctx, draw_info, pm4);
    295 	return true;
    296 }
    297 
    298 static void si_update_alpha_ref(struct r600_context *rctx)
    299 {
    300 #if 0
    301         unsigned alpha_ref;
    302         struct r600_pipe_state rstate;
    303 
    304         alpha_ref = rctx->alpha_ref;
    305         rstate.nregs = 0;
    306         if (rctx->export_16bpc)
    307                 alpha_ref &= ~0x1FFF;
    308         si_pm4_set_reg(&rstate, R_028438_SX_ALPHA_REF, alpha_ref);
    309 
    310 	si_pm4_set_state(rctx, TODO, pm4);
    311         rctx->alpha_ref_dirty = false;
    312 #endif
    313 }
    314 
    315 static void si_update_spi_map(struct r600_context *rctx)
    316 {
    317 	struct si_shader *ps = &rctx->ps_shader->current->shader;
    318 	struct si_shader *vs = &rctx->vs_shader->current->shader;
    319 	struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state);
    320 	unsigned i, j, tmp;
    321 
    322 	for (i = 0; i < ps->ninput; i++) {
    323 		tmp = 0;
    324 
    325 #if 0
    326 		/* XXX: Flat shading hangs the GPU */
    327 		if (ps->input[i].name == TGSI_SEMANTIC_POSITION ||
    328 		    ps->input[i].interpolate == TGSI_INTERPOLATE_CONSTANT ||
    329 		    (ps->input[i].interpolate == TGSI_INTERPOLATE_COLOR &&
    330 		     rctx->rasterizer && rctx->rasterizer->flatshade)) {
    331 			tmp |= S_028644_FLAT_SHADE(1);
    332 		}
    333 #endif
    334 
    335 		if (ps->input[i].name == TGSI_SEMANTIC_GENERIC &&
    336 		    rctx->sprite_coord_enable & (1 << ps->input[i].sid)) {
    337 			tmp |= S_028644_PT_SPRITE_TEX(1);
    338 		}
    339 
    340 		for (j = 0; j < vs->noutput; j++) {
    341 			if (ps->input[i].name == vs->output[j].name &&
    342 			    ps->input[i].sid == vs->output[j].sid) {
    343 				tmp |= S_028644_OFFSET(vs->output[j].param_offset);
    344 				break;
    345 			}
    346 		}
    347 
    348 		if (j == vs->noutput) {
    349 			/* No corresponding output found, load defaults into input */
    350 			tmp |= S_028644_OFFSET(0x20);
    351 		}
    352 
    353 		si_pm4_set_reg(pm4, R_028644_SPI_PS_INPUT_CNTL_0 + i * 4, tmp);
    354 	}
    355 
    356 	si_pm4_set_state(rctx, spi, pm4);
    357 }
    358 
    359 static void si_update_derived_state(struct r600_context *rctx)
    360 {
    361 	struct pipe_context * ctx = (struct pipe_context*)rctx;
    362 	unsigned ps_dirty = 0;
    363 
    364 	if (!rctx->blitter->running) {
    365 		if (rctx->have_depth_fb || rctx->have_depth_texture)
    366 			si_flush_depth_textures(rctx);
    367 	}
    368 
    369 	si_shader_select(ctx, rctx->ps_shader, &ps_dirty);
    370 
    371 	if (rctx->alpha_ref_dirty) {
    372 		si_update_alpha_ref(rctx);
    373 	}
    374 
    375 	if (!rctx->vs_shader->current->pm4) {
    376 		si_pipe_shader_vs(ctx, rctx->vs_shader->current);
    377 	}
    378 
    379 	if (!rctx->ps_shader->current->pm4) {
    380 		si_pipe_shader_ps(ctx, rctx->ps_shader->current);
    381 		ps_dirty = 0;
    382 	}
    383 	if (!rctx->ps_shader->current->bo) {
    384 		if (!rctx->dummy_pixel_shader->pm4)
    385 			si_pipe_shader_ps(ctx, rctx->dummy_pixel_shader);
    386 		else
    387 			si_pm4_bind_state(rctx, vs, rctx->dummy_pixel_shader->pm4);
    388 
    389 		ps_dirty = 0;
    390 	}
    391 
    392 	if (ps_dirty) {
    393 		si_pm4_bind_state(rctx, ps, rctx->ps_shader->current->pm4);
    394 		rctx->shader_dirty = true;
    395 	}
    396 
    397 	if (rctx->shader_dirty) {
    398 		si_update_spi_map(rctx);
    399 		rctx->shader_dirty = false;
    400 	}
    401 }
    402 
    403 static void si_vertex_buffer_update(struct r600_context *rctx)
    404 {
    405 	struct pipe_context *ctx = &rctx->context;
    406 	struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state);
    407 	bool bound[PIPE_MAX_ATTRIBS] = {};
    408 	unsigned i, count;
    409 	uint64_t va;
    410 
    411 	si_pm4_inval_vertex_cache(pm4);
    412 
    413 	/* bind vertex buffer once */
    414 	count = rctx->vertex_elements->count;
    415 	assert(count <= 256 / 4);
    416 
    417 	si_pm4_sh_data_begin(pm4);
    418 	for (i = 0 ; i < count; i++) {
    419 		struct pipe_vertex_element *ve = &rctx->vertex_elements->elements[i];
    420 		struct pipe_vertex_buffer *vb;
    421 		struct si_resource *rbuffer;
    422 		unsigned offset;
    423 
    424 		if (ve->vertex_buffer_index >= rctx->nr_vertex_buffers)
    425 			continue;
    426 
    427 		vb = &rctx->vertex_buffer[ve->vertex_buffer_index];
    428 		rbuffer = (struct si_resource*)vb->buffer;
    429 		if (rbuffer == NULL)
    430 			continue;
    431 
    432 		offset = 0;
    433 		offset += vb->buffer_offset;
    434 		offset += ve->src_offset;
    435 
    436 		va = r600_resource_va(ctx->screen, (void*)rbuffer);
    437 		va += offset;
    438 
    439 		/* Fill in T# buffer resource description */
    440 		si_pm4_sh_data_add(pm4, va & 0xFFFFFFFF);
    441 		si_pm4_sh_data_add(pm4, (S_008F04_BASE_ADDRESS_HI(va >> 32) |
    442 					 S_008F04_STRIDE(vb->stride)));
    443 		si_pm4_sh_data_add(pm4, (vb->buffer->width0 - offset) /
    444 					 MAX2(vb->stride, 1));
    445 		si_pm4_sh_data_add(pm4, rctx->vertex_elements->rsrc_word3[i]);
    446 
    447 		if (!bound[ve->vertex_buffer_index]) {
    448 			si_pm4_add_bo(pm4, rbuffer, RADEON_USAGE_READ);
    449 			bound[ve->vertex_buffer_index] = true;
    450 		}
    451 	}
    452 	si_pm4_sh_data_end(pm4, R_00B148_SPI_SHADER_USER_DATA_VS_6);
    453 	si_pm4_set_state(rctx, vertex_buffers, pm4);
    454 }
    455 
    456 static void si_state_draw(struct r600_context *rctx,
    457 			  const struct pipe_draw_info *info,
    458 			  const struct pipe_index_buffer *ib)
    459 {
    460 	struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state);
    461 
    462 	/* queries need some special values
    463 	 * (this is non-zero if any query is active) */
    464 	if (rctx->num_cs_dw_queries_suspend) {
    465 		struct si_state_dsa *dsa = rctx->queued.named.dsa;
    466 
    467 		si_pm4_set_reg(pm4, R_028004_DB_COUNT_CONTROL,
    468 			       S_028004_PERFECT_ZPASS_COUNTS(1));
    469 		si_pm4_set_reg(pm4, R_02800C_DB_RENDER_OVERRIDE,
    470 			       dsa->db_render_override |
    471 			       S_02800C_NOOP_CULL_DISABLE(1));
    472 	}
    473 
    474 	/* draw packet */
    475 	si_pm4_cmd_begin(pm4, PKT3_INDEX_TYPE);
    476 	if (ib->index_size == 4) {
    477 		si_pm4_cmd_add(pm4, V_028A7C_VGT_INDEX_32 | (R600_BIG_ENDIAN ?
    478 				V_028A7C_VGT_DMA_SWAP_32_BIT : 0));
    479 	} else {
    480 		si_pm4_cmd_add(pm4, V_028A7C_VGT_INDEX_16 | (R600_BIG_ENDIAN ?
    481 				V_028A7C_VGT_DMA_SWAP_16_BIT : 0));
    482 	}
    483 	si_pm4_cmd_end(pm4, rctx->predicate_drawing);
    484 
    485 	si_pm4_cmd_begin(pm4, PKT3_NUM_INSTANCES);
    486 	si_pm4_cmd_add(pm4, info->instance_count);
    487 	si_pm4_cmd_end(pm4, rctx->predicate_drawing);
    488 
    489 	if (info->indexed) {
    490 		uint64_t va;
    491 		va = r600_resource_va(&rctx->screen->screen, ib->buffer);
    492 		va += ib->offset;
    493 
    494 		si_pm4_add_bo(pm4, (struct si_resource *)ib->buffer, RADEON_USAGE_READ);
    495 		si_pm4_cmd_begin(pm4, PKT3_DRAW_INDEX_2);
    496 		si_pm4_cmd_add(pm4, (ib->buffer->width0 - ib->offset) /
    497 					rctx->index_buffer.index_size);
    498 		si_pm4_cmd_add(pm4, va);
    499 		si_pm4_cmd_add(pm4, (va >> 32UL) & 0xFF);
    500 		si_pm4_cmd_add(pm4, info->count);
    501 		si_pm4_cmd_add(pm4, V_0287F0_DI_SRC_SEL_DMA);
    502 		si_pm4_cmd_end(pm4, rctx->predicate_drawing);
    503 	} else {
    504 		si_pm4_cmd_begin(pm4, PKT3_DRAW_INDEX_AUTO);
    505 		si_pm4_cmd_add(pm4, info->count);
    506 		si_pm4_cmd_add(pm4, V_0287F0_DI_SRC_SEL_AUTO_INDEX |
    507 			       (info->count_from_stream_output ?
    508 				S_0287F0_USE_OPAQUE(1) : 0));
    509 		si_pm4_cmd_end(pm4, rctx->predicate_drawing);
    510 	}
    511 	si_pm4_set_state(rctx, draw, pm4);
    512 }
    513 
    514 void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
    515 {
    516 	struct r600_context *rctx = (struct r600_context *)ctx;
    517 	struct pipe_index_buffer ib = {};
    518 	uint32_t cp_coher_cntl;
    519 
    520 	if ((!info->count && (info->indexed || !info->count_from_stream_output)) ||
    521 	    (info->indexed && !rctx->index_buffer.buffer)) {
    522 		return;
    523 	}
    524 
    525 	if (!rctx->ps_shader || !rctx->vs_shader)
    526 		return;
    527 
    528 	si_update_derived_state(rctx);
    529 	si_vertex_buffer_update(rctx);
    530 
    531 	if (info->indexed) {
    532 		/* Initialize the index buffer struct. */
    533 		pipe_resource_reference(&ib.buffer, rctx->index_buffer.buffer);
    534 		ib.index_size = rctx->index_buffer.index_size;
    535 		ib.offset = rctx->index_buffer.offset + info->start * ib.index_size;
    536 
    537 		/* Translate or upload, if needed. */
    538 		r600_translate_index_buffer(rctx, &ib, info->count);
    539 
    540 		if (ib.user_buffer) {
    541 			r600_upload_index_buffer(rctx, &ib, info->count);
    542 		}
    543 
    544 	} else if (info->count_from_stream_output) {
    545 		r600_context_draw_opaque_count(rctx, (struct r600_so_target*)info->count_from_stream_output);
    546 	}
    547 
    548 	rctx->vs_shader_so_strides = rctx->vs_shader->current->so_strides;
    549 
    550 	if (!si_update_draw_info_state(rctx, info))
    551 		return;
    552 
    553 	si_state_draw(rctx, info, &ib);
    554 
    555 	cp_coher_cntl = si_pm4_sync_flags(rctx);
    556 	if (cp_coher_cntl) {
    557 		struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state);
    558 		si_cmd_surface_sync(pm4, cp_coher_cntl);
    559 		si_pm4_set_state(rctx, sync, pm4);
    560 	}
    561 
    562 	/* Emit states. */
    563 	rctx->pm4_dirty_cdwords += si_pm4_dirty_dw(rctx);
    564 
    565 	si_need_cs_space(rctx, 0, TRUE);
    566 
    567 	si_pm4_emit_dirty(rctx);
    568 	rctx->pm4_dirty_cdwords = 0;
    569 
    570 #if 0
    571 	/* Enable stream out if needed. */
    572 	if (rctx->streamout_start) {
    573 		r600_context_streamout_begin(rctx);
    574 		rctx->streamout_start = FALSE;
    575 	}
    576 #endif
    577 
    578 
    579 	rctx->flags |= R600_CONTEXT_DST_CACHES_DIRTY;
    580 
    581 	if (rctx->framebuffer.zsbuf)
    582 	{
    583 		struct pipe_resource *tex = rctx->framebuffer.zsbuf->texture;
    584 		((struct r600_resource_texture *)tex)->dirty_db = TRUE;
    585 	}
    586 
    587 	pipe_resource_reference(&ib.buffer, NULL);
    588 }
    589