1 /************************************************************************** 2 * 3 * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28 #include <sys/errno.h> 29 30 #include "main/glheader.h" 31 #include "main/context.h" 32 #include "main/condrender.h" 33 #include "main/samplerobj.h" 34 #include "main/state.h" 35 #include "main/enums.h" 36 #include "main/macros.h" 37 #include "tnl/tnl.h" 38 #include "vbo/vbo_context.h" 39 #include "swrast/swrast.h" 40 #include "swrast_setup/swrast_setup.h" 41 #include "drivers/common/meta.h" 42 43 #include "brw_draw.h" 44 #include "brw_defines.h" 45 #include "brw_context.h" 46 #include "brw_state.h" 47 48 #include "intel_batchbuffer.h" 49 #include "intel_fbo.h" 50 #include "intel_mipmap_tree.h" 51 #include "intel_regions.h" 52 53 #define FILE_DEBUG_FLAG DEBUG_PRIMS 54 55 static GLuint prim_to_hw_prim[GL_POLYGON+1] = { 56 _3DPRIM_POINTLIST, 57 _3DPRIM_LINELIST, 58 _3DPRIM_LINELOOP, 59 _3DPRIM_LINESTRIP, 60 _3DPRIM_TRILIST, 61 _3DPRIM_TRISTRIP, 62 _3DPRIM_TRIFAN, 63 _3DPRIM_QUADLIST, 64 _3DPRIM_QUADSTRIP, 65 _3DPRIM_POLYGON 66 }; 67 68 69 static const GLenum reduced_prim[GL_POLYGON+1] = { 70 GL_POINTS, 71 GL_LINES, 72 GL_LINES, 73 GL_LINES, 74 GL_TRIANGLES, 75 GL_TRIANGLES, 76 GL_TRIANGLES, 77 GL_TRIANGLES, 78 GL_TRIANGLES, 79 GL_TRIANGLES 80 }; 81 82 83 /* When the primitive changes, set a state bit and re-validate. Not 84 * the nicest and would rather deal with this by having all the 85 * programs be immune to the active primitive (ie. cope with all 86 * possibilities). That may not be realistic however. 87 */ 88 static void brw_set_prim(struct brw_context *brw, 89 const struct _mesa_prim *prim) 90 { 91 struct gl_context *ctx = &brw->intel.ctx; 92 uint32_t hw_prim = prim_to_hw_prim[prim->mode]; 93 94 DBG("PRIM: %s\n", _mesa_lookup_enum_by_nr(prim->mode)); 95 96 /* Slight optimization to avoid the GS program when not needed: 97 */ 98 if (prim->mode == GL_QUAD_STRIP && 99 ctx->Light.ShadeModel != GL_FLAT && 100 ctx->Polygon.FrontMode == GL_FILL && 101 ctx->Polygon.BackMode == GL_FILL) 102 hw_prim = _3DPRIM_TRISTRIP; 103 104 if (prim->mode == GL_QUADS && prim->count == 4 && 105 ctx->Light.ShadeModel != GL_FLAT && 106 ctx->Polygon.FrontMode == GL_FILL && 107 ctx->Polygon.BackMode == GL_FILL) { 108 hw_prim = _3DPRIM_TRIFAN; 109 } 110 111 if (hw_prim != brw->primitive) { 112 brw->primitive = hw_prim; 113 brw->state.dirty.brw |= BRW_NEW_PRIMITIVE; 114 115 if (reduced_prim[prim->mode] != brw->intel.reduced_primitive) { 116 brw->intel.reduced_primitive = reduced_prim[prim->mode]; 117 brw->state.dirty.brw |= BRW_NEW_REDUCED_PRIMITIVE; 118 } 119 } 120 } 121 122 static void gen6_set_prim(struct brw_context *brw, 123 const struct _mesa_prim *prim) 124 { 125 uint32_t hw_prim; 126 127 DBG("PRIM: %s\n", _mesa_lookup_enum_by_nr(prim->mode)); 128 129 hw_prim = prim_to_hw_prim[prim->mode]; 130 131 if (hw_prim != brw->primitive) { 132 brw->primitive = hw_prim; 133 brw->state.dirty.brw |= BRW_NEW_PRIMITIVE; 134 } 135 } 136 137 138 static GLuint trim(GLenum prim, GLuint length) 139 { 140 if (prim == GL_QUAD_STRIP) 141 return length > 3 ? (length - length % 2) : 0; 142 else if (prim == GL_QUADS) 143 return length - length % 4; 144 else 145 return length; 146 } 147 148 149 static void brw_emit_prim(struct brw_context *brw, 150 const struct _mesa_prim *prim, 151 uint32_t hw_prim) 152 { 153 struct intel_context *intel = &brw->intel; 154 int verts_per_instance; 155 int vertex_access_type; 156 int start_vertex_location; 157 int base_vertex_location; 158 159 DBG("PRIM: %s %d %d\n", _mesa_lookup_enum_by_nr(prim->mode), 160 prim->start, prim->count); 161 162 start_vertex_location = prim->start; 163 base_vertex_location = prim->basevertex; 164 if (prim->indexed) { 165 vertex_access_type = GEN4_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM; 166 start_vertex_location += brw->ib.start_vertex_offset; 167 base_vertex_location += brw->vb.start_vertex_bias; 168 } else { 169 vertex_access_type = GEN4_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL; 170 start_vertex_location += brw->vb.start_vertex_bias; 171 } 172 173 verts_per_instance = trim(prim->mode, prim->count); 174 175 /* If nothing to emit, just return. */ 176 if (verts_per_instance == 0) 177 return; 178 179 /* If we're set to always flush, do it before and after the primitive emit. 180 * We want to catch both missed flushes that hurt instruction/state cache 181 * and missed flushes of the render cache as it heads to other parts of 182 * the besides the draw code. 183 */ 184 if (intel->always_flush_cache) { 185 intel_batchbuffer_emit_mi_flush(intel); 186 } 187 188 BEGIN_BATCH(6); 189 OUT_BATCH(CMD_3D_PRIM << 16 | (6 - 2) | 190 hw_prim << GEN4_3DPRIM_TOPOLOGY_TYPE_SHIFT | 191 vertex_access_type); 192 OUT_BATCH(verts_per_instance); 193 OUT_BATCH(start_vertex_location); 194 OUT_BATCH(prim->num_instances); 195 OUT_BATCH(0); // start instance location 196 OUT_BATCH(base_vertex_location); 197 ADVANCE_BATCH(); 198 199 intel->batch.need_workaround_flush = true; 200 201 if (intel->always_flush_cache) { 202 intel_batchbuffer_emit_mi_flush(intel); 203 } 204 } 205 206 static void gen7_emit_prim(struct brw_context *brw, 207 const struct _mesa_prim *prim, 208 uint32_t hw_prim) 209 { 210 struct intel_context *intel = &brw->intel; 211 int verts_per_instance; 212 int vertex_access_type; 213 int start_vertex_location; 214 int base_vertex_location; 215 216 DBG("PRIM: %s %d %d\n", _mesa_lookup_enum_by_nr(prim->mode), 217 prim->start, prim->count); 218 219 start_vertex_location = prim->start; 220 base_vertex_location = prim->basevertex; 221 if (prim->indexed) { 222 vertex_access_type = GEN7_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM; 223 start_vertex_location += brw->ib.start_vertex_offset; 224 base_vertex_location += brw->vb.start_vertex_bias; 225 } else { 226 vertex_access_type = GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL; 227 start_vertex_location += brw->vb.start_vertex_bias; 228 } 229 230 verts_per_instance = trim(prim->mode, prim->count); 231 232 /* If nothing to emit, just return. */ 233 if (verts_per_instance == 0) 234 return; 235 236 /* If we're set to always flush, do it before and after the primitive emit. 237 * We want to catch both missed flushes that hurt instruction/state cache 238 * and missed flushes of the render cache as it heads to other parts of 239 * the besides the draw code. 240 */ 241 if (intel->always_flush_cache) { 242 intel_batchbuffer_emit_mi_flush(intel); 243 } 244 245 BEGIN_BATCH(7); 246 OUT_BATCH(CMD_3D_PRIM << 16 | (7 - 2)); 247 OUT_BATCH(hw_prim | vertex_access_type); 248 OUT_BATCH(verts_per_instance); 249 OUT_BATCH(start_vertex_location); 250 OUT_BATCH(prim->num_instances); 251 OUT_BATCH(0); // start instance location 252 OUT_BATCH(base_vertex_location); 253 ADVANCE_BATCH(); 254 255 if (intel->always_flush_cache) { 256 intel_batchbuffer_emit_mi_flush(intel); 257 } 258 } 259 260 261 static void brw_merge_inputs( struct brw_context *brw, 262 const struct gl_client_array *arrays[]) 263 { 264 struct brw_vertex_info old = brw->vb.info; 265 GLuint i; 266 267 for (i = 0; i < brw->vb.nr_buffers; i++) { 268 drm_intel_bo_unreference(brw->vb.buffers[i].bo); 269 brw->vb.buffers[i].bo = NULL; 270 } 271 brw->vb.nr_buffers = 0; 272 273 memset(&brw->vb.info, 0, sizeof(brw->vb.info)); 274 275 for (i = 0; i < VERT_ATTRIB_MAX; i++) { 276 brw->vb.inputs[i].buffer = -1; 277 brw->vb.inputs[i].glarray = arrays[i]; 278 brw->vb.inputs[i].attrib = (gl_vert_attrib) i; 279 280 if (arrays[i]->StrideB != 0) 281 brw->vb.info.sizes[i/16] |= (brw->vb.inputs[i].glarray->Size - 1) << 282 ((i%16) * 2); 283 } 284 285 /* Raise statechanges if input sizes have changed. */ 286 if (memcmp(brw->vb.info.sizes, old.sizes, sizeof(old.sizes)) != 0) 287 brw->state.dirty.brw |= BRW_NEW_INPUT_DIMENSIONS; 288 } 289 290 /* 291 * \brief Resolve buffers before drawing. 292 * 293 * Resolve the depth buffer's HiZ buffer and resolve the depth buffer of each 294 * enabled depth texture. 295 * 296 * (In the future, this will also perform MSAA resolves). 297 */ 298 static void 299 brw_predraw_resolve_buffers(struct brw_context *brw) 300 { 301 struct gl_context *ctx = &brw->intel.ctx; 302 struct intel_context *intel = &brw->intel; 303 struct intel_renderbuffer *depth_irb; 304 struct intel_texture_object *tex_obj; 305 306 /* Resolve the depth buffer's HiZ buffer. */ 307 depth_irb = intel_get_renderbuffer(ctx->DrawBuffer, BUFFER_DEPTH); 308 if (depth_irb) 309 intel_renderbuffer_resolve_hiz(intel, depth_irb); 310 311 /* Resolve depth buffer of each enabled depth texture. */ 312 for (int i = 0; i < BRW_MAX_TEX_UNIT; i++) { 313 if (!ctx->Texture.Unit[i]._ReallyEnabled) 314 continue; 315 tex_obj = intel_texture_object(ctx->Texture.Unit[i]._Current); 316 if (!tex_obj || !tex_obj->mt) 317 continue; 318 intel_miptree_all_slices_resolve_depth(intel, tex_obj->mt); 319 } 320 } 321 322 /** 323 * \brief Call this after drawing to mark which buffers need resolving 324 * 325 * If the depth buffer was written to and if it has an accompanying HiZ 326 * buffer, then mark that it needs a depth resolve. 327 * 328 * If the color buffer is a multisample window system buffer, then 329 * mark that it needs a downsample. 330 */ 331 static void brw_postdraw_set_buffers_need_resolve(struct brw_context *brw) 332 { 333 struct intel_context *intel = &brw->intel; 334 struct gl_context *ctx = &brw->intel.ctx; 335 struct gl_framebuffer *fb = ctx->DrawBuffer; 336 337 struct intel_renderbuffer *front_irb = NULL; 338 struct intel_renderbuffer *back_irb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT); 339 struct intel_renderbuffer *depth_irb = intel_get_renderbuffer(fb, BUFFER_DEPTH); 340 341 if (intel->is_front_buffer_rendering) 342 front_irb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT); 343 344 if (front_irb) 345 intel_renderbuffer_set_needs_downsample(front_irb); 346 if (back_irb) 347 intel_renderbuffer_set_needs_downsample(back_irb); 348 if (depth_irb && ctx->Depth.Mask) 349 intel_renderbuffer_set_needs_depth_resolve(depth_irb); 350 } 351 352 static int 353 verts_per_prim(GLenum mode) 354 { 355 switch (mode) { 356 case GL_POINTS: 357 return 1; 358 case GL_LINE_STRIP: 359 case GL_LINE_LOOP: 360 case GL_LINES: 361 return 2; 362 case GL_TRIANGLE_STRIP: 363 case GL_TRIANGLE_FAN: 364 case GL_POLYGON: 365 case GL_TRIANGLES: 366 case GL_QUADS: 367 case GL_QUAD_STRIP: 368 return 3; 369 default: 370 _mesa_problem(NULL, 371 "unknown prim type in transform feedback primitive count"); 372 return 0; 373 } 374 } 375 376 /** 377 * Update internal counters based on the the drawing operation described in 378 * prim. 379 */ 380 static void 381 brw_update_primitive_count(struct brw_context *brw, 382 const struct _mesa_prim *prim) 383 { 384 uint32_t count = count_tessellated_primitives(prim); 385 brw->sol.primitives_generated += count; 386 if (brw->intel.ctx.TransformFeedback.CurrentObject->Active && 387 !brw->intel.ctx.TransformFeedback.CurrentObject->Paused) { 388 /* Update brw->sol.svbi_0_max_index to reflect the amount by which the 389 * hardware is going to increment SVBI 0 when this drawing operation 390 * occurs. This is necessary because the kernel does not (yet) save and 391 * restore GPU registers when context switching, so we'll need to be 392 * able to reload SVBI 0 with the correct value in case we have to start 393 * a new batch buffer. 394 */ 395 unsigned verts = verts_per_prim(prim->mode); 396 uint32_t space_avail = 397 (brw->sol.svbi_0_max_index - brw->sol.svbi_0_starting_index) / verts; 398 uint32_t primitives_written = MIN2 (space_avail, count); 399 brw->sol.svbi_0_starting_index += verts * primitives_written; 400 401 /* And update the TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN query. */ 402 brw->sol.primitives_written += primitives_written; 403 } 404 } 405 406 /* May fail if out of video memory for texture or vbo upload, or on 407 * fallback conditions. 408 */ 409 static bool brw_try_draw_prims( struct gl_context *ctx, 410 const struct gl_client_array *arrays[], 411 const struct _mesa_prim *prim, 412 GLuint nr_prims, 413 const struct _mesa_index_buffer *ib, 414 GLuint min_index, 415 GLuint max_index ) 416 { 417 struct intel_context *intel = intel_context(ctx); 418 struct brw_context *brw = brw_context(ctx); 419 bool retval = true; 420 GLuint i; 421 bool fail_next = false; 422 423 if (ctx->NewState) 424 _mesa_update_state( ctx ); 425 426 /* We have to validate the textures *before* checking for fallbacks; 427 * otherwise, the software fallback won't be able to rely on the 428 * texture state, the firstLevel and lastLevel fields won't be 429 * set in the intel texture object (they'll both be 0), and the 430 * software fallback will segfault if it attempts to access any 431 * texture level other than level 0. 432 */ 433 brw_validate_textures( brw ); 434 435 intel_prepare_render(intel); 436 437 /* Resolves must occur after updating renderbuffers, updating context state, 438 * and finalizing textures but before setting up any hardware state for 439 * this draw call. 440 */ 441 brw_predraw_resolve_buffers(brw); 442 443 /* Bind all inputs, derive varying and size information: 444 */ 445 brw_merge_inputs( brw, arrays ); 446 447 brw->ib.ib = ib; 448 brw->state.dirty.brw |= BRW_NEW_INDICES; 449 450 brw->vb.min_index = min_index; 451 brw->vb.max_index = max_index; 452 brw->state.dirty.brw |= BRW_NEW_VERTICES; 453 454 /* Have to validate state quite late. Will rebuild tnl_program, 455 * which depends on varying information. 456 * 457 * Note this is where brw->vs->prog_data.inputs_read is calculated, 458 * so can't access it earlier. 459 */ 460 461 for (i = 0; i < nr_prims; i++) { 462 int estimated_max_prim_size; 463 464 estimated_max_prim_size = 512; /* batchbuffer commands */ 465 estimated_max_prim_size += (BRW_MAX_TEX_UNIT * 466 (sizeof(struct brw_sampler_state) + 467 sizeof(struct gen5_sampler_default_color))); 468 estimated_max_prim_size += 1024; /* gen6 VS push constants */ 469 estimated_max_prim_size += 1024; /* gen6 WM push constants */ 470 estimated_max_prim_size += 512; /* misc. pad */ 471 472 /* Flush the batch if it's approaching full, so that we don't wrap while 473 * we've got validated state that needs to be in the same batch as the 474 * primitives. 475 */ 476 intel_batchbuffer_require_space(intel, estimated_max_prim_size, false); 477 intel_batchbuffer_save_state(intel); 478 479 brw->num_instances = prim->num_instances; 480 if (intel->gen < 6) 481 brw_set_prim(brw, &prim[i]); 482 else 483 gen6_set_prim(brw, &prim[i]); 484 485 retry: 486 /* Note that before the loop, brw->state.dirty.brw was set to != 0, and 487 * that the state updated in the loop outside of this block is that in 488 * *_set_prim or intel_batchbuffer_flush(), which only impacts 489 * brw->state.dirty.brw. 490 */ 491 if (brw->state.dirty.brw) { 492 intel->no_batch_wrap = true; 493 brw_upload_state(brw); 494 } 495 496 if (intel->gen >= 7) 497 gen7_emit_prim(brw, &prim[i], brw->primitive); 498 else 499 brw_emit_prim(brw, &prim[i], brw->primitive); 500 501 intel->no_batch_wrap = false; 502 503 if (dri_bufmgr_check_aperture_space(&intel->batch.bo, 1)) { 504 if (!fail_next) { 505 intel_batchbuffer_reset_to_saved(intel); 506 intel_batchbuffer_flush(intel); 507 fail_next = true; 508 goto retry; 509 } else { 510 if (intel_batchbuffer_flush(intel) == -ENOSPC) { 511 static bool warned = false; 512 513 if (!warned) { 514 fprintf(stderr, "i965: Single primitive emit exceeded" 515 "available aperture space\n"); 516 warned = true; 517 } 518 519 retval = false; 520 } 521 } 522 } 523 524 if (!_mesa_meta_in_progress(ctx)) 525 brw_update_primitive_count(brw, &prim[i]); 526 } 527 528 if (intel->always_flush_batch) 529 intel_batchbuffer_flush(intel); 530 531 brw_state_cache_check_size(brw); 532 brw_postdraw_set_buffers_need_resolve(brw); 533 534 return retval; 535 } 536 537 void brw_draw_prims( struct gl_context *ctx, 538 const struct _mesa_prim *prim, 539 GLuint nr_prims, 540 const struct _mesa_index_buffer *ib, 541 GLboolean index_bounds_valid, 542 GLuint min_index, 543 GLuint max_index, 544 struct gl_transform_feedback_object *tfb_vertcount ) 545 { 546 const struct gl_client_array **arrays = ctx->Array._DrawArrays; 547 548 if (!_mesa_check_conditional_render(ctx)) 549 return; 550 551 /* Handle primitive restart if needed */ 552 if (brw_handle_primitive_restart(ctx, prim, nr_prims, ib)) { 553 /* The draw was handled, so we can exit now */ 554 return; 555 } 556 557 if (!vbo_all_varyings_in_vbos(arrays)) { 558 if (!index_bounds_valid) 559 vbo_get_minmax_indices(ctx, prim, ib, &min_index, &max_index, nr_prims); 560 561 /* Decide if we want to rebase. If so we end up recursing once 562 * only into this function. 563 */ 564 if (min_index != 0 && !vbo_any_varyings_in_vbos(arrays)) { 565 vbo_rebase_prims(ctx, arrays, 566 prim, nr_prims, 567 ib, min_index, max_index, 568 brw_draw_prims ); 569 return; 570 } 571 } 572 573 /* Do GL_SELECT and GL_FEEDBACK rendering using swrast, even though it 574 * won't support all the extensions we support. 575 */ 576 if (ctx->RenderMode != GL_RENDER) { 577 perf_debug("%s render mode not supported in hardware\n", 578 _mesa_lookup_enum_by_nr(ctx->RenderMode)); 579 _swsetup_Wakeup(ctx); 580 _tnl_wakeup(ctx); 581 _tnl_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index); 582 return; 583 } 584 585 /* Try drawing with the hardware, but don't do anything else if we can't 586 * manage it. swrast doesn't support our featureset, so we can't fall back 587 * to it. 588 */ 589 brw_try_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index); 590 } 591 592 void brw_draw_init( struct brw_context *brw ) 593 { 594 struct gl_context *ctx = &brw->intel.ctx; 595 struct vbo_context *vbo = vbo_context(ctx); 596 int i; 597 598 /* Register our drawing function: 599 */ 600 vbo->draw_prims = brw_draw_prims; 601 602 for (i = 0; i < VERT_ATTRIB_MAX; i++) 603 brw->vb.inputs[i].buffer = -1; 604 brw->vb.nr_buffers = 0; 605 brw->vb.nr_enabled = 0; 606 } 607 608 void brw_draw_destroy( struct brw_context *brw ) 609 { 610 int i; 611 612 for (i = 0; i < brw->vb.nr_buffers; i++) { 613 drm_intel_bo_unreference(brw->vb.buffers[i].bo); 614 brw->vb.buffers[i].bo = NULL; 615 } 616 brw->vb.nr_buffers = 0; 617 618 for (i = 0; i < brw->vb.nr_enabled; i++) { 619 brw->vb.enabled[i]->buffer = -1; 620 } 621 brw->vb.nr_enabled = 0; 622 623 drm_intel_bo_unreference(brw->ib.bo); 624 brw->ib.bo = NULL; 625 } 626