1 /* 2 * Copyright 2003 VMware, Inc. 3 * All Rights Reserved. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the 7 * "Software"), to deal in the Software without restriction, including 8 * without limitation the rights to use, copy, modify, merge, publish, 9 * distribute, sublicense, and/or sell copies of the Software, and to 10 * permit persons to whom the Software is furnished to do so, subject to 11 * the following conditions: 12 * 13 * The above copyright notice and this permission notice (including the 14 * next paragraph) shall be included in all copies or substantial portions 15 * of the Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 19 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 20 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 21 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 22 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 23 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 24 */ 25 26 #include <sys/errno.h> 27 28 #include "main/context.h" 29 #include "main/condrender.h" 30 #include "main/samplerobj.h" 31 #include "main/state.h" 32 #include "main/enums.h" 33 #include "main/macros.h" 34 #include "main/transformfeedback.h" 35 #include "main/framebuffer.h" 36 #include "tnl/tnl.h" 37 #include "vbo/vbo_context.h" 38 #include "swrast/swrast.h" 39 #include "swrast_setup/swrast_setup.h" 40 #include "drivers/common/meta.h" 41 #include "util/bitscan.h" 42 43 #include "brw_blorp.h" 44 #include "brw_draw.h" 45 #include "brw_defines.h" 46 #include "brw_context.h" 47 #include "brw_state.h" 48 #include "brw_vs.h" 49 50 #include "intel_batchbuffer.h" 51 #include "intel_buffers.h" 52 #include "intel_fbo.h" 53 #include "intel_mipmap_tree.h" 54 #include "intel_buffer_objects.h" 55 56 #define FILE_DEBUG_FLAG DEBUG_PRIMS 57 58 59 static const GLenum reduced_prim[GL_POLYGON+1] = { 60 [GL_POINTS] = GL_POINTS, 61 [GL_LINES] = GL_LINES, 62 [GL_LINE_LOOP] = GL_LINES, 63 [GL_LINE_STRIP] = GL_LINES, 64 [GL_TRIANGLES] = GL_TRIANGLES, 65 [GL_TRIANGLE_STRIP] = GL_TRIANGLES, 66 [GL_TRIANGLE_FAN] = GL_TRIANGLES, 67 [GL_QUADS] = GL_TRIANGLES, 68 [GL_QUAD_STRIP] = GL_TRIANGLES, 69 [GL_POLYGON] = GL_TRIANGLES 70 }; 71 72 /* When the primitive changes, set a state bit and re-validate. Not 73 * the nicest and would rather deal with this by having all the 74 * programs be immune to the active primitive (ie. cope with all 75 * possibilities). That may not be realistic however. 76 */ 77 static void 78 brw_set_prim(struct brw_context *brw, const struct _mesa_prim *prim) 79 { 80 struct gl_context *ctx = &brw->ctx; 81 uint32_t hw_prim = get_hw_prim_for_gl_prim(prim->mode); 82 83 DBG("PRIM: %s\n", _mesa_enum_to_string(prim->mode)); 84 85 /* Slight optimization to avoid the GS program when not needed: 86 */ 87 if (prim->mode == GL_QUAD_STRIP && 88 ctx->Light.ShadeModel != GL_FLAT && 89 ctx->Polygon.FrontMode == GL_FILL && 90 ctx->Polygon.BackMode == GL_FILL) 91 hw_prim = _3DPRIM_TRISTRIP; 92 93 if (prim->mode == GL_QUADS && prim->count == 4 && 94 ctx->Light.ShadeModel != GL_FLAT && 95 ctx->Polygon.FrontMode == GL_FILL && 96 ctx->Polygon.BackMode == GL_FILL) { 97 hw_prim = _3DPRIM_TRIFAN; 98 } 99 100 if (hw_prim != brw->primitive) { 101 brw->primitive = hw_prim; 102 brw->ctx.NewDriverState |= BRW_NEW_PRIMITIVE; 103 104 if (reduced_prim[prim->mode] != brw->reduced_primitive) { 105 brw->reduced_primitive = reduced_prim[prim->mode]; 106 brw->ctx.NewDriverState |= BRW_NEW_REDUCED_PRIMITIVE; 107 } 108 } 109 } 110 111 static void 112 gen6_set_prim(struct brw_context *brw, const struct _mesa_prim *prim) 113 { 114 const struct gl_context *ctx = &brw->ctx; 115 uint32_t hw_prim; 116 117 DBG("PRIM: %s\n", _mesa_enum_to_string(prim->mode)); 118 119 if (prim->mode == GL_PATCHES) { 120 hw_prim = _3DPRIM_PATCHLIST(ctx->TessCtrlProgram.patch_vertices); 121 } else { 122 hw_prim = get_hw_prim_for_gl_prim(prim->mode); 123 } 124 125 if (hw_prim != brw->primitive) { 126 brw->primitive = hw_prim; 127 brw->ctx.NewDriverState |= BRW_NEW_PRIMITIVE; 128 if (prim->mode == GL_PATCHES) 129 brw->ctx.NewDriverState |= BRW_NEW_PATCH_PRIMITIVE; 130 } 131 } 132 133 134 /** 135 * The hardware is capable of removing dangling vertices on its own; however, 136 * prior to Gen6, we sometimes convert quads into trifans (and quad strips 137 * into tristrips), since pre-Gen6 hardware requires a GS to render quads. 138 * This function manually trims dangling vertices from a draw call involving 139 * quads so that those dangling vertices won't get drawn when we convert to 140 * trifans/tristrips. 141 */ 142 static GLuint 143 trim(GLenum prim, GLuint length) 144 { 145 if (prim == GL_QUAD_STRIP) 146 return length > 3 ? (length - length % 2) : 0; 147 else if (prim == GL_QUADS) 148 return length - length % 4; 149 else 150 return length; 151 } 152 153 154 static void 155 brw_emit_prim(struct brw_context *brw, 156 const struct _mesa_prim *prim, 157 uint32_t hw_prim, 158 struct brw_transform_feedback_object *xfb_obj, 159 unsigned stream) 160 { 161 int verts_per_instance; 162 int vertex_access_type; 163 int indirect_flag; 164 165 DBG("PRIM: %s %d %d\n", _mesa_enum_to_string(prim->mode), 166 prim->start, prim->count); 167 168 int start_vertex_location = prim->start; 169 int base_vertex_location = prim->basevertex; 170 171 if (prim->indexed) { 172 vertex_access_type = brw->gen >= 7 ? 173 GEN7_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM : 174 GEN4_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM; 175 start_vertex_location += brw->ib.start_vertex_offset; 176 base_vertex_location += brw->vb.start_vertex_bias; 177 } else { 178 vertex_access_type = brw->gen >= 7 ? 179 GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL : 180 GEN4_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL; 181 start_vertex_location += brw->vb.start_vertex_bias; 182 } 183 184 /* We only need to trim the primitive count on pre-Gen6. */ 185 if (brw->gen < 6) 186 verts_per_instance = trim(prim->mode, prim->count); 187 else 188 verts_per_instance = prim->count; 189 190 /* If nothing to emit, just return. */ 191 if (verts_per_instance == 0 && !prim->is_indirect && !xfb_obj) 192 return; 193 194 /* If we're set to always flush, do it before and after the primitive emit. 195 * We want to catch both missed flushes that hurt instruction/state cache 196 * and missed flushes of the render cache as it heads to other parts of 197 * the besides the draw code. 198 */ 199 if (brw->always_flush_cache) 200 brw_emit_mi_flush(brw); 201 202 /* If indirect, emit a bunch of loads from the indirect BO. */ 203 if (xfb_obj) { 204 indirect_flag = GEN7_3DPRIM_INDIRECT_PARAMETER_ENABLE; 205 206 brw_load_register_mem(brw, GEN7_3DPRIM_VERTEX_COUNT, 207 xfb_obj->prim_count_bo, 208 I915_GEM_DOMAIN_VERTEX, 0, 209 stream * sizeof(uint32_t)); 210 BEGIN_BATCH(9); 211 OUT_BATCH(MI_LOAD_REGISTER_IMM | (9 - 2)); 212 OUT_BATCH(GEN7_3DPRIM_INSTANCE_COUNT); 213 OUT_BATCH(prim->num_instances); 214 OUT_BATCH(GEN7_3DPRIM_START_VERTEX); 215 OUT_BATCH(0); 216 OUT_BATCH(GEN7_3DPRIM_BASE_VERTEX); 217 OUT_BATCH(0); 218 OUT_BATCH(GEN7_3DPRIM_START_INSTANCE); 219 OUT_BATCH(0); 220 ADVANCE_BATCH(); 221 } else if (prim->is_indirect) { 222 struct gl_buffer_object *indirect_buffer = brw->ctx.DrawIndirectBuffer; 223 drm_intel_bo *bo = intel_bufferobj_buffer(brw, 224 intel_buffer_object(indirect_buffer), 225 prim->indirect_offset, 5 * sizeof(GLuint)); 226 227 indirect_flag = GEN7_3DPRIM_INDIRECT_PARAMETER_ENABLE; 228 229 brw_load_register_mem(brw, GEN7_3DPRIM_VERTEX_COUNT, bo, 230 I915_GEM_DOMAIN_VERTEX, 0, 231 prim->indirect_offset + 0); 232 brw_load_register_mem(brw, GEN7_3DPRIM_INSTANCE_COUNT, bo, 233 I915_GEM_DOMAIN_VERTEX, 0, 234 prim->indirect_offset + 4); 235 236 brw_load_register_mem(brw, GEN7_3DPRIM_START_VERTEX, bo, 237 I915_GEM_DOMAIN_VERTEX, 0, 238 prim->indirect_offset + 8); 239 if (prim->indexed) { 240 brw_load_register_mem(brw, GEN7_3DPRIM_BASE_VERTEX, bo, 241 I915_GEM_DOMAIN_VERTEX, 0, 242 prim->indirect_offset + 12); 243 brw_load_register_mem(brw, GEN7_3DPRIM_START_INSTANCE, bo, 244 I915_GEM_DOMAIN_VERTEX, 0, 245 prim->indirect_offset + 16); 246 } else { 247 brw_load_register_mem(brw, GEN7_3DPRIM_START_INSTANCE, bo, 248 I915_GEM_DOMAIN_VERTEX, 0, 249 prim->indirect_offset + 12); 250 BEGIN_BATCH(3); 251 OUT_BATCH(MI_LOAD_REGISTER_IMM | (3 - 2)); 252 OUT_BATCH(GEN7_3DPRIM_BASE_VERTEX); 253 OUT_BATCH(0); 254 ADVANCE_BATCH(); 255 } 256 } else { 257 indirect_flag = 0; 258 } 259 260 BEGIN_BATCH(brw->gen >= 7 ? 7 : 6); 261 262 if (brw->gen >= 7) { 263 const int predicate_enable = 264 (brw->predicate.state == BRW_PREDICATE_STATE_USE_BIT) 265 ? GEN7_3DPRIM_PREDICATE_ENABLE : 0; 266 267 OUT_BATCH(CMD_3D_PRIM << 16 | (7 - 2) | indirect_flag | predicate_enable); 268 OUT_BATCH(hw_prim | vertex_access_type); 269 } else { 270 OUT_BATCH(CMD_3D_PRIM << 16 | (6 - 2) | 271 hw_prim << GEN4_3DPRIM_TOPOLOGY_TYPE_SHIFT | 272 vertex_access_type); 273 } 274 OUT_BATCH(verts_per_instance); 275 OUT_BATCH(start_vertex_location); 276 OUT_BATCH(prim->num_instances); 277 OUT_BATCH(prim->base_instance); 278 OUT_BATCH(base_vertex_location); 279 ADVANCE_BATCH(); 280 281 if (brw->always_flush_cache) 282 brw_emit_mi_flush(brw); 283 } 284 285 286 static void 287 brw_merge_inputs(struct brw_context *brw, 288 const struct gl_vertex_array *arrays[]) 289 { 290 const struct gl_context *ctx = &brw->ctx; 291 GLuint i; 292 293 for (i = 0; i < brw->vb.nr_buffers; i++) { 294 drm_intel_bo_unreference(brw->vb.buffers[i].bo); 295 brw->vb.buffers[i].bo = NULL; 296 } 297 brw->vb.nr_buffers = 0; 298 299 for (i = 0; i < VERT_ATTRIB_MAX; i++) { 300 brw->vb.inputs[i].buffer = -1; 301 brw->vb.inputs[i].glarray = arrays[i]; 302 } 303 304 if (brw->gen < 8 && !brw->is_haswell) { 305 uint64_t mask = ctx->VertexProgram._Current->info.inputs_read; 306 /* Prior to Haswell, the hardware can't natively support GL_FIXED or 307 * 2_10_10_10_REV vertex formats. Set appropriate workaround flags. 308 */ 309 while (mask) { 310 uint8_t wa_flags = 0; 311 312 i = u_bit_scan64(&mask); 313 314 switch (brw->vb.inputs[i].glarray->Type) { 315 316 case GL_FIXED: 317 wa_flags = brw->vb.inputs[i].glarray->Size; 318 break; 319 320 case GL_INT_2_10_10_10_REV: 321 wa_flags |= BRW_ATTRIB_WA_SIGN; 322 /* fallthough */ 323 324 case GL_UNSIGNED_INT_2_10_10_10_REV: 325 if (brw->vb.inputs[i].glarray->Format == GL_BGRA) 326 wa_flags |= BRW_ATTRIB_WA_BGRA; 327 328 if (brw->vb.inputs[i].glarray->Normalized) 329 wa_flags |= BRW_ATTRIB_WA_NORMALIZE; 330 else if (!brw->vb.inputs[i].glarray->Integer) 331 wa_flags |= BRW_ATTRIB_WA_SCALE; 332 333 break; 334 } 335 336 if (brw->vb.attrib_wa_flags[i] != wa_flags) { 337 brw->vb.attrib_wa_flags[i] = wa_flags; 338 brw->ctx.NewDriverState |= BRW_NEW_VS_ATTRIB_WORKAROUNDS; 339 } 340 } 341 } 342 } 343 344 /** 345 * \brief Call this after drawing to mark which buffers need resolving 346 * 347 * If the depth buffer was written to and if it has an accompanying HiZ 348 * buffer, then mark that it needs a depth resolve. 349 * 350 * If the color buffer is a multisample window system buffer, then 351 * mark that it needs a downsample. 352 * 353 * Also mark any render targets which will be textured as needing a render 354 * cache flush. 355 */ 356 static void 357 brw_postdraw_set_buffers_need_resolve(struct brw_context *brw) 358 { 359 struct gl_context *ctx = &brw->ctx; 360 struct gl_framebuffer *fb = ctx->DrawBuffer; 361 362 struct intel_renderbuffer *front_irb = NULL; 363 struct intel_renderbuffer *back_irb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT); 364 struct intel_renderbuffer *depth_irb = intel_get_renderbuffer(fb, BUFFER_DEPTH); 365 struct intel_renderbuffer *stencil_irb = intel_get_renderbuffer(fb, BUFFER_STENCIL); 366 struct gl_renderbuffer_attachment *depth_att = &fb->Attachment[BUFFER_DEPTH]; 367 368 if (_mesa_is_front_buffer_drawing(fb)) 369 front_irb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT); 370 371 if (front_irb) 372 front_irb->need_downsample = true; 373 if (back_irb) 374 back_irb->need_downsample = true; 375 if (depth_irb && brw_depth_writes_enabled(brw)) { 376 intel_renderbuffer_att_set_needs_depth_resolve(depth_att); 377 brw_render_cache_set_add_bo(brw, depth_irb->mt->bo); 378 } 379 380 if (ctx->Extensions.ARB_stencil_texturing && 381 stencil_irb && ctx->Stencil._WriteEnabled) { 382 brw_render_cache_set_add_bo(brw, stencil_irb->mt->bo); 383 } 384 385 for (unsigned i = 0; i < fb->_NumColorDrawBuffers; i++) { 386 struct intel_renderbuffer *irb = 387 intel_renderbuffer(fb->_ColorDrawBuffers[i]); 388 389 if (!irb) 390 continue; 391 392 brw_render_cache_set_add_bo(brw, irb->mt->bo); 393 intel_miptree_used_for_rendering( 394 brw, irb->mt, irb->mt_level, irb->mt_layer, irb->layer_count); 395 } 396 } 397 398 static void 399 brw_predraw_set_aux_buffers(struct brw_context *brw) 400 { 401 if (brw->gen < 9) 402 return; 403 404 struct gl_context *ctx = &brw->ctx; 405 struct gl_framebuffer *fb = ctx->DrawBuffer; 406 407 for (unsigned i = 0; i < fb->_NumColorDrawBuffers; i++) { 408 struct intel_renderbuffer *irb = 409 intel_renderbuffer(fb->_ColorDrawBuffers[i]); 410 411 if (!irb) { 412 continue; 413 } 414 415 /* For layered rendering non-compressed fast cleared buffers need to be 416 * resolved. Surface state can carry only one fast color clear value 417 * while each layer may have its own fast clear color value. For 418 * compressed buffers color value is available in the color buffer. 419 */ 420 if (irb->layer_count > 1 && 421 !(irb->mt->aux_disable & INTEL_AUX_DISABLE_CCS) && 422 !intel_miptree_is_lossless_compressed(brw, irb->mt)) { 423 assert(brw->gen >= 8); 424 425 intel_miptree_resolve_color(brw, irb->mt, irb->mt_level, 426 irb->mt_layer, irb->layer_count, 0); 427 } 428 } 429 } 430 431 /* May fail if out of video memory for texture or vbo upload, or on 432 * fallback conditions. 433 */ 434 static void 435 brw_try_draw_prims(struct gl_context *ctx, 436 const struct gl_vertex_array *arrays[], 437 const struct _mesa_prim *prims, 438 GLuint nr_prims, 439 const struct _mesa_index_buffer *ib, 440 bool index_bounds_valid, 441 GLuint min_index, 442 GLuint max_index, 443 struct brw_transform_feedback_object *xfb_obj, 444 unsigned stream, 445 struct gl_buffer_object *indirect) 446 { 447 struct brw_context *brw = brw_context(ctx); 448 GLuint i; 449 bool fail_next = false; 450 451 if (ctx->NewState) 452 _mesa_update_state(ctx); 453 454 /* We have to validate the textures *before* checking for fallbacks; 455 * otherwise, the software fallback won't be able to rely on the 456 * texture state, the firstLevel and lastLevel fields won't be 457 * set in the intel texture object (they'll both be 0), and the 458 * software fallback will segfault if it attempts to access any 459 * texture level other than level 0. 460 */ 461 brw_validate_textures(brw); 462 463 /* Find the highest sampler unit used by each shader program. A bit-count 464 * won't work since ARB programs use the texture unit number as the sampler 465 * index. 466 */ 467 brw->wm.base.sampler_count = 468 util_last_bit(ctx->FragmentProgram._Current->SamplersUsed); 469 brw->gs.base.sampler_count = ctx->GeometryProgram._Current ? 470 util_last_bit(ctx->GeometryProgram._Current->SamplersUsed) : 0; 471 brw->tes.base.sampler_count = ctx->TessEvalProgram._Current ? 472 util_last_bit(ctx->TessEvalProgram._Current->SamplersUsed) : 0; 473 brw->tcs.base.sampler_count = ctx->TessCtrlProgram._Current ? 474 util_last_bit(ctx->TessCtrlProgram._Current->SamplersUsed) : 0; 475 brw->vs.base.sampler_count = 476 util_last_bit(ctx->VertexProgram._Current->SamplersUsed); 477 478 intel_prepare_render(brw); 479 brw_predraw_set_aux_buffers(brw); 480 481 /* This workaround has to happen outside of brw_upload_render_state() 482 * because it may flush the batchbuffer for a blit, affecting the state 483 * flags. 484 */ 485 brw_workaround_depthstencil_alignment(brw, 0); 486 487 /* Bind all inputs, derive varying and size information: 488 */ 489 brw_merge_inputs(brw, arrays); 490 491 brw->ib.ib = ib; 492 brw->ctx.NewDriverState |= BRW_NEW_INDICES; 493 494 brw->vb.index_bounds_valid = index_bounds_valid; 495 brw->vb.min_index = min_index; 496 brw->vb.max_index = max_index; 497 brw->ctx.NewDriverState |= BRW_NEW_VERTICES; 498 499 for (i = 0; i < nr_prims; i++) { 500 int estimated_max_prim_size; 501 const int sampler_state_size = 16; 502 503 estimated_max_prim_size = 512; /* batchbuffer commands */ 504 estimated_max_prim_size += BRW_MAX_TEX_UNIT * 505 (sampler_state_size + sizeof(struct gen5_sampler_default_color)); 506 estimated_max_prim_size += 1024; /* gen6 VS push constants */ 507 estimated_max_prim_size += 1024; /* gen6 WM push constants */ 508 estimated_max_prim_size += 512; /* misc. pad */ 509 510 /* Flush the batch if it's approaching full, so that we don't wrap while 511 * we've got validated state that needs to be in the same batch as the 512 * primitives. 513 */ 514 intel_batchbuffer_require_space(brw, estimated_max_prim_size, RENDER_RING); 515 intel_batchbuffer_save_state(brw); 516 517 if (brw->num_instances != prims[i].num_instances || 518 brw->basevertex != prims[i].basevertex || 519 brw->baseinstance != prims[i].base_instance) { 520 brw->num_instances = prims[i].num_instances; 521 brw->basevertex = prims[i].basevertex; 522 brw->baseinstance = prims[i].base_instance; 523 if (i > 0) { /* For i == 0 we just did this before the loop */ 524 brw->ctx.NewDriverState |= BRW_NEW_VERTICES; 525 brw_merge_inputs(brw, arrays); 526 } 527 } 528 529 /* Determine if we need to flag BRW_NEW_VERTICES for updating the 530 * gl_BaseVertexARB or gl_BaseInstanceARB values. For indirect draw, we 531 * always flag if the shader uses one of the values. For direct draws, 532 * we only flag if the values change. 533 */ 534 const int new_basevertex = 535 prims[i].indexed ? prims[i].basevertex : prims[i].start; 536 const int new_baseinstance = prims[i].base_instance; 537 const struct brw_vs_prog_data *vs_prog_data = 538 brw_vs_prog_data(brw->vs.base.prog_data); 539 if (i > 0) { 540 const bool uses_draw_parameters = 541 vs_prog_data->uses_basevertex || 542 vs_prog_data->uses_baseinstance; 543 544 if ((uses_draw_parameters && prims[i].is_indirect) || 545 (vs_prog_data->uses_basevertex && 546 brw->draw.params.gl_basevertex != new_basevertex) || 547 (vs_prog_data->uses_baseinstance && 548 brw->draw.params.gl_baseinstance != new_baseinstance)) 549 brw->ctx.NewDriverState |= BRW_NEW_VERTICES; 550 } 551 552 brw->draw.params.gl_basevertex = new_basevertex; 553 brw->draw.params.gl_baseinstance = new_baseinstance; 554 drm_intel_bo_unreference(brw->draw.draw_params_bo); 555 556 if (prims[i].is_indirect) { 557 /* Point draw_params_bo at the indirect buffer. */ 558 brw->draw.draw_params_bo = 559 intel_buffer_object(ctx->DrawIndirectBuffer)->buffer; 560 drm_intel_bo_reference(brw->draw.draw_params_bo); 561 brw->draw.draw_params_offset = 562 prims[i].indirect_offset + (prims[i].indexed ? 12 : 8); 563 } else { 564 /* Set draw_params_bo to NULL so brw_prepare_vertices knows it 565 * has to upload gl_BaseVertex and such if they're needed. 566 */ 567 brw->draw.draw_params_bo = NULL; 568 brw->draw.draw_params_offset = 0; 569 } 570 571 /* gl_DrawID always needs its own vertex buffer since it's not part of 572 * the indirect parameter buffer. If the program uses gl_DrawID we need 573 * to flag BRW_NEW_VERTICES. For the first iteration, we don't have 574 * valid vs_prog_data, but we always flag BRW_NEW_VERTICES before 575 * the loop. 576 */ 577 brw->draw.gl_drawid = prims[i].draw_id; 578 drm_intel_bo_unreference(brw->draw.draw_id_bo); 579 brw->draw.draw_id_bo = NULL; 580 if (i > 0 && vs_prog_data->uses_drawid) 581 brw->ctx.NewDriverState |= BRW_NEW_VERTICES; 582 583 if (brw->gen < 6) 584 brw_set_prim(brw, &prims[i]); 585 else 586 gen6_set_prim(brw, &prims[i]); 587 588 retry: 589 590 /* Note that before the loop, brw->ctx.NewDriverState was set to != 0, and 591 * that the state updated in the loop outside of this block is that in 592 * *_set_prim or intel_batchbuffer_flush(), which only impacts 593 * brw->ctx.NewDriverState. 594 */ 595 if (brw->ctx.NewDriverState) { 596 brw->no_batch_wrap = true; 597 brw_upload_render_state(brw); 598 } 599 600 brw_emit_prim(brw, &prims[i], brw->primitive, xfb_obj, stream); 601 602 brw->no_batch_wrap = false; 603 604 if (dri_bufmgr_check_aperture_space(&brw->batch.bo, 1)) { 605 if (!fail_next) { 606 intel_batchbuffer_reset_to_saved(brw); 607 intel_batchbuffer_flush(brw); 608 fail_next = true; 609 goto retry; 610 } else { 611 int ret = intel_batchbuffer_flush(brw); 612 WARN_ONCE(ret == -ENOSPC, 613 "i965: Single primitive emit exceeded " 614 "available aperture space\n"); 615 } 616 } 617 618 /* Now that we know we haven't run out of aperture space, we can safely 619 * reset the dirty bits. 620 */ 621 if (brw->ctx.NewDriverState) 622 brw_render_state_finished(brw); 623 } 624 625 if (brw->always_flush_batch) 626 intel_batchbuffer_flush(brw); 627 628 brw_program_cache_check_size(brw); 629 brw_postdraw_set_buffers_need_resolve(brw); 630 631 return; 632 } 633 634 void 635 brw_draw_prims(struct gl_context *ctx, 636 const struct _mesa_prim *prims, 637 GLuint nr_prims, 638 const struct _mesa_index_buffer *ib, 639 GLboolean index_bounds_valid, 640 GLuint min_index, 641 GLuint max_index, 642 struct gl_transform_feedback_object *gl_xfb_obj, 643 unsigned stream, 644 struct gl_buffer_object *indirect) 645 { 646 struct brw_context *brw = brw_context(ctx); 647 const struct gl_vertex_array **arrays = ctx->Array._DrawArrays; 648 struct brw_transform_feedback_object *xfb_obj = 649 (struct brw_transform_feedback_object *) gl_xfb_obj; 650 651 if (!brw_check_conditional_render(brw)) 652 return; 653 654 /* Handle primitive restart if needed */ 655 if (brw_handle_primitive_restart(ctx, prims, nr_prims, ib, indirect)) { 656 /* The draw was handled, so we can exit now */ 657 return; 658 } 659 660 /* Do GL_SELECT and GL_FEEDBACK rendering using swrast, even though it 661 * won't support all the extensions we support. 662 */ 663 if (ctx->RenderMode != GL_RENDER) { 664 perf_debug("%s render mode not supported in hardware\n", 665 _mesa_enum_to_string(ctx->RenderMode)); 666 _swsetup_Wakeup(ctx); 667 _tnl_wakeup(ctx); 668 _tnl_draw_prims(ctx, prims, nr_prims, ib, 669 index_bounds_valid, min_index, max_index, NULL, 0, NULL); 670 return; 671 } 672 673 /* If we're going to have to upload any of the user's vertex arrays, then 674 * get the minimum and maximum of their index buffer so we know what range 675 * to upload. 676 */ 677 if (!index_bounds_valid && !vbo_all_varyings_in_vbos(arrays)) { 678 perf_debug("Scanning index buffer to compute index buffer bounds. " 679 "Use glDrawRangeElements() to avoid this.\n"); 680 vbo_get_minmax_indices(ctx, prims, ib, &min_index, &max_index, nr_prims); 681 index_bounds_valid = true; 682 } 683 684 /* Try drawing with the hardware, but don't do anything else if we can't 685 * manage it. swrast doesn't support our featureset, so we can't fall back 686 * to it. 687 */ 688 brw_try_draw_prims(ctx, arrays, prims, nr_prims, ib, index_bounds_valid, 689 min_index, max_index, xfb_obj, stream, indirect); 690 } 691 692 void 693 brw_draw_init(struct brw_context *brw) 694 { 695 struct gl_context *ctx = &brw->ctx; 696 struct vbo_context *vbo = vbo_context(ctx); 697 698 /* Register our drawing function: 699 */ 700 vbo->draw_prims = brw_draw_prims; 701 702 for (int i = 0; i < VERT_ATTRIB_MAX; i++) 703 brw->vb.inputs[i].buffer = -1; 704 brw->vb.nr_buffers = 0; 705 brw->vb.nr_enabled = 0; 706 } 707 708 void 709 brw_draw_destroy(struct brw_context *brw) 710 { 711 unsigned i; 712 713 for (i = 0; i < brw->vb.nr_buffers; i++) { 714 drm_intel_bo_unreference(brw->vb.buffers[i].bo); 715 brw->vb.buffers[i].bo = NULL; 716 } 717 brw->vb.nr_buffers = 0; 718 719 for (i = 0; i < brw->vb.nr_enabled; i++) { 720 brw->vb.enabled[i]->buffer = -1; 721 } 722 brw->vb.nr_enabled = 0; 723 724 drm_intel_bo_unreference(brw->ib.bo); 725 brw->ib.bo = NULL; 726 } 727