1 /* 2 * Copyright 2003 VMware, Inc. 3 * All Rights Reserved. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the 7 * "Software"), to deal in the Software without restriction, including 8 * without limitation the rights to use, copy, modify, merge, publish, 9 * distribute, sublicense, and/or sell copies of the Software, and to 10 * permit persons to whom the Software is furnished to do so, subject to 11 * the following conditions: 12 * 13 * The above copyright notice and this permission notice (including the 14 * next paragraph) shall be included in all copies or substantial portions 15 * of the Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 19 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 20 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 21 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 22 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 23 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 24 */ 25 26 #include <sys/errno.h> 27 28 #include "main/blend.h" 29 #include "main/context.h" 30 #include "main/condrender.h" 31 #include "main/samplerobj.h" 32 #include "main/state.h" 33 #include "main/enums.h" 34 #include "main/macros.h" 35 #include "main/transformfeedback.h" 36 #include "main/framebuffer.h" 37 #include "tnl/tnl.h" 38 #include "vbo/vbo_context.h" 39 #include "swrast/swrast.h" 40 #include "swrast_setup/swrast_setup.h" 41 #include "drivers/common/meta.h" 42 #include "util/bitscan.h" 43 #include "util/bitset.h" 44 45 #include "brw_blorp.h" 46 #include "brw_draw.h" 47 #include "brw_defines.h" 48 #include "compiler/brw_eu_defines.h" 49 #include "brw_context.h" 50 #include "brw_state.h" 51 52 #include "intel_batchbuffer.h" 53 #include "intel_buffers.h" 54 #include "intel_fbo.h" 55 #include "intel_mipmap_tree.h" 56 #include "intel_buffer_objects.h" 57 58 #define FILE_DEBUG_FLAG DEBUG_PRIMS 59 60 61 static const GLenum reduced_prim[GL_POLYGON+1] = { 62 [GL_POINTS] = GL_POINTS, 63 [GL_LINES] = GL_LINES, 64 [GL_LINE_LOOP] = GL_LINES, 65 [GL_LINE_STRIP] = GL_LINES, 66 [GL_TRIANGLES] = GL_TRIANGLES, 67 [GL_TRIANGLE_STRIP] = GL_TRIANGLES, 68 [GL_TRIANGLE_FAN] = GL_TRIANGLES, 69 [GL_QUADS] = GL_TRIANGLES, 70 [GL_QUAD_STRIP] = GL_TRIANGLES, 71 [GL_POLYGON] = GL_TRIANGLES 72 }; 73 74 /* When the primitive changes, set a state bit and re-validate. Not 75 * the nicest and would rather deal with this by having all the 76 * programs be immune to the active primitive (ie. cope with all 77 * possibilities). That may not be realistic however. 78 */ 79 static void 80 brw_set_prim(struct brw_context *brw, const struct _mesa_prim *prim) 81 { 82 struct gl_context *ctx = &brw->ctx; 83 uint32_t hw_prim = get_hw_prim_for_gl_prim(prim->mode); 84 85 DBG("PRIM: %s\n", _mesa_enum_to_string(prim->mode)); 86 87 /* Slight optimization to avoid the GS program when not needed: 88 */ 89 if (prim->mode == GL_QUAD_STRIP && 90 ctx->Light.ShadeModel != GL_FLAT && 91 ctx->Polygon.FrontMode == GL_FILL && 92 ctx->Polygon.BackMode == GL_FILL) 93 hw_prim = _3DPRIM_TRISTRIP; 94 95 if (prim->mode == GL_QUADS && prim->count == 4 && 96 ctx->Light.ShadeModel != GL_FLAT && 97 ctx->Polygon.FrontMode == GL_FILL && 98 ctx->Polygon.BackMode == GL_FILL) { 99 hw_prim = _3DPRIM_TRIFAN; 100 } 101 102 if (hw_prim != brw->primitive) { 103 brw->primitive = hw_prim; 104 brw->ctx.NewDriverState |= BRW_NEW_PRIMITIVE; 105 106 if (reduced_prim[prim->mode] != brw->reduced_primitive) { 107 brw->reduced_primitive = reduced_prim[prim->mode]; 108 brw->ctx.NewDriverState |= BRW_NEW_REDUCED_PRIMITIVE; 109 } 110 } 111 } 112 113 static void 114 gen6_set_prim(struct brw_context *brw, const struct _mesa_prim *prim) 115 { 116 const struct gl_context *ctx = &brw->ctx; 117 uint32_t hw_prim; 118 119 DBG("PRIM: %s\n", _mesa_enum_to_string(prim->mode)); 120 121 if (prim->mode == GL_PATCHES) { 122 hw_prim = _3DPRIM_PATCHLIST(ctx->TessCtrlProgram.patch_vertices); 123 } else { 124 hw_prim = get_hw_prim_for_gl_prim(prim->mode); 125 } 126 127 if (hw_prim != brw->primitive) { 128 brw->primitive = hw_prim; 129 brw->ctx.NewDriverState |= BRW_NEW_PRIMITIVE; 130 if (prim->mode == GL_PATCHES) 131 brw->ctx.NewDriverState |= BRW_NEW_PATCH_PRIMITIVE; 132 } 133 } 134 135 136 /** 137 * The hardware is capable of removing dangling vertices on its own; however, 138 * prior to Gen6, we sometimes convert quads into trifans (and quad strips 139 * into tristrips), since pre-Gen6 hardware requires a GS to render quads. 140 * This function manually trims dangling vertices from a draw call involving 141 * quads so that those dangling vertices won't get drawn when we convert to 142 * trifans/tristrips. 143 */ 144 static GLuint 145 trim(GLenum prim, GLuint length) 146 { 147 if (prim == GL_QUAD_STRIP) 148 return length > 3 ? (length - length % 2) : 0; 149 else if (prim == GL_QUADS) 150 return length - length % 4; 151 else 152 return length; 153 } 154 155 156 static void 157 brw_emit_prim(struct brw_context *brw, 158 const struct _mesa_prim *prim, 159 uint32_t hw_prim, 160 struct brw_transform_feedback_object *xfb_obj, 161 unsigned stream) 162 { 163 const struct gen_device_info *devinfo = &brw->screen->devinfo; 164 int verts_per_instance; 165 int vertex_access_type; 166 int indirect_flag; 167 168 DBG("PRIM: %s %d %d\n", _mesa_enum_to_string(prim->mode), 169 prim->start, prim->count); 170 171 int start_vertex_location = prim->start; 172 int base_vertex_location = prim->basevertex; 173 174 if (prim->indexed) { 175 vertex_access_type = devinfo->gen >= 7 ? 176 GEN7_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM : 177 GEN4_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM; 178 start_vertex_location += brw->ib.start_vertex_offset; 179 base_vertex_location += brw->vb.start_vertex_bias; 180 } else { 181 vertex_access_type = devinfo->gen >= 7 ? 182 GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL : 183 GEN4_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL; 184 start_vertex_location += brw->vb.start_vertex_bias; 185 } 186 187 /* We only need to trim the primitive count on pre-Gen6. */ 188 if (devinfo->gen < 6) 189 verts_per_instance = trim(prim->mode, prim->count); 190 else 191 verts_per_instance = prim->count; 192 193 /* If nothing to emit, just return. */ 194 if (verts_per_instance == 0 && !prim->is_indirect && !xfb_obj) 195 return; 196 197 /* If we're set to always flush, do it before and after the primitive emit. 198 * We want to catch both missed flushes that hurt instruction/state cache 199 * and missed flushes of the render cache as it heads to other parts of 200 * the besides the draw code. 201 */ 202 if (brw->always_flush_cache) 203 brw_emit_mi_flush(brw); 204 205 /* If indirect, emit a bunch of loads from the indirect BO. */ 206 if (xfb_obj) { 207 indirect_flag = GEN7_3DPRIM_INDIRECT_PARAMETER_ENABLE; 208 209 brw_load_register_mem(brw, GEN7_3DPRIM_VERTEX_COUNT, 210 xfb_obj->prim_count_bo, 211 stream * sizeof(uint32_t)); 212 BEGIN_BATCH(9); 213 OUT_BATCH(MI_LOAD_REGISTER_IMM | (9 - 2)); 214 OUT_BATCH(GEN7_3DPRIM_INSTANCE_COUNT); 215 OUT_BATCH(prim->num_instances); 216 OUT_BATCH(GEN7_3DPRIM_START_VERTEX); 217 OUT_BATCH(0); 218 OUT_BATCH(GEN7_3DPRIM_BASE_VERTEX); 219 OUT_BATCH(0); 220 OUT_BATCH(GEN7_3DPRIM_START_INSTANCE); 221 OUT_BATCH(0); 222 ADVANCE_BATCH(); 223 } else if (prim->is_indirect) { 224 struct gl_buffer_object *indirect_buffer = brw->ctx.DrawIndirectBuffer; 225 struct brw_bo *bo = intel_bufferobj_buffer(brw, 226 intel_buffer_object(indirect_buffer), 227 prim->indirect_offset, 5 * sizeof(GLuint), false); 228 229 indirect_flag = GEN7_3DPRIM_INDIRECT_PARAMETER_ENABLE; 230 231 brw_load_register_mem(brw, GEN7_3DPRIM_VERTEX_COUNT, bo, 232 prim->indirect_offset + 0); 233 brw_load_register_mem(brw, GEN7_3DPRIM_INSTANCE_COUNT, bo, 234 prim->indirect_offset + 4); 235 236 brw_load_register_mem(brw, GEN7_3DPRIM_START_VERTEX, bo, 237 prim->indirect_offset + 8); 238 if (prim->indexed) { 239 brw_load_register_mem(brw, GEN7_3DPRIM_BASE_VERTEX, bo, 240 prim->indirect_offset + 12); 241 brw_load_register_mem(brw, GEN7_3DPRIM_START_INSTANCE, bo, 242 prim->indirect_offset + 16); 243 } else { 244 brw_load_register_mem(brw, GEN7_3DPRIM_START_INSTANCE, bo, 245 prim->indirect_offset + 12); 246 brw_load_register_imm32(brw, GEN7_3DPRIM_BASE_VERTEX, 0); 247 } 248 } else { 249 indirect_flag = 0; 250 } 251 252 BEGIN_BATCH(devinfo->gen >= 7 ? 7 : 6); 253 254 if (devinfo->gen >= 7) { 255 const int predicate_enable = 256 (brw->predicate.state == BRW_PREDICATE_STATE_USE_BIT) 257 ? GEN7_3DPRIM_PREDICATE_ENABLE : 0; 258 259 OUT_BATCH(CMD_3D_PRIM << 16 | (7 - 2) | indirect_flag | predicate_enable); 260 OUT_BATCH(hw_prim | vertex_access_type); 261 } else { 262 OUT_BATCH(CMD_3D_PRIM << 16 | (6 - 2) | 263 hw_prim << GEN4_3DPRIM_TOPOLOGY_TYPE_SHIFT | 264 vertex_access_type); 265 } 266 OUT_BATCH(verts_per_instance); 267 OUT_BATCH(start_vertex_location); 268 OUT_BATCH(prim->num_instances); 269 OUT_BATCH(prim->base_instance); 270 OUT_BATCH(base_vertex_location); 271 ADVANCE_BATCH(); 272 273 if (brw->always_flush_cache) 274 brw_emit_mi_flush(brw); 275 } 276 277 278 static void 279 brw_merge_inputs(struct brw_context *brw, 280 const struct gl_vertex_array *arrays[]) 281 { 282 const struct gen_device_info *devinfo = &brw->screen->devinfo; 283 const struct gl_context *ctx = &brw->ctx; 284 GLuint i; 285 286 for (i = 0; i < brw->vb.nr_buffers; i++) { 287 brw_bo_unreference(brw->vb.buffers[i].bo); 288 brw->vb.buffers[i].bo = NULL; 289 } 290 brw->vb.nr_buffers = 0; 291 292 for (i = 0; i < VERT_ATTRIB_MAX; i++) { 293 brw->vb.inputs[i].buffer = -1; 294 brw->vb.inputs[i].glarray = arrays[i]; 295 } 296 297 if (devinfo->gen < 8 && !devinfo->is_haswell) { 298 uint64_t mask = ctx->VertexProgram._Current->info.inputs_read; 299 /* Prior to Haswell, the hardware can't natively support GL_FIXED or 300 * 2_10_10_10_REV vertex formats. Set appropriate workaround flags. 301 */ 302 while (mask) { 303 uint8_t wa_flags = 0; 304 305 i = u_bit_scan64(&mask); 306 307 switch (brw->vb.inputs[i].glarray->Type) { 308 309 case GL_FIXED: 310 wa_flags = brw->vb.inputs[i].glarray->Size; 311 break; 312 313 case GL_INT_2_10_10_10_REV: 314 wa_flags |= BRW_ATTRIB_WA_SIGN; 315 /* fallthough */ 316 317 case GL_UNSIGNED_INT_2_10_10_10_REV: 318 if (brw->vb.inputs[i].glarray->Format == GL_BGRA) 319 wa_flags |= BRW_ATTRIB_WA_BGRA; 320 321 if (brw->vb.inputs[i].glarray->Normalized) 322 wa_flags |= BRW_ATTRIB_WA_NORMALIZE; 323 else if (!brw->vb.inputs[i].glarray->Integer) 324 wa_flags |= BRW_ATTRIB_WA_SCALE; 325 326 break; 327 } 328 329 if (brw->vb.attrib_wa_flags[i] != wa_flags) { 330 brw->vb.attrib_wa_flags[i] = wa_flags; 331 brw->ctx.NewDriverState |= BRW_NEW_VS_ATTRIB_WORKAROUNDS; 332 } 333 } 334 } 335 } 336 337 /* Disable auxiliary buffers if a renderbuffer is also bound as a texture 338 * or shader image. This causes a self-dependency, where both rendering 339 * and sampling may concurrently read or write the CCS buffer, causing 340 * incorrect pixels. 341 */ 342 static bool 343 intel_disable_rb_aux_buffer(struct brw_context *brw, 344 bool *draw_aux_buffer_disabled, 345 struct intel_mipmap_tree *tex_mt, 346 unsigned min_level, unsigned num_levels, 347 const char *usage) 348 { 349 const struct gl_framebuffer *fb = brw->ctx.DrawBuffer; 350 bool found = false; 351 352 /* We only need to worry about color compression and fast clears. */ 353 if (tex_mt->aux_usage != ISL_AUX_USAGE_CCS_D && 354 tex_mt->aux_usage != ISL_AUX_USAGE_CCS_E) 355 return false; 356 357 for (unsigned i = 0; i < fb->_NumColorDrawBuffers; i++) { 358 const struct intel_renderbuffer *irb = 359 intel_renderbuffer(fb->_ColorDrawBuffers[i]); 360 361 if (irb && irb->mt->bo == tex_mt->bo && 362 irb->mt_level >= min_level && 363 irb->mt_level < min_level + num_levels) { 364 found = draw_aux_buffer_disabled[i] = true; 365 } 366 } 367 368 if (found) { 369 perf_debug("Disabling CCS because a renderbuffer is also bound %s.\n", 370 usage); 371 } 372 373 return found; 374 } 375 376 static void 377 mark_textures_used_for_txf(BITSET_WORD *used_for_txf, 378 const struct gl_program *prog) 379 { 380 if (!prog) 381 return; 382 383 unsigned mask = prog->SamplersUsed & prog->info.textures_used_by_txf; 384 while (mask) { 385 int s = u_bit_scan(&mask); 386 BITSET_SET(used_for_txf, prog->SamplerUnits[s]); 387 } 388 } 389 390 /** 391 * \brief Resolve buffers before drawing. 392 * 393 * Resolve the depth buffer's HiZ buffer, resolve the depth buffer of each 394 * enabled depth texture, and flush the render cache for any dirty textures. 395 */ 396 void 397 brw_predraw_resolve_inputs(struct brw_context *brw, bool rendering, 398 bool *draw_aux_buffer_disabled) 399 { 400 struct gl_context *ctx = &brw->ctx; 401 struct intel_texture_object *tex_obj; 402 403 BITSET_DECLARE(used_for_txf, MAX_COMBINED_TEXTURE_IMAGE_UNITS); 404 memset(used_for_txf, 0, sizeof(used_for_txf)); 405 if (rendering) { 406 mark_textures_used_for_txf(used_for_txf, ctx->VertexProgram._Current); 407 mark_textures_used_for_txf(used_for_txf, ctx->TessCtrlProgram._Current); 408 mark_textures_used_for_txf(used_for_txf, ctx->TessEvalProgram._Current); 409 mark_textures_used_for_txf(used_for_txf, ctx->GeometryProgram._Current); 410 mark_textures_used_for_txf(used_for_txf, ctx->FragmentProgram._Current); 411 } else { 412 mark_textures_used_for_txf(used_for_txf, ctx->ComputeProgram._Current); 413 } 414 415 /* Resolve depth buffer and render cache of each enabled texture. */ 416 int maxEnabledUnit = ctx->Texture._MaxEnabledTexImageUnit; 417 for (int i = 0; i <= maxEnabledUnit; i++) { 418 if (!ctx->Texture.Unit[i]._Current) 419 continue; 420 tex_obj = intel_texture_object(ctx->Texture.Unit[i]._Current); 421 if (!tex_obj || !tex_obj->mt) 422 continue; 423 424 struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, i); 425 enum isl_format view_format = 426 translate_tex_format(brw, tex_obj->_Format, sampler->sRGBDecode); 427 428 unsigned min_level, min_layer, num_levels, num_layers; 429 if (tex_obj->base.Immutable) { 430 min_level = tex_obj->base.MinLevel; 431 num_levels = MIN2(tex_obj->base.NumLevels, tex_obj->_MaxLevel + 1); 432 min_layer = tex_obj->base.MinLayer; 433 num_layers = tex_obj->base.Target != GL_TEXTURE_3D ? 434 tex_obj->base.NumLayers : INTEL_REMAINING_LAYERS; 435 } else { 436 min_level = tex_obj->base.BaseLevel; 437 num_levels = tex_obj->_MaxLevel - tex_obj->base.BaseLevel + 1; 438 min_layer = 0; 439 num_layers = INTEL_REMAINING_LAYERS; 440 } 441 442 const bool disable_aux = rendering && 443 intel_disable_rb_aux_buffer(brw, draw_aux_buffer_disabled, 444 tex_obj->mt, min_level, num_levels, 445 "for sampling"); 446 447 intel_miptree_prepare_texture(brw, tex_obj->mt, view_format, 448 min_level, num_levels, 449 min_layer, num_layers, 450 disable_aux); 451 452 /* If any programs are using it with texelFetch, we may need to also do 453 * a prepare with an sRGB format to ensure texelFetch works "properly". 454 */ 455 if (BITSET_TEST(used_for_txf, i)) { 456 enum isl_format txf_format = 457 translate_tex_format(brw, tex_obj->_Format, GL_DECODE_EXT); 458 if (txf_format != view_format) { 459 intel_miptree_prepare_texture(brw, tex_obj->mt, txf_format, 460 min_level, num_levels, 461 min_layer, num_layers, 462 disable_aux); 463 } 464 } 465 466 brw_cache_flush_for_read(brw, tex_obj->mt->bo); 467 468 if (tex_obj->base.StencilSampling || 469 tex_obj->mt->format == MESA_FORMAT_S_UINT8) { 470 intel_update_r8stencil(brw, tex_obj->mt); 471 } 472 } 473 474 /* Resolve color for each active shader image. */ 475 for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { 476 const struct gl_program *prog = ctx->_Shader->CurrentProgram[i]; 477 478 if (unlikely(prog && prog->info.num_images)) { 479 for (unsigned j = 0; j < prog->info.num_images; j++) { 480 struct gl_image_unit *u = 481 &ctx->ImageUnits[prog->sh.ImageUnits[j]]; 482 tex_obj = intel_texture_object(u->TexObj); 483 484 if (tex_obj && tex_obj->mt) { 485 if (rendering) { 486 intel_disable_rb_aux_buffer(brw, draw_aux_buffer_disabled, 487 tex_obj->mt, 0, ~0, 488 "as a shader image"); 489 } 490 491 intel_miptree_prepare_image(brw, tex_obj->mt); 492 493 brw_cache_flush_for_read(brw, tex_obj->mt->bo); 494 } 495 } 496 } 497 } 498 } 499 500 static void 501 brw_predraw_resolve_framebuffer(struct brw_context *brw, 502 bool *draw_aux_buffer_disabled) 503 { 504 struct gl_context *ctx = &brw->ctx; 505 struct intel_renderbuffer *depth_irb; 506 507 /* Resolve the depth buffer's HiZ buffer. */ 508 depth_irb = intel_get_renderbuffer(ctx->DrawBuffer, BUFFER_DEPTH); 509 if (depth_irb && depth_irb->mt) { 510 intel_miptree_prepare_depth(brw, depth_irb->mt, 511 depth_irb->mt_level, 512 depth_irb->mt_layer, 513 depth_irb->layer_count); 514 } 515 516 /* Resolve color buffers for non-coherent framebuffer fetch. */ 517 if (!ctx->Extensions.MESA_shader_framebuffer_fetch && 518 ctx->FragmentProgram._Current && 519 ctx->FragmentProgram._Current->info.outputs_read) { 520 const struct gl_framebuffer *fb = ctx->DrawBuffer; 521 522 /* This is only used for non-coherent framebuffer fetch, so we don't 523 * need to worry about CCS_E and can simply pass 'false' below. 524 */ 525 assert(brw->screen->devinfo.gen < 9); 526 527 for (unsigned i = 0; i < fb->_NumColorDrawBuffers; i++) { 528 const struct intel_renderbuffer *irb = 529 intel_renderbuffer(fb->_ColorDrawBuffers[i]); 530 531 if (irb) { 532 intel_miptree_prepare_texture(brw, irb->mt, irb->mt->surf.format, 533 irb->mt_level, 1, 534 irb->mt_layer, irb->layer_count, 535 false); 536 } 537 } 538 } 539 540 struct gl_framebuffer *fb = ctx->DrawBuffer; 541 for (int i = 0; i < fb->_NumColorDrawBuffers; i++) { 542 struct intel_renderbuffer *irb = 543 intel_renderbuffer(fb->_ColorDrawBuffers[i]); 544 545 if (irb == NULL || irb->mt == NULL) 546 continue; 547 548 mesa_format mesa_format = 549 _mesa_get_render_format(ctx, intel_rb_format(irb)); 550 enum isl_format isl_format = brw_isl_format_for_mesa_format(mesa_format); 551 bool blend_enabled = ctx->Color.BlendEnabled & (1 << i); 552 enum isl_aux_usage aux_usage = 553 intel_miptree_render_aux_usage(brw, irb->mt, isl_format, 554 blend_enabled, 555 draw_aux_buffer_disabled[i]); 556 if (brw->draw_aux_usage[i] != aux_usage) { 557 brw->ctx.NewDriverState |= BRW_NEW_AUX_STATE; 558 brw->draw_aux_usage[i] = aux_usage; 559 } 560 561 intel_miptree_prepare_render(brw, irb->mt, irb->mt_level, 562 irb->mt_layer, irb->layer_count, 563 aux_usage); 564 565 brw_cache_flush_for_render(brw, irb->mt->bo, 566 isl_format, aux_usage); 567 } 568 } 569 570 /** 571 * \brief Call this after drawing to mark which buffers need resolving 572 * 573 * If the depth buffer was written to and if it has an accompanying HiZ 574 * buffer, then mark that it needs a depth resolve. 575 * 576 * If the color buffer is a multisample window system buffer, then 577 * mark that it needs a downsample. 578 * 579 * Also mark any render targets which will be textured as needing a render 580 * cache flush. 581 */ 582 static void 583 brw_postdraw_set_buffers_need_resolve(struct brw_context *brw) 584 { 585 struct gl_context *ctx = &brw->ctx; 586 struct gl_framebuffer *fb = ctx->DrawBuffer; 587 588 struct intel_renderbuffer *front_irb = NULL; 589 struct intel_renderbuffer *back_irb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT); 590 struct intel_renderbuffer *depth_irb = intel_get_renderbuffer(fb, BUFFER_DEPTH); 591 struct intel_renderbuffer *stencil_irb = intel_get_renderbuffer(fb, BUFFER_STENCIL); 592 struct gl_renderbuffer_attachment *depth_att = &fb->Attachment[BUFFER_DEPTH]; 593 594 if (_mesa_is_front_buffer_drawing(fb)) 595 front_irb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT); 596 597 if (front_irb) 598 front_irb->need_downsample = true; 599 if (back_irb) 600 back_irb->need_downsample = true; 601 if (depth_irb) { 602 bool depth_written = brw_depth_writes_enabled(brw); 603 if (depth_att->Layered) { 604 intel_miptree_finish_depth(brw, depth_irb->mt, 605 depth_irb->mt_level, 606 depth_irb->mt_layer, 607 depth_irb->layer_count, 608 depth_written); 609 } else { 610 intel_miptree_finish_depth(brw, depth_irb->mt, 611 depth_irb->mt_level, 612 depth_irb->mt_layer, 1, 613 depth_written); 614 } 615 if (depth_written) 616 brw_depth_cache_add_bo(brw, depth_irb->mt->bo); 617 } 618 619 if (stencil_irb && brw->stencil_write_enabled) 620 brw_depth_cache_add_bo(brw, stencil_irb->mt->bo); 621 622 for (unsigned i = 0; i < fb->_NumColorDrawBuffers; i++) { 623 struct intel_renderbuffer *irb = 624 intel_renderbuffer(fb->_ColorDrawBuffers[i]); 625 626 if (!irb) 627 continue; 628 629 mesa_format mesa_format = 630 _mesa_get_render_format(ctx, intel_rb_format(irb)); 631 enum isl_format isl_format = brw_isl_format_for_mesa_format(mesa_format); 632 enum isl_aux_usage aux_usage = brw->draw_aux_usage[i]; 633 634 brw_render_cache_add_bo(brw, irb->mt->bo, isl_format, aux_usage); 635 636 intel_miptree_finish_render(brw, irb->mt, irb->mt_level, 637 irb->mt_layer, irb->layer_count, 638 aux_usage); 639 } 640 } 641 642 static void 643 intel_renderbuffer_move_temp_back(struct brw_context *brw, 644 struct intel_renderbuffer *irb) 645 { 646 if (irb->align_wa_mt == NULL) 647 return; 648 649 brw_cache_flush_for_read(brw, irb->align_wa_mt->bo); 650 651 intel_miptree_copy_slice(brw, irb->align_wa_mt, 0, 0, 652 irb->mt, 653 irb->Base.Base.TexImage->Level, irb->mt_layer); 654 655 intel_miptree_reference(&irb->align_wa_mt, NULL); 656 657 /* Finally restore the x,y to correspond to full miptree. */ 658 intel_renderbuffer_set_draw_offset(irb); 659 660 /* Make sure render surface state gets re-emitted with updated miptree. */ 661 brw->NewGLState |= _NEW_BUFFERS; 662 } 663 664 static void 665 brw_postdraw_reconcile_align_wa_slices(struct brw_context *brw) 666 { 667 struct gl_context *ctx = &brw->ctx; 668 struct gl_framebuffer *fb = ctx->DrawBuffer; 669 670 struct intel_renderbuffer *depth_irb = 671 intel_get_renderbuffer(fb, BUFFER_DEPTH); 672 struct intel_renderbuffer *stencil_irb = 673 intel_get_renderbuffer(fb, BUFFER_STENCIL); 674 675 if (depth_irb && depth_irb->align_wa_mt) 676 intel_renderbuffer_move_temp_back(brw, depth_irb); 677 678 if (stencil_irb && stencil_irb->align_wa_mt) 679 intel_renderbuffer_move_temp_back(brw, stencil_irb); 680 681 for (unsigned i = 0; i < fb->_NumColorDrawBuffers; i++) { 682 struct intel_renderbuffer *irb = 683 intel_renderbuffer(fb->_ColorDrawBuffers[i]); 684 685 if (!irb || irb->align_wa_mt == NULL) 686 continue; 687 688 intel_renderbuffer_move_temp_back(brw, irb); 689 } 690 } 691 692 static void 693 brw_prepare_drawing(struct gl_context *ctx, 694 const struct gl_vertex_array *arrays[], 695 const struct _mesa_index_buffer *ib, 696 bool index_bounds_valid, 697 GLuint min_index, 698 GLuint max_index) 699 { 700 struct brw_context *brw = brw_context(ctx); 701 702 if (ctx->NewState) 703 _mesa_update_state(ctx); 704 705 /* We have to validate the textures *before* checking for fallbacks; 706 * otherwise, the software fallback won't be able to rely on the 707 * texture state, the firstLevel and lastLevel fields won't be 708 * set in the intel texture object (they'll both be 0), and the 709 * software fallback will segfault if it attempts to access any 710 * texture level other than level 0. 711 */ 712 brw_validate_textures(brw); 713 714 /* Find the highest sampler unit used by each shader program. A bit-count 715 * won't work since ARB programs use the texture unit number as the sampler 716 * index. 717 */ 718 brw->wm.base.sampler_count = 719 util_last_bit(ctx->FragmentProgram._Current->SamplersUsed); 720 brw->gs.base.sampler_count = ctx->GeometryProgram._Current ? 721 util_last_bit(ctx->GeometryProgram._Current->SamplersUsed) : 0; 722 brw->tes.base.sampler_count = ctx->TessEvalProgram._Current ? 723 util_last_bit(ctx->TessEvalProgram._Current->SamplersUsed) : 0; 724 brw->tcs.base.sampler_count = ctx->TessCtrlProgram._Current ? 725 util_last_bit(ctx->TessCtrlProgram._Current->SamplersUsed) : 0; 726 brw->vs.base.sampler_count = 727 util_last_bit(ctx->VertexProgram._Current->SamplersUsed); 728 729 intel_prepare_render(brw); 730 731 /* This workaround has to happen outside of brw_upload_render_state() 732 * because it may flush the batchbuffer for a blit, affecting the state 733 * flags. 734 */ 735 brw_workaround_depthstencil_alignment(brw, 0); 736 737 /* Resolves must occur after updating renderbuffers, updating context state, 738 * and finalizing textures but before setting up any hardware state for 739 * this draw call. 740 */ 741 bool draw_aux_buffer_disabled[MAX_DRAW_BUFFERS] = { }; 742 brw_predraw_resolve_inputs(brw, true, draw_aux_buffer_disabled); 743 brw_predraw_resolve_framebuffer(brw, draw_aux_buffer_disabled); 744 745 /* Bind all inputs, derive varying and size information: 746 */ 747 brw_merge_inputs(brw, arrays); 748 749 brw->ib.ib = ib; 750 brw->ctx.NewDriverState |= BRW_NEW_INDICES; 751 752 brw->vb.index_bounds_valid = index_bounds_valid; 753 brw->vb.min_index = min_index; 754 brw->vb.max_index = max_index; 755 brw->ctx.NewDriverState |= BRW_NEW_VERTICES; 756 } 757 758 static void 759 brw_finish_drawing(struct gl_context *ctx) 760 { 761 struct brw_context *brw = brw_context(ctx); 762 763 if (brw->always_flush_batch) 764 intel_batchbuffer_flush(brw); 765 766 brw_program_cache_check_size(brw); 767 brw_postdraw_reconcile_align_wa_slices(brw); 768 brw_postdraw_set_buffers_need_resolve(brw); 769 770 if (brw->draw.draw_params_count_bo) { 771 brw_bo_unreference(brw->draw.draw_params_count_bo); 772 brw->draw.draw_params_count_bo = NULL; 773 } 774 } 775 776 /* May fail if out of video memory for texture or vbo upload, or on 777 * fallback conditions. 778 */ 779 static void 780 brw_draw_single_prim(struct gl_context *ctx, 781 const struct gl_vertex_array *arrays[], 782 const struct _mesa_prim *prim, 783 unsigned prim_id, 784 struct brw_transform_feedback_object *xfb_obj, 785 unsigned stream, 786 struct gl_buffer_object *indirect) 787 { 788 struct brw_context *brw = brw_context(ctx); 789 const struct gen_device_info *devinfo = &brw->screen->devinfo; 790 bool fail_next = false; 791 792 /* Flag BRW_NEW_DRAW_CALL on every draw. This allows us to have 793 * atoms that happen on every draw call. 794 */ 795 brw->ctx.NewDriverState |= BRW_NEW_DRAW_CALL; 796 797 /* Flush the batch if the batch/state buffers are nearly full. We can 798 * grow them if needed, but this is not free, so we'd like to avoid it. 799 */ 800 intel_batchbuffer_require_space(brw, 1500, RENDER_RING); 801 brw_require_statebuffer_space(brw, 2400); 802 intel_batchbuffer_save_state(brw); 803 804 if (brw->num_instances != prim->num_instances || 805 brw->basevertex != prim->basevertex || 806 brw->baseinstance != prim->base_instance) { 807 brw->num_instances = prim->num_instances; 808 brw->basevertex = prim->basevertex; 809 brw->baseinstance = prim->base_instance; 810 if (prim_id > 0) { /* For i == 0 we just did this before the loop */ 811 brw->ctx.NewDriverState |= BRW_NEW_VERTICES; 812 brw_merge_inputs(brw, arrays); 813 } 814 } 815 816 /* Determine if we need to flag BRW_NEW_VERTICES for updating the 817 * gl_BaseVertexARB or gl_BaseInstanceARB values. For indirect draw, we 818 * always flag if the shader uses one of the values. For direct draws, 819 * we only flag if the values change. 820 */ 821 const int new_basevertex = 822 prim->indexed ? prim->basevertex : prim->start; 823 const int new_baseinstance = prim->base_instance; 824 const struct brw_vs_prog_data *vs_prog_data = 825 brw_vs_prog_data(brw->vs.base.prog_data); 826 if (prim_id > 0) { 827 const bool uses_draw_parameters = 828 vs_prog_data->uses_basevertex || 829 vs_prog_data->uses_baseinstance; 830 831 if ((uses_draw_parameters && prim->is_indirect) || 832 (vs_prog_data->uses_basevertex && 833 brw->draw.params.gl_basevertex != new_basevertex) || 834 (vs_prog_data->uses_baseinstance && 835 brw->draw.params.gl_baseinstance != new_baseinstance)) 836 brw->ctx.NewDriverState |= BRW_NEW_VERTICES; 837 } 838 839 brw->draw.params.gl_basevertex = new_basevertex; 840 brw->draw.params.gl_baseinstance = new_baseinstance; 841 brw_bo_unreference(brw->draw.draw_params_bo); 842 843 if (prim->is_indirect) { 844 /* Point draw_params_bo at the indirect buffer. */ 845 brw->draw.draw_params_bo = 846 intel_buffer_object(ctx->DrawIndirectBuffer)->buffer; 847 brw_bo_reference(brw->draw.draw_params_bo); 848 brw->draw.draw_params_offset = 849 prim->indirect_offset + (prim->indexed ? 12 : 8); 850 } else { 851 /* Set draw_params_bo to NULL so brw_prepare_vertices knows it 852 * has to upload gl_BaseVertex and such if they're needed. 853 */ 854 brw->draw.draw_params_bo = NULL; 855 brw->draw.draw_params_offset = 0; 856 } 857 858 /* gl_DrawID always needs its own vertex buffer since it's not part of 859 * the indirect parameter buffer. If the program uses gl_DrawID we need 860 * to flag BRW_NEW_VERTICES. For the first iteration, we don't have 861 * valid vs_prog_data, but we always flag BRW_NEW_VERTICES before 862 * the loop. 863 */ 864 brw->draw.gl_drawid = prim->draw_id; 865 brw_bo_unreference(brw->draw.draw_id_bo); 866 brw->draw.draw_id_bo = NULL; 867 if (prim_id > 0 && vs_prog_data->uses_drawid) 868 brw->ctx.NewDriverState |= BRW_NEW_VERTICES; 869 870 if (devinfo->gen < 6) 871 brw_set_prim(brw, prim); 872 else 873 gen6_set_prim(brw, prim); 874 875 retry: 876 877 /* Note that before the loop, brw->ctx.NewDriverState was set to != 0, and 878 * that the state updated in the loop outside of this block is that in 879 * *_set_prim or intel_batchbuffer_flush(), which only impacts 880 * brw->ctx.NewDriverState. 881 */ 882 if (brw->ctx.NewDriverState) { 883 brw->batch.no_wrap = true; 884 brw_upload_render_state(brw); 885 } 886 887 brw_emit_prim(brw, prim, brw->primitive, xfb_obj, stream); 888 889 brw->batch.no_wrap = false; 890 891 if (!brw_batch_has_aperture_space(brw, 0)) { 892 if (!fail_next) { 893 intel_batchbuffer_reset_to_saved(brw); 894 intel_batchbuffer_flush(brw); 895 fail_next = true; 896 goto retry; 897 } else { 898 int ret = intel_batchbuffer_flush(brw); 899 WARN_ONCE(ret == -ENOSPC, 900 "i965: Single primitive emit exceeded " 901 "available aperture space\n"); 902 } 903 } 904 905 /* Now that we know we haven't run out of aperture space, we can safely 906 * reset the dirty bits. 907 */ 908 if (brw->ctx.NewDriverState) 909 brw_render_state_finished(brw); 910 911 return; 912 } 913 914 void 915 brw_draw_prims(struct gl_context *ctx, 916 const struct _mesa_prim *prims, 917 GLuint nr_prims, 918 const struct _mesa_index_buffer *ib, 919 GLboolean index_bounds_valid, 920 GLuint min_index, 921 GLuint max_index, 922 struct gl_transform_feedback_object *gl_xfb_obj, 923 unsigned stream, 924 struct gl_buffer_object *indirect) 925 { 926 unsigned i; 927 struct brw_context *brw = brw_context(ctx); 928 const struct gl_vertex_array **arrays = ctx->Array._DrawArrays; 929 int predicate_state = brw->predicate.state; 930 struct brw_transform_feedback_object *xfb_obj = 931 (struct brw_transform_feedback_object *) gl_xfb_obj; 932 933 if (!brw_check_conditional_render(brw)) 934 return; 935 936 /* Handle primitive restart if needed */ 937 if (brw_handle_primitive_restart(ctx, prims, nr_prims, ib, indirect)) { 938 /* The draw was handled, so we can exit now */ 939 return; 940 } 941 942 /* Do GL_SELECT and GL_FEEDBACK rendering using swrast, even though it 943 * won't support all the extensions we support. 944 */ 945 if (ctx->RenderMode != GL_RENDER) { 946 perf_debug("%s render mode not supported in hardware\n", 947 _mesa_enum_to_string(ctx->RenderMode)); 948 _swsetup_Wakeup(ctx); 949 _tnl_wakeup(ctx); 950 _tnl_draw_prims(ctx, prims, nr_prims, ib, 951 index_bounds_valid, min_index, max_index, NULL, 0, NULL); 952 return; 953 } 954 955 /* If we're going to have to upload any of the user's vertex arrays, then 956 * get the minimum and maximum of their index buffer so we know what range 957 * to upload. 958 */ 959 if (!index_bounds_valid && !vbo_all_varyings_in_vbos(arrays)) { 960 perf_debug("Scanning index buffer to compute index buffer bounds. " 961 "Use glDrawRangeElements() to avoid this.\n"); 962 vbo_get_minmax_indices(ctx, prims, ib, &min_index, &max_index, nr_prims); 963 index_bounds_valid = true; 964 } 965 966 brw_prepare_drawing(ctx, arrays, ib, index_bounds_valid, min_index, 967 max_index); 968 /* Try drawing with the hardware, but don't do anything else if we can't 969 * manage it. swrast doesn't support our featureset, so we can't fall back 970 * to it. 971 */ 972 973 for (i = 0; i < nr_prims; i++) { 974 /* Implementation of ARB_indirect_parameters via predicates */ 975 if (brw->draw.draw_params_count_bo) { 976 brw_emit_pipe_control_flush(brw, PIPE_CONTROL_FLUSH_ENABLE); 977 978 /* Upload the current draw count from the draw parameters buffer to 979 * MI_PREDICATE_SRC0. 980 */ 981 brw_load_register_mem(brw, MI_PREDICATE_SRC0, 982 brw->draw.draw_params_count_bo, 983 brw->draw.draw_params_count_offset); 984 /* Zero the top 32-bits of MI_PREDICATE_SRC0 */ 985 brw_load_register_imm32(brw, MI_PREDICATE_SRC0 + 4, 0); 986 /* Upload the id of the current primitive to MI_PREDICATE_SRC1. */ 987 brw_load_register_imm64(brw, MI_PREDICATE_SRC1, prims[i].draw_id); 988 989 BEGIN_BATCH(1); 990 if (i == 0 && brw->predicate.state != BRW_PREDICATE_STATE_USE_BIT) { 991 OUT_BATCH(GEN7_MI_PREDICATE | MI_PREDICATE_LOADOP_LOADINV | 992 MI_PREDICATE_COMBINEOP_SET | 993 MI_PREDICATE_COMPAREOP_SRCS_EQUAL); 994 } else { 995 OUT_BATCH(GEN7_MI_PREDICATE | 996 MI_PREDICATE_LOADOP_LOAD | MI_PREDICATE_COMBINEOP_XOR | 997 MI_PREDICATE_COMPAREOP_SRCS_EQUAL); 998 } 999 ADVANCE_BATCH(); 1000 1001 brw->predicate.state = BRW_PREDICATE_STATE_USE_BIT; 1002 } 1003 1004 brw_draw_single_prim(ctx, arrays, &prims[i], i, xfb_obj, stream, 1005 indirect); 1006 } 1007 1008 brw_finish_drawing(ctx); 1009 brw->predicate.state = predicate_state; 1010 } 1011 1012 void 1013 brw_draw_indirect_prims(struct gl_context *ctx, 1014 GLuint mode, 1015 struct gl_buffer_object *indirect_data, 1016 GLsizeiptr indirect_offset, 1017 unsigned draw_count, 1018 unsigned stride, 1019 struct gl_buffer_object *indirect_params, 1020 GLsizeiptr indirect_params_offset, 1021 const struct _mesa_index_buffer *ib) 1022 { 1023 struct brw_context *brw = brw_context(ctx); 1024 struct _mesa_prim *prim; 1025 GLsizei i; 1026 1027 prim = calloc(draw_count, sizeof(*prim)); 1028 if (prim == NULL) { 1029 _mesa_error(ctx, GL_OUT_OF_MEMORY, "gl%sDraw%sIndirect%s", 1030 (draw_count > 1) ? "Multi" : "", 1031 ib ? "Elements" : "Arrays", 1032 indirect_params ? "CountARB" : ""); 1033 return; 1034 } 1035 1036 prim[0].begin = 1; 1037 prim[draw_count - 1].end = 1; 1038 for (i = 0; i < draw_count; ++i, indirect_offset += stride) { 1039 prim[i].mode = mode; 1040 prim[i].indexed = ib != NULL; 1041 prim[i].indirect_offset = indirect_offset; 1042 prim[i].is_indirect = 1; 1043 prim[i].draw_id = i; 1044 } 1045 1046 if (indirect_params) { 1047 brw->draw.draw_params_count_bo = 1048 intel_buffer_object(indirect_params)->buffer; 1049 brw_bo_reference(brw->draw.draw_params_count_bo); 1050 brw->draw.draw_params_count_offset = indirect_params_offset; 1051 } 1052 1053 brw_draw_prims(ctx, prim, draw_count, 1054 ib, false, 0, ~0, 1055 NULL, 0, 1056 indirect_data); 1057 1058 free(prim); 1059 } 1060 1061 void 1062 brw_draw_init(struct brw_context *brw) 1063 { 1064 struct gl_context *ctx = &brw->ctx; 1065 struct vbo_context *vbo = vbo_context(ctx); 1066 1067 /* Register our drawing function: 1068 */ 1069 vbo->draw_prims = brw_draw_prims; 1070 vbo->draw_indirect_prims = brw_draw_indirect_prims; 1071 1072 for (int i = 0; i < VERT_ATTRIB_MAX; i++) 1073 brw->vb.inputs[i].buffer = -1; 1074 brw->vb.nr_buffers = 0; 1075 brw->vb.nr_enabled = 0; 1076 } 1077 1078 void 1079 brw_draw_destroy(struct brw_context *brw) 1080 { 1081 unsigned i; 1082 1083 for (i = 0; i < brw->vb.nr_buffers; i++) { 1084 brw_bo_unreference(brw->vb.buffers[i].bo); 1085 brw->vb.buffers[i].bo = NULL; 1086 } 1087 brw->vb.nr_buffers = 0; 1088 1089 for (i = 0; i < brw->vb.nr_enabled; i++) { 1090 brw->vb.enabled[i]->buffer = -1; 1091 } 1092 brw->vb.nr_enabled = 0; 1093 1094 brw_bo_unreference(brw->ib.bo); 1095 brw->ib.bo = NULL; 1096 } 1097