1 /* 2 * Copyright 2017 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24 #include <assert.h> 25 26 #include "common/gen_device_info.h" 27 #include "common/gen_sample_positions.h" 28 #include "genxml/gen_macros.h" 29 30 #include "main/bufferobj.h" 31 #include "main/context.h" 32 #include "main/enums.h" 33 #include "main/macros.h" 34 #include "main/state.h" 35 36 #include "brw_context.h" 37 #include "brw_draw.h" 38 #include "brw_multisample_state.h" 39 #include "brw_state.h" 40 #include "brw_wm.h" 41 #include "brw_util.h" 42 43 #include "intel_batchbuffer.h" 44 #include "intel_buffer_objects.h" 45 #include "intel_fbo.h" 46 47 #include "main/enums.h" 48 #include "main/fbobject.h" 49 #include "main/framebuffer.h" 50 #include "main/glformats.h" 51 #include "main/samplerobj.h" 52 #include "main/shaderapi.h" 53 #include "main/stencil.h" 54 #include "main/transformfeedback.h" 55 #include "main/varray.h" 56 #include "main/viewport.h" 57 #include "util/half_float.h" 58 59 UNUSED static void * 60 emit_dwords(struct brw_context *brw, unsigned n) 61 { 62 intel_batchbuffer_begin(brw, n, RENDER_RING); 63 uint32_t *map = brw->batch.map_next; 64 brw->batch.map_next += n; 65 intel_batchbuffer_advance(brw); 66 return map; 67 } 68 69 struct brw_address { 70 struct brw_bo *bo; 71 unsigned reloc_flags; 72 uint32_t offset; 73 }; 74 75 #define __gen_address_type struct brw_address 76 #define __gen_user_data struct brw_context 77 78 static uint64_t 79 __gen_combine_address(struct brw_context *brw, void *location, 80 struct brw_address address, uint32_t delta) 81 { 82 struct intel_batchbuffer *batch = &brw->batch; 83 uint32_t offset; 84 85 if (address.bo == NULL) { 86 return address.offset + delta; 87 } else { 88 if (GEN_GEN < 6 && brw_ptr_in_state_buffer(batch, location)) { 89 offset = (char *) location - (char *) brw->batch.state.map; 90 return brw_state_reloc(batch, offset, address.bo, 91 address.offset + delta, 92 address.reloc_flags); 93 } 94 95 assert(!brw_ptr_in_state_buffer(batch, location)); 96 97 offset = (char *) location - (char *) brw->batch.batch.map; 98 return brw_batch_reloc(batch, offset, address.bo, 99 address.offset + delta, 100 address.reloc_flags); 101 } 102 } 103 104 static struct brw_address 105 rw_bo(struct brw_bo *bo, uint32_t offset) 106 { 107 return (struct brw_address) { 108 .bo = bo, 109 .offset = offset, 110 .reloc_flags = RELOC_WRITE, 111 }; 112 } 113 114 static struct brw_address 115 ro_bo(struct brw_bo *bo, uint32_t offset) 116 { 117 return (struct brw_address) { 118 .bo = bo, 119 .offset = offset, 120 }; 121 } 122 123 UNUSED static struct brw_address 124 ggtt_bo(struct brw_bo *bo, uint32_t offset) 125 { 126 return (struct brw_address) { 127 .bo = bo, 128 .offset = offset, 129 .reloc_flags = RELOC_WRITE | RELOC_NEEDS_GGTT, 130 }; 131 } 132 133 #if GEN_GEN == 4 134 static struct brw_address 135 KSP(struct brw_context *brw, uint32_t offset) 136 { 137 return ro_bo(brw->cache.bo, offset); 138 } 139 #else 140 static uint32_t 141 KSP(struct brw_context *brw, uint32_t offset) 142 { 143 return offset; 144 } 145 #endif 146 147 #include "genxml/genX_pack.h" 148 149 #define _brw_cmd_length(cmd) cmd ## _length 150 #define _brw_cmd_length_bias(cmd) cmd ## _length_bias 151 #define _brw_cmd_header(cmd) cmd ## _header 152 #define _brw_cmd_pack(cmd) cmd ## _pack 153 154 #define brw_batch_emit(brw, cmd, name) \ 155 for (struct cmd name = { _brw_cmd_header(cmd) }, \ 156 *_dst = emit_dwords(brw, _brw_cmd_length(cmd)); \ 157 __builtin_expect(_dst != NULL, 1); \ 158 _brw_cmd_pack(cmd)(brw, (void *)_dst, &name), \ 159 _dst = NULL) 160 161 #define brw_batch_emitn(brw, cmd, n, ...) ({ \ 162 uint32_t *_dw = emit_dwords(brw, n); \ 163 struct cmd template = { \ 164 _brw_cmd_header(cmd), \ 165 .DWordLength = n - _brw_cmd_length_bias(cmd), \ 166 __VA_ARGS__ \ 167 }; \ 168 _brw_cmd_pack(cmd)(brw, _dw, &template); \ 169 _dw + 1; /* Array starts at dw[1] */ \ 170 }) 171 172 #define brw_state_emit(brw, cmd, align, offset, name) \ 173 for (struct cmd name = {}, \ 174 *_dst = brw_state_batch(brw, _brw_cmd_length(cmd) * 4, \ 175 align, offset); \ 176 __builtin_expect(_dst != NULL, 1); \ 177 _brw_cmd_pack(cmd)(brw, (void *)_dst, &name), \ 178 _dst = NULL) 179 180 /** 181 * Polygon stipple packet 182 */ 183 static void 184 genX(upload_polygon_stipple)(struct brw_context *brw) 185 { 186 struct gl_context *ctx = &brw->ctx; 187 188 /* _NEW_POLYGON */ 189 if (!ctx->Polygon.StippleFlag) 190 return; 191 192 brw_batch_emit(brw, GENX(3DSTATE_POLY_STIPPLE_PATTERN), poly) { 193 /* Polygon stipple is provided in OpenGL order, i.e. bottom 194 * row first. If we're rendering to a window (i.e. the 195 * default frame buffer object, 0), then we need to invert 196 * it to match our pixel layout. But if we're rendering 197 * to a FBO (i.e. any named frame buffer object), we *don't* 198 * need to invert - we already match the layout. 199 */ 200 if (_mesa_is_winsys_fbo(ctx->DrawBuffer)) { 201 for (unsigned i = 0; i < 32; i++) 202 poly.PatternRow[i] = ctx->PolygonStipple[31 - i]; /* invert */ 203 } else { 204 for (unsigned i = 0; i < 32; i++) 205 poly.PatternRow[i] = ctx->PolygonStipple[i]; 206 } 207 } 208 } 209 210 static const struct brw_tracked_state genX(polygon_stipple) = { 211 .dirty = { 212 .mesa = _NEW_POLYGON | 213 _NEW_POLYGONSTIPPLE, 214 .brw = BRW_NEW_CONTEXT, 215 }, 216 .emit = genX(upload_polygon_stipple), 217 }; 218 219 /** 220 * Polygon stipple offset packet 221 */ 222 static void 223 genX(upload_polygon_stipple_offset)(struct brw_context *brw) 224 { 225 struct gl_context *ctx = &brw->ctx; 226 227 /* _NEW_POLYGON */ 228 if (!ctx->Polygon.StippleFlag) 229 return; 230 231 brw_batch_emit(brw, GENX(3DSTATE_POLY_STIPPLE_OFFSET), poly) { 232 /* _NEW_BUFFERS 233 * 234 * If we're drawing to a system window we have to invert the Y axis 235 * in order to match the OpenGL pixel coordinate system, and our 236 * offset must be matched to the window position. If we're drawing 237 * to a user-created FBO then our native pixel coordinate system 238 * works just fine, and there's no window system to worry about. 239 */ 240 if (_mesa_is_winsys_fbo(ctx->DrawBuffer)) { 241 poly.PolygonStippleYOffset = 242 (32 - (_mesa_geometric_height(ctx->DrawBuffer) & 31)) & 31; 243 } 244 } 245 } 246 247 static const struct brw_tracked_state genX(polygon_stipple_offset) = { 248 .dirty = { 249 .mesa = _NEW_BUFFERS | 250 _NEW_POLYGON, 251 .brw = BRW_NEW_CONTEXT, 252 }, 253 .emit = genX(upload_polygon_stipple_offset), 254 }; 255 256 /** 257 * Line stipple packet 258 */ 259 static void 260 genX(upload_line_stipple)(struct brw_context *brw) 261 { 262 struct gl_context *ctx = &brw->ctx; 263 264 if (!ctx->Line.StippleFlag) 265 return; 266 267 brw_batch_emit(brw, GENX(3DSTATE_LINE_STIPPLE), line) { 268 line.LineStipplePattern = ctx->Line.StipplePattern; 269 270 line.LineStippleInverseRepeatCount = 1.0f / ctx->Line.StippleFactor; 271 line.LineStippleRepeatCount = ctx->Line.StippleFactor; 272 } 273 } 274 275 static const struct brw_tracked_state genX(line_stipple) = { 276 .dirty = { 277 .mesa = _NEW_LINE, 278 .brw = BRW_NEW_CONTEXT, 279 }, 280 .emit = genX(upload_line_stipple), 281 }; 282 283 /* Constant single cliprect for framebuffer object or DRI2 drawing */ 284 static void 285 genX(upload_drawing_rect)(struct brw_context *brw) 286 { 287 struct gl_context *ctx = &brw->ctx; 288 const struct gl_framebuffer *fb = ctx->DrawBuffer; 289 const unsigned int fb_width = _mesa_geometric_width(fb); 290 const unsigned int fb_height = _mesa_geometric_height(fb); 291 292 brw_batch_emit(brw, GENX(3DSTATE_DRAWING_RECTANGLE), rect) { 293 rect.ClippedDrawingRectangleXMax = fb_width - 1; 294 rect.ClippedDrawingRectangleYMax = fb_height - 1; 295 } 296 } 297 298 static const struct brw_tracked_state genX(drawing_rect) = { 299 .dirty = { 300 .mesa = _NEW_BUFFERS, 301 .brw = BRW_NEW_BLORP | 302 BRW_NEW_CONTEXT, 303 }, 304 .emit = genX(upload_drawing_rect), 305 }; 306 307 static uint32_t * 308 genX(emit_vertex_buffer_state)(struct brw_context *brw, 309 uint32_t *dw, 310 unsigned buffer_nr, 311 struct brw_bo *bo, 312 unsigned start_offset, 313 unsigned end_offset, 314 unsigned stride, 315 unsigned step_rate) 316 { 317 struct GENX(VERTEX_BUFFER_STATE) buf_state = { 318 .VertexBufferIndex = buffer_nr, 319 .BufferPitch = stride, 320 .BufferStartingAddress = ro_bo(bo, start_offset), 321 #if GEN_GEN >= 8 322 .BufferSize = end_offset - start_offset, 323 #endif 324 325 #if GEN_GEN >= 7 326 .AddressModifyEnable = true, 327 #endif 328 329 #if GEN_GEN < 8 330 .BufferAccessType = step_rate ? INSTANCEDATA : VERTEXDATA, 331 .InstanceDataStepRate = step_rate, 332 #if GEN_GEN >= 5 333 .EndAddress = ro_bo(bo, end_offset - 1), 334 #endif 335 #endif 336 337 #if GEN_GEN == 10 338 .VertexBufferMOCS = CNL_MOCS_WB, 339 #elif GEN_GEN == 9 340 .VertexBufferMOCS = SKL_MOCS_WB, 341 #elif GEN_GEN == 8 342 .VertexBufferMOCS = BDW_MOCS_WB, 343 #elif GEN_GEN == 7 344 .VertexBufferMOCS = GEN7_MOCS_L3, 345 #endif 346 }; 347 348 GENX(VERTEX_BUFFER_STATE_pack)(brw, dw, &buf_state); 349 return dw + GENX(VERTEX_BUFFER_STATE_length); 350 } 351 352 UNUSED static bool 353 is_passthru_format(uint32_t format) 354 { 355 switch (format) { 356 case ISL_FORMAT_R64_PASSTHRU: 357 case ISL_FORMAT_R64G64_PASSTHRU: 358 case ISL_FORMAT_R64G64B64_PASSTHRU: 359 case ISL_FORMAT_R64G64B64A64_PASSTHRU: 360 return true; 361 default: 362 return false; 363 } 364 } 365 366 UNUSED static int 367 uploads_needed(uint32_t format, 368 bool is_dual_slot) 369 { 370 if (!is_passthru_format(format)) 371 return 1; 372 373 if (is_dual_slot) 374 return 2; 375 376 switch (format) { 377 case ISL_FORMAT_R64_PASSTHRU: 378 case ISL_FORMAT_R64G64_PASSTHRU: 379 return 1; 380 case ISL_FORMAT_R64G64B64_PASSTHRU: 381 case ISL_FORMAT_R64G64B64A64_PASSTHRU: 382 return 2; 383 default: 384 unreachable("not reached"); 385 } 386 } 387 388 /* 389 * Returns the format that we are finally going to use when upload a vertex 390 * element. It will only change if we are using *64*PASSTHRU formats, as for 391 * gen < 8 they need to be splitted on two *32*FLOAT formats. 392 * 393 * @upload points in which upload we are. Valid values are [0,1] 394 */ 395 static uint32_t 396 downsize_format_if_needed(uint32_t format, 397 int upload) 398 { 399 assert(upload == 0 || upload == 1); 400 401 if (!is_passthru_format(format)) 402 return format; 403 404 /* ISL_FORMAT_R64_PASSTHRU and ISL_FORMAT_R64G64_PASSTHRU with an upload == 405 * 1 means that we have been forced to do 2 uploads for a size <= 2. This 406 * happens with gen < 8 and dvec3 or dvec4 vertex shader input 407 * variables. In those cases, we return ISL_FORMAT_R32_FLOAT as a way of 408 * flagging that we want to fill with zeroes this second forced upload. 409 */ 410 switch (format) { 411 case ISL_FORMAT_R64_PASSTHRU: 412 return !upload ? ISL_FORMAT_R32G32_FLOAT 413 : ISL_FORMAT_R32_FLOAT; 414 case ISL_FORMAT_R64G64_PASSTHRU: 415 return !upload ? ISL_FORMAT_R32G32B32A32_FLOAT 416 : ISL_FORMAT_R32_FLOAT; 417 case ISL_FORMAT_R64G64B64_PASSTHRU: 418 return !upload ? ISL_FORMAT_R32G32B32A32_FLOAT 419 : ISL_FORMAT_R32G32_FLOAT; 420 case ISL_FORMAT_R64G64B64A64_PASSTHRU: 421 return ISL_FORMAT_R32G32B32A32_FLOAT; 422 default: 423 unreachable("not reached"); 424 } 425 } 426 427 /* 428 * Returns the number of componentes associated with a format that is used on 429 * a 64 to 32 format split. See downsize_format() 430 */ 431 static int 432 upload_format_size(uint32_t upload_format) 433 { 434 switch (upload_format) { 435 case ISL_FORMAT_R32_FLOAT: 436 437 /* downsized_format has returned this one in order to flag that we are 438 * performing a second upload which we want to have filled with 439 * zeroes. This happens with gen < 8, a size <= 2, and dvec3 or dvec4 440 * vertex shader input variables. 441 */ 442 443 return 0; 444 case ISL_FORMAT_R32G32_FLOAT: 445 return 2; 446 case ISL_FORMAT_R32G32B32A32_FLOAT: 447 return 4; 448 default: 449 unreachable("not reached"); 450 } 451 } 452 453 static void 454 genX(emit_vertices)(struct brw_context *brw) 455 { 456 const struct gen_device_info *devinfo = &brw->screen->devinfo; 457 uint32_t *dw; 458 459 brw_prepare_vertices(brw); 460 brw_prepare_shader_draw_parameters(brw); 461 462 #if GEN_GEN < 6 463 brw_emit_query_begin(brw); 464 #endif 465 466 const struct brw_vs_prog_data *vs_prog_data = 467 brw_vs_prog_data(brw->vs.base.prog_data); 468 469 #if GEN_GEN >= 8 470 struct gl_context *ctx = &brw->ctx; 471 const bool uses_edge_flag = (ctx->Polygon.FrontMode != GL_FILL || 472 ctx->Polygon.BackMode != GL_FILL); 473 474 if (vs_prog_data->uses_vertexid || vs_prog_data->uses_instanceid) { 475 unsigned vue = brw->vb.nr_enabled; 476 477 /* The element for the edge flags must always be last, so we have to 478 * insert the SGVS before it in that case. 479 */ 480 if (uses_edge_flag) { 481 assert(vue > 0); 482 vue--; 483 } 484 485 WARN_ONCE(vue >= 33, 486 "Trying to insert VID/IID past 33rd vertex element, " 487 "need to reorder the vertex attrbutes."); 488 489 brw_batch_emit(brw, GENX(3DSTATE_VF_SGVS), vfs) { 490 if (vs_prog_data->uses_vertexid) { 491 vfs.VertexIDEnable = true; 492 vfs.VertexIDComponentNumber = 2; 493 vfs.VertexIDElementOffset = vue; 494 } 495 496 if (vs_prog_data->uses_instanceid) { 497 vfs.InstanceIDEnable = true; 498 vfs.InstanceIDComponentNumber = 3; 499 vfs.InstanceIDElementOffset = vue; 500 } 501 } 502 503 brw_batch_emit(brw, GENX(3DSTATE_VF_INSTANCING), vfi) { 504 vfi.InstancingEnable = true; 505 vfi.VertexElementIndex = vue; 506 } 507 } else { 508 brw_batch_emit(brw, GENX(3DSTATE_VF_SGVS), vfs); 509 } 510 511 /* Normally we don't need an element for the SGVS attribute because the 512 * 3DSTATE_VF_SGVS instruction lets you store the generated attribute in an 513 * element that is past the list in 3DSTATE_VERTEX_ELEMENTS. However if 514 * we're using draw parameters then we need an element for the those 515 * values. Additionally if there is an edge flag element then the SGVS 516 * can't be inserted past that so we need a dummy element to ensure that 517 * the edge flag is the last one. 518 */ 519 const bool needs_sgvs_element = (vs_prog_data->uses_basevertex || 520 vs_prog_data->uses_baseinstance || 521 ((vs_prog_data->uses_instanceid || 522 vs_prog_data->uses_vertexid) 523 && uses_edge_flag)); 524 #else 525 const bool needs_sgvs_element = (vs_prog_data->uses_basevertex || 526 vs_prog_data->uses_baseinstance || 527 vs_prog_data->uses_instanceid || 528 vs_prog_data->uses_vertexid); 529 #endif 530 unsigned nr_elements = 531 brw->vb.nr_enabled + needs_sgvs_element + vs_prog_data->uses_drawid; 532 533 #if GEN_GEN < 8 534 /* If any of the formats of vb.enabled needs more that one upload, we need 535 * to add it to nr_elements 536 */ 537 for (unsigned i = 0; i < brw->vb.nr_enabled; i++) { 538 struct brw_vertex_element *input = brw->vb.enabled[i]; 539 uint32_t format = brw_get_vertex_surface_type(brw, input->glarray); 540 541 if (uploads_needed(format, input->is_dual_slot) > 1) 542 nr_elements++; 543 } 544 #endif 545 546 /* If the VS doesn't read any inputs (calculating vertex position from 547 * a state variable for some reason, for example), emit a single pad 548 * VERTEX_ELEMENT struct and bail. 549 * 550 * The stale VB state stays in place, but they don't do anything unless 551 * a VE loads from them. 552 */ 553 if (nr_elements == 0) { 554 dw = brw_batch_emitn(brw, GENX(3DSTATE_VERTEX_ELEMENTS), 555 1 + GENX(VERTEX_ELEMENT_STATE_length)); 556 struct GENX(VERTEX_ELEMENT_STATE) elem = { 557 .Valid = true, 558 .SourceElementFormat = (enum GENX(SURFACE_FORMAT)) ISL_FORMAT_R32G32B32A32_FLOAT, 559 .Component0Control = VFCOMP_STORE_0, 560 .Component1Control = VFCOMP_STORE_0, 561 .Component2Control = VFCOMP_STORE_0, 562 .Component3Control = VFCOMP_STORE_1_FP, 563 }; 564 GENX(VERTEX_ELEMENT_STATE_pack)(brw, dw, &elem); 565 return; 566 } 567 568 /* Now emit 3DSTATE_VERTEX_BUFFERS and 3DSTATE_VERTEX_ELEMENTS packets. */ 569 const bool uses_draw_params = 570 vs_prog_data->uses_basevertex || 571 vs_prog_data->uses_baseinstance; 572 const unsigned nr_buffers = brw->vb.nr_buffers + 573 uses_draw_params + vs_prog_data->uses_drawid; 574 575 if (nr_buffers) { 576 assert(nr_buffers <= (GEN_GEN >= 6 ? 33 : 17)); 577 578 dw = brw_batch_emitn(brw, GENX(3DSTATE_VERTEX_BUFFERS), 579 1 + GENX(VERTEX_BUFFER_STATE_length) * nr_buffers); 580 581 for (unsigned i = 0; i < brw->vb.nr_buffers; i++) { 582 const struct brw_vertex_buffer *buffer = &brw->vb.buffers[i]; 583 /* Prior to Haswell and Bay Trail we have to use 4-component formats 584 * to fake 3-component ones. In particular, we do this for 585 * half-float and 8 and 16-bit integer formats. This means that the 586 * vertex element may poke over the end of the buffer by 2 bytes. 587 */ 588 const unsigned padding = 589 (GEN_GEN <= 7 && !GEN_IS_HASWELL && !devinfo->is_baytrail) * 2; 590 const unsigned end = buffer->offset + buffer->size + padding; 591 dw = genX(emit_vertex_buffer_state)(brw, dw, i, buffer->bo, 592 buffer->offset, 593 end, 594 buffer->stride, 595 buffer->step_rate); 596 } 597 598 if (uses_draw_params) { 599 dw = genX(emit_vertex_buffer_state)(brw, dw, brw->vb.nr_buffers, 600 brw->draw.draw_params_bo, 601 brw->draw.draw_params_offset, 602 brw->draw.draw_params_bo->size, 603 0 /* stride */, 604 0 /* step rate */); 605 } 606 607 if (vs_prog_data->uses_drawid) { 608 dw = genX(emit_vertex_buffer_state)(brw, dw, brw->vb.nr_buffers + 1, 609 brw->draw.draw_id_bo, 610 brw->draw.draw_id_offset, 611 brw->draw.draw_id_bo->size, 612 0 /* stride */, 613 0 /* step rate */); 614 } 615 } 616 617 /* The hardware allows one more VERTEX_ELEMENTS than VERTEX_BUFFERS, 618 * presumably for VertexID/InstanceID. 619 */ 620 #if GEN_GEN >= 6 621 assert(nr_elements <= 34); 622 const struct brw_vertex_element *gen6_edgeflag_input = NULL; 623 #else 624 assert(nr_elements <= 18); 625 #endif 626 627 dw = brw_batch_emitn(brw, GENX(3DSTATE_VERTEX_ELEMENTS), 628 1 + GENX(VERTEX_ELEMENT_STATE_length) * nr_elements); 629 unsigned i; 630 for (i = 0; i < brw->vb.nr_enabled; i++) { 631 const struct brw_vertex_element *input = brw->vb.enabled[i]; 632 uint32_t format = brw_get_vertex_surface_type(brw, input->glarray); 633 uint32_t comp0 = VFCOMP_STORE_SRC; 634 uint32_t comp1 = VFCOMP_STORE_SRC; 635 uint32_t comp2 = VFCOMP_STORE_SRC; 636 uint32_t comp3 = VFCOMP_STORE_SRC; 637 const unsigned num_uploads = GEN_GEN < 8 ? 638 uploads_needed(format, input->is_dual_slot) : 1; 639 640 #if GEN_GEN >= 8 641 /* From the BDW PRM, Volume 2d, page 588 (VERTEX_ELEMENT_STATE): 642 * "Any SourceElementFormat of *64*_PASSTHRU cannot be used with an 643 * element which has edge flag enabled." 644 */ 645 assert(!(is_passthru_format(format) && uses_edge_flag)); 646 #endif 647 648 /* The gen4 driver expects edgeflag to come in as a float, and passes 649 * that float on to the tests in the clipper. Mesa's current vertex 650 * attribute value for EdgeFlag is stored as a float, which works out. 651 * glEdgeFlagPointer, on the other hand, gives us an unnormalized 652 * integer ubyte. Just rewrite that to convert to a float. 653 * 654 * Gen6+ passes edgeflag as sideband along with the vertex, instead 655 * of in the VUE. We have to upload it sideband as the last vertex 656 * element according to the B-Spec. 657 */ 658 #if GEN_GEN >= 6 659 if (input == &brw->vb.inputs[VERT_ATTRIB_EDGEFLAG]) { 660 gen6_edgeflag_input = input; 661 continue; 662 } 663 #endif 664 665 for (unsigned c = 0; c < num_uploads; c++) { 666 const uint32_t upload_format = GEN_GEN >= 8 ? format : 667 downsize_format_if_needed(format, c); 668 /* If we need more that one upload, the offset stride would be 128 669 * bits (16 bytes), as for previous uploads we are using the full 670 * entry. */ 671 const unsigned offset = input->offset + c * 16; 672 673 const int size = (GEN_GEN < 8 && is_passthru_format(format)) ? 674 upload_format_size(upload_format) : input->glarray->Size; 675 676 switch (size) { 677 case 0: comp0 = VFCOMP_STORE_0; 678 case 1: comp1 = VFCOMP_STORE_0; 679 case 2: comp2 = VFCOMP_STORE_0; 680 case 3: 681 if (GEN_GEN >= 8 && input->glarray->Doubles) { 682 comp3 = VFCOMP_STORE_0; 683 } else if (input->glarray->Integer) { 684 comp3 = VFCOMP_STORE_1_INT; 685 } else { 686 comp3 = VFCOMP_STORE_1_FP; 687 } 688 689 break; 690 } 691 692 #if GEN_GEN >= 8 693 /* From the BDW PRM, Volume 2d, page 586 (VERTEX_ELEMENT_STATE): 694 * 695 * "When SourceElementFormat is set to one of the *64*_PASSTHRU 696 * formats, 64-bit components are stored in the URB without any 697 * conversion. In this case, vertex elements must be written as 128 698 * or 256 bits, with VFCOMP_STORE_0 being used to pad the output as 699 * required. E.g., if R64_PASSTHRU is used to copy a 64-bit Red 700 * component into the URB, Component 1 must be specified as 701 * VFCOMP_STORE_0 (with Components 2,3 set to VFCOMP_NOSTORE) in 702 * order to output a 128-bit vertex element, or Components 1-3 must 703 * be specified as VFCOMP_STORE_0 in order to output a 256-bit vertex 704 * element. Likewise, use of R64G64B64_PASSTHRU requires Component 3 705 * to be specified as VFCOMP_STORE_0 in order to output a 256-bit 706 * vertex element." 707 */ 708 if (input->glarray->Doubles && !input->is_dual_slot) { 709 /* Store vertex elements which correspond to double and dvec2 vertex 710 * shader inputs as 128-bit vertex elements, instead of 256-bits. 711 */ 712 comp2 = VFCOMP_NOSTORE; 713 comp3 = VFCOMP_NOSTORE; 714 } 715 #endif 716 717 struct GENX(VERTEX_ELEMENT_STATE) elem_state = { 718 .VertexBufferIndex = input->buffer, 719 .Valid = true, 720 .SourceElementFormat = upload_format, 721 .SourceElementOffset = offset, 722 .Component0Control = comp0, 723 .Component1Control = comp1, 724 .Component2Control = comp2, 725 .Component3Control = comp3, 726 #if GEN_GEN < 5 727 .DestinationElementOffset = i * 4, 728 #endif 729 }; 730 731 GENX(VERTEX_ELEMENT_STATE_pack)(brw, dw, &elem_state); 732 dw += GENX(VERTEX_ELEMENT_STATE_length); 733 } 734 } 735 736 if (needs_sgvs_element) { 737 struct GENX(VERTEX_ELEMENT_STATE) elem_state = { 738 .Valid = true, 739 .Component0Control = VFCOMP_STORE_0, 740 .Component1Control = VFCOMP_STORE_0, 741 .Component2Control = VFCOMP_STORE_0, 742 .Component3Control = VFCOMP_STORE_0, 743 #if GEN_GEN < 5 744 .DestinationElementOffset = i * 4, 745 #endif 746 }; 747 748 #if GEN_GEN >= 8 749 if (vs_prog_data->uses_basevertex || 750 vs_prog_data->uses_baseinstance) { 751 elem_state.VertexBufferIndex = brw->vb.nr_buffers; 752 elem_state.SourceElementFormat = (enum GENX(SURFACE_FORMAT)) ISL_FORMAT_R32G32_UINT; 753 elem_state.Component0Control = VFCOMP_STORE_SRC; 754 elem_state.Component1Control = VFCOMP_STORE_SRC; 755 } 756 #else 757 elem_state.VertexBufferIndex = brw->vb.nr_buffers; 758 elem_state.SourceElementFormat = (enum GENX(SURFACE_FORMAT)) ISL_FORMAT_R32G32_UINT; 759 if (vs_prog_data->uses_basevertex) 760 elem_state.Component0Control = VFCOMP_STORE_SRC; 761 762 if (vs_prog_data->uses_baseinstance) 763 elem_state.Component1Control = VFCOMP_STORE_SRC; 764 765 if (vs_prog_data->uses_vertexid) 766 elem_state.Component2Control = VFCOMP_STORE_VID; 767 768 if (vs_prog_data->uses_instanceid) 769 elem_state.Component3Control = VFCOMP_STORE_IID; 770 #endif 771 772 GENX(VERTEX_ELEMENT_STATE_pack)(brw, dw, &elem_state); 773 dw += GENX(VERTEX_ELEMENT_STATE_length); 774 } 775 776 if (vs_prog_data->uses_drawid) { 777 struct GENX(VERTEX_ELEMENT_STATE) elem_state = { 778 .Valid = true, 779 .VertexBufferIndex = brw->vb.nr_buffers + 1, 780 .SourceElementFormat = (enum GENX(SURFACE_FORMAT)) ISL_FORMAT_R32_UINT, 781 .Component0Control = VFCOMP_STORE_SRC, 782 .Component1Control = VFCOMP_STORE_0, 783 .Component2Control = VFCOMP_STORE_0, 784 .Component3Control = VFCOMP_STORE_0, 785 #if GEN_GEN < 5 786 .DestinationElementOffset = i * 4, 787 #endif 788 }; 789 790 GENX(VERTEX_ELEMENT_STATE_pack)(brw, dw, &elem_state); 791 dw += GENX(VERTEX_ELEMENT_STATE_length); 792 } 793 794 #if GEN_GEN >= 6 795 if (gen6_edgeflag_input) { 796 const uint32_t format = 797 brw_get_vertex_surface_type(brw, gen6_edgeflag_input->glarray); 798 799 struct GENX(VERTEX_ELEMENT_STATE) elem_state = { 800 .Valid = true, 801 .VertexBufferIndex = gen6_edgeflag_input->buffer, 802 .EdgeFlagEnable = true, 803 .SourceElementFormat = format, 804 .SourceElementOffset = gen6_edgeflag_input->offset, 805 .Component0Control = VFCOMP_STORE_SRC, 806 .Component1Control = VFCOMP_STORE_0, 807 .Component2Control = VFCOMP_STORE_0, 808 .Component3Control = VFCOMP_STORE_0, 809 }; 810 811 GENX(VERTEX_ELEMENT_STATE_pack)(brw, dw, &elem_state); 812 dw += GENX(VERTEX_ELEMENT_STATE_length); 813 } 814 #endif 815 816 #if GEN_GEN >= 8 817 for (unsigned i = 0, j = 0; i < brw->vb.nr_enabled; i++) { 818 const struct brw_vertex_element *input = brw->vb.enabled[i]; 819 const struct brw_vertex_buffer *buffer = &brw->vb.buffers[input->buffer]; 820 unsigned element_index; 821 822 /* The edge flag element is reordered to be the last one in the code 823 * above so we need to compensate for that in the element indices used 824 * below. 825 */ 826 if (input == gen6_edgeflag_input) 827 element_index = nr_elements - 1; 828 else 829 element_index = j++; 830 831 brw_batch_emit(brw, GENX(3DSTATE_VF_INSTANCING), vfi) { 832 vfi.VertexElementIndex = element_index; 833 vfi.InstancingEnable = buffer->step_rate != 0; 834 vfi.InstanceDataStepRate = buffer->step_rate; 835 } 836 } 837 838 if (vs_prog_data->uses_drawid) { 839 const unsigned element = brw->vb.nr_enabled + needs_sgvs_element; 840 841 brw_batch_emit(brw, GENX(3DSTATE_VF_INSTANCING), vfi) { 842 vfi.VertexElementIndex = element; 843 } 844 } 845 #endif 846 } 847 848 static const struct brw_tracked_state genX(vertices) = { 849 .dirty = { 850 .mesa = _NEW_POLYGON, 851 .brw = BRW_NEW_BATCH | 852 BRW_NEW_BLORP | 853 BRW_NEW_VERTICES | 854 BRW_NEW_VS_PROG_DATA, 855 }, 856 .emit = genX(emit_vertices), 857 }; 858 859 static void 860 genX(emit_index_buffer)(struct brw_context *brw) 861 { 862 const struct _mesa_index_buffer *index_buffer = brw->ib.ib; 863 864 if (index_buffer == NULL) 865 return; 866 867 brw_batch_emit(brw, GENX(3DSTATE_INDEX_BUFFER), ib) { 868 #if GEN_GEN < 8 && !GEN_IS_HASWELL 869 ib.CutIndexEnable = brw->prim_restart.enable_cut_index; 870 #endif 871 ib.IndexFormat = brw_get_index_type(index_buffer->index_size); 872 ib.BufferStartingAddress = ro_bo(brw->ib.bo, 0); 873 #if GEN_GEN >= 8 874 ib.IndexBufferMOCS = GEN_GEN >= 9 ? SKL_MOCS_WB : BDW_MOCS_WB; 875 ib.BufferSize = brw->ib.size; 876 #else 877 ib.BufferEndingAddress = ro_bo(brw->ib.bo, brw->ib.size - 1); 878 #endif 879 } 880 } 881 882 static const struct brw_tracked_state genX(index_buffer) = { 883 .dirty = { 884 .mesa = 0, 885 .brw = BRW_NEW_BATCH | 886 BRW_NEW_BLORP | 887 BRW_NEW_INDEX_BUFFER, 888 }, 889 .emit = genX(emit_index_buffer), 890 }; 891 892 #if GEN_IS_HASWELL || GEN_GEN >= 8 893 static void 894 genX(upload_cut_index)(struct brw_context *brw) 895 { 896 const struct gl_context *ctx = &brw->ctx; 897 898 brw_batch_emit(brw, GENX(3DSTATE_VF), vf) { 899 if (ctx->Array._PrimitiveRestart && brw->ib.ib) { 900 vf.IndexedDrawCutIndexEnable = true; 901 vf.CutIndex = _mesa_primitive_restart_index(ctx, brw->ib.index_size); 902 } 903 } 904 } 905 906 const struct brw_tracked_state genX(cut_index) = { 907 .dirty = { 908 .mesa = _NEW_TRANSFORM, 909 .brw = BRW_NEW_INDEX_BUFFER, 910 }, 911 .emit = genX(upload_cut_index), 912 }; 913 #endif 914 915 #if GEN_GEN >= 6 916 /** 917 * Determine the appropriate attribute override value to store into the 918 * 3DSTATE_SF structure for a given fragment shader attribute. The attribute 919 * override value contains two pieces of information: the location of the 920 * attribute in the VUE (relative to urb_entry_read_offset, see below), and a 921 * flag indicating whether to "swizzle" the attribute based on the direction 922 * the triangle is facing. 923 * 924 * If an attribute is "swizzled", then the given VUE location is used for 925 * front-facing triangles, and the VUE location that immediately follows is 926 * used for back-facing triangles. We use this to implement the mapping from 927 * gl_FrontColor/gl_BackColor to gl_Color. 928 * 929 * urb_entry_read_offset is the offset into the VUE at which the SF unit is 930 * being instructed to begin reading attribute data. It can be set to a 931 * nonzero value to prevent the SF unit from wasting time reading elements of 932 * the VUE that are not needed by the fragment shader. It is measured in 933 * 256-bit increments. 934 */ 935 static void 936 genX(get_attr_override)(struct GENX(SF_OUTPUT_ATTRIBUTE_DETAIL) *attr, 937 const struct brw_vue_map *vue_map, 938 int urb_entry_read_offset, int fs_attr, 939 bool two_side_color, uint32_t *max_source_attr) 940 { 941 /* Find the VUE slot for this attribute. */ 942 int slot = vue_map->varying_to_slot[fs_attr]; 943 944 /* Viewport and Layer are stored in the VUE header. We need to override 945 * them to zero if earlier stages didn't write them, as GL requires that 946 * they read back as zero when not explicitly set. 947 */ 948 if (fs_attr == VARYING_SLOT_VIEWPORT || fs_attr == VARYING_SLOT_LAYER) { 949 attr->ComponentOverrideX = true; 950 attr->ComponentOverrideW = true; 951 attr->ConstantSource = CONST_0000; 952 953 if (!(vue_map->slots_valid & VARYING_BIT_LAYER)) 954 attr->ComponentOverrideY = true; 955 if (!(vue_map->slots_valid & VARYING_BIT_VIEWPORT)) 956 attr->ComponentOverrideZ = true; 957 958 return; 959 } 960 961 /* If there was only a back color written but not front, use back 962 * as the color instead of undefined 963 */ 964 if (slot == -1 && fs_attr == VARYING_SLOT_COL0) 965 slot = vue_map->varying_to_slot[VARYING_SLOT_BFC0]; 966 if (slot == -1 && fs_attr == VARYING_SLOT_COL1) 967 slot = vue_map->varying_to_slot[VARYING_SLOT_BFC1]; 968 969 if (slot == -1) { 970 /* This attribute does not exist in the VUE--that means that the vertex 971 * shader did not write to it. This means that either: 972 * 973 * (a) This attribute is a texture coordinate, and it is going to be 974 * replaced with point coordinates (as a consequence of a call to 975 * glTexEnvi(GL_POINT_SPRITE, GL_COORD_REPLACE, GL_TRUE)), so the 976 * hardware will ignore whatever attribute override we supply. 977 * 978 * (b) This attribute is read by the fragment shader but not written by 979 * the vertex shader, so its value is undefined. Therefore the 980 * attribute override we supply doesn't matter. 981 * 982 * (c) This attribute is gl_PrimitiveID, and it wasn't written by the 983 * previous shader stage. 984 * 985 * Note that we don't have to worry about the cases where the attribute 986 * is gl_PointCoord or is undergoing point sprite coordinate 987 * replacement, because in those cases, this function isn't called. 988 * 989 * In case (c), we need to program the attribute overrides so that the 990 * primitive ID will be stored in this slot. In every other case, the 991 * attribute override we supply doesn't matter. So just go ahead and 992 * program primitive ID in every case. 993 */ 994 attr->ComponentOverrideW = true; 995 attr->ComponentOverrideX = true; 996 attr->ComponentOverrideY = true; 997 attr->ComponentOverrideZ = true; 998 attr->ConstantSource = PRIM_ID; 999 return; 1000 } 1001 1002 /* Compute the location of the attribute relative to urb_entry_read_offset. 1003 * Each increment of urb_entry_read_offset represents a 256-bit value, so 1004 * it counts for two 128-bit VUE slots. 1005 */ 1006 int source_attr = slot - 2 * urb_entry_read_offset; 1007 assert(source_attr >= 0 && source_attr < 32); 1008 1009 /* If we are doing two-sided color, and the VUE slot following this one 1010 * represents a back-facing color, then we need to instruct the SF unit to 1011 * do back-facing swizzling. 1012 */ 1013 bool swizzling = two_side_color && 1014 ((vue_map->slot_to_varying[slot] == VARYING_SLOT_COL0 && 1015 vue_map->slot_to_varying[slot+1] == VARYING_SLOT_BFC0) || 1016 (vue_map->slot_to_varying[slot] == VARYING_SLOT_COL1 && 1017 vue_map->slot_to_varying[slot+1] == VARYING_SLOT_BFC1)); 1018 1019 /* Update max_source_attr. If swizzling, the SF will read this slot + 1. */ 1020 if (*max_source_attr < source_attr + swizzling) 1021 *max_source_attr = source_attr + swizzling; 1022 1023 attr->SourceAttribute = source_attr; 1024 if (swizzling) 1025 attr->SwizzleSelect = INPUTATTR_FACING; 1026 } 1027 1028 1029 static void 1030 genX(calculate_attr_overrides)(const struct brw_context *brw, 1031 struct GENX(SF_OUTPUT_ATTRIBUTE_DETAIL) *attr_overrides, 1032 uint32_t *point_sprite_enables, 1033 uint32_t *urb_entry_read_length, 1034 uint32_t *urb_entry_read_offset) 1035 { 1036 const struct gl_context *ctx = &brw->ctx; 1037 1038 /* _NEW_POINT */ 1039 const struct gl_point_attrib *point = &ctx->Point; 1040 1041 /* BRW_NEW_FRAGMENT_PROGRAM */ 1042 const struct gl_program *fp = brw->programs[MESA_SHADER_FRAGMENT]; 1043 1044 /* BRW_NEW_FS_PROG_DATA */ 1045 const struct brw_wm_prog_data *wm_prog_data = 1046 brw_wm_prog_data(brw->wm.base.prog_data); 1047 uint32_t max_source_attr = 0; 1048 1049 *point_sprite_enables = 0; 1050 1051 int first_slot = 1052 brw_compute_first_urb_slot_required(fp->info.inputs_read, 1053 &brw->vue_map_geom_out); 1054 1055 /* Each URB offset packs two varying slots */ 1056 assert(first_slot % 2 == 0); 1057 *urb_entry_read_offset = first_slot / 2; 1058 1059 /* From the Ivybridge PRM, Vol 2 Part 1, 3DSTATE_SBE, 1060 * description of dw10 Point Sprite Texture Coordinate Enable: 1061 * 1062 * "This field must be programmed to zero when non-point primitives 1063 * are rendered." 1064 * 1065 * The SandyBridge PRM doesn't explicitly say that point sprite enables 1066 * must be programmed to zero when rendering non-point primitives, but 1067 * the IvyBridge PRM does, and if we don't, we get garbage. 1068 * 1069 * This is not required on Haswell, as the hardware ignores this state 1070 * when drawing non-points -- although we do still need to be careful to 1071 * correctly set the attr overrides. 1072 * 1073 * _NEW_POLYGON 1074 * BRW_NEW_PRIMITIVE | BRW_NEW_GS_PROG_DATA | BRW_NEW_TES_PROG_DATA 1075 */ 1076 bool drawing_points = brw_is_drawing_points(brw); 1077 1078 for (int attr = 0; attr < VARYING_SLOT_MAX; attr++) { 1079 int input_index = wm_prog_data->urb_setup[attr]; 1080 1081 if (input_index < 0) 1082 continue; 1083 1084 /* _NEW_POINT */ 1085 bool point_sprite = false; 1086 if (drawing_points) { 1087 if (point->PointSprite && 1088 (attr >= VARYING_SLOT_TEX0 && attr <= VARYING_SLOT_TEX7) && 1089 (point->CoordReplace & (1u << (attr - VARYING_SLOT_TEX0)))) { 1090 point_sprite = true; 1091 } 1092 1093 if (attr == VARYING_SLOT_PNTC) 1094 point_sprite = true; 1095 1096 if (point_sprite) 1097 *point_sprite_enables |= (1 << input_index); 1098 } 1099 1100 /* BRW_NEW_VUE_MAP_GEOM_OUT | _NEW_LIGHT | _NEW_PROGRAM */ 1101 struct GENX(SF_OUTPUT_ATTRIBUTE_DETAIL) attribute = { 0 }; 1102 1103 if (!point_sprite) { 1104 genX(get_attr_override)(&attribute, 1105 &brw->vue_map_geom_out, 1106 *urb_entry_read_offset, attr, 1107 _mesa_vertex_program_two_side_enabled(ctx), 1108 &max_source_attr); 1109 } 1110 1111 /* The hardware can only do the overrides on 16 overrides at a 1112 * time, and the other up to 16 have to be lined up so that the 1113 * input index = the output index. We'll need to do some 1114 * tweaking to make sure that's the case. 1115 */ 1116 if (input_index < 16) 1117 attr_overrides[input_index] = attribute; 1118 else 1119 assert(attribute.SourceAttribute == input_index); 1120 } 1121 1122 /* From the Sandy Bridge PRM, Volume 2, Part 1, documentation for 1123 * 3DSTATE_SF DWord 1 bits 15:11, "Vertex URB Entry Read Length": 1124 * 1125 * "This field should be set to the minimum length required to read the 1126 * maximum source attribute. The maximum source attribute is indicated 1127 * by the maximum value of the enabled Attribute # Source Attribute if 1128 * Attribute Swizzle Enable is set, Number of Output Attributes-1 if 1129 * enable is not set. 1130 * read_length = ceiling((max_source_attr + 1) / 2) 1131 * 1132 * [errata] Corruption/Hang possible if length programmed larger than 1133 * recommended" 1134 * 1135 * Similar text exists for Ivy Bridge. 1136 */ 1137 *urb_entry_read_length = DIV_ROUND_UP(max_source_attr + 1, 2); 1138 } 1139 #endif 1140 1141 /* ---------------------------------------------------------------------- */ 1142 1143 #if GEN_GEN >= 8 1144 typedef struct GENX(3DSTATE_WM_DEPTH_STENCIL) DEPTH_STENCIL_GENXML; 1145 #elif GEN_GEN >= 6 1146 typedef struct GENX(DEPTH_STENCIL_STATE) DEPTH_STENCIL_GENXML; 1147 #else 1148 typedef struct GENX(COLOR_CALC_STATE) DEPTH_STENCIL_GENXML; 1149 #endif 1150 1151 static inline void 1152 set_depth_stencil_bits(struct brw_context *brw, DEPTH_STENCIL_GENXML *ds) 1153 { 1154 struct gl_context *ctx = &brw->ctx; 1155 1156 /* _NEW_BUFFERS */ 1157 struct intel_renderbuffer *depth_irb = 1158 intel_get_renderbuffer(ctx->DrawBuffer, BUFFER_DEPTH); 1159 1160 /* _NEW_DEPTH */ 1161 struct gl_depthbuffer_attrib *depth = &ctx->Depth; 1162 1163 /* _NEW_STENCIL */ 1164 struct gl_stencil_attrib *stencil = &ctx->Stencil; 1165 const int b = stencil->_BackFace; 1166 1167 if (depth->Test && depth_irb) { 1168 ds->DepthTestEnable = true; 1169 ds->DepthBufferWriteEnable = brw_depth_writes_enabled(brw); 1170 ds->DepthTestFunction = intel_translate_compare_func(depth->Func); 1171 } 1172 1173 if (brw->stencil_enabled) { 1174 ds->StencilTestEnable = true; 1175 ds->StencilWriteMask = stencil->WriteMask[0] & 0xff; 1176 ds->StencilTestMask = stencil->ValueMask[0] & 0xff; 1177 1178 ds->StencilTestFunction = 1179 intel_translate_compare_func(stencil->Function[0]); 1180 ds->StencilFailOp = 1181 intel_translate_stencil_op(stencil->FailFunc[0]); 1182 ds->StencilPassDepthPassOp = 1183 intel_translate_stencil_op(stencil->ZPassFunc[0]); 1184 ds->StencilPassDepthFailOp = 1185 intel_translate_stencil_op(stencil->ZFailFunc[0]); 1186 1187 ds->StencilBufferWriteEnable = brw->stencil_write_enabled; 1188 1189 if (brw->stencil_two_sided) { 1190 ds->DoubleSidedStencilEnable = true; 1191 ds->BackfaceStencilWriteMask = stencil->WriteMask[b] & 0xff; 1192 ds->BackfaceStencilTestMask = stencil->ValueMask[b] & 0xff; 1193 1194 ds->BackfaceStencilTestFunction = 1195 intel_translate_compare_func(stencil->Function[b]); 1196 ds->BackfaceStencilFailOp = 1197 intel_translate_stencil_op(stencil->FailFunc[b]); 1198 ds->BackfaceStencilPassDepthPassOp = 1199 intel_translate_stencil_op(stencil->ZPassFunc[b]); 1200 ds->BackfaceStencilPassDepthFailOp = 1201 intel_translate_stencil_op(stencil->ZFailFunc[b]); 1202 } 1203 1204 #if GEN_GEN <= 5 || GEN_GEN >= 9 1205 ds->StencilReferenceValue = _mesa_get_stencil_ref(ctx, 0); 1206 ds->BackfaceStencilReferenceValue = _mesa_get_stencil_ref(ctx, b); 1207 #endif 1208 } 1209 } 1210 1211 #if GEN_GEN >= 6 1212 static void 1213 genX(upload_depth_stencil_state)(struct brw_context *brw) 1214 { 1215 #if GEN_GEN >= 8 1216 brw_batch_emit(brw, GENX(3DSTATE_WM_DEPTH_STENCIL), wmds) { 1217 set_depth_stencil_bits(brw, &wmds); 1218 } 1219 #else 1220 uint32_t ds_offset; 1221 brw_state_emit(brw, GENX(DEPTH_STENCIL_STATE), 64, &ds_offset, ds) { 1222 set_depth_stencil_bits(brw, &ds); 1223 } 1224 1225 /* Now upload a pointer to the indirect state */ 1226 #if GEN_GEN == 6 1227 brw_batch_emit(brw, GENX(3DSTATE_CC_STATE_POINTERS), ptr) { 1228 ptr.PointertoDEPTH_STENCIL_STATE = ds_offset; 1229 ptr.DEPTH_STENCIL_STATEChange = true; 1230 } 1231 #else 1232 brw_batch_emit(brw, GENX(3DSTATE_DEPTH_STENCIL_STATE_POINTERS), ptr) { 1233 ptr.PointertoDEPTH_STENCIL_STATE = ds_offset; 1234 } 1235 #endif 1236 #endif 1237 } 1238 1239 static const struct brw_tracked_state genX(depth_stencil_state) = { 1240 .dirty = { 1241 .mesa = _NEW_BUFFERS | 1242 _NEW_DEPTH | 1243 _NEW_STENCIL, 1244 .brw = BRW_NEW_BLORP | 1245 (GEN_GEN >= 8 ? BRW_NEW_CONTEXT 1246 : BRW_NEW_BATCH | 1247 BRW_NEW_STATE_BASE_ADDRESS), 1248 }, 1249 .emit = genX(upload_depth_stencil_state), 1250 }; 1251 #endif 1252 1253 /* ---------------------------------------------------------------------- */ 1254 1255 #if GEN_GEN <= 5 1256 1257 static void 1258 genX(upload_clip_state)(struct brw_context *brw) 1259 { 1260 struct gl_context *ctx = &brw->ctx; 1261 1262 ctx->NewDriverState |= BRW_NEW_GEN4_UNIT_STATE; 1263 brw_state_emit(brw, GENX(CLIP_STATE), 32, &brw->clip.state_offset, clip) { 1264 clip.KernelStartPointer = KSP(brw, brw->clip.prog_offset); 1265 clip.GRFRegisterCount = 1266 DIV_ROUND_UP(brw->clip.prog_data->total_grf, 16) - 1; 1267 clip.FloatingPointMode = FLOATING_POINT_MODE_Alternate; 1268 clip.SingleProgramFlow = true; 1269 clip.VertexURBEntryReadLength = brw->clip.prog_data->urb_read_length; 1270 clip.ConstantURBEntryReadLength = brw->clip.prog_data->curb_read_length; 1271 1272 /* BRW_NEW_PUSH_CONSTANT_ALLOCATION */ 1273 clip.ConstantURBEntryReadOffset = brw->curbe.clip_start * 2; 1274 clip.DispatchGRFStartRegisterForURBData = 1; 1275 clip.VertexURBEntryReadOffset = 0; 1276 1277 /* BRW_NEW_URB_FENCE */ 1278 clip.NumberofURBEntries = brw->urb.nr_clip_entries; 1279 clip.URBEntryAllocationSize = brw->urb.vsize - 1; 1280 1281 if (brw->urb.nr_clip_entries >= 10) { 1282 /* Half of the URB entries go to each thread, and it has to be an 1283 * even number. 1284 */ 1285 assert(brw->urb.nr_clip_entries % 2 == 0); 1286 1287 /* Although up to 16 concurrent Clip threads are allowed on Ironlake, 1288 * only 2 threads can output VUEs at a time. 1289 */ 1290 clip.MaximumNumberofThreads = (GEN_GEN == 5 ? 16 : 2) - 1; 1291 } else { 1292 assert(brw->urb.nr_clip_entries >= 5); 1293 clip.MaximumNumberofThreads = 1 - 1; 1294 } 1295 1296 clip.VertexPositionSpace = VPOS_NDCSPACE; 1297 clip.UserClipFlagsMustClipEnable = true; 1298 clip.GuardbandClipTestEnable = true; 1299 1300 clip.ClipperViewportStatePointer = 1301 ro_bo(brw->batch.state.bo, brw->clip.vp_offset); 1302 1303 clip.ScreenSpaceViewportXMin = -1; 1304 clip.ScreenSpaceViewportXMax = 1; 1305 clip.ScreenSpaceViewportYMin = -1; 1306 clip.ScreenSpaceViewportYMax = 1; 1307 1308 clip.ViewportXYClipTestEnable = true; 1309 clip.ViewportZClipTestEnable = !ctx->Transform.DepthClamp; 1310 1311 /* _NEW_TRANSFORM */ 1312 if (GEN_GEN == 5 || GEN_IS_G4X) { 1313 clip.UserClipDistanceClipTestEnableBitmask = 1314 ctx->Transform.ClipPlanesEnabled; 1315 } else { 1316 /* Up to 6 actual clip flags, plus the 7th for the negative RHW 1317 * workaround. 1318 */ 1319 clip.UserClipDistanceClipTestEnableBitmask = 1320 (ctx->Transform.ClipPlanesEnabled & 0x3f) | 0x40; 1321 } 1322 1323 if (ctx->Transform.ClipDepthMode == GL_ZERO_TO_ONE) 1324 clip.APIMode = APIMODE_D3D; 1325 else 1326 clip.APIMode = APIMODE_OGL; 1327 1328 clip.GuardbandClipTestEnable = true; 1329 1330 clip.ClipMode = brw->clip.prog_data->clip_mode; 1331 1332 #if GEN_IS_G4X 1333 clip.NegativeWClipTestEnable = true; 1334 #endif 1335 } 1336 } 1337 1338 const struct brw_tracked_state genX(clip_state) = { 1339 .dirty = { 1340 .mesa = _NEW_TRANSFORM | 1341 _NEW_VIEWPORT, 1342 .brw = BRW_NEW_BATCH | 1343 BRW_NEW_BLORP | 1344 BRW_NEW_CLIP_PROG_DATA | 1345 BRW_NEW_PUSH_CONSTANT_ALLOCATION | 1346 BRW_NEW_PROGRAM_CACHE | 1347 BRW_NEW_URB_FENCE, 1348 }, 1349 .emit = genX(upload_clip_state), 1350 }; 1351 1352 #else 1353 1354 static void 1355 genX(upload_clip_state)(struct brw_context *brw) 1356 { 1357 struct gl_context *ctx = &brw->ctx; 1358 1359 /* _NEW_BUFFERS */ 1360 struct gl_framebuffer *fb = ctx->DrawBuffer; 1361 1362 /* BRW_NEW_FS_PROG_DATA */ 1363 struct brw_wm_prog_data *wm_prog_data = 1364 brw_wm_prog_data(brw->wm.base.prog_data); 1365 1366 brw_batch_emit(brw, GENX(3DSTATE_CLIP), clip) { 1367 clip.StatisticsEnable = !brw->meta_in_progress; 1368 1369 if (wm_prog_data->barycentric_interp_modes & 1370 BRW_BARYCENTRIC_NONPERSPECTIVE_BITS) 1371 clip.NonPerspectiveBarycentricEnable = true; 1372 1373 #if GEN_GEN >= 7 1374 clip.EarlyCullEnable = true; 1375 #endif 1376 1377 #if GEN_GEN == 7 1378 clip.FrontWinding = brw->polygon_front_bit == _mesa_is_user_fbo(fb); 1379 1380 if (ctx->Polygon.CullFlag) { 1381 switch (ctx->Polygon.CullFaceMode) { 1382 case GL_FRONT: 1383 clip.CullMode = CULLMODE_FRONT; 1384 break; 1385 case GL_BACK: 1386 clip.CullMode = CULLMODE_BACK; 1387 break; 1388 case GL_FRONT_AND_BACK: 1389 clip.CullMode = CULLMODE_BOTH; 1390 break; 1391 default: 1392 unreachable("Should not get here: invalid CullFlag"); 1393 } 1394 } else { 1395 clip.CullMode = CULLMODE_NONE; 1396 } 1397 #endif 1398 1399 #if GEN_GEN < 8 1400 clip.UserClipDistanceCullTestEnableBitmask = 1401 brw_vue_prog_data(brw->vs.base.prog_data)->cull_distance_mask; 1402 1403 clip.ViewportZClipTestEnable = !ctx->Transform.DepthClamp; 1404 #endif 1405 1406 /* _NEW_LIGHT */ 1407 if (ctx->Light.ProvokingVertex == GL_FIRST_VERTEX_CONVENTION) { 1408 clip.TriangleStripListProvokingVertexSelect = 0; 1409 clip.TriangleFanProvokingVertexSelect = 1; 1410 clip.LineStripListProvokingVertexSelect = 0; 1411 } else { 1412 clip.TriangleStripListProvokingVertexSelect = 2; 1413 clip.TriangleFanProvokingVertexSelect = 2; 1414 clip.LineStripListProvokingVertexSelect = 1; 1415 } 1416 1417 /* _NEW_TRANSFORM */ 1418 clip.UserClipDistanceClipTestEnableBitmask = 1419 ctx->Transform.ClipPlanesEnabled; 1420 1421 #if GEN_GEN >= 8 1422 clip.ForceUserClipDistanceClipTestEnableBitmask = true; 1423 #endif 1424 1425 if (ctx->Transform.ClipDepthMode == GL_ZERO_TO_ONE) 1426 clip.APIMode = APIMODE_D3D; 1427 else 1428 clip.APIMode = APIMODE_OGL; 1429 1430 clip.GuardbandClipTestEnable = true; 1431 1432 /* BRW_NEW_VIEWPORT_COUNT */ 1433 const unsigned viewport_count = brw->clip.viewport_count; 1434 1435 if (ctx->RasterDiscard) { 1436 clip.ClipMode = CLIPMODE_REJECT_ALL; 1437 #if GEN_GEN == 6 1438 perf_debug("Rasterizer discard is currently implemented via the " 1439 "clipper; having the GS not write primitives would " 1440 "likely be faster.\n"); 1441 #endif 1442 } else { 1443 clip.ClipMode = CLIPMODE_NORMAL; 1444 } 1445 1446 clip.ClipEnable = true; 1447 1448 /* _NEW_POLYGON, 1449 * BRW_NEW_GEOMETRY_PROGRAM | BRW_NEW_TES_PROG_DATA | BRW_NEW_PRIMITIVE 1450 */ 1451 if (!brw_is_drawing_points(brw) && !brw_is_drawing_lines(brw)) 1452 clip.ViewportXYClipTestEnable = true; 1453 1454 clip.MinimumPointWidth = 0.125; 1455 clip.MaximumPointWidth = 255.875; 1456 clip.MaximumVPIndex = viewport_count - 1; 1457 if (_mesa_geometric_layers(fb) == 0) 1458 clip.ForceZeroRTAIndexEnable = true; 1459 } 1460 } 1461 1462 static const struct brw_tracked_state genX(clip_state) = { 1463 .dirty = { 1464 .mesa = _NEW_BUFFERS | 1465 _NEW_LIGHT | 1466 _NEW_POLYGON | 1467 _NEW_TRANSFORM, 1468 .brw = BRW_NEW_BLORP | 1469 BRW_NEW_CONTEXT | 1470 BRW_NEW_FS_PROG_DATA | 1471 BRW_NEW_GS_PROG_DATA | 1472 BRW_NEW_VS_PROG_DATA | 1473 BRW_NEW_META_IN_PROGRESS | 1474 BRW_NEW_PRIMITIVE | 1475 BRW_NEW_RASTERIZER_DISCARD | 1476 BRW_NEW_TES_PROG_DATA | 1477 BRW_NEW_VIEWPORT_COUNT, 1478 }, 1479 .emit = genX(upload_clip_state), 1480 }; 1481 #endif 1482 1483 /* ---------------------------------------------------------------------- */ 1484 1485 static void 1486 genX(upload_sf)(struct brw_context *brw) 1487 { 1488 struct gl_context *ctx = &brw->ctx; 1489 float point_size; 1490 1491 #if GEN_GEN <= 7 1492 /* _NEW_BUFFERS */ 1493 bool render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer); 1494 UNUSED const bool multisampled_fbo = 1495 _mesa_geometric_samples(ctx->DrawBuffer) > 1; 1496 #endif 1497 1498 #if GEN_GEN < 6 1499 const struct brw_sf_prog_data *sf_prog_data = brw->sf.prog_data; 1500 1501 ctx->NewDriverState |= BRW_NEW_GEN4_UNIT_STATE; 1502 1503 brw_state_emit(brw, GENX(SF_STATE), 64, &brw->sf.state_offset, sf) { 1504 sf.KernelStartPointer = KSP(brw, brw->sf.prog_offset); 1505 sf.FloatingPointMode = FLOATING_POINT_MODE_Alternate; 1506 sf.GRFRegisterCount = DIV_ROUND_UP(sf_prog_data->total_grf, 16) - 1; 1507 sf.DispatchGRFStartRegisterForURBData = 3; 1508 sf.VertexURBEntryReadOffset = BRW_SF_URB_ENTRY_READ_OFFSET; 1509 sf.VertexURBEntryReadLength = sf_prog_data->urb_read_length; 1510 sf.NumberofURBEntries = brw->urb.nr_sf_entries; 1511 sf.URBEntryAllocationSize = brw->urb.sfsize - 1; 1512 1513 /* STATE_PREFETCH command description describes this state as being 1514 * something loaded through the GPE (L2 ISC), so it's INSTRUCTION 1515 * domain. 1516 */ 1517 sf.SetupViewportStateOffset = 1518 ro_bo(brw->batch.state.bo, brw->sf.vp_offset); 1519 1520 sf.PointRasterizationRule = RASTRULE_UPPER_RIGHT; 1521 1522 /* sf.ConstantURBEntryReadLength = stage_prog_data->curb_read_length; */ 1523 /* sf.ConstantURBEntryReadOffset = brw->curbe.vs_start * 2; */ 1524 1525 sf.MaximumNumberofThreads = 1526 MIN2(GEN_GEN == 5 ? 48 : 24, brw->urb.nr_sf_entries) - 1; 1527 1528 sf.SpritePointEnable = ctx->Point.PointSprite; 1529 1530 sf.DestinationOriginHorizontalBias = 0.5; 1531 sf.DestinationOriginVerticalBias = 0.5; 1532 #else 1533 brw_batch_emit(brw, GENX(3DSTATE_SF), sf) { 1534 sf.StatisticsEnable = true; 1535 #endif 1536 sf.ViewportTransformEnable = true; 1537 1538 #if GEN_GEN == 7 1539 /* _NEW_BUFFERS */ 1540 sf.DepthBufferSurfaceFormat = brw_depthbuffer_format(brw); 1541 #endif 1542 1543 #if GEN_GEN <= 7 1544 /* _NEW_POLYGON */ 1545 sf.FrontWinding = brw->polygon_front_bit == render_to_fbo; 1546 #if GEN_GEN >= 6 1547 sf.GlobalDepthOffsetEnableSolid = ctx->Polygon.OffsetFill; 1548 sf.GlobalDepthOffsetEnableWireframe = ctx->Polygon.OffsetLine; 1549 sf.GlobalDepthOffsetEnablePoint = ctx->Polygon.OffsetPoint; 1550 1551 switch (ctx->Polygon.FrontMode) { 1552 case GL_FILL: 1553 sf.FrontFaceFillMode = FILL_MODE_SOLID; 1554 break; 1555 case GL_LINE: 1556 sf.FrontFaceFillMode = FILL_MODE_WIREFRAME; 1557 break; 1558 case GL_POINT: 1559 sf.FrontFaceFillMode = FILL_MODE_POINT; 1560 break; 1561 default: 1562 unreachable("not reached"); 1563 } 1564 1565 switch (ctx->Polygon.BackMode) { 1566 case GL_FILL: 1567 sf.BackFaceFillMode = FILL_MODE_SOLID; 1568 break; 1569 case GL_LINE: 1570 sf.BackFaceFillMode = FILL_MODE_WIREFRAME; 1571 break; 1572 case GL_POINT: 1573 sf.BackFaceFillMode = FILL_MODE_POINT; 1574 break; 1575 default: 1576 unreachable("not reached"); 1577 } 1578 1579 if (multisampled_fbo && ctx->Multisample.Enabled) 1580 sf.MultisampleRasterizationMode = MSRASTMODE_ON_PATTERN; 1581 1582 sf.GlobalDepthOffsetConstant = ctx->Polygon.OffsetUnits * 2; 1583 sf.GlobalDepthOffsetScale = ctx->Polygon.OffsetFactor; 1584 sf.GlobalDepthOffsetClamp = ctx->Polygon.OffsetClamp; 1585 #endif 1586 1587 sf.ScissorRectangleEnable = true; 1588 1589 if (ctx->Polygon.CullFlag) { 1590 switch (ctx->Polygon.CullFaceMode) { 1591 case GL_FRONT: 1592 sf.CullMode = CULLMODE_FRONT; 1593 break; 1594 case GL_BACK: 1595 sf.CullMode = CULLMODE_BACK; 1596 break; 1597 case GL_FRONT_AND_BACK: 1598 sf.CullMode = CULLMODE_BOTH; 1599 break; 1600 default: 1601 unreachable("not reached"); 1602 } 1603 } else { 1604 sf.CullMode = CULLMODE_NONE; 1605 } 1606 1607 #if GEN_IS_HASWELL 1608 sf.LineStippleEnable = ctx->Line.StippleFlag; 1609 #endif 1610 1611 #endif 1612 1613 /* _NEW_LINE */ 1614 #if GEN_GEN == 8 1615 const struct gen_device_info *devinfo = &brw->screen->devinfo; 1616 1617 if (devinfo->is_cherryview) 1618 sf.CHVLineWidth = brw_get_line_width(brw); 1619 else 1620 sf.LineWidth = brw_get_line_width(brw); 1621 #else 1622 sf.LineWidth = brw_get_line_width(brw); 1623 #endif 1624 1625 if (ctx->Line.SmoothFlag) { 1626 sf.LineEndCapAntialiasingRegionWidth = _10pixels; 1627 #if GEN_GEN <= 7 1628 sf.AntiAliasingEnable = true; 1629 #endif 1630 } 1631 1632 /* _NEW_POINT - Clamp to ARB_point_parameters user limits */ 1633 point_size = CLAMP(ctx->Point.Size, ctx->Point.MinSize, ctx->Point.MaxSize); 1634 /* Clamp to the hardware limits */ 1635 sf.PointWidth = CLAMP(point_size, 0.125f, 255.875f); 1636 1637 /* _NEW_PROGRAM | _NEW_POINT, BRW_NEW_VUE_MAP_GEOM_OUT */ 1638 if (use_state_point_size(brw)) 1639 sf.PointWidthSource = State; 1640 1641 #if GEN_GEN >= 8 1642 /* _NEW_POINT | _NEW_MULTISAMPLE */ 1643 if ((ctx->Point.SmoothFlag || _mesa_is_multisample_enabled(ctx)) && 1644 !ctx->Point.PointSprite) 1645 sf.SmoothPointEnable = true; 1646 #endif 1647 1648 #if GEN_GEN == 10 1649 /* _NEW_BUFFERS 1650 * Smooth Point Enable bit MUST not be set when NUM_MULTISAMPLES > 1. 1651 */ 1652 const bool multisampled_fbo = 1653 _mesa_geometric_samples(ctx->DrawBuffer) > 1; 1654 if (multisampled_fbo) 1655 sf.SmoothPointEnable = false; 1656 #endif 1657 1658 #if GEN_IS_G4X || GEN_GEN >= 5 1659 sf.AALineDistanceMode = AALINEDISTANCE_TRUE; 1660 #endif 1661 1662 /* _NEW_LIGHT */ 1663 if (ctx->Light.ProvokingVertex != GL_FIRST_VERTEX_CONVENTION) { 1664 sf.TriangleStripListProvokingVertexSelect = 2; 1665 sf.TriangleFanProvokingVertexSelect = 2; 1666 sf.LineStripListProvokingVertexSelect = 1; 1667 } else { 1668 sf.TriangleFanProvokingVertexSelect = 1; 1669 } 1670 1671 #if GEN_GEN == 6 1672 /* BRW_NEW_FS_PROG_DATA */ 1673 const struct brw_wm_prog_data *wm_prog_data = 1674 brw_wm_prog_data(brw->wm.base.prog_data); 1675 1676 sf.AttributeSwizzleEnable = true; 1677 sf.NumberofSFOutputAttributes = wm_prog_data->num_varying_inputs; 1678 1679 /* 1680 * Window coordinates in an FBO are inverted, which means point 1681 * sprite origin must be inverted, too. 1682 */ 1683 if ((ctx->Point.SpriteOrigin == GL_LOWER_LEFT) != render_to_fbo) { 1684 sf.PointSpriteTextureCoordinateOrigin = LOWERLEFT; 1685 } else { 1686 sf.PointSpriteTextureCoordinateOrigin = UPPERLEFT; 1687 } 1688 1689 /* BRW_NEW_VUE_MAP_GEOM_OUT | BRW_NEW_FRAGMENT_PROGRAM | 1690 * _NEW_POINT | _NEW_LIGHT | _NEW_PROGRAM | BRW_NEW_FS_PROG_DATA 1691 */ 1692 uint32_t urb_entry_read_length; 1693 uint32_t urb_entry_read_offset; 1694 uint32_t point_sprite_enables; 1695 genX(calculate_attr_overrides)(brw, sf.Attribute, &point_sprite_enables, 1696 &urb_entry_read_length, 1697 &urb_entry_read_offset); 1698 sf.VertexURBEntryReadLength = urb_entry_read_length; 1699 sf.VertexURBEntryReadOffset = urb_entry_read_offset; 1700 sf.PointSpriteTextureCoordinateEnable = point_sprite_enables; 1701 sf.ConstantInterpolationEnable = wm_prog_data->flat_inputs; 1702 #endif 1703 } 1704 } 1705 1706 static const struct brw_tracked_state genX(sf_state) = { 1707 .dirty = { 1708 .mesa = _NEW_LIGHT | 1709 _NEW_LINE | 1710 _NEW_POINT | 1711 _NEW_PROGRAM | 1712 (GEN_GEN >= 6 ? _NEW_MULTISAMPLE : 0) | 1713 (GEN_GEN <= 7 ? _NEW_BUFFERS | _NEW_POLYGON : 0) | 1714 (GEN_GEN == 10 ? _NEW_BUFFERS : 0), 1715 .brw = BRW_NEW_BLORP | 1716 BRW_NEW_VUE_MAP_GEOM_OUT | 1717 (GEN_GEN <= 5 ? BRW_NEW_BATCH | 1718 BRW_NEW_PROGRAM_CACHE | 1719 BRW_NEW_SF_PROG_DATA | 1720 BRW_NEW_SF_VP | 1721 BRW_NEW_URB_FENCE 1722 : 0) | 1723 (GEN_GEN >= 6 ? BRW_NEW_CONTEXT : 0) | 1724 (GEN_GEN >= 6 && GEN_GEN <= 7 ? 1725 BRW_NEW_GS_PROG_DATA | 1726 BRW_NEW_PRIMITIVE | 1727 BRW_NEW_TES_PROG_DATA 1728 : 0) | 1729 (GEN_GEN == 6 ? BRW_NEW_FS_PROG_DATA | 1730 BRW_NEW_FRAGMENT_PROGRAM 1731 : 0), 1732 }, 1733 .emit = genX(upload_sf), 1734 }; 1735 1736 /* ---------------------------------------------------------------------- */ 1737 1738 static bool 1739 brw_color_buffer_write_enabled(struct brw_context *brw) 1740 { 1741 struct gl_context *ctx = &brw->ctx; 1742 /* BRW_NEW_FRAGMENT_PROGRAM */ 1743 const struct gl_program *fp = brw->programs[MESA_SHADER_FRAGMENT]; 1744 unsigned i; 1745 1746 /* _NEW_BUFFERS */ 1747 for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) { 1748 struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[i]; 1749 uint64_t outputs_written = fp->info.outputs_written; 1750 1751 /* _NEW_COLOR */ 1752 if (rb && (outputs_written & BITFIELD64_BIT(FRAG_RESULT_COLOR) || 1753 outputs_written & BITFIELD64_BIT(FRAG_RESULT_DATA0 + i)) && 1754 (ctx->Color.ColorMask[i][0] || 1755 ctx->Color.ColorMask[i][1] || 1756 ctx->Color.ColorMask[i][2] || 1757 ctx->Color.ColorMask[i][3])) { 1758 return true; 1759 } 1760 } 1761 1762 return false; 1763 } 1764 1765 static void 1766 genX(upload_wm)(struct brw_context *brw) 1767 { 1768 struct gl_context *ctx = &brw->ctx; 1769 1770 /* BRW_NEW_FS_PROG_DATA */ 1771 const struct brw_wm_prog_data *wm_prog_data = 1772 brw_wm_prog_data(brw->wm.base.prog_data); 1773 1774 UNUSED bool writes_depth = 1775 wm_prog_data->computed_depth_mode != BRW_PSCDEPTH_OFF; 1776 UNUSED struct brw_stage_state *stage_state = &brw->wm.base; 1777 UNUSED const struct gen_device_info *devinfo = &brw->screen->devinfo; 1778 1779 #if GEN_GEN == 6 1780 /* We can't fold this into gen6_upload_wm_push_constants(), because 1781 * according to the SNB PRM, vol 2 part 1 section 7.2.2 1782 * (3DSTATE_CONSTANT_PS [DevSNB]): 1783 * 1784 * "[DevSNB]: This packet must be followed by WM_STATE." 1785 */ 1786 brw_batch_emit(brw, GENX(3DSTATE_CONSTANT_PS), wmcp) { 1787 if (wm_prog_data->base.nr_params != 0) { 1788 wmcp.Buffer0Valid = true; 1789 /* Pointer to the WM constant buffer. Covered by the set of 1790 * state flags from gen6_upload_wm_push_constants. 1791 */ 1792 wmcp.PointertoPSConstantBuffer0 = stage_state->push_const_offset; 1793 wmcp.PSConstantBuffer0ReadLength = stage_state->push_const_size - 1; 1794 } 1795 } 1796 #endif 1797 1798 #if GEN_GEN >= 6 1799 brw_batch_emit(brw, GENX(3DSTATE_WM), wm) { 1800 wm.LineAntialiasingRegionWidth = _10pixels; 1801 wm.LineEndCapAntialiasingRegionWidth = _05pixels; 1802 1803 wm.PointRasterizationRule = RASTRULE_UPPER_RIGHT; 1804 wm.BarycentricInterpolationMode = wm_prog_data->barycentric_interp_modes; 1805 #else 1806 ctx->NewDriverState |= BRW_NEW_GEN4_UNIT_STATE; 1807 brw_state_emit(brw, GENX(WM_STATE), 64, &stage_state->state_offset, wm) { 1808 if (wm_prog_data->dispatch_8 && wm_prog_data->dispatch_16) { 1809 /* These two fields should be the same pre-gen6, which is why we 1810 * only have one hardware field to program for both dispatch 1811 * widths. 1812 */ 1813 assert(wm_prog_data->base.dispatch_grf_start_reg == 1814 wm_prog_data->dispatch_grf_start_reg_2); 1815 } 1816 1817 if (wm_prog_data->dispatch_8 || wm_prog_data->dispatch_16) 1818 wm.GRFRegisterCount0 = wm_prog_data->reg_blocks_0; 1819 1820 if (stage_state->sampler_count) 1821 wm.SamplerStatePointer = 1822 ro_bo(brw->batch.state.bo, stage_state->sampler_offset); 1823 #if GEN_GEN == 5 1824 if (wm_prog_data->prog_offset_2) 1825 wm.GRFRegisterCount2 = wm_prog_data->reg_blocks_2; 1826 #endif 1827 1828 wm.SetupURBEntryReadLength = wm_prog_data->num_varying_inputs * 2; 1829 wm.ConstantURBEntryReadLength = wm_prog_data->base.curb_read_length; 1830 /* BRW_NEW_PUSH_CONSTANT_ALLOCATION */ 1831 wm.ConstantURBEntryReadOffset = brw->curbe.wm_start * 2; 1832 wm.EarlyDepthTestEnable = true; 1833 wm.LineAntialiasingRegionWidth = _05pixels; 1834 wm.LineEndCapAntialiasingRegionWidth = _10pixels; 1835 1836 /* _NEW_POLYGON */ 1837 if (ctx->Polygon.OffsetFill) { 1838 wm.GlobalDepthOffsetEnable = true; 1839 /* Something weird going on with legacy_global_depth_bias, 1840 * offset_constant, scaling and MRD. This value passes glean 1841 * but gives some odd results elsewere (eg. the 1842 * quad-offset-units test). 1843 */ 1844 wm.GlobalDepthOffsetConstant = ctx->Polygon.OffsetUnits * 2; 1845 1846 /* This is the only value that passes glean: 1847 */ 1848 wm.GlobalDepthOffsetScale = ctx->Polygon.OffsetFactor; 1849 } 1850 1851 wm.DepthCoefficientURBReadOffset = 1; 1852 #endif 1853 1854 /* BRW_NEW_STATS_WM */ 1855 wm.StatisticsEnable = GEN_GEN >= 6 || brw->stats_wm; 1856 1857 #if GEN_GEN < 7 1858 if (wm_prog_data->base.use_alt_mode) 1859 wm.FloatingPointMode = FLOATING_POINT_MODE_Alternate; 1860 1861 wm.SamplerCount = GEN_GEN == 5 ? 1862 0 : DIV_ROUND_UP(stage_state->sampler_count, 4); 1863 1864 wm.BindingTableEntryCount = 1865 wm_prog_data->base.binding_table.size_bytes / 4; 1866 wm.MaximumNumberofThreads = devinfo->max_wm_threads - 1; 1867 wm._8PixelDispatchEnable = wm_prog_data->dispatch_8; 1868 wm._16PixelDispatchEnable = wm_prog_data->dispatch_16; 1869 wm.DispatchGRFStartRegisterForConstantSetupData0 = 1870 wm_prog_data->base.dispatch_grf_start_reg; 1871 if (GEN_GEN == 6 || 1872 wm_prog_data->dispatch_8 || wm_prog_data->dispatch_16) { 1873 wm.KernelStartPointer0 = KSP(brw, stage_state->prog_offset); 1874 } 1875 1876 #if GEN_GEN >= 5 1877 if (GEN_GEN == 6 || wm_prog_data->prog_offset_2) { 1878 wm.KernelStartPointer2 = 1879 KSP(brw, stage_state->prog_offset + wm_prog_data->prog_offset_2); 1880 } 1881 #endif 1882 1883 #if GEN_GEN == 6 1884 wm.DualSourceBlendEnable = 1885 wm_prog_data->dual_src_blend && (ctx->Color.BlendEnabled & 1) && 1886 ctx->Color.Blend[0]._UsesDualSrc; 1887 wm.oMaskPresenttoRenderTarget = wm_prog_data->uses_omask; 1888 wm.NumberofSFOutputAttributes = wm_prog_data->num_varying_inputs; 1889 1890 /* From the SNB PRM, volume 2 part 1, page 281: 1891 * "If the PS kernel does not need the Position XY Offsets 1892 * to compute a Position XY value, then this field should be 1893 * programmed to POSOFFSET_NONE." 1894 * 1895 * "SW Recommendation: If the PS kernel needs the Position Offsets 1896 * to compute a Position XY value, this field should match Position 1897 * ZW Interpolation Mode to ensure a consistent position.xyzw 1898 * computation." 1899 * We only require XY sample offsets. So, this recommendation doesn't 1900 * look useful at the moment. We might need this in future. 1901 */ 1902 if (wm_prog_data->uses_pos_offset) 1903 wm.PositionXYOffsetSelect = POSOFFSET_SAMPLE; 1904 else 1905 wm.PositionXYOffsetSelect = POSOFFSET_NONE; 1906 1907 wm.DispatchGRFStartRegisterForConstantSetupData2 = 1908 wm_prog_data->dispatch_grf_start_reg_2; 1909 #endif 1910 1911 if (wm_prog_data->base.total_scratch) { 1912 wm.ScratchSpaceBasePointer = rw_bo(stage_state->scratch_bo, 0); 1913 wm.PerThreadScratchSpace = 1914 ffs(stage_state->per_thread_scratch) - 11; 1915 } 1916 1917 wm.PixelShaderComputedDepth = writes_depth; 1918 #endif 1919 1920 /* _NEW_LINE */ 1921 wm.LineStippleEnable = ctx->Line.StippleFlag; 1922 1923 /* _NEW_POLYGON */ 1924 wm.PolygonStippleEnable = ctx->Polygon.StippleFlag; 1925 1926 #if GEN_GEN < 8 1927 1928 #if GEN_GEN >= 6 1929 wm.PixelShaderUsesSourceW = wm_prog_data->uses_src_w; 1930 1931 /* _NEW_BUFFERS */ 1932 const bool multisampled_fbo = _mesa_geometric_samples(ctx->DrawBuffer) > 1; 1933 1934 if (multisampled_fbo) { 1935 /* _NEW_MULTISAMPLE */ 1936 if (ctx->Multisample.Enabled) 1937 wm.MultisampleRasterizationMode = MSRASTMODE_ON_PATTERN; 1938 else 1939 wm.MultisampleRasterizationMode = MSRASTMODE_OFF_PIXEL; 1940 1941 if (wm_prog_data->persample_dispatch) 1942 wm.MultisampleDispatchMode = MSDISPMODE_PERSAMPLE; 1943 else 1944 wm.MultisampleDispatchMode = MSDISPMODE_PERPIXEL; 1945 } else { 1946 wm.MultisampleRasterizationMode = MSRASTMODE_OFF_PIXEL; 1947 wm.MultisampleDispatchMode = MSDISPMODE_PERSAMPLE; 1948 } 1949 #endif 1950 wm.PixelShaderUsesSourceDepth = wm_prog_data->uses_src_depth; 1951 if (wm_prog_data->uses_kill || 1952 _mesa_is_alpha_test_enabled(ctx) || 1953 _mesa_is_alpha_to_coverage_enabled(ctx) || 1954 (GEN_GEN >= 6 && wm_prog_data->uses_omask)) { 1955 wm.PixelShaderKillsPixel = true; 1956 } 1957 1958 /* _NEW_BUFFERS | _NEW_COLOR */ 1959 if (brw_color_buffer_write_enabled(brw) || writes_depth || 1960 wm.PixelShaderKillsPixel || 1961 (GEN_GEN >= 6 && wm_prog_data->has_side_effects)) { 1962 wm.ThreadDispatchEnable = true; 1963 } 1964 1965 #if GEN_GEN >= 7 1966 wm.PixelShaderComputedDepthMode = wm_prog_data->computed_depth_mode; 1967 wm.PixelShaderUsesInputCoverageMask = wm_prog_data->uses_sample_mask; 1968 #endif 1969 1970 /* The "UAV access enable" bits are unnecessary on HSW because they only 1971 * seem to have an effect on the HW-assisted coherency mechanism which we 1972 * don't need, and the rasterization-related UAV_ONLY flag and the 1973 * DISPATCH_ENABLE bit can be set independently from it. 1974 * C.f. gen8_upload_ps_extra(). 1975 * 1976 * BRW_NEW_FRAGMENT_PROGRAM | BRW_NEW_FS_PROG_DATA | _NEW_BUFFERS | 1977 * _NEW_COLOR 1978 */ 1979 #if GEN_IS_HASWELL 1980 if (!(brw_color_buffer_write_enabled(brw) || writes_depth) && 1981 wm_prog_data->has_side_effects) 1982 wm.PSUAVonly = ON; 1983 #endif 1984 #endif 1985 1986 #if GEN_GEN >= 7 1987 /* BRW_NEW_FS_PROG_DATA */ 1988 if (wm_prog_data->early_fragment_tests) 1989 wm.EarlyDepthStencilControl = EDSC_PREPS; 1990 else if (wm_prog_data->has_side_effects) 1991 wm.EarlyDepthStencilControl = EDSC_PSEXEC; 1992 #endif 1993 } 1994 1995 #if GEN_GEN <= 5 1996 if (brw->wm.offset_clamp != ctx->Polygon.OffsetClamp) { 1997 brw_batch_emit(brw, GENX(3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP), clamp) { 1998 clamp.GlobalDepthOffsetClamp = ctx->Polygon.OffsetClamp; 1999 } 2000 2001 brw->wm.offset_clamp = ctx->Polygon.OffsetClamp; 2002 } 2003 #endif 2004 } 2005 2006 static const struct brw_tracked_state genX(wm_state) = { 2007 .dirty = { 2008 .mesa = _NEW_LINE | 2009 _NEW_POLYGON | 2010 (GEN_GEN < 8 ? _NEW_BUFFERS | 2011 _NEW_COLOR : 2012 0) | 2013 (GEN_GEN == 6 ? _NEW_PROGRAM_CONSTANTS : 0) | 2014 (GEN_GEN < 6 ? _NEW_POLYGONSTIPPLE : 0) | 2015 (GEN_GEN < 8 && GEN_GEN >= 6 ? _NEW_MULTISAMPLE : 0), 2016 .brw = BRW_NEW_BLORP | 2017 BRW_NEW_FS_PROG_DATA | 2018 (GEN_GEN < 6 ? BRW_NEW_PUSH_CONSTANT_ALLOCATION | 2019 BRW_NEW_FRAGMENT_PROGRAM | 2020 BRW_NEW_PROGRAM_CACHE | 2021 BRW_NEW_SAMPLER_STATE_TABLE | 2022 BRW_NEW_STATS_WM 2023 : 0) | 2024 (GEN_GEN < 7 ? BRW_NEW_BATCH : BRW_NEW_CONTEXT), 2025 }, 2026 .emit = genX(upload_wm), 2027 }; 2028 2029 /* ---------------------------------------------------------------------- */ 2030 2031 #define INIT_THREAD_DISPATCH_FIELDS(pkt, prefix) \ 2032 pkt.KernelStartPointer = KSP(brw, stage_state->prog_offset); \ 2033 pkt.SamplerCount = \ 2034 DIV_ROUND_UP(CLAMP(stage_state->sampler_count, 0, 16), 4); \ 2035 pkt.BindingTableEntryCount = \ 2036 stage_prog_data->binding_table.size_bytes / 4; \ 2037 pkt.FloatingPointMode = stage_prog_data->use_alt_mode; \ 2038 \ 2039 if (stage_prog_data->total_scratch) { \ 2040 pkt.ScratchSpaceBasePointer = rw_bo(stage_state->scratch_bo, 0); \ 2041 pkt.PerThreadScratchSpace = \ 2042 ffs(stage_state->per_thread_scratch) - 11; \ 2043 } \ 2044 \ 2045 pkt.DispatchGRFStartRegisterForURBData = \ 2046 stage_prog_data->dispatch_grf_start_reg; \ 2047 pkt.prefix##URBEntryReadLength = vue_prog_data->urb_read_length; \ 2048 pkt.prefix##URBEntryReadOffset = 0; \ 2049 \ 2050 pkt.StatisticsEnable = true; \ 2051 pkt.Enable = true; 2052 2053 static void 2054 genX(upload_vs_state)(struct brw_context *brw) 2055 { 2056 UNUSED struct gl_context *ctx = &brw->ctx; 2057 const struct gen_device_info *devinfo = &brw->screen->devinfo; 2058 struct brw_stage_state *stage_state = &brw->vs.base; 2059 2060 /* BRW_NEW_VS_PROG_DATA */ 2061 const struct brw_vue_prog_data *vue_prog_data = 2062 brw_vue_prog_data(brw->vs.base.prog_data); 2063 const struct brw_stage_prog_data *stage_prog_data = &vue_prog_data->base; 2064 2065 assert(vue_prog_data->dispatch_mode == DISPATCH_MODE_SIMD8 || 2066 vue_prog_data->dispatch_mode == DISPATCH_MODE_4X2_DUAL_OBJECT); 2067 2068 #if GEN_GEN == 6 2069 /* From the BSpec, 3D Pipeline > Geometry > Vertex Shader > State, 2070 * 3DSTATE_VS, Dword 5.0 "VS Function Enable": 2071 * 2072 * [DevSNB] A pipeline flush must be programmed prior to a 3DSTATE_VS 2073 * command that causes the VS Function Enable to toggle. Pipeline 2074 * flush can be executed by sending a PIPE_CONTROL command with CS 2075 * stall bit set and a post sync operation. 2076 * 2077 * We've already done such a flush at the start of state upload, so we 2078 * don't need to do another one here. 2079 */ 2080 brw_batch_emit(brw, GENX(3DSTATE_CONSTANT_VS), cvs) { 2081 if (stage_state->push_const_size != 0) { 2082 cvs.Buffer0Valid = true; 2083 cvs.PointertoVSConstantBuffer0 = stage_state->push_const_offset; 2084 cvs.VSConstantBuffer0ReadLength = stage_state->push_const_size - 1; 2085 } 2086 } 2087 #endif 2088 2089 if (GEN_GEN == 7 && devinfo->is_ivybridge) 2090 gen7_emit_vs_workaround_flush(brw); 2091 2092 #if GEN_GEN >= 6 2093 brw_batch_emit(brw, GENX(3DSTATE_VS), vs) { 2094 #else 2095 ctx->NewDriverState |= BRW_NEW_GEN4_UNIT_STATE; 2096 brw_state_emit(brw, GENX(VS_STATE), 32, &stage_state->state_offset, vs) { 2097 #endif 2098 INIT_THREAD_DISPATCH_FIELDS(vs, Vertex); 2099 2100 vs.MaximumNumberofThreads = devinfo->max_vs_threads - 1; 2101 2102 #if GEN_GEN < 6 2103 vs.GRFRegisterCount = DIV_ROUND_UP(vue_prog_data->total_grf, 16) - 1; 2104 vs.ConstantURBEntryReadLength = stage_prog_data->curb_read_length; 2105 vs.ConstantURBEntryReadOffset = brw->curbe.vs_start * 2; 2106 2107 vs.NumberofURBEntries = brw->urb.nr_vs_entries >> (GEN_GEN == 5 ? 2 : 0); 2108 vs.URBEntryAllocationSize = brw->urb.vsize - 1; 2109 2110 vs.MaximumNumberofThreads = 2111 CLAMP(brw->urb.nr_vs_entries / 2, 1, devinfo->max_vs_threads) - 1; 2112 2113 vs.StatisticsEnable = false; 2114 vs.SamplerStatePointer = 2115 ro_bo(brw->batch.state.bo, stage_state->sampler_offset); 2116 #endif 2117 2118 #if GEN_GEN == 5 2119 /* Force single program flow on Ironlake. We cannot reliably get 2120 * all applications working without it. See: 2121 * https://bugs.freedesktop.org/show_bug.cgi?id=29172 2122 * 2123 * The most notable and reliably failing application is the Humus 2124 * demo "CelShading" 2125 */ 2126 vs.SingleProgramFlow = true; 2127 vs.SamplerCount = 0; /* hardware requirement */ 2128 #endif 2129 2130 #if GEN_GEN >= 8 2131 vs.SIMD8DispatchEnable = 2132 vue_prog_data->dispatch_mode == DISPATCH_MODE_SIMD8; 2133 2134 vs.UserClipDistanceCullTestEnableBitmask = 2135 vue_prog_data->cull_distance_mask; 2136 #endif 2137 } 2138 2139 #if GEN_GEN == 6 2140 /* Based on my reading of the simulator, the VS constants don't get 2141 * pulled into the VS FF unit until an appropriate pipeline flush 2142 * happens, and instead the 3DSTATE_CONSTANT_VS packet just adds 2143 * references to them into a little FIFO. The flushes are common, 2144 * but don't reliably happen between this and a 3DPRIMITIVE, causing 2145 * the primitive to use the wrong constants. Then the FIFO 2146 * containing the constant setup gets added to again on the next 2147 * constants change, and eventually when a flush does happen the 2148 * unit is overwhelmed by constant changes and dies. 2149 * 2150 * To avoid this, send a PIPE_CONTROL down the line that will 2151 * update the unit immediately loading the constants. The flush 2152 * type bits here were those set by the STATE_BASE_ADDRESS whose 2153 * move in a82a43e8d99e1715dd11c9c091b5ab734079b6a6 triggered the 2154 * bug reports that led to this workaround, and may be more than 2155 * what is strictly required to avoid the issue. 2156 */ 2157 brw_emit_pipe_control_flush(brw, 2158 PIPE_CONTROL_DEPTH_STALL | 2159 PIPE_CONTROL_INSTRUCTION_INVALIDATE | 2160 PIPE_CONTROL_STATE_CACHE_INVALIDATE); 2161 #endif 2162 } 2163 2164 static const struct brw_tracked_state genX(vs_state) = { 2165 .dirty = { 2166 .mesa = (GEN_GEN == 6 ? (_NEW_PROGRAM_CONSTANTS | _NEW_TRANSFORM) : 0), 2167 .brw = BRW_NEW_BATCH | 2168 BRW_NEW_BLORP | 2169 BRW_NEW_CONTEXT | 2170 BRW_NEW_VS_PROG_DATA | 2171 (GEN_GEN == 6 ? BRW_NEW_VERTEX_PROGRAM : 0) | 2172 (GEN_GEN <= 5 ? BRW_NEW_PUSH_CONSTANT_ALLOCATION | 2173 BRW_NEW_PROGRAM_CACHE | 2174 BRW_NEW_SAMPLER_STATE_TABLE | 2175 BRW_NEW_URB_FENCE 2176 : 0), 2177 }, 2178 .emit = genX(upload_vs_state), 2179 }; 2180 2181 /* ---------------------------------------------------------------------- */ 2182 2183 static void 2184 genX(upload_cc_viewport)(struct brw_context *brw) 2185 { 2186 struct gl_context *ctx = &brw->ctx; 2187 2188 /* BRW_NEW_VIEWPORT_COUNT */ 2189 const unsigned viewport_count = brw->clip.viewport_count; 2190 2191 struct GENX(CC_VIEWPORT) ccv; 2192 uint32_t cc_vp_offset; 2193 uint32_t *cc_map = 2194 brw_state_batch(brw, 4 * GENX(CC_VIEWPORT_length) * viewport_count, 2195 32, &cc_vp_offset); 2196 2197 for (unsigned i = 0; i < viewport_count; i++) { 2198 /* _NEW_VIEWPORT | _NEW_TRANSFORM */ 2199 const struct gl_viewport_attrib *vp = &ctx->ViewportArray[i]; 2200 if (ctx->Transform.DepthClamp) { 2201 ccv.MinimumDepth = MIN2(vp->Near, vp->Far); 2202 ccv.MaximumDepth = MAX2(vp->Near, vp->Far); 2203 } else { 2204 ccv.MinimumDepth = 0.0; 2205 ccv.MaximumDepth = 1.0; 2206 } 2207 GENX(CC_VIEWPORT_pack)(NULL, cc_map, &ccv); 2208 cc_map += GENX(CC_VIEWPORT_length); 2209 } 2210 2211 #if GEN_GEN >= 7 2212 brw_batch_emit(brw, GENX(3DSTATE_VIEWPORT_STATE_POINTERS_CC), ptr) { 2213 ptr.CCViewportPointer = cc_vp_offset; 2214 } 2215 #elif GEN_GEN == 6 2216 brw_batch_emit(brw, GENX(3DSTATE_VIEWPORT_STATE_POINTERS), vp) { 2217 vp.CCViewportStateChange = 1; 2218 vp.PointertoCC_VIEWPORT = cc_vp_offset; 2219 } 2220 #else 2221 brw->cc.vp_offset = cc_vp_offset; 2222 ctx->NewDriverState |= BRW_NEW_CC_VP; 2223 #endif 2224 } 2225 2226 const struct brw_tracked_state genX(cc_vp) = { 2227 .dirty = { 2228 .mesa = _NEW_TRANSFORM | 2229 _NEW_VIEWPORT, 2230 .brw = BRW_NEW_BATCH | 2231 BRW_NEW_BLORP | 2232 BRW_NEW_VIEWPORT_COUNT, 2233 }, 2234 .emit = genX(upload_cc_viewport) 2235 }; 2236 2237 /* ---------------------------------------------------------------------- */ 2238 2239 static void 2240 set_scissor_bits(const struct gl_context *ctx, int i, 2241 bool render_to_fbo, unsigned fb_width, unsigned fb_height, 2242 struct GENX(SCISSOR_RECT) *sc) 2243 { 2244 int bbox[4]; 2245 2246 bbox[0] = MAX2(ctx->ViewportArray[i].X, 0); 2247 bbox[1] = MIN2(bbox[0] + ctx->ViewportArray[i].Width, fb_width); 2248 bbox[2] = MAX2(ctx->ViewportArray[i].Y, 0); 2249 bbox[3] = MIN2(bbox[2] + ctx->ViewportArray[i].Height, fb_height); 2250 _mesa_intersect_scissor_bounding_box(ctx, i, bbox); 2251 2252 if (bbox[0] == bbox[1] || bbox[2] == bbox[3]) { 2253 /* If the scissor was out of bounds and got clamped to 0 width/height 2254 * at the bounds, the subtraction of 1 from maximums could produce a 2255 * negative number and thus not clip anything. Instead, just provide 2256 * a min > max scissor inside the bounds, which produces the expected 2257 * no rendering. 2258 */ 2259 sc->ScissorRectangleXMin = 1; 2260 sc->ScissorRectangleXMax = 0; 2261 sc->ScissorRectangleYMin = 1; 2262 sc->ScissorRectangleYMax = 0; 2263 } else if (render_to_fbo) { 2264 /* texmemory: Y=0=bottom */ 2265 sc->ScissorRectangleXMin = bbox[0]; 2266 sc->ScissorRectangleXMax = bbox[1] - 1; 2267 sc->ScissorRectangleYMin = bbox[2]; 2268 sc->ScissorRectangleYMax = bbox[3] - 1; 2269 } else { 2270 /* memory: Y=0=top */ 2271 sc->ScissorRectangleXMin = bbox[0]; 2272 sc->ScissorRectangleXMax = bbox[1] - 1; 2273 sc->ScissorRectangleYMin = fb_height - bbox[3]; 2274 sc->ScissorRectangleYMax = fb_height - bbox[2] - 1; 2275 } 2276 } 2277 2278 #if GEN_GEN >= 6 2279 static void 2280 genX(upload_scissor_state)(struct brw_context *brw) 2281 { 2282 struct gl_context *ctx = &brw->ctx; 2283 const bool render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer); 2284 struct GENX(SCISSOR_RECT) scissor; 2285 uint32_t scissor_state_offset; 2286 const unsigned int fb_width = _mesa_geometric_width(ctx->DrawBuffer); 2287 const unsigned int fb_height = _mesa_geometric_height(ctx->DrawBuffer); 2288 uint32_t *scissor_map; 2289 2290 /* BRW_NEW_VIEWPORT_COUNT */ 2291 const unsigned viewport_count = brw->clip.viewport_count; 2292 2293 scissor_map = brw_state_batch( 2294 brw, GENX(SCISSOR_RECT_length) * sizeof(uint32_t) * viewport_count, 2295 32, &scissor_state_offset); 2296 2297 /* _NEW_SCISSOR | _NEW_BUFFERS | _NEW_VIEWPORT */ 2298 2299 /* The scissor only needs to handle the intersection of drawable and 2300 * scissor rect. Clipping to the boundaries of static shared buffers 2301 * for front/back/depth is covered by looping over cliprects in brw_draw.c. 2302 * 2303 * Note that the hardware's coordinates are inclusive, while Mesa's min is 2304 * inclusive but max is exclusive. 2305 */ 2306 for (unsigned i = 0; i < viewport_count; i++) { 2307 set_scissor_bits(ctx, i, render_to_fbo, fb_width, fb_height, &scissor); 2308 GENX(SCISSOR_RECT_pack)( 2309 NULL, scissor_map + i * GENX(SCISSOR_RECT_length), &scissor); 2310 } 2311 2312 brw_batch_emit(brw, GENX(3DSTATE_SCISSOR_STATE_POINTERS), ptr) { 2313 ptr.ScissorRectPointer = scissor_state_offset; 2314 } 2315 } 2316 2317 static const struct brw_tracked_state genX(scissor_state) = { 2318 .dirty = { 2319 .mesa = _NEW_BUFFERS | 2320 _NEW_SCISSOR | 2321 _NEW_VIEWPORT, 2322 .brw = BRW_NEW_BATCH | 2323 BRW_NEW_BLORP | 2324 BRW_NEW_VIEWPORT_COUNT, 2325 }, 2326 .emit = genX(upload_scissor_state), 2327 }; 2328 #endif 2329 2330 /* ---------------------------------------------------------------------- */ 2331 2332 static void 2333 brw_calculate_guardband_size(uint32_t fb_width, uint32_t fb_height, 2334 float m00, float m11, float m30, float m31, 2335 float *xmin, float *xmax, 2336 float *ymin, float *ymax) 2337 { 2338 /* According to the "Vertex X,Y Clamping and Quantization" section of the 2339 * Strips and Fans documentation: 2340 * 2341 * "The vertex X and Y screen-space coordinates are also /clamped/ to the 2342 * fixed-point "guardband" range supported by the rasterization hardware" 2343 * 2344 * and 2345 * 2346 * "In almost all circumstances, if an objects vertices are actually 2347 * modified by this clamping (i.e., had X or Y coordinates outside of 2348 * the guardband extent the rendered object will not match the intended 2349 * result. Therefore software should take steps to ensure that this does 2350 * not happen - e.g., by clipping objects such that they do not exceed 2351 * these limits after the Drawing Rectangle is applied." 2352 * 2353 * I believe the fundamental restriction is that the rasterizer (in 2354 * the SF/WM stages) have a limit on the number of pixels that can be 2355 * rasterized. We need to ensure any coordinates beyond the rasterizer 2356 * limit are handled by the clipper. So effectively that limit becomes 2357 * the clipper's guardband size. 2358 * 2359 * It goes on to say: 2360 * 2361 * "In addition, in order to be correctly rendered, objects must have a 2362 * screenspace bounding box not exceeding 8K in the X or Y direction. 2363 * This additional restriction must also be comprehended by software, 2364 * i.e., enforced by use of clipping." 2365 * 2366 * This makes no sense. Gen7+ hardware supports 16K render targets, 2367 * and you definitely need to be able to draw polygons that fill the 2368 * surface. Our assumption is that the rasterizer was limited to 8K 2369 * on Sandybridge, which only supports 8K surfaces, and it was actually 2370 * increased to 16K on Ivybridge and later. 2371 * 2372 * So, limit the guardband to 16K on Gen7+ and 8K on Sandybridge. 2373 */ 2374 const float gb_size = GEN_GEN >= 7 ? 16384.0f : 8192.0f; 2375 2376 if (m00 != 0 && m11 != 0) { 2377 /* First, we compute the screen-space render area */ 2378 const float ss_ra_xmin = MIN3( 0, m30 + m00, m30 - m00); 2379 const float ss_ra_xmax = MAX3( fb_width, m30 + m00, m30 - m00); 2380 const float ss_ra_ymin = MIN3( 0, m31 + m11, m31 - m11); 2381 const float ss_ra_ymax = MAX3(fb_height, m31 + m11, m31 - m11); 2382 2383 /* We want the guardband to be centered on that */ 2384 const float ss_gb_xmin = (ss_ra_xmin + ss_ra_xmax) / 2 - gb_size; 2385 const float ss_gb_xmax = (ss_ra_xmin + ss_ra_xmax) / 2 + gb_size; 2386 const float ss_gb_ymin = (ss_ra_ymin + ss_ra_ymax) / 2 - gb_size; 2387 const float ss_gb_ymax = (ss_ra_ymin + ss_ra_ymax) / 2 + gb_size; 2388 2389 /* Now we need it in native device coordinates */ 2390 const float ndc_gb_xmin = (ss_gb_xmin - m30) / m00; 2391 const float ndc_gb_xmax = (ss_gb_xmax - m30) / m00; 2392 const float ndc_gb_ymin = (ss_gb_ymin - m31) / m11; 2393 const float ndc_gb_ymax = (ss_gb_ymax - m31) / m11; 2394 2395 /* Thanks to Y-flipping and ORIGIN_UPPER_LEFT, the Y coordinates may be 2396 * flipped upside-down. X should be fine though. 2397 */ 2398 assert(ndc_gb_xmin <= ndc_gb_xmax); 2399 *xmin = ndc_gb_xmin; 2400 *xmax = ndc_gb_xmax; 2401 *ymin = MIN2(ndc_gb_ymin, ndc_gb_ymax); 2402 *ymax = MAX2(ndc_gb_ymin, ndc_gb_ymax); 2403 } else { 2404 /* The viewport scales to 0, so nothing will be rendered. */ 2405 *xmin = 0.0f; 2406 *xmax = 0.0f; 2407 *ymin = 0.0f; 2408 *ymax = 0.0f; 2409 } 2410 } 2411 2412 static void 2413 genX(upload_sf_clip_viewport)(struct brw_context *brw) 2414 { 2415 struct gl_context *ctx = &brw->ctx; 2416 float y_scale, y_bias; 2417 2418 /* BRW_NEW_VIEWPORT_COUNT */ 2419 const unsigned viewport_count = brw->clip.viewport_count; 2420 2421 /* _NEW_BUFFERS */ 2422 const bool render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer); 2423 const uint32_t fb_width = (float)_mesa_geometric_width(ctx->DrawBuffer); 2424 const uint32_t fb_height = (float)_mesa_geometric_height(ctx->DrawBuffer); 2425 2426 #if GEN_GEN >= 7 2427 #define clv sfv 2428 struct GENX(SF_CLIP_VIEWPORT) sfv; 2429 uint32_t sf_clip_vp_offset; 2430 uint32_t *sf_clip_map = 2431 brw_state_batch(brw, GENX(SF_CLIP_VIEWPORT_length) * 4 * viewport_count, 2432 64, &sf_clip_vp_offset); 2433 #else 2434 struct GENX(SF_VIEWPORT) sfv; 2435 struct GENX(CLIP_VIEWPORT) clv; 2436 uint32_t sf_vp_offset, clip_vp_offset; 2437 uint32_t *sf_map = 2438 brw_state_batch(brw, GENX(SF_VIEWPORT_length) * 4 * viewport_count, 2439 32, &sf_vp_offset); 2440 uint32_t *clip_map = 2441 brw_state_batch(brw, GENX(CLIP_VIEWPORT_length) * 4 * viewport_count, 2442 32, &clip_vp_offset); 2443 #endif 2444 2445 /* _NEW_BUFFERS */ 2446 if (render_to_fbo) { 2447 y_scale = 1.0; 2448 y_bias = 0; 2449 } else { 2450 y_scale = -1.0; 2451 y_bias = (float)fb_height; 2452 } 2453 2454 for (unsigned i = 0; i < brw->clip.viewport_count; i++) { 2455 /* _NEW_VIEWPORT: Guardband Clipping */ 2456 float scale[3], translate[3], gb_xmin, gb_xmax, gb_ymin, gb_ymax; 2457 _mesa_get_viewport_xform(ctx, i, scale, translate); 2458 2459 sfv.ViewportMatrixElementm00 = scale[0]; 2460 sfv.ViewportMatrixElementm11 = scale[1] * y_scale, 2461 sfv.ViewportMatrixElementm22 = scale[2], 2462 sfv.ViewportMatrixElementm30 = translate[0], 2463 sfv.ViewportMatrixElementm31 = translate[1] * y_scale + y_bias, 2464 sfv.ViewportMatrixElementm32 = translate[2], 2465 brw_calculate_guardband_size(fb_width, fb_height, 2466 sfv.ViewportMatrixElementm00, 2467 sfv.ViewportMatrixElementm11, 2468 sfv.ViewportMatrixElementm30, 2469 sfv.ViewportMatrixElementm31, 2470 &gb_xmin, &gb_xmax, &gb_ymin, &gb_ymax); 2471 2472 2473 clv.XMinClipGuardband = gb_xmin; 2474 clv.XMaxClipGuardband = gb_xmax; 2475 clv.YMinClipGuardband = gb_ymin; 2476 clv.YMaxClipGuardband = gb_ymax; 2477 2478 #if GEN_GEN < 6 2479 set_scissor_bits(ctx, i, render_to_fbo, fb_width, fb_height, 2480 &sfv.ScissorRectangle); 2481 #elif GEN_GEN >= 8 2482 /* _NEW_VIEWPORT | _NEW_BUFFERS: Screen Space Viewport 2483 * The hardware will take the intersection of the drawing rectangle, 2484 * scissor rectangle, and the viewport extents. We don't need to be 2485 * smart, and can therefore just program the viewport extents. 2486 */ 2487 const float viewport_Xmax = 2488 ctx->ViewportArray[i].X + ctx->ViewportArray[i].Width; 2489 const float viewport_Ymax = 2490 ctx->ViewportArray[i].Y + ctx->ViewportArray[i].Height; 2491 2492 if (render_to_fbo) { 2493 sfv.XMinViewPort = ctx->ViewportArray[i].X; 2494 sfv.XMaxViewPort = viewport_Xmax - 1; 2495 sfv.YMinViewPort = ctx->ViewportArray[i].Y; 2496 sfv.YMaxViewPort = viewport_Ymax - 1; 2497 } else { 2498 sfv.XMinViewPort = ctx->ViewportArray[i].X; 2499 sfv.XMaxViewPort = viewport_Xmax - 1; 2500 sfv.YMinViewPort = fb_height - viewport_Ymax; 2501 sfv.YMaxViewPort = fb_height - ctx->ViewportArray[i].Y - 1; 2502 } 2503 #endif 2504 2505 #if GEN_GEN >= 7 2506 GENX(SF_CLIP_VIEWPORT_pack)(NULL, sf_clip_map, &sfv); 2507 sf_clip_map += GENX(SF_CLIP_VIEWPORT_length); 2508 #else 2509 GENX(SF_VIEWPORT_pack)(NULL, sf_map, &sfv); 2510 GENX(CLIP_VIEWPORT_pack)(NULL, clip_map, &clv); 2511 sf_map += GENX(SF_VIEWPORT_length); 2512 clip_map += GENX(CLIP_VIEWPORT_length); 2513 #endif 2514 } 2515 2516 #if GEN_GEN >= 7 2517 brw_batch_emit(brw, GENX(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP), ptr) { 2518 ptr.SFClipViewportPointer = sf_clip_vp_offset; 2519 } 2520 #elif GEN_GEN == 6 2521 brw_batch_emit(brw, GENX(3DSTATE_VIEWPORT_STATE_POINTERS), vp) { 2522 vp.SFViewportStateChange = 1; 2523 vp.CLIPViewportStateChange = 1; 2524 vp.PointertoCLIP_VIEWPORT = clip_vp_offset; 2525 vp.PointertoSF_VIEWPORT = sf_vp_offset; 2526 } 2527 #else 2528 brw->sf.vp_offset = sf_vp_offset; 2529 brw->clip.vp_offset = clip_vp_offset; 2530 brw->ctx.NewDriverState |= BRW_NEW_SF_VP | BRW_NEW_CLIP_VP; 2531 #endif 2532 } 2533 2534 static const struct brw_tracked_state genX(sf_clip_viewport) = { 2535 .dirty = { 2536 .mesa = _NEW_BUFFERS | 2537 _NEW_VIEWPORT | 2538 (GEN_GEN <= 5 ? _NEW_SCISSOR : 0), 2539 .brw = BRW_NEW_BATCH | 2540 BRW_NEW_BLORP | 2541 BRW_NEW_VIEWPORT_COUNT, 2542 }, 2543 .emit = genX(upload_sf_clip_viewport), 2544 }; 2545 2546 /* ---------------------------------------------------------------------- */ 2547 2548 static void 2549 genX(upload_gs_state)(struct brw_context *brw) 2550 { 2551 UNUSED struct gl_context *ctx = &brw->ctx; 2552 UNUSED const struct gen_device_info *devinfo = &brw->screen->devinfo; 2553 const struct brw_stage_state *stage_state = &brw->gs.base; 2554 const struct gl_program *gs_prog = brw->programs[MESA_SHADER_GEOMETRY]; 2555 /* BRW_NEW_GEOMETRY_PROGRAM */ 2556 bool active = GEN_GEN >= 6 && gs_prog; 2557 2558 /* BRW_NEW_GS_PROG_DATA */ 2559 struct brw_stage_prog_data *stage_prog_data = stage_state->prog_data; 2560 UNUSED const struct brw_vue_prog_data *vue_prog_data = 2561 brw_vue_prog_data(stage_prog_data); 2562 #if GEN_GEN >= 7 2563 const struct brw_gs_prog_data *gs_prog_data = 2564 brw_gs_prog_data(stage_prog_data); 2565 #endif 2566 2567 #if GEN_GEN == 6 2568 brw_batch_emit(brw, GENX(3DSTATE_CONSTANT_GS), cgs) { 2569 if (active && stage_state->push_const_size != 0) { 2570 cgs.Buffer0Valid = true; 2571 cgs.PointertoGSConstantBuffer0 = stage_state->push_const_offset; 2572 cgs.GSConstantBuffer0ReadLength = stage_state->push_const_size - 1; 2573 } 2574 } 2575 #endif 2576 2577 #if GEN_GEN == 7 && !GEN_IS_HASWELL 2578 /** 2579 * From Graphics BSpec: 3D-Media-GPGPU Engine > 3D Pipeline Stages > 2580 * Geometry > Geometry Shader > State: 2581 * 2582 * "Note: Because of corruption in IVB:GT2, software needs to flush the 2583 * whole fixed function pipeline when the GS enable changes value in 2584 * the 3DSTATE_GS." 2585 * 2586 * The hardware architects have clarified that in this context "flush the 2587 * whole fixed function pipeline" means to emit a PIPE_CONTROL with the "CS 2588 * Stall" bit set. 2589 */ 2590 if (devinfo->gt == 2 && brw->gs.enabled != active) 2591 gen7_emit_cs_stall_flush(brw); 2592 #endif 2593 2594 #if GEN_GEN >= 6 2595 brw_batch_emit(brw, GENX(3DSTATE_GS), gs) { 2596 #else 2597 ctx->NewDriverState |= BRW_NEW_GEN4_UNIT_STATE; 2598 brw_state_emit(brw, GENX(GS_STATE), 32, &brw->ff_gs.state_offset, gs) { 2599 #endif 2600 2601 #if GEN_GEN >= 6 2602 if (active) { 2603 INIT_THREAD_DISPATCH_FIELDS(gs, Vertex); 2604 2605 #if GEN_GEN >= 7 2606 gs.OutputVertexSize = gs_prog_data->output_vertex_size_hwords * 2 - 1; 2607 gs.OutputTopology = gs_prog_data->output_topology; 2608 gs.ControlDataHeaderSize = 2609 gs_prog_data->control_data_header_size_hwords; 2610 2611 gs.InstanceControl = gs_prog_data->invocations - 1; 2612 gs.DispatchMode = vue_prog_data->dispatch_mode; 2613 2614 gs.IncludePrimitiveID = gs_prog_data->include_primitive_id; 2615 2616 gs.ControlDataFormat = gs_prog_data->control_data_format; 2617 #endif 2618 2619 /* Note: the meaning of the GEN7_GS_REORDER_TRAILING bit changes between 2620 * Ivy Bridge and Haswell. 2621 * 2622 * On Ivy Bridge, setting this bit causes the vertices of a triangle 2623 * strip to be delivered to the geometry shader in an order that does 2624 * not strictly follow the OpenGL spec, but preserves triangle 2625 * orientation. For example, if the vertices are (1, 2, 3, 4, 5), then 2626 * the geometry shader sees triangles: 2627 * 2628 * (1, 2, 3), (2, 4, 3), (3, 4, 5) 2629 * 2630 * (Clearing the bit is even worse, because it fails to preserve 2631 * orientation). 2632 * 2633 * Triangle strips with adjacency always ordered in a way that preserves 2634 * triangle orientation but does not strictly follow the OpenGL spec, 2635 * regardless of the setting of this bit. 2636 * 2637 * On Haswell, both triangle strips and triangle strips with adjacency 2638 * are always ordered in a way that preserves triangle orientation. 2639 * Setting this bit causes the ordering to strictly follow the OpenGL 2640 * spec. 2641 * 2642 * So in either case we want to set the bit. Unfortunately on Ivy 2643 * Bridge this will get the order close to correct but not perfect. 2644 */ 2645 gs.ReorderMode = TRAILING; 2646 gs.MaximumNumberofThreads = 2647 GEN_GEN == 8 ? (devinfo->max_gs_threads / 2 - 1) 2648 : (devinfo->max_gs_threads - 1); 2649 2650 #if GEN_GEN < 7 2651 gs.SOStatisticsEnable = true; 2652 if (gs_prog->info.has_transform_feedback_varyings) 2653 gs.SVBIPayloadEnable = true; 2654 2655 /* GEN6_GS_SPF_MODE and GEN6_GS_VECTOR_MASK_ENABLE are enabled as it 2656 * was previously done for gen6. 2657 * 2658 * TODO: test with both disabled to see if the HW is behaving 2659 * as expected, like in gen7. 2660 */ 2661 gs.SingleProgramFlow = true; 2662 gs.VectorMaskEnable = true; 2663 #endif 2664 2665 #if GEN_GEN >= 8 2666 gs.ExpectedVertexCount = gs_prog_data->vertices_in; 2667 2668 if (gs_prog_data->static_vertex_count != -1) { 2669 gs.StaticOutput = true; 2670 gs.StaticOutputVertexCount = gs_prog_data->static_vertex_count; 2671 } 2672 gs.IncludeVertexHandles = vue_prog_data->include_vue_handles; 2673 2674 gs.UserClipDistanceCullTestEnableBitmask = 2675 vue_prog_data->cull_distance_mask; 2676 2677 const int urb_entry_write_offset = 1; 2678 const uint32_t urb_entry_output_length = 2679 DIV_ROUND_UP(vue_prog_data->vue_map.num_slots, 2) - 2680 urb_entry_write_offset; 2681 2682 gs.VertexURBEntryOutputReadOffset = urb_entry_write_offset; 2683 gs.VertexURBEntryOutputLength = MAX2(urb_entry_output_length, 1); 2684 #endif 2685 } 2686 #endif 2687 2688 #if GEN_GEN <= 6 2689 if (!active && brw->ff_gs.prog_active) { 2690 /* In gen6, transform feedback for the VS stage is done with an 2691 * ad-hoc GS program. This function provides the needed 3DSTATE_GS 2692 * for this. 2693 */ 2694 gs.KernelStartPointer = KSP(brw, brw->ff_gs.prog_offset); 2695 gs.SingleProgramFlow = true; 2696 gs.DispatchGRFStartRegisterForURBData = GEN_GEN == 6 ? 2 : 1; 2697 gs.VertexURBEntryReadLength = brw->ff_gs.prog_data->urb_read_length; 2698 2699 #if GEN_GEN <= 5 2700 gs.GRFRegisterCount = 2701 DIV_ROUND_UP(brw->ff_gs.prog_data->total_grf, 16) - 1; 2702 /* BRW_NEW_URB_FENCE */ 2703 gs.NumberofURBEntries = brw->urb.nr_gs_entries; 2704 gs.URBEntryAllocationSize = brw->urb.vsize - 1; 2705 gs.MaximumNumberofThreads = brw->urb.nr_gs_entries >= 8 ? 1 : 0; 2706 gs.FloatingPointMode = FLOATING_POINT_MODE_Alternate; 2707 #else 2708 gs.Enable = true; 2709 gs.VectorMaskEnable = true; 2710 gs.SVBIPayloadEnable = true; 2711 gs.SVBIPostIncrementEnable = true; 2712 gs.SVBIPostIncrementValue = 2713 brw->ff_gs.prog_data->svbi_postincrement_value; 2714 gs.SOStatisticsEnable = true; 2715 gs.MaximumNumberofThreads = devinfo->max_gs_threads - 1; 2716 #endif 2717 } 2718 #endif 2719 if (!active && !brw->ff_gs.prog_active) { 2720 #if GEN_GEN < 8 2721 gs.DispatchGRFStartRegisterForURBData = 1; 2722 #if GEN_GEN >= 7 2723 gs.IncludeVertexHandles = true; 2724 #endif 2725 #endif 2726 } 2727 2728 #if GEN_GEN >= 6 2729 gs.StatisticsEnable = true; 2730 #endif 2731 #if GEN_GEN == 5 || GEN_GEN == 6 2732 gs.RenderingEnabled = true; 2733 #endif 2734 #if GEN_GEN <= 5 2735 gs.MaximumVPIndex = brw->clip.viewport_count - 1; 2736 #endif 2737 } 2738 2739 #if GEN_GEN == 6 2740 brw->gs.enabled = active; 2741 #endif 2742 } 2743 2744 static const struct brw_tracked_state genX(gs_state) = { 2745 .dirty = { 2746 .mesa = (GEN_GEN == 6 ? _NEW_PROGRAM_CONSTANTS : 0), 2747 .brw = BRW_NEW_BATCH | 2748 BRW_NEW_BLORP | 2749 (GEN_GEN <= 5 ? BRW_NEW_PUSH_CONSTANT_ALLOCATION | 2750 BRW_NEW_PROGRAM_CACHE | 2751 BRW_NEW_URB_FENCE | 2752 BRW_NEW_VIEWPORT_COUNT 2753 : 0) | 2754 (GEN_GEN >= 6 ? BRW_NEW_CONTEXT | 2755 BRW_NEW_GEOMETRY_PROGRAM | 2756 BRW_NEW_GS_PROG_DATA 2757 : 0) | 2758 (GEN_GEN < 7 ? BRW_NEW_FF_GS_PROG_DATA : 0), 2759 }, 2760 .emit = genX(upload_gs_state), 2761 }; 2762 2763 /* ---------------------------------------------------------------------- */ 2764 2765 UNUSED static GLenum 2766 fix_dual_blend_alpha_to_one(GLenum function) 2767 { 2768 switch (function) { 2769 case GL_SRC1_ALPHA: 2770 return GL_ONE; 2771 2772 case GL_ONE_MINUS_SRC1_ALPHA: 2773 return GL_ZERO; 2774 } 2775 2776 return function; 2777 } 2778 2779 #define blend_factor(x) brw_translate_blend_factor(x) 2780 #define blend_eqn(x) brw_translate_blend_equation(x) 2781 2782 /** 2783 * Modify blend function to force destination alpha to 1.0 2784 * 2785 * If \c function specifies a blend function that uses destination alpha, 2786 * replace it with a function that hard-wires destination alpha to 1.0. This 2787 * is used when rendering to xRGB targets. 2788 */ 2789 static GLenum 2790 brw_fix_xRGB_alpha(GLenum function) 2791 { 2792 switch (function) { 2793 case GL_DST_ALPHA: 2794 return GL_ONE; 2795 2796 case GL_ONE_MINUS_DST_ALPHA: 2797 case GL_SRC_ALPHA_SATURATE: 2798 return GL_ZERO; 2799 } 2800 2801 return function; 2802 } 2803 2804 #if GEN_GEN >= 6 2805 typedef struct GENX(BLEND_STATE_ENTRY) BLEND_ENTRY_GENXML; 2806 #else 2807 typedef struct GENX(COLOR_CALC_STATE) BLEND_ENTRY_GENXML; 2808 #endif 2809 2810 UNUSED static bool 2811 set_blend_entry_bits(struct brw_context *brw, BLEND_ENTRY_GENXML *entry, int i, 2812 bool alpha_to_one) 2813 { 2814 struct gl_context *ctx = &brw->ctx; 2815 2816 /* _NEW_BUFFERS */ 2817 const struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[i]; 2818 2819 bool independent_alpha_blend = false; 2820 2821 /* Used for implementing the following bit of GL_EXT_texture_integer: 2822 * "Per-fragment operations that require floating-point color 2823 * components, including multisample alpha operations, alpha test, 2824 * blending, and dithering, have no effect when the corresponding 2825 * colors are written to an integer color buffer." 2826 */ 2827 const bool integer = ctx->DrawBuffer->_IntegerBuffers & (0x1 << i); 2828 2829 const unsigned blend_enabled = GEN_GEN >= 6 ? 2830 ctx->Color.BlendEnabled & (1 << i) : ctx->Color.BlendEnabled; 2831 2832 /* _NEW_COLOR */ 2833 if (ctx->Color.ColorLogicOpEnabled) { 2834 GLenum rb_type = rb ? _mesa_get_format_datatype(rb->Format) 2835 : GL_UNSIGNED_NORMALIZED; 2836 WARN_ONCE(ctx->Color.LogicOp != GL_COPY && 2837 rb_type != GL_UNSIGNED_NORMALIZED && 2838 rb_type != GL_FLOAT, "Ignoring %s logic op on %s " 2839 "renderbuffer\n", 2840 _mesa_enum_to_string(ctx->Color.LogicOp), 2841 _mesa_enum_to_string(rb_type)); 2842 if (GEN_GEN >= 8 || rb_type == GL_UNSIGNED_NORMALIZED) { 2843 entry->LogicOpEnable = true; 2844 entry->LogicOpFunction = 2845 intel_translate_logic_op(ctx->Color.LogicOp); 2846 } 2847 } else if (blend_enabled && !ctx->Color._AdvancedBlendMode 2848 && (GEN_GEN <= 5 || !integer)) { 2849 GLenum eqRGB = ctx->Color.Blend[i].EquationRGB; 2850 GLenum eqA = ctx->Color.Blend[i].EquationA; 2851 GLenum srcRGB = ctx->Color.Blend[i].SrcRGB; 2852 GLenum dstRGB = ctx->Color.Blend[i].DstRGB; 2853 GLenum srcA = ctx->Color.Blend[i].SrcA; 2854 GLenum dstA = ctx->Color.Blend[i].DstA; 2855 2856 if (eqRGB == GL_MIN || eqRGB == GL_MAX) 2857 srcRGB = dstRGB = GL_ONE; 2858 2859 if (eqA == GL_MIN || eqA == GL_MAX) 2860 srcA = dstA = GL_ONE; 2861 2862 /* Due to hardware limitations, the destination may have information 2863 * in an alpha channel even when the format specifies no alpha 2864 * channel. In order to avoid getting any incorrect blending due to 2865 * that alpha channel, coerce the blend factors to values that will 2866 * not read the alpha channel, but will instead use the correct 2867 * implicit value for alpha. 2868 */ 2869 if (rb && !_mesa_base_format_has_channel(rb->_BaseFormat, 2870 GL_TEXTURE_ALPHA_TYPE)) { 2871 srcRGB = brw_fix_xRGB_alpha(srcRGB); 2872 srcA = brw_fix_xRGB_alpha(srcA); 2873 dstRGB = brw_fix_xRGB_alpha(dstRGB); 2874 dstA = brw_fix_xRGB_alpha(dstA); 2875 } 2876 2877 /* From the BLEND_STATE docs, DWord 0, Bit 29 (AlphaToOne Enable): 2878 * "If Dual Source Blending is enabled, this bit must be disabled." 2879 * 2880 * We override SRC1_ALPHA to ONE and ONE_MINUS_SRC1_ALPHA to ZERO, 2881 * and leave it enabled anyway. 2882 */ 2883 if (GEN_GEN >= 6 && ctx->Color.Blend[i]._UsesDualSrc && alpha_to_one) { 2884 srcRGB = fix_dual_blend_alpha_to_one(srcRGB); 2885 srcA = fix_dual_blend_alpha_to_one(srcA); 2886 dstRGB = fix_dual_blend_alpha_to_one(dstRGB); 2887 dstA = fix_dual_blend_alpha_to_one(dstA); 2888 } 2889 2890 entry->ColorBufferBlendEnable = true; 2891 entry->DestinationBlendFactor = blend_factor(dstRGB); 2892 entry->SourceBlendFactor = blend_factor(srcRGB); 2893 entry->DestinationAlphaBlendFactor = blend_factor(dstA); 2894 entry->SourceAlphaBlendFactor = blend_factor(srcA); 2895 entry->ColorBlendFunction = blend_eqn(eqRGB); 2896 entry->AlphaBlendFunction = blend_eqn(eqA); 2897 2898 if (srcA != srcRGB || dstA != dstRGB || eqA != eqRGB) 2899 independent_alpha_blend = true; 2900 } 2901 2902 return independent_alpha_blend; 2903 } 2904 2905 #if GEN_GEN >= 6 2906 static void 2907 genX(upload_blend_state)(struct brw_context *brw) 2908 { 2909 struct gl_context *ctx = &brw->ctx; 2910 int size; 2911 2912 /* We need at least one BLEND_STATE written, because we might do 2913 * thread dispatch even if _NumColorDrawBuffers is 0 (for example 2914 * for computed depth or alpha test), which will do an FB write 2915 * with render target 0, which will reference BLEND_STATE[0] for 2916 * alpha test enable. 2917 */ 2918 int nr_draw_buffers = ctx->DrawBuffer->_NumColorDrawBuffers; 2919 if (nr_draw_buffers == 0 && ctx->Color.AlphaEnabled) 2920 nr_draw_buffers = 1; 2921 2922 size = GENX(BLEND_STATE_ENTRY_length) * 4 * nr_draw_buffers; 2923 #if GEN_GEN >= 8 2924 size += GENX(BLEND_STATE_length) * 4; 2925 #endif 2926 2927 uint32_t *blend_map; 2928 blend_map = brw_state_batch(brw, size, 64, &brw->cc.blend_state_offset); 2929 2930 #if GEN_GEN >= 8 2931 struct GENX(BLEND_STATE) blend = { 0 }; 2932 { 2933 #else 2934 for (int i = 0; i < nr_draw_buffers; i++) { 2935 struct GENX(BLEND_STATE_ENTRY) entry = { 0 }; 2936 #define blend entry 2937 #endif 2938 /* OpenGL specification 3.3 (page 196), section 4.1.3 says: 2939 * "If drawbuffer zero is not NONE and the buffer it references has an 2940 * integer format, the SAMPLE_ALPHA_TO_COVERAGE and SAMPLE_ALPHA_TO_ONE 2941 * operations are skipped." 2942 */ 2943 if (!(ctx->DrawBuffer->_IntegerBuffers & 0x1)) { 2944 /* _NEW_MULTISAMPLE */ 2945 if (_mesa_is_multisample_enabled(ctx)) { 2946 if (ctx->Multisample.SampleAlphaToCoverage) { 2947 blend.AlphaToCoverageEnable = true; 2948 blend.AlphaToCoverageDitherEnable = GEN_GEN >= 7; 2949 } 2950 if (ctx->Multisample.SampleAlphaToOne) 2951 blend.AlphaToOneEnable = true; 2952 } 2953 2954 /* _NEW_COLOR */ 2955 if (ctx->Color.AlphaEnabled) { 2956 blend.AlphaTestEnable = true; 2957 blend.AlphaTestFunction = 2958 intel_translate_compare_func(ctx->Color.AlphaFunc); 2959 } 2960 2961 if (ctx->Color.DitherFlag) { 2962 blend.ColorDitherEnable = true; 2963 } 2964 } 2965 2966 #if GEN_GEN >= 8 2967 for (int i = 0; i < nr_draw_buffers; i++) { 2968 struct GENX(BLEND_STATE_ENTRY) entry = { 0 }; 2969 #else 2970 { 2971 #endif 2972 blend.IndependentAlphaBlendEnable = 2973 set_blend_entry_bits(brw, &entry, i, blend.AlphaToOneEnable) || 2974 blend.IndependentAlphaBlendEnable; 2975 2976 /* See section 8.1.6 "Pre-Blend Color Clamping" of the 2977 * SandyBridge PRM Volume 2 Part 1 for HW requirements. 2978 * 2979 * We do our ARB_color_buffer_float CLAMP_FRAGMENT_COLOR 2980 * clamping in the fragment shader. For its clamping of 2981 * blending, the spec says: 2982 * 2983 * "RESOLVED: For fixed-point color buffers, the inputs and 2984 * the result of the blending equation are clamped. For 2985 * floating-point color buffers, no clamping occurs." 2986 * 2987 * So, generally, we want clamping to the render target's range. 2988 * And, good news, the hardware tables for both pre- and 2989 * post-blend color clamping are either ignored, or any are 2990 * allowed, or clamping is required but RT range clamping is a 2991 * valid option. 2992 */ 2993 entry.PreBlendColorClampEnable = true; 2994 entry.PostBlendColorClampEnable = true; 2995 entry.ColorClampRange = COLORCLAMP_RTFORMAT; 2996 2997 entry.WriteDisableRed = !ctx->Color.ColorMask[i][0]; 2998 entry.WriteDisableGreen = !ctx->Color.ColorMask[i][1]; 2999 entry.WriteDisableBlue = !ctx->Color.ColorMask[i][2]; 3000 entry.WriteDisableAlpha = !ctx->Color.ColorMask[i][3]; 3001 3002 #if GEN_GEN >= 8 3003 GENX(BLEND_STATE_ENTRY_pack)(NULL, &blend_map[1 + i * 2], &entry); 3004 #else 3005 GENX(BLEND_STATE_ENTRY_pack)(NULL, &blend_map[i * 2], &entry); 3006 #endif 3007 } 3008 } 3009 3010 #if GEN_GEN >= 8 3011 GENX(BLEND_STATE_pack)(NULL, blend_map, &blend); 3012 #endif 3013 3014 #if GEN_GEN < 7 3015 brw_batch_emit(brw, GENX(3DSTATE_CC_STATE_POINTERS), ptr) { 3016 ptr.PointertoBLEND_STATE = brw->cc.blend_state_offset; 3017 ptr.BLEND_STATEChange = true; 3018 } 3019 #else 3020 brw_batch_emit(brw, GENX(3DSTATE_BLEND_STATE_POINTERS), ptr) { 3021 ptr.BlendStatePointer = brw->cc.blend_state_offset; 3022 #if GEN_GEN >= 8 3023 ptr.BlendStatePointerValid = true; 3024 #endif 3025 } 3026 #endif 3027 } 3028 3029 static const struct brw_tracked_state genX(blend_state) = { 3030 .dirty = { 3031 .mesa = _NEW_BUFFERS | 3032 _NEW_COLOR | 3033 _NEW_MULTISAMPLE, 3034 .brw = BRW_NEW_BATCH | 3035 BRW_NEW_BLORP | 3036 BRW_NEW_STATE_BASE_ADDRESS, 3037 }, 3038 .emit = genX(upload_blend_state), 3039 }; 3040 #endif 3041 3042 /* ---------------------------------------------------------------------- */ 3043 3044 #if GEN_GEN >= 7 3045 UNUSED static const uint32_t push_constant_opcodes[] = { 3046 [MESA_SHADER_VERTEX] = 21, 3047 [MESA_SHADER_TESS_CTRL] = 25, /* HS */ 3048 [MESA_SHADER_TESS_EVAL] = 26, /* DS */ 3049 [MESA_SHADER_GEOMETRY] = 22, 3050 [MESA_SHADER_FRAGMENT] = 23, 3051 [MESA_SHADER_COMPUTE] = 0, 3052 }; 3053 3054 static void 3055 genX(upload_push_constant_packets)(struct brw_context *brw) 3056 { 3057 const struct gen_device_info *devinfo = &brw->screen->devinfo; 3058 struct gl_context *ctx = &brw->ctx; 3059 3060 UNUSED uint32_t mocs = GEN_GEN < 8 ? GEN7_MOCS_L3 : 0; 3061 3062 struct brw_stage_state *stage_states[] = { 3063 &brw->vs.base, 3064 &brw->tcs.base, 3065 &brw->tes.base, 3066 &brw->gs.base, 3067 &brw->wm.base, 3068 }; 3069 3070 if (GEN_GEN == 7 && !GEN_IS_HASWELL && !devinfo->is_baytrail && 3071 stage_states[MESA_SHADER_VERTEX]->push_constants_dirty) 3072 gen7_emit_vs_workaround_flush(brw); 3073 3074 for (int stage = 0; stage <= MESA_SHADER_FRAGMENT; stage++) { 3075 struct brw_stage_state *stage_state = stage_states[stage]; 3076 UNUSED struct gl_program *prog = ctx->_Shader->CurrentProgram[stage]; 3077 3078 if (!stage_state->push_constants_dirty) 3079 continue; 3080 3081 brw_batch_emit(brw, GENX(3DSTATE_CONSTANT_VS), pkt) { 3082 pkt._3DCommandSubOpcode = push_constant_opcodes[stage]; 3083 if (stage_state->prog_data) { 3084 #if GEN_GEN >= 8 || GEN_IS_HASWELL 3085 /* The Skylake PRM contains the following restriction: 3086 * 3087 * "The driver must ensure The following case does not occur 3088 * without a flush to the 3D engine: 3DSTATE_CONSTANT_* with 3089 * buffer 3 read length equal to zero committed followed by a 3090 * 3DSTATE_CONSTANT_* with buffer 0 read length not equal to 3091 * zero committed." 3092 * 3093 * To avoid this, we program the buffers in the highest slots. 3094 * This way, slot 0 is only used if slot 3 is also used. 3095 */ 3096 int n = 3; 3097 3098 for (int i = 3; i >= 0; i--) { 3099 const struct brw_ubo_range *range = 3100 &stage_state->prog_data->ubo_ranges[i]; 3101 3102 if (range->length == 0) 3103 continue; 3104 3105 const struct gl_uniform_block *block = 3106 prog->sh.UniformBlocks[range->block]; 3107 const struct gl_buffer_binding *binding = 3108 &ctx->UniformBufferBindings[block->Binding]; 3109 3110 if (binding->BufferObject == ctx->Shared->NullBufferObj) { 3111 static unsigned msg_id = 0; 3112 _mesa_gl_debug(ctx, &msg_id, MESA_DEBUG_SOURCE_API, 3113 MESA_DEBUG_TYPE_UNDEFINED, 3114 MESA_DEBUG_SEVERITY_HIGH, 3115 "UBO %d unbound, %s shader uniform data " 3116 "will be undefined.", 3117 range->block, 3118 _mesa_shader_stage_to_string(stage)); 3119 continue; 3120 } 3121 3122 assert(binding->Offset % 32 == 0); 3123 3124 struct brw_bo *bo = intel_bufferobj_buffer(brw, 3125 intel_buffer_object(binding->BufferObject), 3126 binding->Offset, range->length * 32, false); 3127 3128 pkt.ConstantBody.ReadLength[n] = range->length; 3129 pkt.ConstantBody.Buffer[n] = 3130 ro_bo(bo, range->start * 32 + binding->Offset); 3131 n--; 3132 } 3133 3134 if (stage_state->push_const_size > 0) { 3135 assert(n >= 0); 3136 pkt.ConstantBody.ReadLength[n] = stage_state->push_const_size; 3137 pkt.ConstantBody.Buffer[n] = 3138 ro_bo(stage_state->push_const_bo, 3139 stage_state->push_const_offset); 3140 } 3141 #else 3142 pkt.ConstantBody.ReadLength[0] = stage_state->push_const_size; 3143 pkt.ConstantBody.Buffer[0].offset = 3144 stage_state->push_const_offset | mocs; 3145 #endif 3146 } 3147 } 3148 3149 stage_state->push_constants_dirty = false; 3150 brw->ctx.NewDriverState |= GEN_GEN >= 9 ? BRW_NEW_SURFACES : 0; 3151 } 3152 } 3153 3154 const struct brw_tracked_state genX(push_constant_packets) = { 3155 .dirty = { 3156 .mesa = 0, 3157 .brw = BRW_NEW_DRAW_CALL, 3158 }, 3159 .emit = genX(upload_push_constant_packets), 3160 }; 3161 #endif 3162 3163 #if GEN_GEN >= 6 3164 static void 3165 genX(upload_vs_push_constants)(struct brw_context *brw) 3166 { 3167 struct brw_stage_state *stage_state = &brw->vs.base; 3168 3169 /* BRW_NEW_VERTEX_PROGRAM */ 3170 const struct gl_program *vp = brw->programs[MESA_SHADER_VERTEX]; 3171 /* BRW_NEW_VS_PROG_DATA */ 3172 const struct brw_stage_prog_data *prog_data = brw->vs.base.prog_data; 3173 3174 gen6_upload_push_constants(brw, vp, prog_data, stage_state); 3175 } 3176 3177 static const struct brw_tracked_state genX(vs_push_constants) = { 3178 .dirty = { 3179 .mesa = _NEW_PROGRAM_CONSTANTS | 3180 _NEW_TRANSFORM, 3181 .brw = BRW_NEW_BATCH | 3182 BRW_NEW_BLORP | 3183 BRW_NEW_VERTEX_PROGRAM | 3184 BRW_NEW_VS_PROG_DATA, 3185 }, 3186 .emit = genX(upload_vs_push_constants), 3187 }; 3188 3189 static void 3190 genX(upload_gs_push_constants)(struct brw_context *brw) 3191 { 3192 struct brw_stage_state *stage_state = &brw->gs.base; 3193 3194 /* BRW_NEW_GEOMETRY_PROGRAM */ 3195 const struct gl_program *gp = brw->programs[MESA_SHADER_GEOMETRY]; 3196 3197 /* BRW_NEW_GS_PROG_DATA */ 3198 struct brw_stage_prog_data *prog_data = brw->gs.base.prog_data; 3199 3200 gen6_upload_push_constants(brw, gp, prog_data, stage_state); 3201 } 3202 3203 static const struct brw_tracked_state genX(gs_push_constants) = { 3204 .dirty = { 3205 .mesa = _NEW_PROGRAM_CONSTANTS | 3206 _NEW_TRANSFORM, 3207 .brw = BRW_NEW_BATCH | 3208 BRW_NEW_BLORP | 3209 BRW_NEW_GEOMETRY_PROGRAM | 3210 BRW_NEW_GS_PROG_DATA, 3211 }, 3212 .emit = genX(upload_gs_push_constants), 3213 }; 3214 3215 static void 3216 genX(upload_wm_push_constants)(struct brw_context *brw) 3217 { 3218 struct brw_stage_state *stage_state = &brw->wm.base; 3219 /* BRW_NEW_FRAGMENT_PROGRAM */ 3220 const struct gl_program *fp = brw->programs[MESA_SHADER_FRAGMENT]; 3221 /* BRW_NEW_FS_PROG_DATA */ 3222 const struct brw_stage_prog_data *prog_data = brw->wm.base.prog_data; 3223 3224 gen6_upload_push_constants(brw, fp, prog_data, stage_state); 3225 } 3226 3227 static const struct brw_tracked_state genX(wm_push_constants) = { 3228 .dirty = { 3229 .mesa = _NEW_PROGRAM_CONSTANTS, 3230 .brw = BRW_NEW_BATCH | 3231 BRW_NEW_BLORP | 3232 BRW_NEW_FRAGMENT_PROGRAM | 3233 BRW_NEW_FS_PROG_DATA, 3234 }, 3235 .emit = genX(upload_wm_push_constants), 3236 }; 3237 #endif 3238 3239 /* ---------------------------------------------------------------------- */ 3240 3241 #if GEN_GEN >= 6 3242 static unsigned 3243 genX(determine_sample_mask)(struct brw_context *brw) 3244 { 3245 struct gl_context *ctx = &brw->ctx; 3246 float coverage = 1.0f; 3247 float coverage_invert = false; 3248 unsigned sample_mask = ~0u; 3249 3250 /* BRW_NEW_NUM_SAMPLES */ 3251 unsigned num_samples = brw->num_samples; 3252 3253 if (_mesa_is_multisample_enabled(ctx)) { 3254 if (ctx->Multisample.SampleCoverage) { 3255 coverage = ctx->Multisample.SampleCoverageValue; 3256 coverage_invert = ctx->Multisample.SampleCoverageInvert; 3257 } 3258 if (ctx->Multisample.SampleMask) { 3259 sample_mask = ctx->Multisample.SampleMaskValue; 3260 } 3261 } 3262 3263 if (num_samples > 1) { 3264 int coverage_int = (int) (num_samples * coverage + 0.5f); 3265 uint32_t coverage_bits = (1 << coverage_int) - 1; 3266 if (coverage_invert) 3267 coverage_bits ^= (1 << num_samples) - 1; 3268 return coverage_bits & sample_mask; 3269 } else { 3270 return 1; 3271 } 3272 } 3273 3274 static void 3275 genX(emit_3dstate_multisample2)(struct brw_context *brw, 3276 unsigned num_samples) 3277 { 3278 unsigned log2_samples = ffs(num_samples) - 1; 3279 3280 brw_batch_emit(brw, GENX(3DSTATE_MULTISAMPLE), multi) { 3281 multi.PixelLocation = CENTER; 3282 multi.NumberofMultisamples = log2_samples; 3283 #if GEN_GEN == 6 3284 GEN_SAMPLE_POS_4X(multi.Sample); 3285 #elif GEN_GEN == 7 3286 switch (num_samples) { 3287 case 1: 3288 GEN_SAMPLE_POS_1X(multi.Sample); 3289 break; 3290 case 2: 3291 GEN_SAMPLE_POS_2X(multi.Sample); 3292 break; 3293 case 4: 3294 GEN_SAMPLE_POS_4X(multi.Sample); 3295 break; 3296 case 8: 3297 GEN_SAMPLE_POS_8X(multi.Sample); 3298 break; 3299 default: 3300 break; 3301 } 3302 #endif 3303 } 3304 } 3305 3306 static void 3307 genX(upload_multisample_state)(struct brw_context *brw) 3308 { 3309 assert(brw->num_samples > 0 && brw->num_samples <= 16); 3310 3311 genX(emit_3dstate_multisample2)(brw, brw->num_samples); 3312 3313 brw_batch_emit(brw, GENX(3DSTATE_SAMPLE_MASK), sm) { 3314 sm.SampleMask = genX(determine_sample_mask)(brw); 3315 } 3316 } 3317 3318 static const struct brw_tracked_state genX(multisample_state) = { 3319 .dirty = { 3320 .mesa = _NEW_MULTISAMPLE | 3321 (GEN_GEN == 10 ? _NEW_BUFFERS : 0), 3322 .brw = BRW_NEW_BLORP | 3323 BRW_NEW_CONTEXT | 3324 BRW_NEW_NUM_SAMPLES, 3325 }, 3326 .emit = genX(upload_multisample_state) 3327 }; 3328 #endif 3329 3330 /* ---------------------------------------------------------------------- */ 3331 3332 static void 3333 genX(upload_color_calc_state)(struct brw_context *brw) 3334 { 3335 struct gl_context *ctx = &brw->ctx; 3336 3337 brw_state_emit(brw, GENX(COLOR_CALC_STATE), 64, &brw->cc.state_offset, cc) { 3338 #if GEN_GEN <= 5 3339 cc.IndependentAlphaBlendEnable = 3340 set_blend_entry_bits(brw, &cc, 0, false); 3341 set_depth_stencil_bits(brw, &cc); 3342 3343 if (ctx->Color.AlphaEnabled && 3344 ctx->DrawBuffer->_NumColorDrawBuffers <= 1) { 3345 cc.AlphaTestEnable = true; 3346 cc.AlphaTestFunction = 3347 intel_translate_compare_func(ctx->Color.AlphaFunc); 3348 } 3349 3350 cc.ColorDitherEnable = ctx->Color.DitherFlag; 3351 3352 cc.StatisticsEnable = brw->stats_wm; 3353 3354 cc.CCViewportStatePointer = 3355 ro_bo(brw->batch.state.bo, brw->cc.vp_offset); 3356 #else 3357 /* _NEW_COLOR */ 3358 cc.BlendConstantColorRed = ctx->Color.BlendColorUnclamped[0]; 3359 cc.BlendConstantColorGreen = ctx->Color.BlendColorUnclamped[1]; 3360 cc.BlendConstantColorBlue = ctx->Color.BlendColorUnclamped[2]; 3361 cc.BlendConstantColorAlpha = ctx->Color.BlendColorUnclamped[3]; 3362 3363 #if GEN_GEN < 9 3364 /* _NEW_STENCIL */ 3365 cc.StencilReferenceValue = _mesa_get_stencil_ref(ctx, 0); 3366 cc.BackfaceStencilReferenceValue = 3367 _mesa_get_stencil_ref(ctx, ctx->Stencil._BackFace); 3368 #endif 3369 3370 #endif 3371 3372 /* _NEW_COLOR */ 3373 UNCLAMPED_FLOAT_TO_UBYTE(cc.AlphaReferenceValueAsUNORM8, 3374 ctx->Color.AlphaRef); 3375 } 3376 3377 #if GEN_GEN >= 6 3378 brw_batch_emit(brw, GENX(3DSTATE_CC_STATE_POINTERS), ptr) { 3379 ptr.ColorCalcStatePointer = brw->cc.state_offset; 3380 #if GEN_GEN != 7 3381 ptr.ColorCalcStatePointerValid = true; 3382 #endif 3383 } 3384 #else 3385 brw->ctx.NewDriverState |= BRW_NEW_GEN4_UNIT_STATE; 3386 #endif 3387 } 3388 3389 static const struct brw_tracked_state genX(color_calc_state) = { 3390 .dirty = { 3391 .mesa = _NEW_COLOR | 3392 _NEW_STENCIL | 3393 (GEN_GEN <= 5 ? _NEW_BUFFERS | 3394 _NEW_DEPTH 3395 : 0), 3396 .brw = BRW_NEW_BATCH | 3397 BRW_NEW_BLORP | 3398 (GEN_GEN <= 5 ? BRW_NEW_CC_VP | 3399 BRW_NEW_STATS_WM 3400 : BRW_NEW_CC_STATE | 3401 BRW_NEW_STATE_BASE_ADDRESS), 3402 }, 3403 .emit = genX(upload_color_calc_state), 3404 }; 3405 3406 3407 /* ---------------------------------------------------------------------- */ 3408 3409 #if GEN_GEN >= 7 3410 static void 3411 genX(upload_sbe)(struct brw_context *brw) 3412 { 3413 struct gl_context *ctx = &brw->ctx; 3414 /* BRW_NEW_FRAGMENT_PROGRAM */ 3415 UNUSED const struct gl_program *fp = brw->programs[MESA_SHADER_FRAGMENT]; 3416 /* BRW_NEW_FS_PROG_DATA */ 3417 const struct brw_wm_prog_data *wm_prog_data = 3418 brw_wm_prog_data(brw->wm.base.prog_data); 3419 #if GEN_GEN >= 8 3420 struct GENX(SF_OUTPUT_ATTRIBUTE_DETAIL) attr_overrides[16] = { { 0 } }; 3421 #else 3422 #define attr_overrides sbe.Attribute 3423 #endif 3424 uint32_t urb_entry_read_length; 3425 uint32_t urb_entry_read_offset; 3426 uint32_t point_sprite_enables; 3427 3428 brw_batch_emit(brw, GENX(3DSTATE_SBE), sbe) { 3429 sbe.AttributeSwizzleEnable = true; 3430 sbe.NumberofSFOutputAttributes = wm_prog_data->num_varying_inputs; 3431 3432 /* _NEW_BUFFERS */ 3433 bool render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer); 3434 3435 /* _NEW_POINT 3436 * 3437 * Window coordinates in an FBO are inverted, which means point 3438 * sprite origin must be inverted. 3439 */ 3440 if ((ctx->Point.SpriteOrigin == GL_LOWER_LEFT) != render_to_fbo) 3441 sbe.PointSpriteTextureCoordinateOrigin = LOWERLEFT; 3442 else 3443 sbe.PointSpriteTextureCoordinateOrigin = UPPERLEFT; 3444 3445 /* _NEW_POINT | _NEW_LIGHT | _NEW_PROGRAM, 3446 * BRW_NEW_FS_PROG_DATA | BRW_NEW_FRAGMENT_PROGRAM | 3447 * BRW_NEW_GS_PROG_DATA | BRW_NEW_PRIMITIVE | BRW_NEW_TES_PROG_DATA | 3448 * BRW_NEW_VUE_MAP_GEOM_OUT 3449 */ 3450 genX(calculate_attr_overrides)(brw, 3451 attr_overrides, 3452 &point_sprite_enables, 3453 &urb_entry_read_length, 3454 &urb_entry_read_offset); 3455 3456 /* Typically, the URB entry read length and offset should be programmed 3457 * in 3DSTATE_VS and 3DSTATE_GS; SBE inherits it from the last active 3458 * stage which produces geometry. However, we don't know the proper 3459 * value until we call calculate_attr_overrides(). 3460 * 3461 * To fit with our existing code, we override the inherited values and 3462 * specify it here directly, as we did on previous generations. 3463 */ 3464 sbe.VertexURBEntryReadLength = urb_entry_read_length; 3465 sbe.VertexURBEntryReadOffset = urb_entry_read_offset; 3466 sbe.PointSpriteTextureCoordinateEnable = point_sprite_enables; 3467 sbe.ConstantInterpolationEnable = wm_prog_data->flat_inputs; 3468 3469 #if GEN_GEN >= 8 3470 sbe.ForceVertexURBEntryReadLength = true; 3471 sbe.ForceVertexURBEntryReadOffset = true; 3472 #endif 3473 3474 #if GEN_GEN >= 9 3475 /* prepare the active component dwords */ 3476 for (int i = 0; i < 32; i++) 3477 sbe.AttributeActiveComponentFormat[i] = ACTIVE_COMPONENT_XYZW; 3478 #endif 3479 } 3480 3481 #if GEN_GEN >= 8 3482 brw_batch_emit(brw, GENX(3DSTATE_SBE_SWIZ), sbes) { 3483 for (int i = 0; i < 16; i++) 3484 sbes.Attribute[i] = attr_overrides[i]; 3485 } 3486 #endif 3487 3488 #undef attr_overrides 3489 } 3490 3491 static const struct brw_tracked_state genX(sbe_state) = { 3492 .dirty = { 3493 .mesa = _NEW_BUFFERS | 3494 _NEW_LIGHT | 3495 _NEW_POINT | 3496 _NEW_POLYGON | 3497 _NEW_PROGRAM, 3498 .brw = BRW_NEW_BLORP | 3499 BRW_NEW_CONTEXT | 3500 BRW_NEW_FRAGMENT_PROGRAM | 3501 BRW_NEW_FS_PROG_DATA | 3502 BRW_NEW_GS_PROG_DATA | 3503 BRW_NEW_TES_PROG_DATA | 3504 BRW_NEW_VUE_MAP_GEOM_OUT | 3505 (GEN_GEN == 7 ? BRW_NEW_PRIMITIVE 3506 : 0), 3507 }, 3508 .emit = genX(upload_sbe), 3509 }; 3510 #endif 3511 3512 /* ---------------------------------------------------------------------- */ 3513 3514 #if GEN_GEN >= 7 3515 /** 3516 * Outputs the 3DSTATE_SO_DECL_LIST command. 3517 * 3518 * The data output is a series of 64-bit entries containing a SO_DECL per 3519 * stream. We only have one stream of rendering coming out of the GS unit, so 3520 * we only emit stream 0 (low 16 bits) SO_DECLs. 3521 */ 3522 static void 3523 genX(upload_3dstate_so_decl_list)(struct brw_context *brw, 3524 const struct brw_vue_map *vue_map) 3525 { 3526 struct gl_context *ctx = &brw->ctx; 3527 /* BRW_NEW_TRANSFORM_FEEDBACK */ 3528 struct gl_transform_feedback_object *xfb_obj = 3529 ctx->TransformFeedback.CurrentObject; 3530 const struct gl_transform_feedback_info *linked_xfb_info = 3531 xfb_obj->program->sh.LinkedTransformFeedback; 3532 struct GENX(SO_DECL) so_decl[MAX_VERTEX_STREAMS][128]; 3533 int buffer_mask[MAX_VERTEX_STREAMS] = {0, 0, 0, 0}; 3534 int next_offset[MAX_VERTEX_STREAMS] = {0, 0, 0, 0}; 3535 int decls[MAX_VERTEX_STREAMS] = {0, 0, 0, 0}; 3536 int max_decls = 0; 3537 STATIC_ASSERT(ARRAY_SIZE(so_decl[0]) >= MAX_PROGRAM_OUTPUTS); 3538 3539 memset(so_decl, 0, sizeof(so_decl)); 3540 3541 /* Construct the list of SO_DECLs to be emitted. The formatting of the 3542 * command feels strange -- each dword pair contains a SO_DECL per stream. 3543 */ 3544 for (unsigned i = 0; i < linked_xfb_info->NumOutputs; i++) { 3545 const struct gl_transform_feedback_output *output = 3546 &linked_xfb_info->Outputs[i]; 3547 const int buffer = output->OutputBuffer; 3548 const int varying = output->OutputRegister; 3549 const unsigned stream_id = output->StreamId; 3550 assert(stream_id < MAX_VERTEX_STREAMS); 3551 3552 buffer_mask[stream_id] |= 1 << buffer; 3553 3554 assert(vue_map->varying_to_slot[varying] >= 0); 3555 3556 /* Mesa doesn't store entries for gl_SkipComponents in the Outputs[] 3557 * array. Instead, it simply increments DstOffset for the following 3558 * input by the number of components that should be skipped. 3559 * 3560 * Our hardware is unusual in that it requires us to program SO_DECLs 3561 * for fake "hole" components, rather than simply taking the offset 3562 * for each real varying. Each hole can have size 1, 2, 3, or 4; we 3563 * program as many size = 4 holes as we can, then a final hole to 3564 * accommodate the final 1, 2, or 3 remaining. 3565 */ 3566 int skip_components = output->DstOffset - next_offset[buffer]; 3567 3568 while (skip_components > 0) { 3569 so_decl[stream_id][decls[stream_id]++] = (struct GENX(SO_DECL)) { 3570 .HoleFlag = 1, 3571 .OutputBufferSlot = output->OutputBuffer, 3572 .ComponentMask = (1 << MIN2(skip_components, 4)) - 1, 3573 }; 3574 skip_components -= 4; 3575 } 3576 3577 next_offset[buffer] = output->DstOffset + output->NumComponents; 3578 3579 so_decl[stream_id][decls[stream_id]++] = (struct GENX(SO_DECL)) { 3580 .OutputBufferSlot = output->OutputBuffer, 3581 .RegisterIndex = vue_map->varying_to_slot[varying], 3582 .ComponentMask = 3583 ((1 << output->NumComponents) - 1) << output->ComponentOffset, 3584 }; 3585 3586 if (decls[stream_id] > max_decls) 3587 max_decls = decls[stream_id]; 3588 } 3589 3590 uint32_t *dw; 3591 dw = brw_batch_emitn(brw, GENX(3DSTATE_SO_DECL_LIST), 3 + 2 * max_decls, 3592 .StreamtoBufferSelects0 = buffer_mask[0], 3593 .StreamtoBufferSelects1 = buffer_mask[1], 3594 .StreamtoBufferSelects2 = buffer_mask[2], 3595 .StreamtoBufferSelects3 = buffer_mask[3], 3596 .NumEntries0 = decls[0], 3597 .NumEntries1 = decls[1], 3598 .NumEntries2 = decls[2], 3599 .NumEntries3 = decls[3]); 3600 3601 for (int i = 0; i < max_decls; i++) { 3602 GENX(SO_DECL_ENTRY_pack)( 3603 brw, dw + 2 + i * 2, 3604 &(struct GENX(SO_DECL_ENTRY)) { 3605 .Stream0Decl = so_decl[0][i], 3606 .Stream1Decl = so_decl[1][i], 3607 .Stream2Decl = so_decl[2][i], 3608 .Stream3Decl = so_decl[3][i], 3609 }); 3610 } 3611 } 3612 3613 static void 3614 genX(upload_3dstate_so_buffers)(struct brw_context *brw) 3615 { 3616 struct gl_context *ctx = &brw->ctx; 3617 /* BRW_NEW_TRANSFORM_FEEDBACK */ 3618 struct gl_transform_feedback_object *xfb_obj = 3619 ctx->TransformFeedback.CurrentObject; 3620 #if GEN_GEN < 8 3621 const struct gl_transform_feedback_info *linked_xfb_info = 3622 xfb_obj->program->sh.LinkedTransformFeedback; 3623 #else 3624 struct brw_transform_feedback_object *brw_obj = 3625 (struct brw_transform_feedback_object *) xfb_obj; 3626 uint32_t mocs_wb = GEN_GEN >= 9 ? SKL_MOCS_WB : BDW_MOCS_WB; 3627 #endif 3628 3629 /* Set up the up to 4 output buffers. These are the ranges defined in the 3630 * gl_transform_feedback_object. 3631 */ 3632 for (int i = 0; i < 4; i++) { 3633 struct intel_buffer_object *bufferobj = 3634 intel_buffer_object(xfb_obj->Buffers[i]); 3635 3636 if (!bufferobj) { 3637 brw_batch_emit(brw, GENX(3DSTATE_SO_BUFFER), sob) { 3638 sob.SOBufferIndex = i; 3639 } 3640 continue; 3641 } 3642 3643 uint32_t start = xfb_obj->Offset[i]; 3644 assert(start % 4 == 0); 3645 uint32_t end = ALIGN(start + xfb_obj->Size[i], 4); 3646 struct brw_bo *bo = 3647 intel_bufferobj_buffer(brw, bufferobj, start, end - start, true); 3648 assert(end <= bo->size); 3649 3650 brw_batch_emit(brw, GENX(3DSTATE_SO_BUFFER), sob) { 3651 sob.SOBufferIndex = i; 3652 3653 sob.SurfaceBaseAddress = rw_bo(bo, start); 3654 #if GEN_GEN < 8 3655 sob.SurfacePitch = linked_xfb_info->Buffers[i].Stride * 4; 3656 sob.SurfaceEndAddress = rw_bo(bo, end); 3657 #else 3658 sob.SOBufferEnable = true; 3659 sob.StreamOffsetWriteEnable = true; 3660 sob.StreamOutputBufferOffsetAddressEnable = true; 3661 sob.SOBufferMOCS = mocs_wb; 3662 3663 sob.SurfaceSize = MAX2(xfb_obj->Size[i] / 4, 1) - 1; 3664 sob.StreamOutputBufferOffsetAddress = 3665 rw_bo(brw_obj->offset_bo, i * sizeof(uint32_t)); 3666 3667 if (brw_obj->zero_offsets) { 3668 /* Zero out the offset and write that to offset_bo */ 3669 sob.StreamOffset = 0; 3670 } else { 3671 /* Use offset_bo as the "Stream Offset." */ 3672 sob.StreamOffset = 0xFFFFFFFF; 3673 } 3674 #endif 3675 } 3676 } 3677 3678 #if GEN_GEN >= 8 3679 brw_obj->zero_offsets = false; 3680 #endif 3681 } 3682 3683 static bool 3684 query_active(struct gl_query_object *q) 3685 { 3686 return q && q->Active; 3687 } 3688 3689 static void 3690 genX(upload_3dstate_streamout)(struct brw_context *brw, bool active, 3691 const struct brw_vue_map *vue_map) 3692 { 3693 struct gl_context *ctx = &brw->ctx; 3694 /* BRW_NEW_TRANSFORM_FEEDBACK */ 3695 struct gl_transform_feedback_object *xfb_obj = 3696 ctx->TransformFeedback.CurrentObject; 3697 3698 brw_batch_emit(brw, GENX(3DSTATE_STREAMOUT), sos) { 3699 if (active) { 3700 int urb_entry_read_offset = 0; 3701 int urb_entry_read_length = (vue_map->num_slots + 1) / 2 - 3702 urb_entry_read_offset; 3703 3704 sos.SOFunctionEnable = true; 3705 sos.SOStatisticsEnable = true; 3706 3707 /* BRW_NEW_RASTERIZER_DISCARD */ 3708 if (ctx->RasterDiscard) { 3709 if (!query_active(ctx->Query.PrimitivesGenerated[0])) { 3710 sos.RenderingDisable = true; 3711 } else { 3712 perf_debug("Rasterizer discard with a GL_PRIMITIVES_GENERATED " 3713 "query active relies on the clipper.\n"); 3714 } 3715 } 3716 3717 /* _NEW_LIGHT */ 3718 if (ctx->Light.ProvokingVertex != GL_FIRST_VERTEX_CONVENTION) 3719 sos.ReorderMode = TRAILING; 3720 3721 #if GEN_GEN < 8 3722 sos.SOBufferEnable0 = xfb_obj->Buffers[0] != NULL; 3723 sos.SOBufferEnable1 = xfb_obj->Buffers[1] != NULL; 3724 sos.SOBufferEnable2 = xfb_obj->Buffers[2] != NULL; 3725 sos.SOBufferEnable3 = xfb_obj->Buffers[3] != NULL; 3726 #else 3727 const struct gl_transform_feedback_info *linked_xfb_info = 3728 xfb_obj->program->sh.LinkedTransformFeedback; 3729 /* Set buffer pitches; 0 means unbound. */ 3730 if (xfb_obj->Buffers[0]) 3731 sos.Buffer0SurfacePitch = linked_xfb_info->Buffers[0].Stride * 4; 3732 if (xfb_obj->Buffers[1]) 3733 sos.Buffer1SurfacePitch = linked_xfb_info->Buffers[1].Stride * 4; 3734 if (xfb_obj->Buffers[2]) 3735 sos.Buffer2SurfacePitch = linked_xfb_info->Buffers[2].Stride * 4; 3736 if (xfb_obj->Buffers[3]) 3737 sos.Buffer3SurfacePitch = linked_xfb_info->Buffers[3].Stride * 4; 3738 #endif 3739 3740 /* We always read the whole vertex. This could be reduced at some 3741 * point by reading less and offsetting the register index in the 3742 * SO_DECLs. 3743 */ 3744 sos.Stream0VertexReadOffset = urb_entry_read_offset; 3745 sos.Stream0VertexReadLength = urb_entry_read_length - 1; 3746 sos.Stream1VertexReadOffset = urb_entry_read_offset; 3747 sos.Stream1VertexReadLength = urb_entry_read_length - 1; 3748 sos.Stream2VertexReadOffset = urb_entry_read_offset; 3749 sos.Stream2VertexReadLength = urb_entry_read_length - 1; 3750 sos.Stream3VertexReadOffset = urb_entry_read_offset; 3751 sos.Stream3VertexReadLength = urb_entry_read_length - 1; 3752 } 3753 } 3754 } 3755 3756 static void 3757 genX(upload_sol)(struct brw_context *brw) 3758 { 3759 struct gl_context *ctx = &brw->ctx; 3760 /* BRW_NEW_TRANSFORM_FEEDBACK */ 3761 bool active = _mesa_is_xfb_active_and_unpaused(ctx); 3762 3763 if (active) { 3764 genX(upload_3dstate_so_buffers)(brw); 3765 3766 /* BRW_NEW_VUE_MAP_GEOM_OUT */ 3767 genX(upload_3dstate_so_decl_list)(brw, &brw->vue_map_geom_out); 3768 } 3769 3770 /* Finally, set up the SOL stage. This command must always follow updates to 3771 * the nonpipelined SOL state (3DSTATE_SO_BUFFER, 3DSTATE_SO_DECL_LIST) or 3772 * MMIO register updates (current performed by the kernel at each batch 3773 * emit). 3774 */ 3775 genX(upload_3dstate_streamout)(brw, active, &brw->vue_map_geom_out); 3776 } 3777 3778 static const struct brw_tracked_state genX(sol_state) = { 3779 .dirty = { 3780 .mesa = _NEW_LIGHT, 3781 .brw = BRW_NEW_BATCH | 3782 BRW_NEW_BLORP | 3783 BRW_NEW_RASTERIZER_DISCARD | 3784 BRW_NEW_VUE_MAP_GEOM_OUT | 3785 BRW_NEW_TRANSFORM_FEEDBACK, 3786 }, 3787 .emit = genX(upload_sol), 3788 }; 3789 #endif 3790 3791 /* ---------------------------------------------------------------------- */ 3792 3793 #if GEN_GEN >= 7 3794 static void 3795 genX(upload_ps)(struct brw_context *brw) 3796 { 3797 UNUSED const struct gl_context *ctx = &brw->ctx; 3798 UNUSED const struct gen_device_info *devinfo = &brw->screen->devinfo; 3799 3800 /* BRW_NEW_FS_PROG_DATA */ 3801 const struct brw_wm_prog_data *prog_data = 3802 brw_wm_prog_data(brw->wm.base.prog_data); 3803 const struct brw_stage_state *stage_state = &brw->wm.base; 3804 3805 #if GEN_GEN < 8 3806 #endif 3807 3808 brw_batch_emit(brw, GENX(3DSTATE_PS), ps) { 3809 /* Initialize the execution mask with VMask. Otherwise, derivatives are 3810 * incorrect for subspans where some of the pixels are unlit. We believe 3811 * the bit just didn't take effect in previous generations. 3812 */ 3813 ps.VectorMaskEnable = GEN_GEN >= 8; 3814 3815 ps.SamplerCount = 3816 DIV_ROUND_UP(CLAMP(stage_state->sampler_count, 0, 16), 4); 3817 3818 /* BRW_NEW_FS_PROG_DATA */ 3819 ps.BindingTableEntryCount = prog_data->base.binding_table.size_bytes / 4; 3820 3821 if (prog_data->base.use_alt_mode) 3822 ps.FloatingPointMode = Alternate; 3823 3824 /* Haswell requires the sample mask to be set in this packet as well as 3825 * in 3DSTATE_SAMPLE_MASK; the values should match. 3826 */ 3827 3828 /* _NEW_BUFFERS, _NEW_MULTISAMPLE */ 3829 #if GEN_IS_HASWELL 3830 ps.SampleMask = genX(determine_sample_mask(brw)); 3831 #endif 3832 3833 /* 3DSTATE_PS expects the number of threads per PSD, which is always 64; 3834 * it implicitly scales for different GT levels (which have some # of 3835 * PSDs). 3836 * 3837 * In Gen8 the format is U8-2 whereas in Gen9 it is U8-1. 3838 */ 3839 #if GEN_GEN >= 9 3840 ps.MaximumNumberofThreadsPerPSD = 64 - 1; 3841 #elif GEN_GEN >= 8 3842 ps.MaximumNumberofThreadsPerPSD = 64 - 2; 3843 #else 3844 ps.MaximumNumberofThreads = devinfo->max_wm_threads - 1; 3845 #endif 3846 3847 if (prog_data->base.nr_params > 0 || 3848 prog_data->base.ubo_ranges[0].length > 0) 3849 ps.PushConstantEnable = true; 3850 3851 #if GEN_GEN < 8 3852 /* From the IVB PRM, volume 2 part 1, page 287: 3853 * "This bit is inserted in the PS payload header and made available to 3854 * the DataPort (either via the message header or via header bypass) to 3855 * indicate that oMask data (one or two phases) is included in Render 3856 * Target Write messages. If present, the oMask data is used to mask off 3857 * samples." 3858 */ 3859 ps.oMaskPresenttoRenderTarget = prog_data->uses_omask; 3860 3861 /* The hardware wedges if you have this bit set but don't turn on any 3862 * dual source blend factors. 3863 * 3864 * BRW_NEW_FS_PROG_DATA | _NEW_COLOR 3865 */ 3866 ps.DualSourceBlendEnable = prog_data->dual_src_blend && 3867 (ctx->Color.BlendEnabled & 1) && 3868 ctx->Color.Blend[0]._UsesDualSrc; 3869 3870 /* BRW_NEW_FS_PROG_DATA */ 3871 ps.AttributeEnable = (prog_data->num_varying_inputs != 0); 3872 #endif 3873 3874 /* From the documentation for this packet: 3875 * "If the PS kernel does not need the Position XY Offsets to 3876 * compute a Position Value, then this field should be programmed 3877 * to POSOFFSET_NONE." 3878 * 3879 * "SW Recommendation: If the PS kernel needs the Position Offsets 3880 * to compute a Position XY value, this field should match Position 3881 * ZW Interpolation Mode to ensure a consistent position.xyzw 3882 * computation." 3883 * 3884 * We only require XY sample offsets. So, this recommendation doesn't 3885 * look useful at the moment. We might need this in future. 3886 */ 3887 if (prog_data->uses_pos_offset) 3888 ps.PositionXYOffsetSelect = POSOFFSET_SAMPLE; 3889 else 3890 ps.PositionXYOffsetSelect = POSOFFSET_NONE; 3891 3892 ps._8PixelDispatchEnable = prog_data->dispatch_8; 3893 ps._16PixelDispatchEnable = prog_data->dispatch_16; 3894 ps.DispatchGRFStartRegisterForConstantSetupData0 = 3895 prog_data->base.dispatch_grf_start_reg; 3896 ps.DispatchGRFStartRegisterForConstantSetupData2 = 3897 prog_data->dispatch_grf_start_reg_2; 3898 3899 ps.KernelStartPointer0 = stage_state->prog_offset; 3900 ps.KernelStartPointer2 = stage_state->prog_offset + 3901 prog_data->prog_offset_2; 3902 3903 if (prog_data->base.total_scratch) { 3904 ps.ScratchSpaceBasePointer = 3905 rw_bo(stage_state->scratch_bo, 3906 ffs(stage_state->per_thread_scratch) - 11); 3907 } 3908 } 3909 } 3910 3911 static const struct brw_tracked_state genX(ps_state) = { 3912 .dirty = { 3913 .mesa = _NEW_MULTISAMPLE | 3914 (GEN_GEN < 8 ? _NEW_BUFFERS | 3915 _NEW_COLOR 3916 : 0), 3917 .brw = BRW_NEW_BATCH | 3918 BRW_NEW_BLORP | 3919 BRW_NEW_FS_PROG_DATA, 3920 }, 3921 .emit = genX(upload_ps), 3922 }; 3923 #endif 3924 3925 /* ---------------------------------------------------------------------- */ 3926 3927 #if GEN_GEN >= 7 3928 static void 3929 genX(upload_hs_state)(struct brw_context *brw) 3930 { 3931 const struct gen_device_info *devinfo = &brw->screen->devinfo; 3932 struct brw_stage_state *stage_state = &brw->tcs.base; 3933 struct brw_stage_prog_data *stage_prog_data = stage_state->prog_data; 3934 const struct brw_vue_prog_data *vue_prog_data = 3935 brw_vue_prog_data(stage_prog_data); 3936 3937 /* BRW_NEW_TES_PROG_DATA */ 3938 struct brw_tcs_prog_data *tcs_prog_data = 3939 brw_tcs_prog_data(stage_prog_data); 3940 3941 if (!tcs_prog_data) { 3942 brw_batch_emit(brw, GENX(3DSTATE_HS), hs); 3943 } else { 3944 brw_batch_emit(brw, GENX(3DSTATE_HS), hs) { 3945 INIT_THREAD_DISPATCH_FIELDS(hs, Vertex); 3946 3947 hs.InstanceCount = tcs_prog_data->instances - 1; 3948 hs.IncludeVertexHandles = true; 3949 3950 hs.MaximumNumberofThreads = devinfo->max_tcs_threads - 1; 3951 } 3952 } 3953 } 3954 3955 static const struct brw_tracked_state genX(hs_state) = { 3956 .dirty = { 3957 .mesa = 0, 3958 .brw = BRW_NEW_BATCH | 3959 BRW_NEW_BLORP | 3960 BRW_NEW_TCS_PROG_DATA | 3961 BRW_NEW_TESS_PROGRAMS, 3962 }, 3963 .emit = genX(upload_hs_state), 3964 }; 3965 3966 static void 3967 genX(upload_ds_state)(struct brw_context *brw) 3968 { 3969 const struct gen_device_info *devinfo = &brw->screen->devinfo; 3970 const struct brw_stage_state *stage_state = &brw->tes.base; 3971 struct brw_stage_prog_data *stage_prog_data = stage_state->prog_data; 3972 3973 /* BRW_NEW_TES_PROG_DATA */ 3974 const struct brw_tes_prog_data *tes_prog_data = 3975 brw_tes_prog_data(stage_prog_data); 3976 const struct brw_vue_prog_data *vue_prog_data = 3977 brw_vue_prog_data(stage_prog_data); 3978 3979 if (!tes_prog_data) { 3980 brw_batch_emit(brw, GENX(3DSTATE_DS), ds); 3981 } else { 3982 brw_batch_emit(brw, GENX(3DSTATE_DS), ds) { 3983 INIT_THREAD_DISPATCH_FIELDS(ds, Patch); 3984 3985 ds.MaximumNumberofThreads = devinfo->max_tes_threads - 1; 3986 ds.ComputeWCoordinateEnable = 3987 tes_prog_data->domain == BRW_TESS_DOMAIN_TRI; 3988 3989 #if GEN_GEN >= 8 3990 if (vue_prog_data->dispatch_mode == DISPATCH_MODE_SIMD8) 3991 ds.DispatchMode = DISPATCH_MODE_SIMD8_SINGLE_PATCH; 3992 ds.UserClipDistanceCullTestEnableBitmask = 3993 vue_prog_data->cull_distance_mask; 3994 #endif 3995 } 3996 } 3997 } 3998 3999 static const struct brw_tracked_state genX(ds_state) = { 4000 .dirty = { 4001 .mesa = 0, 4002 .brw = BRW_NEW_BATCH | 4003 BRW_NEW_BLORP | 4004 BRW_NEW_TESS_PROGRAMS | 4005 BRW_NEW_TES_PROG_DATA, 4006 }, 4007 .emit = genX(upload_ds_state), 4008 }; 4009 4010 /* ---------------------------------------------------------------------- */ 4011 4012 static void 4013 upload_te_state(struct brw_context *brw) 4014 { 4015 /* BRW_NEW_TESS_PROGRAMS */ 4016 bool active = brw->programs[MESA_SHADER_TESS_EVAL]; 4017 4018 /* BRW_NEW_TES_PROG_DATA */ 4019 const struct brw_tes_prog_data *tes_prog_data = 4020 brw_tes_prog_data(brw->tes.base.prog_data); 4021 4022 if (active) { 4023 brw_batch_emit(brw, GENX(3DSTATE_TE), te) { 4024 te.Partitioning = tes_prog_data->partitioning; 4025 te.OutputTopology = tes_prog_data->output_topology; 4026 te.TEDomain = tes_prog_data->domain; 4027 te.TEEnable = true; 4028 te.MaximumTessellationFactorOdd = 63.0; 4029 te.MaximumTessellationFactorNotOdd = 64.0; 4030 } 4031 } else { 4032 brw_batch_emit(brw, GENX(3DSTATE_TE), te); 4033 } 4034 } 4035 4036 static const struct brw_tracked_state genX(te_state) = { 4037 .dirty = { 4038 .mesa = 0, 4039 .brw = BRW_NEW_BLORP | 4040 BRW_NEW_CONTEXT | 4041 BRW_NEW_TES_PROG_DATA | 4042 BRW_NEW_TESS_PROGRAMS, 4043 }, 4044 .emit = upload_te_state, 4045 }; 4046 4047 /* ---------------------------------------------------------------------- */ 4048 4049 static void 4050 genX(upload_tes_push_constants)(struct brw_context *brw) 4051 { 4052 struct brw_stage_state *stage_state = &brw->tes.base; 4053 /* BRW_NEW_TESS_PROGRAMS */ 4054 const struct gl_program *tep = brw->programs[MESA_SHADER_TESS_EVAL]; 4055 4056 /* BRW_NEW_TES_PROG_DATA */ 4057 const struct brw_stage_prog_data *prog_data = brw->tes.base.prog_data; 4058 gen6_upload_push_constants(brw, tep, prog_data, stage_state); 4059 } 4060 4061 static const struct brw_tracked_state genX(tes_push_constants) = { 4062 .dirty = { 4063 .mesa = _NEW_PROGRAM_CONSTANTS, 4064 .brw = BRW_NEW_BATCH | 4065 BRW_NEW_BLORP | 4066 BRW_NEW_TESS_PROGRAMS | 4067 BRW_NEW_TES_PROG_DATA, 4068 }, 4069 .emit = genX(upload_tes_push_constants), 4070 }; 4071 4072 static void 4073 genX(upload_tcs_push_constants)(struct brw_context *brw) 4074 { 4075 struct brw_stage_state *stage_state = &brw->tcs.base; 4076 /* BRW_NEW_TESS_PROGRAMS */ 4077 const struct gl_program *tcp = brw->programs[MESA_SHADER_TESS_CTRL]; 4078 4079 /* BRW_NEW_TCS_PROG_DATA */ 4080 const struct brw_stage_prog_data *prog_data = brw->tcs.base.prog_data; 4081 4082 gen6_upload_push_constants(brw, tcp, prog_data, stage_state); 4083 } 4084 4085 static const struct brw_tracked_state genX(tcs_push_constants) = { 4086 .dirty = { 4087 .mesa = _NEW_PROGRAM_CONSTANTS, 4088 .brw = BRW_NEW_BATCH | 4089 BRW_NEW_BLORP | 4090 BRW_NEW_DEFAULT_TESS_LEVELS | 4091 BRW_NEW_TESS_PROGRAMS | 4092 BRW_NEW_TCS_PROG_DATA, 4093 }, 4094 .emit = genX(upload_tcs_push_constants), 4095 }; 4096 4097 #endif 4098 4099 /* ---------------------------------------------------------------------- */ 4100 4101 #if GEN_GEN >= 7 4102 static void 4103 genX(upload_cs_push_constants)(struct brw_context *brw) 4104 { 4105 struct brw_stage_state *stage_state = &brw->cs.base; 4106 4107 /* BRW_NEW_COMPUTE_PROGRAM */ 4108 const struct gl_program *cp = brw->programs[MESA_SHADER_COMPUTE]; 4109 4110 if (cp) { 4111 /* BRW_NEW_CS_PROG_DATA */ 4112 struct brw_cs_prog_data *cs_prog_data = 4113 brw_cs_prog_data(brw->cs.base.prog_data); 4114 4115 _mesa_shader_write_subroutine_indices(&brw->ctx, MESA_SHADER_COMPUTE); 4116 brw_upload_cs_push_constants(brw, cp, cs_prog_data, stage_state); 4117 } 4118 } 4119 4120 const struct brw_tracked_state genX(cs_push_constants) = { 4121 .dirty = { 4122 .mesa = _NEW_PROGRAM_CONSTANTS, 4123 .brw = BRW_NEW_BATCH | 4124 BRW_NEW_BLORP | 4125 BRW_NEW_COMPUTE_PROGRAM | 4126 BRW_NEW_CS_PROG_DATA, 4127 }, 4128 .emit = genX(upload_cs_push_constants), 4129 }; 4130 4131 /** 4132 * Creates a new CS constant buffer reflecting the current CS program's 4133 * constants, if needed by the CS program. 4134 */ 4135 static void 4136 genX(upload_cs_pull_constants)(struct brw_context *brw) 4137 { 4138 struct brw_stage_state *stage_state = &brw->cs.base; 4139 4140 /* BRW_NEW_COMPUTE_PROGRAM */ 4141 struct brw_program *cp = 4142 (struct brw_program *) brw->programs[MESA_SHADER_COMPUTE]; 4143 4144 /* BRW_NEW_CS_PROG_DATA */ 4145 const struct brw_stage_prog_data *prog_data = brw->cs.base.prog_data; 4146 4147 _mesa_shader_write_subroutine_indices(&brw->ctx, MESA_SHADER_COMPUTE); 4148 /* _NEW_PROGRAM_CONSTANTS */ 4149 brw_upload_pull_constants(brw, BRW_NEW_SURFACES, &cp->program, 4150 stage_state, prog_data); 4151 } 4152 4153 const struct brw_tracked_state genX(cs_pull_constants) = { 4154 .dirty = { 4155 .mesa = _NEW_PROGRAM_CONSTANTS, 4156 .brw = BRW_NEW_BATCH | 4157 BRW_NEW_BLORP | 4158 BRW_NEW_COMPUTE_PROGRAM | 4159 BRW_NEW_CS_PROG_DATA, 4160 }, 4161 .emit = genX(upload_cs_pull_constants), 4162 }; 4163 4164 static void 4165 genX(upload_cs_state)(struct brw_context *brw) 4166 { 4167 if (!brw->cs.base.prog_data) 4168 return; 4169 4170 uint32_t offset; 4171 uint32_t *desc = (uint32_t*) brw_state_batch( 4172 brw, GENX(INTERFACE_DESCRIPTOR_DATA_length) * sizeof(uint32_t), 64, 4173 &offset); 4174 4175 struct brw_stage_state *stage_state = &brw->cs.base; 4176 struct brw_stage_prog_data *prog_data = stage_state->prog_data; 4177 struct brw_cs_prog_data *cs_prog_data = brw_cs_prog_data(prog_data); 4178 const struct gen_device_info *devinfo = &brw->screen->devinfo; 4179 4180 if (INTEL_DEBUG & DEBUG_SHADER_TIME) { 4181 brw_emit_buffer_surface_state( 4182 brw, &stage_state->surf_offset[ 4183 prog_data->binding_table.shader_time_start], 4184 brw->shader_time.bo, 0, ISL_FORMAT_RAW, 4185 brw->shader_time.bo->size, 1, 4186 RELOC_WRITE); 4187 } 4188 4189 uint32_t *bind = brw_state_batch(brw, prog_data->binding_table.size_bytes, 4190 32, &stage_state->bind_bo_offset); 4191 4192 /* The MEDIA_VFE_STATE documentation for Gen8+ says: 4193 * 4194 * "A stalling PIPE_CONTROL is required before MEDIA_VFE_STATE unless 4195 * the only bits that are changed are scoreboard related: Scoreboard 4196 * Enable, Scoreboard Type, Scoreboard Mask, Scoreboard * Delta. For 4197 * these scoreboard related states, a MEDIA_STATE_FLUSH is sufficient." 4198 * 4199 * Earlier generations say "MI_FLUSH" instead of "stalling PIPE_CONTROL", 4200 * but MI_FLUSH isn't really a thing, so we assume they meant PIPE_CONTROL. 4201 */ 4202 brw_emit_pipe_control_flush(brw, PIPE_CONTROL_CS_STALL); 4203 4204 brw_batch_emit(brw, GENX(MEDIA_VFE_STATE), vfe) { 4205 if (prog_data->total_scratch) { 4206 uint32_t per_thread_scratch_value; 4207 4208 if (GEN_GEN >= 8) { 4209 /* Broadwell's Per Thread Scratch Space is in the range [0, 11] 4210 * where 0 = 1k, 1 = 2k, 2 = 4k, ..., 11 = 2M. 4211 */ 4212 per_thread_scratch_value = ffs(stage_state->per_thread_scratch) - 11; 4213 } else if (GEN_IS_HASWELL) { 4214 /* Haswell's Per Thread Scratch Space is in the range [0, 10] 4215 * where 0 = 2k, 1 = 4k, 2 = 8k, ..., 10 = 2M. 4216 */ 4217 per_thread_scratch_value = ffs(stage_state->per_thread_scratch) - 12; 4218 } else { 4219 /* Earlier platforms use the range [0, 11] to mean [1kB, 12kB] 4220 * where 0 = 1kB, 1 = 2kB, 2 = 3kB, ..., 11 = 12kB. 4221 */ 4222 per_thread_scratch_value = stage_state->per_thread_scratch / 1024 - 1; 4223 } 4224 vfe.ScratchSpaceBasePointer = rw_bo(stage_state->scratch_bo, 0); 4225 vfe.PerThreadScratchSpace = per_thread_scratch_value; 4226 } 4227 4228 /* If brw->screen->subslice_total is greater than one, then 4229 * devinfo->max_cs_threads stores number of threads per sub-slice; 4230 * thus we need to multiply by that number by subslices to get 4231 * the actual maximum number of threads; the -1 is because the HW 4232 * has a bias of 1 (would not make sense to say the maximum number 4233 * of threads is 0). 4234 */ 4235 const uint32_t subslices = MAX2(brw->screen->subslice_total, 1); 4236 vfe.MaximumNumberofThreads = devinfo->max_cs_threads * subslices - 1; 4237 vfe.NumberofURBEntries = GEN_GEN >= 8 ? 2 : 0; 4238 vfe.ResetGatewayTimer = 4239 Resettingrelativetimerandlatchingtheglobaltimestamp; 4240 #if GEN_GEN < 9 4241 vfe.BypassGatewayControl = BypassingOpenGatewayCloseGatewayprotocol; 4242 #endif 4243 #if GEN_GEN == 7 4244 vfe.GPGPUMode = 1; 4245 #endif 4246 4247 /* We are uploading duplicated copies of push constant uniforms for each 4248 * thread. Although the local id data needs to vary per thread, it won't 4249 * change for other uniform data. Unfortunately this duplication is 4250 * required for gen7. As of Haswell, this duplication can be avoided, 4251 * but this older mechanism with duplicated data continues to work. 4252 * 4253 * FINISHME: As of Haswell, we could make use of the 4254 * INTERFACE_DESCRIPTOR_DATA "Cross-Thread Constant Data Read Length" 4255 * field to only store one copy of uniform data. 4256 * 4257 * FINISHME: Broadwell adds a new alternative "Indirect Payload Storage" 4258 * which is described in the GPGPU_WALKER command and in the Broadwell 4259 * PRM Volume 7: 3D Media GPGPU, under Media GPGPU Pipeline => Mode of 4260 * Operations => GPGPU Mode => Indirect Payload Storage. 4261 * 4262 * Note: The constant data is built in brw_upload_cs_push_constants 4263 * below. 4264 */ 4265 vfe.URBEntryAllocationSize = GEN_GEN >= 8 ? 2 : 0; 4266 4267 const uint32_t vfe_curbe_allocation = 4268 ALIGN(cs_prog_data->push.per_thread.regs * cs_prog_data->threads + 4269 cs_prog_data->push.cross_thread.regs, 2); 4270 vfe.CURBEAllocationSize = vfe_curbe_allocation; 4271 } 4272 4273 if (cs_prog_data->push.total.size > 0) { 4274 brw_batch_emit(brw, GENX(MEDIA_CURBE_LOAD), curbe) { 4275 curbe.CURBETotalDataLength = 4276 ALIGN(cs_prog_data->push.total.size, 64); 4277 curbe.CURBEDataStartAddress = stage_state->push_const_offset; 4278 } 4279 } 4280 4281 /* BRW_NEW_SURFACES and BRW_NEW_*_CONSTBUF */ 4282 memcpy(bind, stage_state->surf_offset, 4283 prog_data->binding_table.size_bytes); 4284 const struct GENX(INTERFACE_DESCRIPTOR_DATA) idd = { 4285 .KernelStartPointer = brw->cs.base.prog_offset, 4286 .SamplerStatePointer = stage_state->sampler_offset, 4287 .SamplerCount = DIV_ROUND_UP(CLAMP(stage_state->sampler_count, 0, 16), 4), 4288 .BindingTablePointer = stage_state->bind_bo_offset, 4289 .ConstantURBEntryReadLength = cs_prog_data->push.per_thread.regs, 4290 .NumberofThreadsinGPGPUThreadGroup = cs_prog_data->threads, 4291 .SharedLocalMemorySize = encode_slm_size(GEN_GEN, 4292 prog_data->total_shared), 4293 .BarrierEnable = cs_prog_data->uses_barrier, 4294 #if GEN_GEN >= 8 || GEN_IS_HASWELL 4295 .CrossThreadConstantDataReadLength = 4296 cs_prog_data->push.cross_thread.regs, 4297 #endif 4298 }; 4299 4300 GENX(INTERFACE_DESCRIPTOR_DATA_pack)(brw, desc, &idd); 4301 4302 brw_batch_emit(brw, GENX(MEDIA_INTERFACE_DESCRIPTOR_LOAD), load) { 4303 load.InterfaceDescriptorTotalLength = 4304 GENX(INTERFACE_DESCRIPTOR_DATA_length) * sizeof(uint32_t); 4305 load.InterfaceDescriptorDataStartAddress = offset; 4306 } 4307 } 4308 4309 static const struct brw_tracked_state genX(cs_state) = { 4310 .dirty = { 4311 .mesa = _NEW_PROGRAM_CONSTANTS, 4312 .brw = BRW_NEW_BATCH | 4313 BRW_NEW_BLORP | 4314 BRW_NEW_CS_PROG_DATA | 4315 BRW_NEW_SAMPLER_STATE_TABLE | 4316 BRW_NEW_SURFACES, 4317 }, 4318 .emit = genX(upload_cs_state) 4319 }; 4320 4321 #endif 4322 4323 /* ---------------------------------------------------------------------- */ 4324 4325 #if GEN_GEN >= 8 4326 static void 4327 genX(upload_raster)(struct brw_context *brw) 4328 { 4329 const struct gl_context *ctx = &brw->ctx; 4330 4331 /* _NEW_BUFFERS */ 4332 const bool render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer); 4333 4334 /* _NEW_POLYGON */ 4335 const struct gl_polygon_attrib *polygon = &ctx->Polygon; 4336 4337 /* _NEW_POINT */ 4338 const struct gl_point_attrib *point = &ctx->Point; 4339 4340 brw_batch_emit(brw, GENX(3DSTATE_RASTER), raster) { 4341 if (brw->polygon_front_bit == render_to_fbo) 4342 raster.FrontWinding = CounterClockwise; 4343 4344 if (polygon->CullFlag) { 4345 switch (polygon->CullFaceMode) { 4346 case GL_FRONT: 4347 raster.CullMode = CULLMODE_FRONT; 4348 break; 4349 case GL_BACK: 4350 raster.CullMode = CULLMODE_BACK; 4351 break; 4352 case GL_FRONT_AND_BACK: 4353 raster.CullMode = CULLMODE_BOTH; 4354 break; 4355 default: 4356 unreachable("not reached"); 4357 } 4358 } else { 4359 raster.CullMode = CULLMODE_NONE; 4360 } 4361 4362 raster.SmoothPointEnable = point->SmoothFlag; 4363 4364 raster.DXMultisampleRasterizationEnable = 4365 _mesa_is_multisample_enabled(ctx); 4366 4367 raster.GlobalDepthOffsetEnableSolid = polygon->OffsetFill; 4368 raster.GlobalDepthOffsetEnableWireframe = polygon->OffsetLine; 4369 raster.GlobalDepthOffsetEnablePoint = polygon->OffsetPoint; 4370 4371 switch (polygon->FrontMode) { 4372 case GL_FILL: 4373 raster.FrontFaceFillMode = FILL_MODE_SOLID; 4374 break; 4375 case GL_LINE: 4376 raster.FrontFaceFillMode = FILL_MODE_WIREFRAME; 4377 break; 4378 case GL_POINT: 4379 raster.FrontFaceFillMode = FILL_MODE_POINT; 4380 break; 4381 default: 4382 unreachable("not reached"); 4383 } 4384 4385 switch (polygon->BackMode) { 4386 case GL_FILL: 4387 raster.BackFaceFillMode = FILL_MODE_SOLID; 4388 break; 4389 case GL_LINE: 4390 raster.BackFaceFillMode = FILL_MODE_WIREFRAME; 4391 break; 4392 case GL_POINT: 4393 raster.BackFaceFillMode = FILL_MODE_POINT; 4394 break; 4395 default: 4396 unreachable("not reached"); 4397 } 4398 4399 /* _NEW_LINE */ 4400 raster.AntialiasingEnable = ctx->Line.SmoothFlag; 4401 4402 #if GEN_GEN == 10 4403 /* _NEW_BUFFERS 4404 * Antialiasing Enable bit MUST not be set when NUM_MULTISAMPLES > 1. 4405 */ 4406 const bool multisampled_fbo = 4407 _mesa_geometric_samples(ctx->DrawBuffer) > 1; 4408 if (multisampled_fbo) 4409 raster.AntialiasingEnable = false; 4410 #endif 4411 4412 /* _NEW_SCISSOR */ 4413 raster.ScissorRectangleEnable = ctx->Scissor.EnableFlags; 4414 4415 /* _NEW_TRANSFORM */ 4416 if (!ctx->Transform.DepthClamp) { 4417 #if GEN_GEN >= 9 4418 raster.ViewportZFarClipTestEnable = true; 4419 raster.ViewportZNearClipTestEnable = true; 4420 #else 4421 raster.ViewportZClipTestEnable = true; 4422 #endif 4423 } 4424 4425 /* BRW_NEW_CONSERVATIVE_RASTERIZATION */ 4426 #if GEN_GEN >= 9 4427 raster.ConservativeRasterizationEnable = 4428 ctx->IntelConservativeRasterization; 4429 #endif 4430 4431 raster.GlobalDepthOffsetClamp = polygon->OffsetClamp; 4432 raster.GlobalDepthOffsetScale = polygon->OffsetFactor; 4433 4434 raster.GlobalDepthOffsetConstant = polygon->OffsetUnits * 2; 4435 } 4436 } 4437 4438 static const struct brw_tracked_state genX(raster_state) = { 4439 .dirty = { 4440 .mesa = _NEW_BUFFERS | 4441 _NEW_LINE | 4442 _NEW_MULTISAMPLE | 4443 _NEW_POINT | 4444 _NEW_POLYGON | 4445 _NEW_SCISSOR | 4446 _NEW_TRANSFORM, 4447 .brw = BRW_NEW_BLORP | 4448 BRW_NEW_CONTEXT | 4449 BRW_NEW_CONSERVATIVE_RASTERIZATION, 4450 }, 4451 .emit = genX(upload_raster), 4452 }; 4453 #endif 4454 4455 /* ---------------------------------------------------------------------- */ 4456 4457 #if GEN_GEN >= 8 4458 static void 4459 genX(upload_ps_extra)(struct brw_context *brw) 4460 { 4461 UNUSED struct gl_context *ctx = &brw->ctx; 4462 4463 const struct brw_wm_prog_data *prog_data = 4464 brw_wm_prog_data(brw->wm.base.prog_data); 4465 4466 brw_batch_emit(brw, GENX(3DSTATE_PS_EXTRA), psx) { 4467 psx.PixelShaderValid = true; 4468 psx.PixelShaderComputedDepthMode = prog_data->computed_depth_mode; 4469 psx.PixelShaderKillsPixel = prog_data->uses_kill; 4470 psx.AttributeEnable = prog_data->num_varying_inputs != 0; 4471 psx.PixelShaderUsesSourceDepth = prog_data->uses_src_depth; 4472 psx.PixelShaderUsesSourceW = prog_data->uses_src_w; 4473 psx.PixelShaderIsPerSample = prog_data->persample_dispatch; 4474 4475 /* _NEW_MULTISAMPLE | BRW_NEW_CONSERVATIVE_RASTERIZATION */ 4476 if (prog_data->uses_sample_mask) { 4477 #if GEN_GEN >= 9 4478 if (prog_data->post_depth_coverage) 4479 psx.InputCoverageMaskState = ICMS_DEPTH_COVERAGE; 4480 else if (prog_data->inner_coverage && ctx->IntelConservativeRasterization) 4481 psx.InputCoverageMaskState = ICMS_INNER_CONSERVATIVE; 4482 else 4483 psx.InputCoverageMaskState = ICMS_NORMAL; 4484 #else 4485 psx.PixelShaderUsesInputCoverageMask = true; 4486 #endif 4487 } 4488 4489 psx.oMaskPresenttoRenderTarget = prog_data->uses_omask; 4490 #if GEN_GEN >= 9 4491 psx.PixelShaderPullsBary = prog_data->pulls_bary; 4492 psx.PixelShaderComputesStencil = prog_data->computed_stencil; 4493 #endif 4494 4495 /* The stricter cross-primitive coherency guarantees that the hardware 4496 * gives us with the "Accesses UAV" bit set for at least one shader stage 4497 * and the "UAV coherency required" bit set on the 3DPRIMITIVE command 4498 * are redundant within the current image, atomic counter and SSBO GL 4499 * APIs, which all have very loose ordering and coherency requirements 4500 * and generally rely on the application to insert explicit barriers when 4501 * a shader invocation is expected to see the memory writes performed by 4502 * the invocations of some previous primitive. Regardless of the value 4503 * of "UAV coherency required", the "Accesses UAV" bits will implicitly 4504 * cause an in most cases useless DC flush when the lowermost stage with 4505 * the bit set finishes execution. 4506 * 4507 * It would be nice to disable it, but in some cases we can't because on 4508 * Gen8+ it also has an influence on rasterization via the PS UAV-only 4509 * signal (which could be set independently from the coherency mechanism 4510 * in the 3DSTATE_WM command on Gen7), and because in some cases it will 4511 * determine whether the hardware skips execution of the fragment shader 4512 * or not via the ThreadDispatchEnable signal. However if we know that 4513 * GEN8_PS_BLEND_HAS_WRITEABLE_RT is going to be set and 4514 * GEN8_PSX_PIXEL_SHADER_NO_RT_WRITE is not set it shouldn't make any 4515 * difference so we may just disable it here. 4516 * 4517 * Gen8 hardware tries to compute ThreadDispatchEnable for us but doesn't 4518 * take into account KillPixels when no depth or stencil writes are 4519 * enabled. In order for occlusion queries to work correctly with no 4520 * attachments, we need to force-enable here. 4521 * 4522 * BRW_NEW_FS_PROG_DATA | BRW_NEW_FRAGMENT_PROGRAM | _NEW_BUFFERS | 4523 * _NEW_COLOR 4524 */ 4525 if ((prog_data->has_side_effects || prog_data->uses_kill) && 4526 !brw_color_buffer_write_enabled(brw)) 4527 psx.PixelShaderHasUAV = true; 4528 } 4529 } 4530 4531 const struct brw_tracked_state genX(ps_extra) = { 4532 .dirty = { 4533 .mesa = _NEW_BUFFERS | _NEW_COLOR, 4534 .brw = BRW_NEW_BLORP | 4535 BRW_NEW_CONTEXT | 4536 BRW_NEW_FRAGMENT_PROGRAM | 4537 BRW_NEW_FS_PROG_DATA | 4538 BRW_NEW_CONSERVATIVE_RASTERIZATION, 4539 }, 4540 .emit = genX(upload_ps_extra), 4541 }; 4542 #endif 4543 4544 /* ---------------------------------------------------------------------- */ 4545 4546 #if GEN_GEN >= 8 4547 static void 4548 genX(upload_ps_blend)(struct brw_context *brw) 4549 { 4550 struct gl_context *ctx = &brw->ctx; 4551 4552 /* _NEW_BUFFERS */ 4553 struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[0]; 4554 const bool buffer0_is_integer = ctx->DrawBuffer->_IntegerBuffers & 0x1; 4555 4556 /* _NEW_COLOR */ 4557 struct gl_colorbuffer_attrib *color = &ctx->Color; 4558 4559 brw_batch_emit(brw, GENX(3DSTATE_PS_BLEND), pb) { 4560 /* BRW_NEW_FRAGMENT_PROGRAM | _NEW_BUFFERS | _NEW_COLOR */ 4561 pb.HasWriteableRT = brw_color_buffer_write_enabled(brw); 4562 4563 bool alpha_to_one = false; 4564 4565 if (!buffer0_is_integer) { 4566 /* _NEW_MULTISAMPLE */ 4567 4568 if (_mesa_is_multisample_enabled(ctx)) { 4569 pb.AlphaToCoverageEnable = ctx->Multisample.SampleAlphaToCoverage; 4570 alpha_to_one = ctx->Multisample.SampleAlphaToOne; 4571 } 4572 4573 pb.AlphaTestEnable = color->AlphaEnabled; 4574 } 4575 4576 /* Used for implementing the following bit of GL_EXT_texture_integer: 4577 * "Per-fragment operations that require floating-point color 4578 * components, including multisample alpha operations, alpha test, 4579 * blending, and dithering, have no effect when the corresponding 4580 * colors are written to an integer color buffer." 4581 * 4582 * The OpenGL specification 3.3 (page 196), section 4.1.3 says: 4583 * "If drawbuffer zero is not NONE and the buffer it references has an 4584 * integer format, the SAMPLE_ALPHA_TO_COVERAGE and SAMPLE_ALPHA_TO_ONE 4585 * operations are skipped." 4586 */ 4587 if (rb && !buffer0_is_integer && (color->BlendEnabled & 1)) { 4588 GLenum eqRGB = color->Blend[0].EquationRGB; 4589 GLenum eqA = color->Blend[0].EquationA; 4590 GLenum srcRGB = color->Blend[0].SrcRGB; 4591 GLenum dstRGB = color->Blend[0].DstRGB; 4592 GLenum srcA = color->Blend[0].SrcA; 4593 GLenum dstA = color->Blend[0].DstA; 4594 4595 if (eqRGB == GL_MIN || eqRGB == GL_MAX) 4596 srcRGB = dstRGB = GL_ONE; 4597 4598 if (eqA == GL_MIN || eqA == GL_MAX) 4599 srcA = dstA = GL_ONE; 4600 4601 /* Due to hardware limitations, the destination may have information 4602 * in an alpha channel even when the format specifies no alpha 4603 * channel. In order to avoid getting any incorrect blending due to 4604 * that alpha channel, coerce the blend factors to values that will 4605 * not read the alpha channel, but will instead use the correct 4606 * implicit value for alpha. 4607 */ 4608 if (!_mesa_base_format_has_channel(rb->_BaseFormat, 4609 GL_TEXTURE_ALPHA_TYPE)) { 4610 srcRGB = brw_fix_xRGB_alpha(srcRGB); 4611 srcA = brw_fix_xRGB_alpha(srcA); 4612 dstRGB = brw_fix_xRGB_alpha(dstRGB); 4613 dstA = brw_fix_xRGB_alpha(dstA); 4614 } 4615 4616 /* Alpha to One doesn't work with Dual Color Blending. Override 4617 * SRC1_ALPHA to ONE and ONE_MINUS_SRC1_ALPHA to ZERO. 4618 */ 4619 if (alpha_to_one && color->Blend[0]._UsesDualSrc) { 4620 srcRGB = fix_dual_blend_alpha_to_one(srcRGB); 4621 srcA = fix_dual_blend_alpha_to_one(srcA); 4622 dstRGB = fix_dual_blend_alpha_to_one(dstRGB); 4623 dstA = fix_dual_blend_alpha_to_one(dstA); 4624 } 4625 4626 pb.ColorBufferBlendEnable = true; 4627 pb.SourceAlphaBlendFactor = brw_translate_blend_factor(srcA); 4628 pb.DestinationAlphaBlendFactor = brw_translate_blend_factor(dstA); 4629 pb.SourceBlendFactor = brw_translate_blend_factor(srcRGB); 4630 pb.DestinationBlendFactor = brw_translate_blend_factor(dstRGB); 4631 4632 pb.IndependentAlphaBlendEnable = 4633 srcA != srcRGB || dstA != dstRGB || eqA != eqRGB; 4634 } 4635 } 4636 } 4637 4638 static const struct brw_tracked_state genX(ps_blend) = { 4639 .dirty = { 4640 .mesa = _NEW_BUFFERS | 4641 _NEW_COLOR | 4642 _NEW_MULTISAMPLE, 4643 .brw = BRW_NEW_BLORP | 4644 BRW_NEW_CONTEXT | 4645 BRW_NEW_FRAGMENT_PROGRAM, 4646 }, 4647 .emit = genX(upload_ps_blend) 4648 }; 4649 #endif 4650 4651 /* ---------------------------------------------------------------------- */ 4652 4653 #if GEN_GEN >= 8 4654 static void 4655 genX(emit_vf_topology)(struct brw_context *brw) 4656 { 4657 brw_batch_emit(brw, GENX(3DSTATE_VF_TOPOLOGY), vftopo) { 4658 vftopo.PrimitiveTopologyType = brw->primitive; 4659 } 4660 } 4661 4662 static const struct brw_tracked_state genX(vf_topology) = { 4663 .dirty = { 4664 .mesa = 0, 4665 .brw = BRW_NEW_BLORP | 4666 BRW_NEW_PRIMITIVE, 4667 }, 4668 .emit = genX(emit_vf_topology), 4669 }; 4670 #endif 4671 4672 /* ---------------------------------------------------------------------- */ 4673 4674 #if GEN_GEN >= 7 4675 static void 4676 genX(emit_mi_report_perf_count)(struct brw_context *brw, 4677 struct brw_bo *bo, 4678 uint32_t offset_in_bytes, 4679 uint32_t report_id) 4680 { 4681 brw_batch_emit(brw, GENX(MI_REPORT_PERF_COUNT), mi_rpc) { 4682 mi_rpc.MemoryAddress = ggtt_bo(bo, offset_in_bytes); 4683 mi_rpc.ReportID = report_id; 4684 } 4685 } 4686 #endif 4687 4688 /* ---------------------------------------------------------------------- */ 4689 4690 /** 4691 * Emit a 3DSTATE_SAMPLER_STATE_POINTERS_{VS,HS,GS,DS,PS} packet. 4692 */ 4693 static void 4694 genX(emit_sampler_state_pointers_xs)(struct brw_context *brw, 4695 struct brw_stage_state *stage_state) 4696 { 4697 #if GEN_GEN >= 7 4698 static const uint16_t packet_headers[] = { 4699 [MESA_SHADER_VERTEX] = 43, 4700 [MESA_SHADER_TESS_CTRL] = 44, 4701 [MESA_SHADER_TESS_EVAL] = 45, 4702 [MESA_SHADER_GEOMETRY] = 46, 4703 [MESA_SHADER_FRAGMENT] = 47, 4704 }; 4705 4706 /* Ivybridge requires a workaround flush before VS packets. */ 4707 if (GEN_GEN == 7 && !GEN_IS_HASWELL && 4708 stage_state->stage == MESA_SHADER_VERTEX) { 4709 gen7_emit_vs_workaround_flush(brw); 4710 } 4711 4712 brw_batch_emit(brw, GENX(3DSTATE_SAMPLER_STATE_POINTERS_VS), ptr) { 4713 ptr._3DCommandSubOpcode = packet_headers[stage_state->stage]; 4714 ptr.PointertoVSSamplerState = stage_state->sampler_offset; 4715 } 4716 #endif 4717 } 4718 4719 UNUSED static bool 4720 has_component(mesa_format format, int i) 4721 { 4722 if (_mesa_is_format_color_format(format)) 4723 return _mesa_format_has_color_component(format, i); 4724 4725 /* depth and stencil have only one component */ 4726 return i == 0; 4727 } 4728 4729 /** 4730 * Upload SAMPLER_BORDER_COLOR_STATE. 4731 */ 4732 static void 4733 genX(upload_default_color)(struct brw_context *brw, 4734 const struct gl_sampler_object *sampler, 4735 mesa_format format, GLenum base_format, 4736 bool is_integer_format, bool is_stencil_sampling, 4737 uint32_t *sdc_offset) 4738 { 4739 union gl_color_union color; 4740 4741 switch (base_format) { 4742 case GL_DEPTH_COMPONENT: 4743 /* GL specs that border color for depth textures is taken from the 4744 * R channel, while the hardware uses A. Spam R into all the 4745 * channels for safety. 4746 */ 4747 color.ui[0] = sampler->BorderColor.ui[0]; 4748 color.ui[1] = sampler->BorderColor.ui[0]; 4749 color.ui[2] = sampler->BorderColor.ui[0]; 4750 color.ui[3] = sampler->BorderColor.ui[0]; 4751 break; 4752 case GL_ALPHA: 4753 color.ui[0] = 0u; 4754 color.ui[1] = 0u; 4755 color.ui[2] = 0u; 4756 color.ui[3] = sampler->BorderColor.ui[3]; 4757 break; 4758 case GL_INTENSITY: 4759 color.ui[0] = sampler->BorderColor.ui[0]; 4760 color.ui[1] = sampler->BorderColor.ui[0]; 4761 color.ui[2] = sampler->BorderColor.ui[0]; 4762 color.ui[3] = sampler->BorderColor.ui[0]; 4763 break; 4764 case GL_LUMINANCE: 4765 color.ui[0] = sampler->BorderColor.ui[0]; 4766 color.ui[1] = sampler->BorderColor.ui[0]; 4767 color.ui[2] = sampler->BorderColor.ui[0]; 4768 color.ui[3] = float_as_int(1.0); 4769 break; 4770 case GL_LUMINANCE_ALPHA: 4771 color.ui[0] = sampler->BorderColor.ui[0]; 4772 color.ui[1] = sampler->BorderColor.ui[0]; 4773 color.ui[2] = sampler->BorderColor.ui[0]; 4774 color.ui[3] = sampler->BorderColor.ui[3]; 4775 break; 4776 default: 4777 color.ui[0] = sampler->BorderColor.ui[0]; 4778 color.ui[1] = sampler->BorderColor.ui[1]; 4779 color.ui[2] = sampler->BorderColor.ui[2]; 4780 color.ui[3] = sampler->BorderColor.ui[3]; 4781 break; 4782 } 4783 4784 /* In some cases we use an RGBA surface format for GL RGB textures, 4785 * where we've initialized the A channel to 1.0. We also have to set 4786 * the border color alpha to 1.0 in that case. 4787 */ 4788 if (base_format == GL_RGB) 4789 color.ui[3] = float_as_int(1.0); 4790 4791 int alignment = 32; 4792 if (GEN_GEN >= 8) { 4793 alignment = 64; 4794 } else if (GEN_IS_HASWELL && (is_integer_format || is_stencil_sampling)) { 4795 alignment = 512; 4796 } 4797 4798 uint32_t *sdc = brw_state_batch( 4799 brw, GENX(SAMPLER_BORDER_COLOR_STATE_length) * sizeof(uint32_t), 4800 alignment, sdc_offset); 4801 4802 struct GENX(SAMPLER_BORDER_COLOR_STATE) state = { 0 }; 4803 4804 #define ASSIGN(dst, src) \ 4805 do { \ 4806 dst = src; \ 4807 } while (0) 4808 4809 #define ASSIGNu16(dst, src) \ 4810 do { \ 4811 dst = (uint16_t)src; \ 4812 } while (0) 4813 4814 #define ASSIGNu8(dst, src) \ 4815 do { \ 4816 dst = (uint8_t)src; \ 4817 } while (0) 4818 4819 #define BORDER_COLOR_ATTR(macro, _color_type, src) \ 4820 macro(state.BorderColor ## _color_type ## Red, src[0]); \ 4821 macro(state.BorderColor ## _color_type ## Green, src[1]); \ 4822 macro(state.BorderColor ## _color_type ## Blue, src[2]); \ 4823 macro(state.BorderColor ## _color_type ## Alpha, src[3]); 4824 4825 #if GEN_GEN >= 8 4826 /* On Broadwell, the border color is represented as four 32-bit floats, 4827 * integers, or unsigned values, interpreted according to the surface 4828 * format. This matches the sampler->BorderColor union exactly; just 4829 * memcpy the values. 4830 */ 4831 BORDER_COLOR_ATTR(ASSIGN, 32bit, color.ui); 4832 #elif GEN_IS_HASWELL 4833 if (is_integer_format || is_stencil_sampling) { 4834 bool stencil = format == MESA_FORMAT_S_UINT8 || is_stencil_sampling; 4835 const int bits_per_channel = 4836 _mesa_get_format_bits(format, stencil ? GL_STENCIL_BITS : GL_RED_BITS); 4837 4838 /* From the Haswell PRM, "Command Reference: Structures", Page 36: 4839 * "If any color channel is missing from the surface format, 4840 * corresponding border color should be programmed as zero and if 4841 * alpha channel is missing, corresponding Alpha border color should 4842 * be programmed as 1." 4843 */ 4844 unsigned c[4] = { 0, 0, 0, 1 }; 4845 for (int i = 0; i < 4; i++) { 4846 if (has_component(format, i)) 4847 c[i] = color.ui[i]; 4848 } 4849 4850 switch (bits_per_channel) { 4851 case 8: 4852 /* Copy RGBA in order. */ 4853 BORDER_COLOR_ATTR(ASSIGNu8, 8bit, c); 4854 break; 4855 case 10: 4856 /* R10G10B10A2_UINT is treated like a 16-bit format. */ 4857 case 16: 4858 BORDER_COLOR_ATTR(ASSIGNu16, 16bit, c); 4859 break; 4860 case 32: 4861 if (base_format == GL_RG) { 4862 /* Careful inspection of the tables reveals that for RG32 formats, 4863 * the green channel needs to go where blue normally belongs. 4864 */ 4865 state.BorderColor32bitRed = c[0]; 4866 state.BorderColor32bitBlue = c[1]; 4867 state.BorderColor32bitAlpha = 1; 4868 } else { 4869 /* Copy RGBA in order. */ 4870 BORDER_COLOR_ATTR(ASSIGN, 32bit, c); 4871 } 4872 break; 4873 default: 4874 assert(!"Invalid number of bits per channel in integer format."); 4875 break; 4876 } 4877 } else { 4878 BORDER_COLOR_ATTR(ASSIGN, Float, color.f); 4879 } 4880 #elif GEN_GEN == 5 || GEN_GEN == 6 4881 BORDER_COLOR_ATTR(UNCLAMPED_FLOAT_TO_UBYTE, Unorm, color.f); 4882 BORDER_COLOR_ATTR(UNCLAMPED_FLOAT_TO_USHORT, Unorm16, color.f); 4883 BORDER_COLOR_ATTR(UNCLAMPED_FLOAT_TO_SHORT, Snorm16, color.f); 4884 4885 #define MESA_FLOAT_TO_HALF(dst, src) \ 4886 dst = _mesa_float_to_half(src); 4887 4888 BORDER_COLOR_ATTR(MESA_FLOAT_TO_HALF, Float16, color.f); 4889 4890 #undef MESA_FLOAT_TO_HALF 4891 4892 state.BorderColorSnorm8Red = state.BorderColorSnorm16Red >> 8; 4893 state.BorderColorSnorm8Green = state.BorderColorSnorm16Green >> 8; 4894 state.BorderColorSnorm8Blue = state.BorderColorSnorm16Blue >> 8; 4895 state.BorderColorSnorm8Alpha = state.BorderColorSnorm16Alpha >> 8; 4896 4897 BORDER_COLOR_ATTR(ASSIGN, Float, color.f); 4898 #elif GEN_GEN == 4 4899 BORDER_COLOR_ATTR(ASSIGN, , color.f); 4900 #else 4901 BORDER_COLOR_ATTR(ASSIGN, Float, color.f); 4902 #endif 4903 4904 #undef ASSIGN 4905 #undef BORDER_COLOR_ATTR 4906 4907 GENX(SAMPLER_BORDER_COLOR_STATE_pack)(brw, sdc, &state); 4908 } 4909 4910 static uint32_t 4911 translate_wrap_mode(struct brw_context *brw, GLenum wrap, bool using_nearest) 4912 { 4913 switch (wrap) { 4914 case GL_REPEAT: 4915 return TCM_WRAP; 4916 case GL_CLAMP: 4917 #if GEN_GEN >= 8 4918 /* GL_CLAMP is the weird mode where coordinates are clamped to 4919 * [0.0, 1.0], so linear filtering of coordinates outside of 4920 * [0.0, 1.0] give you half edge texel value and half border 4921 * color. 4922 * 4923 * Gen8+ supports this natively. 4924 */ 4925 return TCM_HALF_BORDER; 4926 #else 4927 /* On Gen4-7.5, we clamp the coordinates in the fragment shader 4928 * and set clamp_border here, which gets the result desired. 4929 * We just use clamp(_to_edge) for nearest, because for nearest 4930 * clamping to 1.0 gives border color instead of the desired 4931 * edge texels. 4932 */ 4933 if (using_nearest) 4934 return TCM_CLAMP; 4935 else 4936 return TCM_CLAMP_BORDER; 4937 #endif 4938 case GL_CLAMP_TO_EDGE: 4939 return TCM_CLAMP; 4940 case GL_CLAMP_TO_BORDER: 4941 return TCM_CLAMP_BORDER; 4942 case GL_MIRRORED_REPEAT: 4943 return TCM_MIRROR; 4944 case GL_MIRROR_CLAMP_TO_EDGE: 4945 return TCM_MIRROR_ONCE; 4946 default: 4947 return TCM_WRAP; 4948 } 4949 } 4950 4951 /** 4952 * Return true if the given wrap mode requires the border color to exist. 4953 */ 4954 static bool 4955 wrap_mode_needs_border_color(unsigned wrap_mode) 4956 { 4957 #if GEN_GEN >= 8 4958 return wrap_mode == TCM_CLAMP_BORDER || 4959 wrap_mode == TCM_HALF_BORDER; 4960 #else 4961 return wrap_mode == TCM_CLAMP_BORDER; 4962 #endif 4963 } 4964 4965 /** 4966 * Sets the sampler state for a single unit based off of the sampler key 4967 * entry. 4968 */ 4969 static void 4970 genX(update_sampler_state)(struct brw_context *brw, 4971 GLenum target, bool tex_cube_map_seamless, 4972 GLfloat tex_unit_lod_bias, 4973 mesa_format format, GLenum base_format, 4974 const struct gl_texture_object *texObj, 4975 const struct gl_sampler_object *sampler, 4976 uint32_t *sampler_state, 4977 uint32_t batch_offset_for_sampler_state) 4978 { 4979 struct GENX(SAMPLER_STATE) samp_st = { 0 }; 4980 4981 /* Select min and mip filters. */ 4982 switch (sampler->MinFilter) { 4983 case GL_NEAREST: 4984 samp_st.MinModeFilter = MAPFILTER_NEAREST; 4985 samp_st.MipModeFilter = MIPFILTER_NONE; 4986 break; 4987 case GL_LINEAR: 4988 samp_st.MinModeFilter = MAPFILTER_LINEAR; 4989 samp_st.MipModeFilter = MIPFILTER_NONE; 4990 break; 4991 case GL_NEAREST_MIPMAP_NEAREST: 4992 samp_st.MinModeFilter = MAPFILTER_NEAREST; 4993 samp_st.MipModeFilter = MIPFILTER_NEAREST; 4994 break; 4995 case GL_LINEAR_MIPMAP_NEAREST: 4996 samp_st.MinModeFilter = MAPFILTER_LINEAR; 4997 samp_st.MipModeFilter = MIPFILTER_NEAREST; 4998 break; 4999 case GL_NEAREST_MIPMAP_LINEAR: 5000 samp_st.MinModeFilter = MAPFILTER_NEAREST; 5001 samp_st.MipModeFilter = MIPFILTER_LINEAR; 5002 break; 5003 case GL_LINEAR_MIPMAP_LINEAR: 5004 samp_st.MinModeFilter = MAPFILTER_LINEAR; 5005 samp_st.MipModeFilter = MIPFILTER_LINEAR; 5006 break; 5007 default: 5008 unreachable("not reached"); 5009 } 5010 5011 /* Select mag filter. */ 5012 samp_st.MagModeFilter = sampler->MagFilter == GL_LINEAR ? 5013 MAPFILTER_LINEAR : MAPFILTER_NEAREST; 5014 5015 /* Enable anisotropic filtering if desired. */ 5016 samp_st.MaximumAnisotropy = RATIO21; 5017 5018 if (sampler->MaxAnisotropy > 1.0f) { 5019 if (samp_st.MinModeFilter == MAPFILTER_LINEAR) 5020 samp_st.MinModeFilter = MAPFILTER_ANISOTROPIC; 5021 if (samp_st.MagModeFilter == MAPFILTER_LINEAR) 5022 samp_st.MagModeFilter = MAPFILTER_ANISOTROPIC; 5023 5024 if (sampler->MaxAnisotropy > 2.0f) { 5025 samp_st.MaximumAnisotropy = 5026 MIN2((sampler->MaxAnisotropy - 2) / 2, RATIO161); 5027 } 5028 } 5029 5030 /* Set address rounding bits if not using nearest filtering. */ 5031 if (samp_st.MinModeFilter != MAPFILTER_NEAREST) { 5032 samp_st.UAddressMinFilterRoundingEnable = true; 5033 samp_st.VAddressMinFilterRoundingEnable = true; 5034 samp_st.RAddressMinFilterRoundingEnable = true; 5035 } 5036 5037 if (samp_st.MagModeFilter != MAPFILTER_NEAREST) { 5038 samp_st.UAddressMagFilterRoundingEnable = true; 5039 samp_st.VAddressMagFilterRoundingEnable = true; 5040 samp_st.RAddressMagFilterRoundingEnable = true; 5041 } 5042 5043 bool either_nearest = 5044 sampler->MinFilter == GL_NEAREST || sampler->MagFilter == GL_NEAREST; 5045 unsigned wrap_s = translate_wrap_mode(brw, sampler->WrapS, either_nearest); 5046 unsigned wrap_t = translate_wrap_mode(brw, sampler->WrapT, either_nearest); 5047 unsigned wrap_r = translate_wrap_mode(brw, sampler->WrapR, either_nearest); 5048 5049 if (target == GL_TEXTURE_CUBE_MAP || 5050 target == GL_TEXTURE_CUBE_MAP_ARRAY) { 5051 /* Cube maps must use the same wrap mode for all three coordinate 5052 * dimensions. Prior to Haswell, only CUBE and CLAMP are valid. 5053 * 5054 * Ivybridge and Baytrail seem to have problems with CUBE mode and 5055 * integer formats. Fall back to CLAMP for now. 5056 */ 5057 if ((tex_cube_map_seamless || sampler->CubeMapSeamless) && 5058 !(GEN_GEN == 7 && !GEN_IS_HASWELL && texObj->_IsIntegerFormat)) { 5059 wrap_s = TCM_CUBE; 5060 wrap_t = TCM_CUBE; 5061 wrap_r = TCM_CUBE; 5062 } else { 5063 wrap_s = TCM_CLAMP; 5064 wrap_t = TCM_CLAMP; 5065 wrap_r = TCM_CLAMP; 5066 } 5067 } else if (target == GL_TEXTURE_1D) { 5068 /* There's a bug in 1D texture sampling - it actually pays 5069 * attention to the wrap_t value, though it should not. 5070 * Override the wrap_t value here to GL_REPEAT to keep 5071 * any nonexistent border pixels from floating in. 5072 */ 5073 wrap_t = TCM_WRAP; 5074 } 5075 5076 samp_st.TCXAddressControlMode = wrap_s; 5077 samp_st.TCYAddressControlMode = wrap_t; 5078 samp_st.TCZAddressControlMode = wrap_r; 5079 5080 samp_st.ShadowFunction = 5081 sampler->CompareMode == GL_COMPARE_R_TO_TEXTURE_ARB ? 5082 intel_translate_shadow_compare_func(sampler->CompareFunc) : 0; 5083 5084 #if GEN_GEN >= 7 5085 /* Set shadow function. */ 5086 samp_st.AnisotropicAlgorithm = 5087 samp_st.MinModeFilter == MAPFILTER_ANISOTROPIC ? 5088 EWAApproximation : LEGACY; 5089 #endif 5090 5091 #if GEN_GEN >= 6 5092 samp_st.NonnormalizedCoordinateEnable = target == GL_TEXTURE_RECTANGLE; 5093 #endif 5094 5095 const float hw_max_lod = GEN_GEN >= 7 ? 14 : 13; 5096 samp_st.MinLOD = CLAMP(sampler->MinLod, 0, hw_max_lod); 5097 samp_st.MaxLOD = CLAMP(sampler->MaxLod, 0, hw_max_lod); 5098 samp_st.TextureLODBias = 5099 CLAMP(tex_unit_lod_bias + sampler->LodBias, -16, 15); 5100 5101 #if GEN_GEN == 6 5102 samp_st.BaseMipLevel = 5103 CLAMP(texObj->MinLevel + texObj->BaseLevel, 0, hw_max_lod); 5104 samp_st.MinandMagStateNotEqual = 5105 samp_st.MinModeFilter != samp_st.MagModeFilter; 5106 #endif 5107 5108 /* Upload the border color if necessary. If not, just point it at 5109 * offset 0 (the start of the batch) - the color should be ignored, 5110 * but that address won't fault in case something reads it anyway. 5111 */ 5112 uint32_t border_color_offset = 0; 5113 if (wrap_mode_needs_border_color(wrap_s) || 5114 wrap_mode_needs_border_color(wrap_t) || 5115 wrap_mode_needs_border_color(wrap_r)) { 5116 genX(upload_default_color)(brw, sampler, format, base_format, 5117 texObj->_IsIntegerFormat, 5118 texObj->StencilSampling, 5119 &border_color_offset); 5120 } 5121 #if GEN_GEN < 6 5122 samp_st.BorderColorPointer = 5123 ro_bo(brw->batch.state.bo, border_color_offset); 5124 #else 5125 samp_st.BorderColorPointer = border_color_offset; 5126 #endif 5127 5128 #if GEN_GEN >= 8 5129 samp_st.LODPreClampMode = CLAMP_MODE_OGL; 5130 #else 5131 samp_st.LODPreClampEnable = true; 5132 #endif 5133 5134 GENX(SAMPLER_STATE_pack)(brw, sampler_state, &samp_st); 5135 } 5136 5137 static void 5138 update_sampler_state(struct brw_context *brw, 5139 int unit, 5140 uint32_t *sampler_state, 5141 uint32_t batch_offset_for_sampler_state) 5142 { 5143 struct gl_context *ctx = &brw->ctx; 5144 const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; 5145 const struct gl_texture_object *texObj = texUnit->_Current; 5146 const struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit); 5147 5148 /* These don't use samplers at all. */ 5149 if (texObj->Target == GL_TEXTURE_BUFFER) 5150 return; 5151 5152 struct gl_texture_image *firstImage = texObj->Image[0][texObj->BaseLevel]; 5153 genX(update_sampler_state)(brw, texObj->Target, 5154 ctx->Texture.CubeMapSeamless, 5155 texUnit->LodBias, 5156 firstImage->TexFormat, firstImage->_BaseFormat, 5157 texObj, sampler, 5158 sampler_state, batch_offset_for_sampler_state); 5159 } 5160 5161 static void 5162 genX(upload_sampler_state_table)(struct brw_context *brw, 5163 struct gl_program *prog, 5164 struct brw_stage_state *stage_state) 5165 { 5166 struct gl_context *ctx = &brw->ctx; 5167 uint32_t sampler_count = stage_state->sampler_count; 5168 5169 GLbitfield SamplersUsed = prog->SamplersUsed; 5170 5171 if (sampler_count == 0) 5172 return; 5173 5174 /* SAMPLER_STATE is 4 DWords on all platforms. */ 5175 const int dwords = GENX(SAMPLER_STATE_length); 5176 const int size_in_bytes = dwords * sizeof(uint32_t); 5177 5178 uint32_t *sampler_state = brw_state_batch(brw, 5179 sampler_count * size_in_bytes, 5180 32, &stage_state->sampler_offset); 5181 /* memset(sampler_state, 0, sampler_count * size_in_bytes); */ 5182 5183 uint32_t batch_offset_for_sampler_state = stage_state->sampler_offset; 5184 5185 for (unsigned s = 0; s < sampler_count; s++) { 5186 if (SamplersUsed & (1 << s)) { 5187 const unsigned unit = prog->SamplerUnits[s]; 5188 if (ctx->Texture.Unit[unit]._Current) { 5189 update_sampler_state(brw, unit, sampler_state, 5190 batch_offset_for_sampler_state); 5191 } 5192 } 5193 5194 sampler_state += dwords; 5195 batch_offset_for_sampler_state += size_in_bytes; 5196 } 5197 5198 if (GEN_GEN >= 7 && stage_state->stage != MESA_SHADER_COMPUTE) { 5199 /* Emit a 3DSTATE_SAMPLER_STATE_POINTERS_XS packet. */ 5200 genX(emit_sampler_state_pointers_xs)(brw, stage_state); 5201 } else { 5202 /* Flag that the sampler state table pointer has changed; later atoms 5203 * will handle it. 5204 */ 5205 brw->ctx.NewDriverState |= BRW_NEW_SAMPLER_STATE_TABLE; 5206 } 5207 } 5208 5209 static void 5210 genX(upload_fs_samplers)(struct brw_context *brw) 5211 { 5212 /* BRW_NEW_FRAGMENT_PROGRAM */ 5213 struct gl_program *fs = brw->programs[MESA_SHADER_FRAGMENT]; 5214 genX(upload_sampler_state_table)(brw, fs, &brw->wm.base); 5215 } 5216 5217 static const struct brw_tracked_state genX(fs_samplers) = { 5218 .dirty = { 5219 .mesa = _NEW_TEXTURE, 5220 .brw = BRW_NEW_BATCH | 5221 BRW_NEW_BLORP | 5222 BRW_NEW_FRAGMENT_PROGRAM, 5223 }, 5224 .emit = genX(upload_fs_samplers), 5225 }; 5226 5227 static void 5228 genX(upload_vs_samplers)(struct brw_context *brw) 5229 { 5230 /* BRW_NEW_VERTEX_PROGRAM */ 5231 struct gl_program *vs = brw->programs[MESA_SHADER_VERTEX]; 5232 genX(upload_sampler_state_table)(brw, vs, &brw->vs.base); 5233 } 5234 5235 static const struct brw_tracked_state genX(vs_samplers) = { 5236 .dirty = { 5237 .mesa = _NEW_TEXTURE, 5238 .brw = BRW_NEW_BATCH | 5239 BRW_NEW_BLORP | 5240 BRW_NEW_VERTEX_PROGRAM, 5241 }, 5242 .emit = genX(upload_vs_samplers), 5243 }; 5244 5245 #if GEN_GEN >= 6 5246 static void 5247 genX(upload_gs_samplers)(struct brw_context *brw) 5248 { 5249 /* BRW_NEW_GEOMETRY_PROGRAM */ 5250 struct gl_program *gs = brw->programs[MESA_SHADER_GEOMETRY]; 5251 if (!gs) 5252 return; 5253 5254 genX(upload_sampler_state_table)(brw, gs, &brw->gs.base); 5255 } 5256 5257 5258 static const struct brw_tracked_state genX(gs_samplers) = { 5259 .dirty = { 5260 .mesa = _NEW_TEXTURE, 5261 .brw = BRW_NEW_BATCH | 5262 BRW_NEW_BLORP | 5263 BRW_NEW_GEOMETRY_PROGRAM, 5264 }, 5265 .emit = genX(upload_gs_samplers), 5266 }; 5267 #endif 5268 5269 #if GEN_GEN >= 7 5270 static void 5271 genX(upload_tcs_samplers)(struct brw_context *brw) 5272 { 5273 /* BRW_NEW_TESS_PROGRAMS */ 5274 struct gl_program *tcs = brw->programs[MESA_SHADER_TESS_CTRL]; 5275 if (!tcs) 5276 return; 5277 5278 genX(upload_sampler_state_table)(brw, tcs, &brw->tcs.base); 5279 } 5280 5281 static const struct brw_tracked_state genX(tcs_samplers) = { 5282 .dirty = { 5283 .mesa = _NEW_TEXTURE, 5284 .brw = BRW_NEW_BATCH | 5285 BRW_NEW_BLORP | 5286 BRW_NEW_TESS_PROGRAMS, 5287 }, 5288 .emit = genX(upload_tcs_samplers), 5289 }; 5290 #endif 5291 5292 #if GEN_GEN >= 7 5293 static void 5294 genX(upload_tes_samplers)(struct brw_context *brw) 5295 { 5296 /* BRW_NEW_TESS_PROGRAMS */ 5297 struct gl_program *tes = brw->programs[MESA_SHADER_TESS_EVAL]; 5298 if (!tes) 5299 return; 5300 5301 genX(upload_sampler_state_table)(brw, tes, &brw->tes.base); 5302 } 5303 5304 static const struct brw_tracked_state genX(tes_samplers) = { 5305 .dirty = { 5306 .mesa = _NEW_TEXTURE, 5307 .brw = BRW_NEW_BATCH | 5308 BRW_NEW_BLORP | 5309 BRW_NEW_TESS_PROGRAMS, 5310 }, 5311 .emit = genX(upload_tes_samplers), 5312 }; 5313 #endif 5314 5315 #if GEN_GEN >= 7 5316 static void 5317 genX(upload_cs_samplers)(struct brw_context *brw) 5318 { 5319 /* BRW_NEW_COMPUTE_PROGRAM */ 5320 struct gl_program *cs = brw->programs[MESA_SHADER_COMPUTE]; 5321 if (!cs) 5322 return; 5323 5324 genX(upload_sampler_state_table)(brw, cs, &brw->cs.base); 5325 } 5326 5327 const struct brw_tracked_state genX(cs_samplers) = { 5328 .dirty = { 5329 .mesa = _NEW_TEXTURE, 5330 .brw = BRW_NEW_BATCH | 5331 BRW_NEW_BLORP | 5332 BRW_NEW_COMPUTE_PROGRAM, 5333 }, 5334 .emit = genX(upload_cs_samplers), 5335 }; 5336 #endif 5337 5338 /* ---------------------------------------------------------------------- */ 5339 5340 #if GEN_GEN <= 5 5341 5342 static void genX(upload_blend_constant_color)(struct brw_context *brw) 5343 { 5344 struct gl_context *ctx = &brw->ctx; 5345 5346 brw_batch_emit(brw, GENX(3DSTATE_CONSTANT_COLOR), blend_cc) { 5347 blend_cc.BlendConstantColorRed = ctx->Color.BlendColorUnclamped[0]; 5348 blend_cc.BlendConstantColorGreen = ctx->Color.BlendColorUnclamped[1]; 5349 blend_cc.BlendConstantColorBlue = ctx->Color.BlendColorUnclamped[2]; 5350 blend_cc.BlendConstantColorAlpha = ctx->Color.BlendColorUnclamped[3]; 5351 } 5352 } 5353 5354 static const struct brw_tracked_state genX(blend_constant_color) = { 5355 .dirty = { 5356 .mesa = _NEW_COLOR, 5357 .brw = BRW_NEW_CONTEXT | 5358 BRW_NEW_BLORP, 5359 }, 5360 .emit = genX(upload_blend_constant_color) 5361 }; 5362 #endif 5363 5364 /* ---------------------------------------------------------------------- */ 5365 5366 void 5367 genX(init_atoms)(struct brw_context *brw) 5368 { 5369 #if GEN_GEN < 6 5370 static const struct brw_tracked_state *render_atoms[] = 5371 { 5372 /* Once all the programs are done, we know how large urb entry 5373 * sizes need to be and can decide if we need to change the urb 5374 * layout. 5375 */ 5376 &brw_curbe_offsets, 5377 &brw_recalculate_urb_fence, 5378 5379 &genX(cc_vp), 5380 &genX(color_calc_state), 5381 5382 /* Surface state setup. Must come before the VS/WM unit. The binding 5383 * table upload must be last. 5384 */ 5385 &brw_vs_pull_constants, 5386 &brw_wm_pull_constants, 5387 &brw_renderbuffer_surfaces, 5388 &brw_renderbuffer_read_surfaces, 5389 &brw_texture_surfaces, 5390 &brw_vs_binding_table, 5391 &brw_wm_binding_table, 5392 5393 &genX(fs_samplers), 5394 &genX(vs_samplers), 5395 5396 /* These set up state for brw_psp_urb_cbs */ 5397 &genX(wm_state), 5398 &genX(sf_clip_viewport), 5399 &genX(sf_state), 5400 &genX(vs_state), /* always required, enabled or not */ 5401 &genX(clip_state), 5402 &genX(gs_state), 5403 5404 /* Command packets: 5405 */ 5406 &brw_binding_table_pointers, 5407 &genX(blend_constant_color), 5408 5409 &brw_depthbuffer, 5410 5411 &genX(polygon_stipple), 5412 &genX(polygon_stipple_offset), 5413 5414 &genX(line_stipple), 5415 5416 &brw_psp_urb_cbs, 5417 5418 &genX(drawing_rect), 5419 &brw_indices, /* must come before brw_vertices */ 5420 &genX(index_buffer), 5421 &genX(vertices), 5422 5423 &brw_constant_buffer 5424 }; 5425 #elif GEN_GEN == 6 5426 static const struct brw_tracked_state *render_atoms[] = 5427 { 5428 &genX(sf_clip_viewport), 5429 5430 /* Command packets: */ 5431 5432 &genX(cc_vp), 5433 5434 &gen6_urb, 5435 &genX(blend_state), /* must do before cc unit */ 5436 &genX(color_calc_state), /* must do before cc unit */ 5437 &genX(depth_stencil_state), /* must do before cc unit */ 5438 5439 &genX(vs_push_constants), /* Before vs_state */ 5440 &genX(gs_push_constants), /* Before gs_state */ 5441 &genX(wm_push_constants), /* Before wm_state */ 5442 5443 /* Surface state setup. Must come before the VS/WM unit. The binding 5444 * table upload must be last. 5445 */ 5446 &brw_vs_pull_constants, 5447 &brw_vs_ubo_surfaces, 5448 &brw_gs_pull_constants, 5449 &brw_gs_ubo_surfaces, 5450 &brw_wm_pull_constants, 5451 &brw_wm_ubo_surfaces, 5452 &gen6_renderbuffer_surfaces, 5453 &brw_renderbuffer_read_surfaces, 5454 &brw_texture_surfaces, 5455 &gen6_sol_surface, 5456 &brw_vs_binding_table, 5457 &gen6_gs_binding_table, 5458 &brw_wm_binding_table, 5459 5460 &genX(fs_samplers), 5461 &genX(vs_samplers), 5462 &genX(gs_samplers), 5463 &gen6_sampler_state, 5464 &genX(multisample_state), 5465 5466 &genX(vs_state), 5467 &genX(gs_state), 5468 &genX(clip_state), 5469 &genX(sf_state), 5470 &genX(wm_state), 5471 5472 &genX(scissor_state), 5473 5474 &gen6_binding_table_pointers, 5475 5476 &brw_depthbuffer, 5477 5478 &genX(polygon_stipple), 5479 &genX(polygon_stipple_offset), 5480 5481 &genX(line_stipple), 5482 5483 &genX(drawing_rect), 5484 5485 &brw_indices, /* must come before brw_vertices */ 5486 &genX(index_buffer), 5487 &genX(vertices), 5488 }; 5489 #elif GEN_GEN == 7 5490 static const struct brw_tracked_state *render_atoms[] = 5491 { 5492 /* Command packets: */ 5493 5494 &genX(cc_vp), 5495 &genX(sf_clip_viewport), 5496 5497 &gen7_l3_state, 5498 &gen7_push_constant_space, 5499 &gen7_urb, 5500 &genX(blend_state), /* must do before cc unit */ 5501 &genX(color_calc_state), /* must do before cc unit */ 5502 &genX(depth_stencil_state), /* must do before cc unit */ 5503 5504 &brw_vs_image_surfaces, /* Before vs push/pull constants and binding table */ 5505 &brw_tcs_image_surfaces, /* Before tcs push/pull constants and binding table */ 5506 &brw_tes_image_surfaces, /* Before tes push/pull constants and binding table */ 5507 &brw_gs_image_surfaces, /* Before gs push/pull constants and binding table */ 5508 &brw_wm_image_surfaces, /* Before wm push/pull constants and binding table */ 5509 5510 &genX(vs_push_constants), /* Before vs_state */ 5511 &genX(tcs_push_constants), 5512 &genX(tes_push_constants), 5513 &genX(gs_push_constants), /* Before gs_state */ 5514 &genX(wm_push_constants), /* Before wm_surfaces and constant_buffer */ 5515 5516 /* Surface state setup. Must come before the VS/WM unit. The binding 5517 * table upload must be last. 5518 */ 5519 &brw_vs_pull_constants, 5520 &brw_vs_ubo_surfaces, 5521 &brw_tcs_pull_constants, 5522 &brw_tcs_ubo_surfaces, 5523 &brw_tes_pull_constants, 5524 &brw_tes_ubo_surfaces, 5525 &brw_gs_pull_constants, 5526 &brw_gs_ubo_surfaces, 5527 &brw_wm_pull_constants, 5528 &brw_wm_ubo_surfaces, 5529 &gen6_renderbuffer_surfaces, 5530 &brw_renderbuffer_read_surfaces, 5531 &brw_texture_surfaces, 5532 5533 &genX(push_constant_packets), 5534 5535 &brw_vs_binding_table, 5536 &brw_tcs_binding_table, 5537 &brw_tes_binding_table, 5538 &brw_gs_binding_table, 5539 &brw_wm_binding_table, 5540 5541 &genX(fs_samplers), 5542 &genX(vs_samplers), 5543 &genX(tcs_samplers), 5544 &genX(tes_samplers), 5545 &genX(gs_samplers), 5546 &genX(multisample_state), 5547 5548 &genX(vs_state), 5549 &genX(hs_state), 5550 &genX(te_state), 5551 &genX(ds_state), 5552 &genX(gs_state), 5553 &genX(sol_state), 5554 &genX(clip_state), 5555 &genX(sbe_state), 5556 &genX(sf_state), 5557 &genX(wm_state), 5558 &genX(ps_state), 5559 5560 &genX(scissor_state), 5561 5562 &gen7_depthbuffer, 5563 5564 &genX(polygon_stipple), 5565 &genX(polygon_stipple_offset), 5566 5567 &genX(line_stipple), 5568 5569 &genX(drawing_rect), 5570 5571 &brw_indices, /* must come before brw_vertices */ 5572 &genX(index_buffer), 5573 &genX(vertices), 5574 5575 #if GEN_IS_HASWELL 5576 &genX(cut_index), 5577 #endif 5578 }; 5579 #elif GEN_GEN >= 8 5580 static const struct brw_tracked_state *render_atoms[] = 5581 { 5582 &genX(cc_vp), 5583 &genX(sf_clip_viewport), 5584 5585 &gen7_l3_state, 5586 &gen7_push_constant_space, 5587 &gen7_urb, 5588 &genX(blend_state), 5589 &genX(color_calc_state), 5590 5591 &brw_vs_image_surfaces, /* Before vs push/pull constants and binding table */ 5592 &brw_tcs_image_surfaces, /* Before tcs push/pull constants and binding table */ 5593 &brw_tes_image_surfaces, /* Before tes push/pull constants and binding table */ 5594 &brw_gs_image_surfaces, /* Before gs push/pull constants and binding table */ 5595 &brw_wm_image_surfaces, /* Before wm push/pull constants and binding table */ 5596 5597 &genX(vs_push_constants), /* Before vs_state */ 5598 &genX(tcs_push_constants), 5599 &genX(tes_push_constants), 5600 &genX(gs_push_constants), /* Before gs_state */ 5601 &genX(wm_push_constants), /* Before wm_surfaces and constant_buffer */ 5602 5603 /* Surface state setup. Must come before the VS/WM unit. The binding 5604 * table upload must be last. 5605 */ 5606 &brw_vs_pull_constants, 5607 &brw_vs_ubo_surfaces, 5608 &brw_tcs_pull_constants, 5609 &brw_tcs_ubo_surfaces, 5610 &brw_tes_pull_constants, 5611 &brw_tes_ubo_surfaces, 5612 &brw_gs_pull_constants, 5613 &brw_gs_ubo_surfaces, 5614 &brw_wm_pull_constants, 5615 &brw_wm_ubo_surfaces, 5616 &gen6_renderbuffer_surfaces, 5617 &brw_renderbuffer_read_surfaces, 5618 &brw_texture_surfaces, 5619 5620 &genX(push_constant_packets), 5621 5622 &brw_vs_binding_table, 5623 &brw_tcs_binding_table, 5624 &brw_tes_binding_table, 5625 &brw_gs_binding_table, 5626 &brw_wm_binding_table, 5627 5628 &genX(fs_samplers), 5629 &genX(vs_samplers), 5630 &genX(tcs_samplers), 5631 &genX(tes_samplers), 5632 &genX(gs_samplers), 5633 &genX(multisample_state), 5634 5635 &genX(vs_state), 5636 &genX(hs_state), 5637 &genX(te_state), 5638 &genX(ds_state), 5639 &genX(gs_state), 5640 &genX(sol_state), 5641 &genX(clip_state), 5642 &genX(raster_state), 5643 &genX(sbe_state), 5644 &genX(sf_state), 5645 &genX(ps_blend), 5646 &genX(ps_extra), 5647 &genX(ps_state), 5648 &genX(depth_stencil_state), 5649 &genX(wm_state), 5650 5651 &genX(scissor_state), 5652 5653 &gen7_depthbuffer, 5654 5655 &genX(polygon_stipple), 5656 &genX(polygon_stipple_offset), 5657 5658 &genX(line_stipple), 5659 5660 &genX(drawing_rect), 5661 5662 &genX(vf_topology), 5663 5664 &brw_indices, 5665 &genX(index_buffer), 5666 &genX(vertices), 5667 5668 &genX(cut_index), 5669 &gen8_pma_fix, 5670 }; 5671 #endif 5672 5673 STATIC_ASSERT(ARRAY_SIZE(render_atoms) <= ARRAY_SIZE(brw->render_atoms)); 5674 brw_copy_pipeline_atoms(brw, BRW_RENDER_PIPELINE, 5675 render_atoms, ARRAY_SIZE(render_atoms)); 5676 5677 #if GEN_GEN >= 7 5678 static const struct brw_tracked_state *compute_atoms[] = 5679 { 5680 &gen7_l3_state, 5681 &brw_cs_image_surfaces, 5682 &genX(cs_push_constants), 5683 &genX(cs_pull_constants), 5684 &brw_cs_ubo_surfaces, 5685 &brw_cs_texture_surfaces, 5686 &brw_cs_work_groups_surface, 5687 &genX(cs_samplers), 5688 &genX(cs_state), 5689 }; 5690 5691 STATIC_ASSERT(ARRAY_SIZE(compute_atoms) <= ARRAY_SIZE(brw->compute_atoms)); 5692 brw_copy_pipeline_atoms(brw, BRW_COMPUTE_PIPELINE, 5693 compute_atoms, ARRAY_SIZE(compute_atoms)); 5694 5695 brw->vtbl.emit_mi_report_perf_count = genX(emit_mi_report_perf_count); 5696 #endif 5697 } 5698