1 /* 2 * Copyright 2009 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric (at) anholt.net> 25 * 26 */ 27 28 #include "brw_context.h" 29 #include "brw_state.h" 30 #include "brw_defines.h" 31 #include "brw_util.h" 32 #include "compiler/nir/nir.h" 33 #include "main/macros.h" 34 #include "main/fbobject.h" 35 #include "main/framebuffer.h" 36 #include "intel_batchbuffer.h" 37 38 /** 39 * Determine the appropriate attribute override value to store into the 40 * 3DSTATE_SF structure for a given fragment shader attribute. The attribute 41 * override value contains two pieces of information: the location of the 42 * attribute in the VUE (relative to urb_entry_read_offset, see below), and a 43 * flag indicating whether to "swizzle" the attribute based on the direction 44 * the triangle is facing. 45 * 46 * If an attribute is "swizzled", then the given VUE location is used for 47 * front-facing triangles, and the VUE location that immediately follows is 48 * used for back-facing triangles. We use this to implement the mapping from 49 * gl_FrontColor/gl_BackColor to gl_Color. 50 * 51 * urb_entry_read_offset is the offset into the VUE at which the SF unit is 52 * being instructed to begin reading attribute data. It can be set to a 53 * nonzero value to prevent the SF unit from wasting time reading elements of 54 * the VUE that are not needed by the fragment shader. It is measured in 55 * 256-bit increments. 56 */ 57 static uint32_t 58 get_attr_override(const struct brw_vue_map *vue_map, int urb_entry_read_offset, 59 int fs_attr, bool two_side_color, uint32_t *max_source_attr) 60 { 61 /* Find the VUE slot for this attribute. */ 62 int slot = vue_map->varying_to_slot[fs_attr]; 63 64 /* Viewport and Layer are stored in the VUE header. We need to override 65 * them to zero if earlier stages didn't write them, as GL requires that 66 * they read back as zero when not explicitly set. 67 */ 68 if (fs_attr == VARYING_SLOT_VIEWPORT || fs_attr == VARYING_SLOT_LAYER) { 69 unsigned override = 70 ATTRIBUTE_0_OVERRIDE_X | ATTRIBUTE_0_OVERRIDE_W | 71 ATTRIBUTE_CONST_0000 << ATTRIBUTE_0_CONST_SOURCE_SHIFT; 72 73 if (!(vue_map->slots_valid & VARYING_BIT_LAYER)) 74 override |= ATTRIBUTE_0_OVERRIDE_Y; 75 if (!(vue_map->slots_valid & VARYING_BIT_VIEWPORT)) 76 override |= ATTRIBUTE_0_OVERRIDE_Z; 77 78 return override; 79 } 80 81 /* If there was only a back color written but not front, use back 82 * as the color instead of undefined 83 */ 84 if (slot == -1 && fs_attr == VARYING_SLOT_COL0) 85 slot = vue_map->varying_to_slot[VARYING_SLOT_BFC0]; 86 if (slot == -1 && fs_attr == VARYING_SLOT_COL1) 87 slot = vue_map->varying_to_slot[VARYING_SLOT_BFC1]; 88 89 if (slot == -1) { 90 /* This attribute does not exist in the VUE--that means that the vertex 91 * shader did not write to it. This means that either: 92 * 93 * (a) This attribute is a texture coordinate, and it is going to be 94 * replaced with point coordinates (as a consequence of a call to 95 * glTexEnvi(GL_POINT_SPRITE, GL_COORD_REPLACE, GL_TRUE)), so the 96 * hardware will ignore whatever attribute override we supply. 97 * 98 * (b) This attribute is read by the fragment shader but not written by 99 * the vertex shader, so its value is undefined. Therefore the 100 * attribute override we supply doesn't matter. 101 * 102 * (c) This attribute is gl_PrimitiveID, and it wasn't written by the 103 * previous shader stage. 104 * 105 * Note that we don't have to worry about the cases where the attribute 106 * is gl_PointCoord or is undergoing point sprite coordinate 107 * replacement, because in those cases, this function isn't called. 108 * 109 * In case (c), we need to program the attribute overrides so that the 110 * primitive ID will be stored in this slot. In every other case, the 111 * attribute override we supply doesn't matter. So just go ahead and 112 * program primitive ID in every case. 113 */ 114 return (ATTRIBUTE_0_OVERRIDE_W | 115 ATTRIBUTE_0_OVERRIDE_Z | 116 ATTRIBUTE_0_OVERRIDE_Y | 117 ATTRIBUTE_0_OVERRIDE_X | 118 (ATTRIBUTE_CONST_PRIM_ID << ATTRIBUTE_0_CONST_SOURCE_SHIFT)); 119 } 120 121 /* Compute the location of the attribute relative to urb_entry_read_offset. 122 * Each increment of urb_entry_read_offset represents a 256-bit value, so 123 * it counts for two 128-bit VUE slots. 124 */ 125 int source_attr = slot - 2 * urb_entry_read_offset; 126 assert(source_attr >= 0 && source_attr < 32); 127 128 /* If we are doing two-sided color, and the VUE slot following this one 129 * represents a back-facing color, then we need to instruct the SF unit to 130 * do back-facing swizzling. 131 */ 132 bool swizzling = two_side_color && 133 ((vue_map->slot_to_varying[slot] == VARYING_SLOT_COL0 && 134 vue_map->slot_to_varying[slot+1] == VARYING_SLOT_BFC0) || 135 (vue_map->slot_to_varying[slot] == VARYING_SLOT_COL1 && 136 vue_map->slot_to_varying[slot+1] == VARYING_SLOT_BFC1)); 137 138 /* Update max_source_attr. If swizzling, the SF will read this slot + 1. */ 139 if (*max_source_attr < source_attr + swizzling) 140 *max_source_attr = source_attr + swizzling; 141 142 if (swizzling) { 143 return source_attr | 144 (ATTRIBUTE_SWIZZLE_INPUTATTR_FACING << ATTRIBUTE_SWIZZLE_SHIFT); 145 } 146 147 return source_attr; 148 } 149 150 151 /** 152 * Create the mapping from the FS inputs we produce to the previous pipeline 153 * stage (GS or VS) outputs they source from. 154 */ 155 void 156 calculate_attr_overrides(const struct brw_context *brw, 157 uint16_t *attr_overrides, 158 uint32_t *point_sprite_enables, 159 uint32_t *urb_entry_read_length, 160 uint32_t *urb_entry_read_offset) 161 { 162 /* BRW_NEW_FS_PROG_DATA */ 163 const struct brw_wm_prog_data *wm_prog_data = 164 brw_wm_prog_data(brw->wm.base.prog_data); 165 uint32_t max_source_attr = 0; 166 167 *point_sprite_enables = 0; 168 169 *urb_entry_read_offset = BRW_SF_URB_ENTRY_READ_OFFSET; 170 171 /* BRW_NEW_FRAGMENT_PROGRAM 172 * 173 * If the fragment shader reads VARYING_SLOT_LAYER, then we need to pass in 174 * the full vertex header. Otherwise, we can program the SF to start 175 * reading at an offset of 1 (2 varying slots) to skip unnecessary data: 176 * - VARYING_SLOT_PSIZ and BRW_VARYING_SLOT_NDC on gen4-5 177 * - VARYING_SLOT_{PSIZ,LAYER} and VARYING_SLOT_POS on gen6+ 178 */ 179 180 bool fs_needs_vue_header = brw->fragment_program->info.inputs_read & 181 (VARYING_BIT_LAYER | VARYING_BIT_VIEWPORT); 182 183 *urb_entry_read_offset = fs_needs_vue_header ? 0 : 1; 184 185 /* From the Ivybridge PRM, Vol 2 Part 1, 3DSTATE_SBE, 186 * description of dw10 Point Sprite Texture Coordinate Enable: 187 * 188 * "This field must be programmed to zero when non-point primitives 189 * are rendered." 190 * 191 * The SandyBridge PRM doesn't explicitly say that point sprite enables 192 * must be programmed to zero when rendering non-point primitives, but 193 * the IvyBridge PRM does, and if we don't, we get garbage. 194 * 195 * This is not required on Haswell, as the hardware ignores this state 196 * when drawing non-points -- although we do still need to be careful to 197 * correctly set the attr overrides. 198 * 199 * _NEW_POLYGON 200 * BRW_NEW_PRIMITIVE | BRW_NEW_GS_PROG_DATA | BRW_NEW_TES_PROG_DATA 201 */ 202 bool drawing_points = brw_is_drawing_points(brw); 203 204 /* Initialize all the attr_overrides to 0. In the loop below we'll modify 205 * just the ones that correspond to inputs used by the fs. 206 */ 207 memset(attr_overrides, 0, 16*sizeof(*attr_overrides)); 208 209 for (int attr = 0; attr < VARYING_SLOT_MAX; attr++) { 210 int input_index = wm_prog_data->urb_setup[attr]; 211 212 if (input_index < 0) 213 continue; 214 215 /* _NEW_POINT */ 216 bool point_sprite = false; 217 if (drawing_points) { 218 if (brw->ctx.Point.PointSprite && 219 (attr >= VARYING_SLOT_TEX0 && attr <= VARYING_SLOT_TEX7) && 220 (brw->ctx.Point.CoordReplace & (1u << (attr - VARYING_SLOT_TEX0)))) { 221 point_sprite = true; 222 } 223 224 if (attr == VARYING_SLOT_PNTC) 225 point_sprite = true; 226 227 if (point_sprite) 228 *point_sprite_enables |= (1 << input_index); 229 } 230 231 /* BRW_NEW_VUE_MAP_GEOM_OUT | _NEW_LIGHT | _NEW_PROGRAM */ 232 uint16_t attr_override = point_sprite ? 0 : 233 get_attr_override(&brw->vue_map_geom_out, 234 *urb_entry_read_offset, attr, 235 brw->ctx.VertexProgram._TwoSideEnabled, 236 &max_source_attr); 237 238 /* The hardware can only do the overrides on 16 overrides at a 239 * time, and the other up to 16 have to be lined up so that the 240 * input index = the output index. We'll need to do some 241 * tweaking to make sure that's the case. 242 */ 243 if (input_index < 16) 244 attr_overrides[input_index] = attr_override; 245 else 246 assert(attr_override == input_index); 247 } 248 249 /* From the Sandy Bridge PRM, Volume 2, Part 1, documentation for 250 * 3DSTATE_SF DWord 1 bits 15:11, "Vertex URB Entry Read Length": 251 * 252 * "This field should be set to the minimum length required to read the 253 * maximum source attribute. The maximum source attribute is indicated 254 * by the maximum value of the enabled Attribute # Source Attribute if 255 * Attribute Swizzle Enable is set, Number of Output Attributes-1 if 256 * enable is not set. 257 * read_length = ceiling((max_source_attr + 1) / 2) 258 * 259 * [errata] Corruption/Hang possible if length programmed larger than 260 * recommended" 261 * 262 * Similar text exists for Ivy Bridge. 263 */ 264 *urb_entry_read_length = ALIGN(max_source_attr + 1, 2) / 2; 265 } 266 267 268 static void 269 upload_sf_state(struct brw_context *brw) 270 { 271 struct gl_context *ctx = &brw->ctx; 272 /* BRW_NEW_FS_PROG_DATA */ 273 const struct brw_wm_prog_data *wm_prog_data = 274 brw_wm_prog_data(brw->wm.base.prog_data); 275 uint32_t num_outputs = wm_prog_data->num_varying_inputs; 276 uint32_t dw1, dw2, dw3, dw4; 277 uint32_t point_sprite_enables; 278 int i; 279 /* _NEW_BUFFER */ 280 bool render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer); 281 const bool multisampled_fbo = _mesa_geometric_samples(ctx->DrawBuffer) > 1; 282 283 float point_size; 284 uint16_t attr_overrides[16]; 285 uint32_t point_sprite_origin; 286 287 dw1 = GEN6_SF_SWIZZLE_ENABLE | num_outputs << GEN6_SF_NUM_OUTPUTS_SHIFT; 288 dw2 = GEN6_SF_STATISTICS_ENABLE; 289 dw3 = GEN6_SF_SCISSOR_ENABLE; 290 dw4 = 0; 291 292 if (brw->sf.viewport_transform_enable) 293 dw2 |= GEN6_SF_VIEWPORT_TRANSFORM_ENABLE; 294 295 /* _NEW_POLYGON */ 296 if (ctx->Polygon._FrontBit == render_to_fbo) 297 dw2 |= GEN6_SF_WINDING_CCW; 298 299 if (ctx->Polygon.OffsetFill) 300 dw2 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_SOLID; 301 302 if (ctx->Polygon.OffsetLine) 303 dw2 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_WIREFRAME; 304 305 if (ctx->Polygon.OffsetPoint) 306 dw2 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_POINT; 307 308 switch (ctx->Polygon.FrontMode) { 309 case GL_FILL: 310 dw2 |= GEN6_SF_FRONT_SOLID; 311 break; 312 313 case GL_LINE: 314 dw2 |= GEN6_SF_FRONT_WIREFRAME; 315 break; 316 317 case GL_POINT: 318 dw2 |= GEN6_SF_FRONT_POINT; 319 break; 320 321 default: 322 unreachable("not reached"); 323 } 324 325 switch (ctx->Polygon.BackMode) { 326 case GL_FILL: 327 dw2 |= GEN6_SF_BACK_SOLID; 328 break; 329 330 case GL_LINE: 331 dw2 |= GEN6_SF_BACK_WIREFRAME; 332 break; 333 334 case GL_POINT: 335 dw2 |= GEN6_SF_BACK_POINT; 336 break; 337 338 default: 339 unreachable("not reached"); 340 } 341 342 /* _NEW_POLYGON */ 343 if (ctx->Polygon.CullFlag) { 344 switch (ctx->Polygon.CullFaceMode) { 345 case GL_FRONT: 346 dw3 |= GEN6_SF_CULL_FRONT; 347 break; 348 case GL_BACK: 349 dw3 |= GEN6_SF_CULL_BACK; 350 break; 351 case GL_FRONT_AND_BACK: 352 dw3 |= GEN6_SF_CULL_BOTH; 353 break; 354 default: 355 unreachable("not reached"); 356 } 357 } else { 358 dw3 |= GEN6_SF_CULL_NONE; 359 } 360 361 /* _NEW_LINE */ 362 { 363 uint32_t line_width_u3_7 = brw_get_line_width(brw); 364 dw3 |= line_width_u3_7 << GEN6_SF_LINE_WIDTH_SHIFT; 365 } 366 if (ctx->Line.SmoothFlag) { 367 dw3 |= GEN6_SF_LINE_AA_ENABLE; 368 dw3 |= GEN6_SF_LINE_AA_MODE_TRUE; 369 dw3 |= GEN6_SF_LINE_END_CAP_WIDTH_1_0; 370 } 371 /* _NEW_MULTISAMPLE */ 372 if (multisampled_fbo && ctx->Multisample.Enabled) 373 dw3 |= GEN6_SF_MSRAST_ON_PATTERN; 374 375 /* _NEW_PROGRAM | _NEW_POINT, BRW_NEW_VUE_MAP_GEOM_OUT */ 376 if (use_state_point_size(brw)) 377 dw4 |= GEN6_SF_USE_STATE_POINT_WIDTH; 378 379 /* _NEW_POINT - Clamp to ARB_point_parameters user limits */ 380 point_size = CLAMP(ctx->Point.Size, ctx->Point.MinSize, ctx->Point.MaxSize); 381 382 /* Clamp to the hardware limits and convert to fixed point */ 383 dw4 |= U_FIXED(CLAMP(point_size, 0.125f, 255.875f), 3); 384 385 /* 386 * Window coordinates in an FBO are inverted, which means point 387 * sprite origin must be inverted, too. 388 */ 389 if ((ctx->Point.SpriteOrigin == GL_LOWER_LEFT) != render_to_fbo) { 390 point_sprite_origin = GEN6_SF_POINT_SPRITE_LOWERLEFT; 391 } else { 392 point_sprite_origin = GEN6_SF_POINT_SPRITE_UPPERLEFT; 393 } 394 dw1 |= point_sprite_origin; 395 396 /* _NEW_LIGHT */ 397 if (ctx->Light.ProvokingVertex != GL_FIRST_VERTEX_CONVENTION) { 398 dw4 |= 399 (2 << GEN6_SF_TRI_PROVOKE_SHIFT) | 400 (2 << GEN6_SF_TRIFAN_PROVOKE_SHIFT) | 401 (1 << GEN6_SF_LINE_PROVOKE_SHIFT); 402 } else { 403 dw4 |= 404 (1 << GEN6_SF_TRIFAN_PROVOKE_SHIFT); 405 } 406 407 /* BRW_NEW_VUE_MAP_GEOM_OUT | BRW_NEW_FRAGMENT_PROGRAM | 408 * _NEW_POINT | _NEW_LIGHT | _NEW_PROGRAM | BRW_NEW_FS_PROG_DATA 409 */ 410 uint32_t urb_entry_read_length; 411 uint32_t urb_entry_read_offset; 412 calculate_attr_overrides(brw, attr_overrides, &point_sprite_enables, 413 &urb_entry_read_length, &urb_entry_read_offset); 414 dw1 |= (urb_entry_read_length << GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT | 415 urb_entry_read_offset << GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT); 416 417 BEGIN_BATCH(20); 418 OUT_BATCH(_3DSTATE_SF << 16 | (20 - 2)); 419 OUT_BATCH(dw1); 420 OUT_BATCH(dw2); 421 OUT_BATCH(dw3); 422 OUT_BATCH(dw4); 423 OUT_BATCH_F(ctx->Polygon.OffsetUnits * 2); /* constant. copied from gen4 */ 424 OUT_BATCH_F(ctx->Polygon.OffsetFactor); /* scale */ 425 OUT_BATCH_F(ctx->Polygon.OffsetClamp); /* global depth offset clamp */ 426 for (i = 0; i < 8; i++) { 427 OUT_BATCH(attr_overrides[i * 2] | attr_overrides[i * 2 + 1] << 16); 428 } 429 OUT_BATCH(point_sprite_enables); /* dw16 */ 430 OUT_BATCH(wm_prog_data->flat_inputs); 431 OUT_BATCH(0); /* wrapshortest enables 0-7 */ 432 OUT_BATCH(0); /* wrapshortest enables 8-15 */ 433 ADVANCE_BATCH(); 434 } 435 436 const struct brw_tracked_state gen6_sf_state = { 437 .dirty = { 438 .mesa = _NEW_BUFFERS | 439 _NEW_LIGHT | 440 _NEW_LINE | 441 _NEW_MULTISAMPLE | 442 _NEW_POINT | 443 _NEW_POLYGON | 444 _NEW_PROGRAM, 445 .brw = BRW_NEW_BLORP | 446 BRW_NEW_CONTEXT | 447 BRW_NEW_FRAGMENT_PROGRAM | 448 BRW_NEW_FS_PROG_DATA | 449 BRW_NEW_GS_PROG_DATA | 450 BRW_NEW_PRIMITIVE | 451 BRW_NEW_TES_PROG_DATA | 452 BRW_NEW_VUE_MAP_GEOM_OUT, 453 }, 454 .emit = upload_sf_state, 455 }; 456