Home | History | Annotate | Download | only in i965
      1 /*
      2  * Copyright  2009 Intel Corporation
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8  * and/or sell copies of the Software, and to permit persons to whom the
      9  * Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice (including the next
     12  * paragraph) shall be included in all copies or substantial portions of the
     13  * Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
     21  * IN THE SOFTWARE.
     22  *
     23  * Authors:
     24  *    Eric Anholt <eric (at) anholt.net>
     25  *
     26  */
     27 
     28 #include "brw_context.h"
     29 #include "brw_state.h"
     30 #include "brw_defines.h"
     31 #include "brw_util.h"
     32 #include "compiler/nir/nir.h"
     33 #include "main/macros.h"
     34 #include "main/fbobject.h"
     35 #include "main/framebuffer.h"
     36 #include "intel_batchbuffer.h"
     37 
     38 /**
     39  * Determine the appropriate attribute override value to store into the
     40  * 3DSTATE_SF structure for a given fragment shader attribute.  The attribute
     41  * override value contains two pieces of information: the location of the
     42  * attribute in the VUE (relative to urb_entry_read_offset, see below), and a
     43  * flag indicating whether to "swizzle" the attribute based on the direction
     44  * the triangle is facing.
     45  *
     46  * If an attribute is "swizzled", then the given VUE location is used for
     47  * front-facing triangles, and the VUE location that immediately follows is
     48  * used for back-facing triangles.  We use this to implement the mapping from
     49  * gl_FrontColor/gl_BackColor to gl_Color.
     50  *
     51  * urb_entry_read_offset is the offset into the VUE at which the SF unit is
     52  * being instructed to begin reading attribute data.  It can be set to a
     53  * nonzero value to prevent the SF unit from wasting time reading elements of
     54  * the VUE that are not needed by the fragment shader.  It is measured in
     55  * 256-bit increments.
     56  */
     57 static uint32_t
     58 get_attr_override(const struct brw_vue_map *vue_map, int urb_entry_read_offset,
     59                   int fs_attr, bool two_side_color, uint32_t *max_source_attr)
     60 {
     61    /* Find the VUE slot for this attribute. */
     62    int slot = vue_map->varying_to_slot[fs_attr];
     63 
     64    /* Viewport and Layer are stored in the VUE header.  We need to override
     65     * them to zero if earlier stages didn't write them, as GL requires that
     66     * they read back as zero when not explicitly set.
     67     */
     68    if (fs_attr == VARYING_SLOT_VIEWPORT || fs_attr == VARYING_SLOT_LAYER) {
     69       unsigned override =
     70          ATTRIBUTE_0_OVERRIDE_X | ATTRIBUTE_0_OVERRIDE_W |
     71          ATTRIBUTE_CONST_0000 << ATTRIBUTE_0_CONST_SOURCE_SHIFT;
     72 
     73       if (!(vue_map->slots_valid & VARYING_BIT_LAYER))
     74          override |= ATTRIBUTE_0_OVERRIDE_Y;
     75       if (!(vue_map->slots_valid & VARYING_BIT_VIEWPORT))
     76          override |= ATTRIBUTE_0_OVERRIDE_Z;
     77 
     78       return override;
     79    }
     80 
     81    /* If there was only a back color written but not front, use back
     82     * as the color instead of undefined
     83     */
     84    if (slot == -1 && fs_attr == VARYING_SLOT_COL0)
     85       slot = vue_map->varying_to_slot[VARYING_SLOT_BFC0];
     86    if (slot == -1 && fs_attr == VARYING_SLOT_COL1)
     87       slot = vue_map->varying_to_slot[VARYING_SLOT_BFC1];
     88 
     89    if (slot == -1) {
     90       /* This attribute does not exist in the VUE--that means that the vertex
     91        * shader did not write to it.  This means that either:
     92        *
     93        * (a) This attribute is a texture coordinate, and it is going to be
     94        * replaced with point coordinates (as a consequence of a call to
     95        * glTexEnvi(GL_POINT_SPRITE, GL_COORD_REPLACE, GL_TRUE)), so the
     96        * hardware will ignore whatever attribute override we supply.
     97        *
     98        * (b) This attribute is read by the fragment shader but not written by
     99        * the vertex shader, so its value is undefined.  Therefore the
    100        * attribute override we supply doesn't matter.
    101        *
    102        * (c) This attribute is gl_PrimitiveID, and it wasn't written by the
    103        * previous shader stage.
    104        *
    105        * Note that we don't have to worry about the cases where the attribute
    106        * is gl_PointCoord or is undergoing point sprite coordinate
    107        * replacement, because in those cases, this function isn't called.
    108        *
    109        * In case (c), we need to program the attribute overrides so that the
    110        * primitive ID will be stored in this slot.  In every other case, the
    111        * attribute override we supply doesn't matter.  So just go ahead and
    112        * program primitive ID in every case.
    113        */
    114       return (ATTRIBUTE_0_OVERRIDE_W |
    115               ATTRIBUTE_0_OVERRIDE_Z |
    116               ATTRIBUTE_0_OVERRIDE_Y |
    117               ATTRIBUTE_0_OVERRIDE_X |
    118               (ATTRIBUTE_CONST_PRIM_ID << ATTRIBUTE_0_CONST_SOURCE_SHIFT));
    119    }
    120 
    121    /* Compute the location of the attribute relative to urb_entry_read_offset.
    122     * Each increment of urb_entry_read_offset represents a 256-bit value, so
    123     * it counts for two 128-bit VUE slots.
    124     */
    125    int source_attr = slot - 2 * urb_entry_read_offset;
    126    assert(source_attr >= 0 && source_attr < 32);
    127 
    128    /* If we are doing two-sided color, and the VUE slot following this one
    129     * represents a back-facing color, then we need to instruct the SF unit to
    130     * do back-facing swizzling.
    131     */
    132    bool swizzling = two_side_color &&
    133       ((vue_map->slot_to_varying[slot] == VARYING_SLOT_COL0 &&
    134         vue_map->slot_to_varying[slot+1] == VARYING_SLOT_BFC0) ||
    135        (vue_map->slot_to_varying[slot] == VARYING_SLOT_COL1 &&
    136         vue_map->slot_to_varying[slot+1] == VARYING_SLOT_BFC1));
    137 
    138    /* Update max_source_attr.  If swizzling, the SF will read this slot + 1. */
    139    if (*max_source_attr < source_attr + swizzling)
    140       *max_source_attr = source_attr + swizzling;
    141 
    142    if (swizzling) {
    143       return source_attr |
    144          (ATTRIBUTE_SWIZZLE_INPUTATTR_FACING << ATTRIBUTE_SWIZZLE_SHIFT);
    145    }
    146 
    147    return source_attr;
    148 }
    149 
    150 
    151 /**
    152  * Create the mapping from the FS inputs we produce to the previous pipeline
    153  * stage (GS or VS) outputs they source from.
    154  */
    155 void
    156 calculate_attr_overrides(const struct brw_context *brw,
    157                          uint16_t *attr_overrides,
    158                          uint32_t *point_sprite_enables,
    159                          uint32_t *urb_entry_read_length,
    160                          uint32_t *urb_entry_read_offset)
    161 {
    162    /* BRW_NEW_FS_PROG_DATA */
    163    const struct brw_wm_prog_data *wm_prog_data =
    164       brw_wm_prog_data(brw->wm.base.prog_data);
    165    uint32_t max_source_attr = 0;
    166 
    167    *point_sprite_enables = 0;
    168 
    169    *urb_entry_read_offset = BRW_SF_URB_ENTRY_READ_OFFSET;
    170 
    171    /* BRW_NEW_FRAGMENT_PROGRAM
    172     *
    173     * If the fragment shader reads VARYING_SLOT_LAYER, then we need to pass in
    174     * the full vertex header.  Otherwise, we can program the SF to start
    175     * reading at an offset of 1 (2 varying slots) to skip unnecessary data:
    176     * - VARYING_SLOT_PSIZ and BRW_VARYING_SLOT_NDC on gen4-5
    177     * - VARYING_SLOT_{PSIZ,LAYER} and VARYING_SLOT_POS on gen6+
    178     */
    179 
    180    bool fs_needs_vue_header = brw->fragment_program->info.inputs_read &
    181       (VARYING_BIT_LAYER | VARYING_BIT_VIEWPORT);
    182 
    183    *urb_entry_read_offset = fs_needs_vue_header ? 0 : 1;
    184 
    185    /* From the Ivybridge PRM, Vol 2 Part 1, 3DSTATE_SBE,
    186     * description of dw10 Point Sprite Texture Coordinate Enable:
    187     *
    188     * "This field must be programmed to zero when non-point primitives
    189     * are rendered."
    190     *
    191     * The SandyBridge PRM doesn't explicitly say that point sprite enables
    192     * must be programmed to zero when rendering non-point primitives, but
    193     * the IvyBridge PRM does, and if we don't, we get garbage.
    194     *
    195     * This is not required on Haswell, as the hardware ignores this state
    196     * when drawing non-points -- although we do still need to be careful to
    197     * correctly set the attr overrides.
    198     *
    199     * _NEW_POLYGON
    200     * BRW_NEW_PRIMITIVE | BRW_NEW_GS_PROG_DATA | BRW_NEW_TES_PROG_DATA
    201     */
    202    bool drawing_points = brw_is_drawing_points(brw);
    203 
    204    /* Initialize all the attr_overrides to 0.  In the loop below we'll modify
    205     * just the ones that correspond to inputs used by the fs.
    206     */
    207    memset(attr_overrides, 0, 16*sizeof(*attr_overrides));
    208 
    209    for (int attr = 0; attr < VARYING_SLOT_MAX; attr++) {
    210       int input_index = wm_prog_data->urb_setup[attr];
    211 
    212       if (input_index < 0)
    213 	 continue;
    214 
    215       /* _NEW_POINT */
    216       bool point_sprite = false;
    217       if (drawing_points) {
    218          if (brw->ctx.Point.PointSprite &&
    219              (attr >= VARYING_SLOT_TEX0 && attr <= VARYING_SLOT_TEX7) &&
    220              (brw->ctx.Point.CoordReplace & (1u << (attr - VARYING_SLOT_TEX0)))) {
    221             point_sprite = true;
    222          }
    223 
    224          if (attr == VARYING_SLOT_PNTC)
    225             point_sprite = true;
    226 
    227          if (point_sprite)
    228             *point_sprite_enables |= (1 << input_index);
    229       }
    230 
    231       /* BRW_NEW_VUE_MAP_GEOM_OUT | _NEW_LIGHT | _NEW_PROGRAM */
    232       uint16_t attr_override = point_sprite ? 0 :
    233          get_attr_override(&brw->vue_map_geom_out,
    234 			   *urb_entry_read_offset, attr,
    235                            brw->ctx.VertexProgram._TwoSideEnabled,
    236                            &max_source_attr);
    237 
    238       /* The hardware can only do the overrides on 16 overrides at a
    239        * time, and the other up to 16 have to be lined up so that the
    240        * input index = the output index.  We'll need to do some
    241        * tweaking to make sure that's the case.
    242        */
    243       if (input_index < 16)
    244          attr_overrides[input_index] = attr_override;
    245       else
    246          assert(attr_override == input_index);
    247    }
    248 
    249    /* From the Sandy Bridge PRM, Volume 2, Part 1, documentation for
    250     * 3DSTATE_SF DWord 1 bits 15:11, "Vertex URB Entry Read Length":
    251     *
    252     * "This field should be set to the minimum length required to read the
    253     *  maximum source attribute.  The maximum source attribute is indicated
    254     *  by the maximum value of the enabled Attribute # Source Attribute if
    255     *  Attribute Swizzle Enable is set, Number of Output Attributes-1 if
    256     *  enable is not set.
    257     *  read_length = ceiling((max_source_attr + 1) / 2)
    258     *
    259     *  [errata] Corruption/Hang possible if length programmed larger than
    260     *  recommended"
    261     *
    262     * Similar text exists for Ivy Bridge.
    263     */
    264    *urb_entry_read_length = ALIGN(max_source_attr + 1, 2) / 2;
    265 }
    266 
    267 
    268 static void
    269 upload_sf_state(struct brw_context *brw)
    270 {
    271    struct gl_context *ctx = &brw->ctx;
    272    /* BRW_NEW_FS_PROG_DATA */
    273    const struct brw_wm_prog_data *wm_prog_data =
    274       brw_wm_prog_data(brw->wm.base.prog_data);
    275    uint32_t num_outputs = wm_prog_data->num_varying_inputs;
    276    uint32_t dw1, dw2, dw3, dw4;
    277    uint32_t point_sprite_enables;
    278    int i;
    279    /* _NEW_BUFFER */
    280    bool render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer);
    281    const bool multisampled_fbo = _mesa_geometric_samples(ctx->DrawBuffer) > 1;
    282 
    283    float point_size;
    284    uint16_t attr_overrides[16];
    285    uint32_t point_sprite_origin;
    286 
    287    dw1 = GEN6_SF_SWIZZLE_ENABLE | num_outputs << GEN6_SF_NUM_OUTPUTS_SHIFT;
    288    dw2 = GEN6_SF_STATISTICS_ENABLE;
    289    dw3 = GEN6_SF_SCISSOR_ENABLE;
    290    dw4 = 0;
    291 
    292    if (brw->sf.viewport_transform_enable)
    293        dw2 |= GEN6_SF_VIEWPORT_TRANSFORM_ENABLE;
    294 
    295    /* _NEW_POLYGON */
    296    if (ctx->Polygon._FrontBit == render_to_fbo)
    297       dw2 |= GEN6_SF_WINDING_CCW;
    298 
    299    if (ctx->Polygon.OffsetFill)
    300        dw2 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_SOLID;
    301 
    302    if (ctx->Polygon.OffsetLine)
    303        dw2 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_WIREFRAME;
    304 
    305    if (ctx->Polygon.OffsetPoint)
    306        dw2 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_POINT;
    307 
    308    switch (ctx->Polygon.FrontMode) {
    309    case GL_FILL:
    310        dw2 |= GEN6_SF_FRONT_SOLID;
    311        break;
    312 
    313    case GL_LINE:
    314        dw2 |= GEN6_SF_FRONT_WIREFRAME;
    315        break;
    316 
    317    case GL_POINT:
    318        dw2 |= GEN6_SF_FRONT_POINT;
    319        break;
    320 
    321    default:
    322        unreachable("not reached");
    323    }
    324 
    325    switch (ctx->Polygon.BackMode) {
    326    case GL_FILL:
    327        dw2 |= GEN6_SF_BACK_SOLID;
    328        break;
    329 
    330    case GL_LINE:
    331        dw2 |= GEN6_SF_BACK_WIREFRAME;
    332        break;
    333 
    334    case GL_POINT:
    335        dw2 |= GEN6_SF_BACK_POINT;
    336        break;
    337 
    338    default:
    339        unreachable("not reached");
    340    }
    341 
    342    /* _NEW_POLYGON */
    343    if (ctx->Polygon.CullFlag) {
    344       switch (ctx->Polygon.CullFaceMode) {
    345       case GL_FRONT:
    346 	 dw3 |= GEN6_SF_CULL_FRONT;
    347 	 break;
    348       case GL_BACK:
    349 	 dw3 |= GEN6_SF_CULL_BACK;
    350 	 break;
    351       case GL_FRONT_AND_BACK:
    352 	 dw3 |= GEN6_SF_CULL_BOTH;
    353 	 break;
    354       default:
    355 	 unreachable("not reached");
    356       }
    357    } else {
    358       dw3 |= GEN6_SF_CULL_NONE;
    359    }
    360 
    361    /* _NEW_LINE */
    362    {
    363       uint32_t line_width_u3_7 = brw_get_line_width(brw);
    364       dw3 |= line_width_u3_7 << GEN6_SF_LINE_WIDTH_SHIFT;
    365    }
    366    if (ctx->Line.SmoothFlag) {
    367       dw3 |= GEN6_SF_LINE_AA_ENABLE;
    368       dw3 |= GEN6_SF_LINE_AA_MODE_TRUE;
    369       dw3 |= GEN6_SF_LINE_END_CAP_WIDTH_1_0;
    370    }
    371    /* _NEW_MULTISAMPLE */
    372    if (multisampled_fbo && ctx->Multisample.Enabled)
    373       dw3 |= GEN6_SF_MSRAST_ON_PATTERN;
    374 
    375    /* _NEW_PROGRAM | _NEW_POINT, BRW_NEW_VUE_MAP_GEOM_OUT */
    376    if (use_state_point_size(brw))
    377       dw4 |= GEN6_SF_USE_STATE_POINT_WIDTH;
    378 
    379    /* _NEW_POINT - Clamp to ARB_point_parameters user limits */
    380    point_size = CLAMP(ctx->Point.Size, ctx->Point.MinSize, ctx->Point.MaxSize);
    381 
    382    /* Clamp to the hardware limits and convert to fixed point */
    383    dw4 |= U_FIXED(CLAMP(point_size, 0.125f, 255.875f), 3);
    384 
    385    /*
    386     * Window coordinates in an FBO are inverted, which means point
    387     * sprite origin must be inverted, too.
    388     */
    389    if ((ctx->Point.SpriteOrigin == GL_LOWER_LEFT) != render_to_fbo) {
    390       point_sprite_origin = GEN6_SF_POINT_SPRITE_LOWERLEFT;
    391    } else {
    392       point_sprite_origin = GEN6_SF_POINT_SPRITE_UPPERLEFT;
    393    }
    394    dw1 |= point_sprite_origin;
    395 
    396    /* _NEW_LIGHT */
    397    if (ctx->Light.ProvokingVertex != GL_FIRST_VERTEX_CONVENTION) {
    398       dw4 |=
    399 	 (2 << GEN6_SF_TRI_PROVOKE_SHIFT) |
    400 	 (2 << GEN6_SF_TRIFAN_PROVOKE_SHIFT) |
    401 	 (1 << GEN6_SF_LINE_PROVOKE_SHIFT);
    402    } else {
    403       dw4 |=
    404 	 (1 << GEN6_SF_TRIFAN_PROVOKE_SHIFT);
    405    }
    406 
    407    /* BRW_NEW_VUE_MAP_GEOM_OUT | BRW_NEW_FRAGMENT_PROGRAM |
    408     * _NEW_POINT | _NEW_LIGHT | _NEW_PROGRAM | BRW_NEW_FS_PROG_DATA
    409     */
    410    uint32_t urb_entry_read_length;
    411    uint32_t urb_entry_read_offset;
    412    calculate_attr_overrides(brw, attr_overrides, &point_sprite_enables,
    413                             &urb_entry_read_length, &urb_entry_read_offset);
    414    dw1 |= (urb_entry_read_length << GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT |
    415            urb_entry_read_offset << GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT);
    416 
    417    BEGIN_BATCH(20);
    418    OUT_BATCH(_3DSTATE_SF << 16 | (20 - 2));
    419    OUT_BATCH(dw1);
    420    OUT_BATCH(dw2);
    421    OUT_BATCH(dw3);
    422    OUT_BATCH(dw4);
    423    OUT_BATCH_F(ctx->Polygon.OffsetUnits * 2); /* constant.  copied from gen4 */
    424    OUT_BATCH_F(ctx->Polygon.OffsetFactor); /* scale */
    425    OUT_BATCH_F(ctx->Polygon.OffsetClamp); /* global depth offset clamp */
    426    for (i = 0; i < 8; i++) {
    427       OUT_BATCH(attr_overrides[i * 2] | attr_overrides[i * 2 + 1] << 16);
    428    }
    429    OUT_BATCH(point_sprite_enables); /* dw16 */
    430    OUT_BATCH(wm_prog_data->flat_inputs);
    431    OUT_BATCH(0); /* wrapshortest enables 0-7 */
    432    OUT_BATCH(0); /* wrapshortest enables 8-15 */
    433    ADVANCE_BATCH();
    434 }
    435 
    436 const struct brw_tracked_state gen6_sf_state = {
    437    .dirty = {
    438       .mesa  = _NEW_BUFFERS |
    439                _NEW_LIGHT |
    440                _NEW_LINE |
    441                _NEW_MULTISAMPLE |
    442                _NEW_POINT |
    443                _NEW_POLYGON |
    444                _NEW_PROGRAM,
    445       .brw   = BRW_NEW_BLORP |
    446                BRW_NEW_CONTEXT |
    447                BRW_NEW_FRAGMENT_PROGRAM |
    448                BRW_NEW_FS_PROG_DATA |
    449                BRW_NEW_GS_PROG_DATA |
    450                BRW_NEW_PRIMITIVE |
    451                BRW_NEW_TES_PROG_DATA |
    452                BRW_NEW_VUE_MAP_GEOM_OUT,
    453    },
    454    .emit = upload_sf_state,
    455 };
    456