Home | History | Annotate | Download | only in i965
      1 /*
      2  * Copyright  2009 Intel Corporation
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8  * and/or sell copies of the Software, and to permit persons to whom the
      9  * Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice (including the next
     12  * paragraph) shall be included in all copies or substantial portions of the
     13  * Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
     21  * IN THE SOFTWARE.
     22  *
     23  * Authors:
     24  *    Eric Anholt <eric (at) anholt.net>
     25  *
     26  */
     27 
     28 #include "brw_context.h"
     29 #include "brw_state.h"
     30 #include "brw_defines.h"
     31 #include "brw_util.h"
     32 #include "main/macros.h"
     33 #include "main/fbobject.h"
     34 #include "intel_batchbuffer.h"
     35 
     36 /**
     37  * Determine the appropriate attribute override value to store into the
     38  * 3DSTATE_SF structure for a given fragment shader attribute.  The attribute
     39  * override value contains two pieces of information: the location of the
     40  * attribute in the VUE (relative to urb_entry_read_offset, see below), and a
     41  * flag indicating whether to "swizzle" the attribute based on the direction
     42  * the triangle is facing.
     43  *
     44  * If an attribute is "swizzled", then the given VUE location is used for
     45  * front-facing triangles, and the VUE location that immediately follows is
     46  * used for back-facing triangles.  We use this to implement the mapping from
     47  * gl_FrontColor/gl_BackColor to gl_Color.
     48  *
     49  * urb_entry_read_offset is the offset into the VUE at which the SF unit is
     50  * being instructed to begin reading attribute data.  It can be set to a
     51  * nonzero value to prevent the SF unit from wasting time reading elements of
     52  * the VUE that are not needed by the fragment shader.  It is measured in
     53  * 256-bit increments.
     54  */
     55 uint32_t
     56 get_attr_override(struct brw_vue_map *vue_map, int urb_entry_read_offset,
     57                   int fs_attr, bool two_side_color, uint32_t *max_source_attr)
     58 {
     59    int vs_attr = _mesa_frag_attrib_to_vert_result(fs_attr);
     60    if (vs_attr < 0 || vs_attr == VERT_RESULT_HPOS) {
     61       /* These attributes will be overwritten by the fragment shader's
     62        * interpolation code (see emit_interp() in brw_wm_fp.c), so just let
     63        * them reference the first available attribute.
     64        */
     65       return 0;
     66    }
     67 
     68    /* Find the VUE slot for this attribute. */
     69    int slot = vue_map->vert_result_to_slot[vs_attr];
     70 
     71    /* If there was only a back color written but not front, use back
     72     * as the color instead of undefined
     73     */
     74    if (slot == -1 && vs_attr == VERT_RESULT_COL0)
     75       slot = vue_map->vert_result_to_slot[VERT_RESULT_BFC0];
     76    if (slot == -1 && vs_attr == VERT_RESULT_COL1)
     77       slot = vue_map->vert_result_to_slot[VERT_RESULT_BFC1];
     78 
     79    if (slot == -1) {
     80       /* This attribute does not exist in the VUE--that means that the vertex
     81        * shader did not write to it.  Behavior is undefined in this case, so
     82        * just reference the first available attribute.
     83        */
     84       return 0;
     85    }
     86 
     87    /* Compute the location of the attribute relative to urb_entry_read_offset.
     88     * Each increment of urb_entry_read_offset represents a 256-bit value, so
     89     * it counts for two 128-bit VUE slots.
     90     */
     91    int source_attr = slot - 2 * urb_entry_read_offset;
     92    assert(source_attr >= 0 && source_attr < 32);
     93 
     94    /* If we are doing two-sided color, and the VUE slot following this one
     95     * represents a back-facing color, then we need to instruct the SF unit to
     96     * do back-facing swizzling.
     97     */
     98    bool swizzling = two_side_color &&
     99       ((vue_map->slot_to_vert_result[slot] == VERT_RESULT_COL0 &&
    100         vue_map->slot_to_vert_result[slot+1] == VERT_RESULT_BFC0) ||
    101        (vue_map->slot_to_vert_result[slot] == VERT_RESULT_COL1 &&
    102         vue_map->slot_to_vert_result[slot+1] == VERT_RESULT_BFC1));
    103 
    104    /* Update max_source_attr.  If swizzling, the SF will read this slot + 1. */
    105    if (*max_source_attr < source_attr + swizzling)
    106       *max_source_attr = source_attr + swizzling;
    107 
    108    if (swizzling) {
    109       return source_attr |
    110          (ATTRIBUTE_SWIZZLE_INPUTATTR_FACING << ATTRIBUTE_SWIZZLE_SHIFT);
    111    }
    112 
    113    return source_attr;
    114 }
    115 
    116 static void
    117 upload_sf_state(struct brw_context *brw)
    118 {
    119    struct intel_context *intel = &brw->intel;
    120    struct gl_context *ctx = &intel->ctx;
    121    /* BRW_NEW_FRAGMENT_PROGRAM */
    122    uint32_t num_outputs = _mesa_bitcount_64(brw->fragment_program->Base.InputsRead);
    123    /* _NEW_LIGHT */
    124    bool shade_model_flat = ctx->Light.ShadeModel == GL_FLAT;
    125    uint32_t dw1, dw2, dw3, dw4, dw16, dw17;
    126    int i;
    127    /* _NEW_BUFFER */
    128    bool render_to_fbo = _mesa_is_user_fbo(brw->intel.ctx.DrawBuffer);
    129    bool multisampled_fbo = ctx->DrawBuffer->Visual.samples > 1;
    130 
    131    int attr = 0, input_index = 0;
    132    int urb_entry_read_offset = 1;
    133    float point_size;
    134    uint16_t attr_overrides[FRAG_ATTRIB_MAX];
    135    uint32_t point_sprite_origin;
    136 
    137    dw1 = GEN6_SF_SWIZZLE_ENABLE | num_outputs << GEN6_SF_NUM_OUTPUTS_SHIFT;
    138 
    139    dw2 = GEN6_SF_STATISTICS_ENABLE |
    140          GEN6_SF_VIEWPORT_TRANSFORM_ENABLE;
    141 
    142    dw3 = 0;
    143    dw4 = 0;
    144    dw16 = 0;
    145    dw17 = 0;
    146 
    147    /* _NEW_POLYGON */
    148    if ((ctx->Polygon.FrontFace == GL_CCW) ^ render_to_fbo)
    149       dw2 |= GEN6_SF_WINDING_CCW;
    150 
    151    if (ctx->Polygon.OffsetFill)
    152        dw2 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_SOLID;
    153 
    154    if (ctx->Polygon.OffsetLine)
    155        dw2 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_WIREFRAME;
    156 
    157    if (ctx->Polygon.OffsetPoint)
    158        dw2 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_POINT;
    159 
    160    switch (ctx->Polygon.FrontMode) {
    161    case GL_FILL:
    162        dw2 |= GEN6_SF_FRONT_SOLID;
    163        break;
    164 
    165    case GL_LINE:
    166        dw2 |= GEN6_SF_FRONT_WIREFRAME;
    167        break;
    168 
    169    case GL_POINT:
    170        dw2 |= GEN6_SF_FRONT_POINT;
    171        break;
    172 
    173    default:
    174        assert(0);
    175        break;
    176    }
    177 
    178    switch (ctx->Polygon.BackMode) {
    179    case GL_FILL:
    180        dw2 |= GEN6_SF_BACK_SOLID;
    181        break;
    182 
    183    case GL_LINE:
    184        dw2 |= GEN6_SF_BACK_WIREFRAME;
    185        break;
    186 
    187    case GL_POINT:
    188        dw2 |= GEN6_SF_BACK_POINT;
    189        break;
    190 
    191    default:
    192        assert(0);
    193        break;
    194    }
    195 
    196    /* _NEW_SCISSOR */
    197    if (ctx->Scissor.Enabled)
    198       dw3 |= GEN6_SF_SCISSOR_ENABLE;
    199 
    200    /* _NEW_POLYGON */
    201    if (ctx->Polygon.CullFlag) {
    202       switch (ctx->Polygon.CullFaceMode) {
    203       case GL_FRONT:
    204 	 dw3 |= GEN6_SF_CULL_FRONT;
    205 	 break;
    206       case GL_BACK:
    207 	 dw3 |= GEN6_SF_CULL_BACK;
    208 	 break;
    209       case GL_FRONT_AND_BACK:
    210 	 dw3 |= GEN6_SF_CULL_BOTH;
    211 	 break;
    212       default:
    213 	 assert(0);
    214 	 break;
    215       }
    216    } else {
    217       dw3 |= GEN6_SF_CULL_NONE;
    218    }
    219 
    220    /* _NEW_LINE */
    221    {
    222       uint32_t line_width_u3_7 = U_FIXED(CLAMP(ctx->Line.Width, 0.0, 7.99), 7);
    223       /* TODO: line width of 0 is not allowed when MSAA enabled */
    224       if (line_width_u3_7 == 0)
    225          line_width_u3_7 = 1;
    226       dw3 |= line_width_u3_7 << GEN6_SF_LINE_WIDTH_SHIFT;
    227    }
    228    if (ctx->Line.SmoothFlag) {
    229       dw3 |= GEN6_SF_LINE_AA_ENABLE;
    230       dw3 |= GEN6_SF_LINE_AA_MODE_TRUE;
    231       dw3 |= GEN6_SF_LINE_END_CAP_WIDTH_1_0;
    232    }
    233    /* _NEW_MULTISAMPLE */
    234    if (multisampled_fbo && ctx->Multisample.Enabled)
    235       dw3 |= GEN6_SF_MSRAST_ON_PATTERN;
    236 
    237    /* _NEW_PROGRAM | _NEW_POINT */
    238    if (!(ctx->VertexProgram.PointSizeEnabled ||
    239 	 ctx->Point._Attenuated))
    240       dw4 |= GEN6_SF_USE_STATE_POINT_WIDTH;
    241 
    242    /* Clamp to ARB_point_parameters user limits */
    243    point_size = CLAMP(ctx->Point.Size, ctx->Point.MinSize, ctx->Point.MaxSize);
    244 
    245    /* Clamp to the hardware limits and convert to fixed point */
    246    dw4 |= U_FIXED(CLAMP(point_size, 0.125, 255.875), 3);
    247 
    248    /*
    249     * Window coordinates in an FBO are inverted, which means point
    250     * sprite origin must be inverted, too.
    251     */
    252    if ((ctx->Point.SpriteOrigin == GL_LOWER_LEFT) != render_to_fbo) {
    253       point_sprite_origin = GEN6_SF_POINT_SPRITE_LOWERLEFT;
    254    } else {
    255       point_sprite_origin = GEN6_SF_POINT_SPRITE_UPPERLEFT;
    256    }
    257    dw1 |= point_sprite_origin;
    258 
    259    /* _NEW_LIGHT */
    260    if (ctx->Light.ProvokingVertex != GL_FIRST_VERTEX_CONVENTION) {
    261       dw4 |=
    262 	 (2 << GEN6_SF_TRI_PROVOKE_SHIFT) |
    263 	 (2 << GEN6_SF_TRIFAN_PROVOKE_SHIFT) |
    264 	 (1 << GEN6_SF_LINE_PROVOKE_SHIFT);
    265    } else {
    266       dw4 |=
    267 	 (1 << GEN6_SF_TRIFAN_PROVOKE_SHIFT);
    268    }
    269 
    270    /* Create the mapping from the FS inputs we produce to the VS outputs
    271     * they source from.
    272     */
    273    uint32_t max_source_attr = 0;
    274    for (; attr < FRAG_ATTRIB_MAX; attr++) {
    275       enum glsl_interp_qualifier interp_qualifier =
    276          brw->fragment_program->InterpQualifier[attr];
    277       bool is_gl_Color = attr == FRAG_ATTRIB_COL0 || attr == FRAG_ATTRIB_COL1;
    278 
    279       if (!(brw->fragment_program->Base.InputsRead & BITFIELD64_BIT(attr)))
    280 	 continue;
    281 
    282       /* _NEW_POINT */
    283       if (ctx->Point.PointSprite &&
    284 	  (attr >= FRAG_ATTRIB_TEX0 && attr <= FRAG_ATTRIB_TEX7) &&
    285 	  ctx->Point.CoordReplace[attr - FRAG_ATTRIB_TEX0]) {
    286 	 dw16 |= (1 << input_index);
    287       }
    288 
    289       if (attr == FRAG_ATTRIB_PNTC)
    290 	 dw16 |= (1 << input_index);
    291 
    292       /* flat shading */
    293       if (interp_qualifier == INTERP_QUALIFIER_FLAT ||
    294           (shade_model_flat && is_gl_Color &&
    295            interp_qualifier == INTERP_QUALIFIER_NONE))
    296          dw17 |= (1 << input_index);
    297 
    298       /* The hardware can only do the overrides on 16 overrides at a
    299        * time, and the other up to 16 have to be lined up so that the
    300        * input index = the output index.  We'll need to do some
    301        * tweaking to make sure that's the case.
    302        */
    303       assert(input_index < 16 || attr == input_index);
    304 
    305       /* CACHE_NEW_VS_PROG | _NEW_LIGHT | _NEW_PROGRAM */
    306       attr_overrides[input_index++] =
    307          get_attr_override(&brw->vs.prog_data->vue_map,
    308 			   urb_entry_read_offset, attr,
    309                            ctx->VertexProgram._TwoSideEnabled,
    310                            &max_source_attr);
    311    }
    312 
    313    for (; input_index < FRAG_ATTRIB_MAX; input_index++)
    314       attr_overrides[input_index] = 0;
    315 
    316    /* From the Sandy Bridge PRM, Volume 2, Part 1, documentation for
    317     * 3DSTATE_SF DWord 1 bits 15:11, "Vertex URB Entry Read Length":
    318     *
    319     * "This field should be set to the minimum length required to read the
    320     *  maximum source attribute.  The maximum source attribute is indicated
    321     *  by the maximum value of the enabled Attribute # Source Attribute if
    322     *  Attribute Swizzle Enable is set, Number of Output Attributes-1 if
    323     *  enable is not set.
    324     *  read_length = ceiling((max_source_attr + 1) / 2)
    325     *
    326     *  [errata] Corruption/Hang possible if length programmed larger than
    327     *  recommended"
    328     */
    329    uint32_t urb_entry_read_length = ALIGN(max_source_attr + 1, 2) / 2;
    330       dw1 |= urb_entry_read_length << GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT |
    331              urb_entry_read_offset << GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT;
    332 
    333    BEGIN_BATCH(20);
    334    OUT_BATCH(_3DSTATE_SF << 16 | (20 - 2));
    335    OUT_BATCH(dw1);
    336    OUT_BATCH(dw2);
    337    OUT_BATCH(dw3);
    338    OUT_BATCH(dw4);
    339    OUT_BATCH_F(ctx->Polygon.OffsetUnits * 2); /* constant.  copied from gen4 */
    340    OUT_BATCH_F(ctx->Polygon.OffsetFactor); /* scale */
    341    OUT_BATCH_F(0.0); /* XXX: global depth offset clamp */
    342    for (i = 0; i < 8; i++) {
    343       OUT_BATCH(attr_overrides[i * 2] | attr_overrides[i * 2 + 1] << 16);
    344    }
    345    OUT_BATCH(dw16); /* point sprite texcoord bitmask */
    346    OUT_BATCH(dw17); /* constant interp bitmask */
    347    OUT_BATCH(0); /* wrapshortest enables 0-7 */
    348    OUT_BATCH(0); /* wrapshortest enables 8-15 */
    349    ADVANCE_BATCH();
    350 }
    351 
    352 const struct brw_tracked_state gen6_sf_state = {
    353    .dirty = {
    354       .mesa  = (_NEW_LIGHT |
    355 		_NEW_PROGRAM |
    356 		_NEW_POLYGON |
    357 		_NEW_LINE |
    358 		_NEW_SCISSOR |
    359 		_NEW_BUFFERS |
    360 		_NEW_POINT |
    361                 _NEW_MULTISAMPLE),
    362       .brw   = (BRW_NEW_CONTEXT |
    363 		BRW_NEW_FRAGMENT_PROGRAM),
    364       .cache = CACHE_NEW_VS_PROG
    365    },
    366    .emit = upload_sf_state,
    367 };
    368