Home | History | Annotate | Download | only in i965
      1 /*
      2  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
      3  Intel funded Tungsten Graphics to
      4  develop this 3D driver.
      5 
      6  Permission is hereby granted, free of charge, to any person obtaining
      7  a copy of this software and associated documentation files (the
      8  "Software"), to deal in the Software without restriction, including
      9  without limitation the rights to use, copy, modify, merge, publish,
     10  distribute, sublicense, and/or sell copies of the Software, and to
     11  permit persons to whom the Software is furnished to do so, subject to
     12  the following conditions:
     13 
     14  The above copyright notice and this permission notice (including the
     15  next paragraph) shall be included in all copies or substantial
     16  portions of the Software.
     17 
     18  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
     19  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
     20  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
     21  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
     22  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
     23  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
     24  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
     25 
     26  **********************************************************************/
     27  /*
     28   * Authors:
     29   *   Keith Whitwell <keithw (at) vmware.com>
     30   */
     31 
     32 
     33 
     34 #include "main/mtypes.h"
     35 #include "main/macros.h"
     36 #include "main/fbobject.h"
     37 #include "main/viewport.h"
     38 #include "brw_context.h"
     39 #include "brw_state.h"
     40 #include "brw_defines.h"
     41 #include "brw_sf.h"
     42 
     43 static void upload_sf_vp(struct brw_context *brw)
     44 {
     45    struct gl_context *ctx = &brw->ctx;
     46    struct brw_sf_viewport *sfv;
     47    GLfloat y_scale, y_bias;
     48    float scale[3], translate[3];
     49    const bool render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer);
     50 
     51    sfv = brw_state_batch(brw, AUB_TRACE_SF_VP_STATE,
     52 			 sizeof(*sfv), 32, &brw->sf.vp_offset);
     53    memset(sfv, 0, sizeof(*sfv));
     54 
     55    /* Accessing the fields Width and Height of gl_framebuffer to produce the
     56     * values to program the viewport and scissor is fine as long as the
     57     * gl_framebuffer has atleast one attachment.
     58     */
     59    assert(ctx->DrawBuffer->_HasAttachments);
     60 
     61    if (render_to_fbo) {
     62       y_scale = 1.0;
     63       y_bias = 0;
     64    }
     65    else {
     66       y_scale = -1.0;
     67       y_bias = ctx->DrawBuffer->Height;
     68    }
     69 
     70    /* _NEW_VIEWPORT */
     71 
     72    _mesa_get_viewport_xform(ctx, 0, scale, translate);
     73    sfv->viewport.m00 = scale[0];
     74    sfv->viewport.m11 = scale[1] * y_scale;
     75    sfv->viewport.m22 = scale[2];
     76    sfv->viewport.m30 = translate[0];
     77    sfv->viewport.m31 = translate[1] * y_scale + y_bias;
     78    sfv->viewport.m32 = translate[2];
     79 
     80    /* _NEW_SCISSOR | _NEW_BUFFERS | _NEW_VIEWPORT
     81     * for DrawBuffer->_[XY]{min,max}
     82     */
     83 
     84    /* The scissor only needs to handle the intersection of drawable
     85     * and scissor rect, since there are no longer cliprects for shared
     86     * buffers with DRI2.
     87     *
     88     * Note that the hardware's coordinates are inclusive, while Mesa's min is
     89     * inclusive but max is exclusive.
     90     */
     91 
     92    if (ctx->DrawBuffer->_Xmin == ctx->DrawBuffer->_Xmax ||
     93        ctx->DrawBuffer->_Ymin == ctx->DrawBuffer->_Ymax) {
     94       /* If the scissor was out of bounds and got clamped to 0
     95        * width/height at the bounds, the subtraction of 1 from
     96        * maximums could produce a negative number and thus not clip
     97        * anything.  Instead, just provide a min > max scissor inside
     98        * the bounds, which produces the expected no rendering.
     99        */
    100       sfv->scissor.xmin = 1;
    101       sfv->scissor.xmax = 0;
    102       sfv->scissor.ymin = 1;
    103       sfv->scissor.ymax = 0;
    104    } else if (render_to_fbo) {
    105       /* texmemory: Y=0=bottom */
    106       sfv->scissor.xmin = ctx->DrawBuffer->_Xmin;
    107       sfv->scissor.xmax = ctx->DrawBuffer->_Xmax - 1;
    108       sfv->scissor.ymin = ctx->DrawBuffer->_Ymin;
    109       sfv->scissor.ymax = ctx->DrawBuffer->_Ymax - 1;
    110    }
    111    else {
    112       /* memory: Y=0=top */
    113       sfv->scissor.xmin = ctx->DrawBuffer->_Xmin;
    114       sfv->scissor.xmax = ctx->DrawBuffer->_Xmax - 1;
    115       sfv->scissor.ymin = ctx->DrawBuffer->Height - ctx->DrawBuffer->_Ymax;
    116       sfv->scissor.ymax = ctx->DrawBuffer->Height - ctx->DrawBuffer->_Ymin - 1;
    117    }
    118 
    119    brw->ctx.NewDriverState |= BRW_NEW_SF_VP;
    120 }
    121 
    122 const struct brw_tracked_state brw_sf_vp = {
    123    .dirty = {
    124       .mesa  = _NEW_BUFFERS |
    125                _NEW_SCISSOR |
    126                _NEW_VIEWPORT,
    127       .brw   = BRW_NEW_BATCH |
    128                BRW_NEW_BLORP,
    129    },
    130    .emit = upload_sf_vp
    131 };
    132 
    133 static void upload_sf_unit( struct brw_context *brw )
    134 {
    135    struct gl_context *ctx = &brw->ctx;
    136    struct brw_sf_unit_state *sf;
    137    drm_intel_bo *bo = brw->batch.bo;
    138    int chipset_max_threads;
    139    bool render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer);
    140 
    141    sf = brw_state_batch(brw, AUB_TRACE_SF_STATE,
    142 			sizeof(*sf), 64, &brw->sf.state_offset);
    143 
    144    memset(sf, 0, sizeof(*sf));
    145 
    146    /* BRW_NEW_PROGRAM_CACHE | BRW_NEW_SF_PROG_DATA */
    147    sf->thread0.grf_reg_count = ALIGN(brw->sf.prog_data->total_grf, 16) / 16 - 1;
    148    sf->thread0.kernel_start_pointer =
    149       brw_program_reloc(brw,
    150 			brw->sf.state_offset +
    151 			offsetof(struct brw_sf_unit_state, thread0),
    152 			brw->sf.prog_offset +
    153 			(sf->thread0.grf_reg_count << 1)) >> 6;
    154 
    155    sf->thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
    156 
    157    sf->thread3.dispatch_grf_start_reg = 3;
    158    sf->thread3.urb_entry_read_offset = BRW_SF_URB_ENTRY_READ_OFFSET;
    159 
    160    /* BRW_NEW_SF_PROG_DATA */
    161    sf->thread3.urb_entry_read_length = brw->sf.prog_data->urb_read_length;
    162 
    163    /* BRW_NEW_URB_FENCE */
    164    sf->thread4.nr_urb_entries = brw->urb.nr_sf_entries;
    165    sf->thread4.urb_entry_allocation_size = brw->urb.sfsize - 1;
    166 
    167    /* Each SF thread produces 1 PUE, and there can be up to 24 (Pre-Ironlake) or
    168     * 48 (Ironlake) threads.
    169     */
    170    if (brw->gen == 5)
    171       chipset_max_threads = 48;
    172    else
    173       chipset_max_threads = 24;
    174 
    175    /* BRW_NEW_URB_FENCE */
    176    sf->thread4.max_threads = MIN2(chipset_max_threads,
    177 				  brw->urb.nr_sf_entries) - 1;
    178 
    179    if (unlikely(INTEL_DEBUG & DEBUG_STATS))
    180       sf->thread4.stats_enable = 1;
    181 
    182    /* BRW_NEW_SF_VP */
    183    sf->sf5.sf_viewport_state_offset = (brw->batch.bo->offset64 +
    184 				       brw->sf.vp_offset) >> 5; /* reloc */
    185 
    186    sf->sf5.viewport_transform = 1;
    187 
    188    /* _NEW_SCISSOR */
    189    if (ctx->Scissor.EnableFlags)
    190       sf->sf6.scissor = 1;
    191 
    192    /* _NEW_POLYGON */
    193    if (ctx->Polygon._FrontBit)
    194       sf->sf5.front_winding = BRW_FRONTWINDING_CW;
    195    else
    196       sf->sf5.front_winding = BRW_FRONTWINDING_CCW;
    197 
    198    /* _NEW_BUFFERS
    199     * The viewport is inverted for rendering to a FBO, and that inverts
    200     * polygon front/back orientation.
    201     */
    202    sf->sf5.front_winding ^= render_to_fbo;
    203 
    204    /* _NEW_POLYGON */
    205    switch (ctx->Polygon.CullFlag ? ctx->Polygon.CullFaceMode : GL_NONE) {
    206    case GL_FRONT:
    207       sf->sf6.cull_mode = BRW_CULLMODE_FRONT;
    208       break;
    209    case GL_BACK:
    210       sf->sf6.cull_mode = BRW_CULLMODE_BACK;
    211       break;
    212    case GL_FRONT_AND_BACK:
    213       sf->sf6.cull_mode = BRW_CULLMODE_BOTH;
    214       break;
    215    case GL_NONE:
    216       sf->sf6.cull_mode = BRW_CULLMODE_NONE;
    217       break;
    218    default:
    219       unreachable("not reached");
    220    }
    221 
    222    /* _NEW_LINE */
    223    sf->sf6.line_width =
    224       CLAMP(ctx->Line.Width, 1.0f, ctx->Const.MaxLineWidth) * (1<<1);
    225 
    226    sf->sf6.line_endcap_aa_region_width = 1;
    227    if (ctx->Line.SmoothFlag)
    228       sf->sf6.aa_enable = 1;
    229    else if (sf->sf6.line_width <= 0x2)
    230        sf->sf6.line_width = 0;
    231 
    232    /* _NEW_BUFFERS */
    233    if (!render_to_fbo) {
    234       /* Rendering to an OpenGL window */
    235       sf->sf6.point_rast_rule = BRW_RASTRULE_UPPER_RIGHT;
    236    }
    237    else {
    238       /* If rendering to an FBO, the pixel coordinate system is
    239        * inverted with respect to the normal OpenGL coordinate
    240        * system, so BRW_RASTRULE_LOWER_RIGHT is correct.
    241        * But this value is listed as "Reserved, but not seen as useful"
    242        * in Intel documentation (page 212, "Point Rasterization Rule",
    243        * section 7.4 "SF Pipeline State Summary", of document
    244        * "Intel 965 Express Chipset Family and Intel G35 Express
    245        * Chipset Graphics Controller Programmer's Reference Manual,
    246        * Volume 2: 3D/Media", Revision 1.0b as of January 2008,
    247        * available at
    248        *     https://01.org/linuxgraphics/documentation/hardware-specification-prms
    249        * at the time of this writing).
    250        *
    251        * It does work on at least some devices, if not all;
    252        * if devices that don't support it can be identified,
    253        * the likely failure case is that points are rasterized
    254        * incorrectly, which is no worse than occurs without
    255        * the value, so we're using it here.
    256        */
    257       sf->sf6.point_rast_rule = BRW_RASTRULE_LOWER_RIGHT;
    258    }
    259    /* XXX clamp max depends on AA vs. non-AA */
    260 
    261    /* _NEW_POINT */
    262    sf->sf7.sprite_point = ctx->Point.PointSprite;
    263    sf->sf7.point_size = CLAMP(rintf(CLAMP(ctx->Point.Size,
    264                                           ctx->Point.MinSize,
    265                                           ctx->Point.MaxSize)), 1.0f, 255.0f) *
    266                         (1<<3);
    267    /* _NEW_PROGRAM | _NEW_POINT */
    268    sf->sf7.use_point_size_state = !(ctx->VertexProgram.PointSizeEnabled ||
    269 				    ctx->Point._Attenuated);
    270    sf->sf7.aa_line_distance_mode = 0;
    271 
    272    /* might be BRW_NEW_PRIMITIVE if we have to adjust pv for polygons:
    273     * _NEW_LIGHT
    274     */
    275    if (ctx->Light.ProvokingVertex != GL_FIRST_VERTEX_CONVENTION) {
    276       sf->sf7.trifan_pv = 2;
    277       sf->sf7.linestrip_pv = 1;
    278       sf->sf7.tristrip_pv = 2;
    279    } else {
    280       sf->sf7.trifan_pv = 1;
    281       sf->sf7.linestrip_pv = 0;
    282       sf->sf7.tristrip_pv = 0;
    283    }
    284    sf->sf7.line_last_pixel_enable = 0;
    285 
    286    /* Set bias for OpenGL rasterization rules:
    287     */
    288    sf->sf6.dest_org_vbias = 0x8;
    289    sf->sf6.dest_org_hbias = 0x8;
    290 
    291    /* STATE_PREFETCH command description describes this state as being
    292     * something loaded through the GPE (L2 ISC), so it's INSTRUCTION domain.
    293     */
    294 
    295    /* Emit SF viewport relocation */
    296    drm_intel_bo_emit_reloc(bo, (brw->sf.state_offset +
    297 				offsetof(struct brw_sf_unit_state, sf5)),
    298 			   brw->batch.bo, (brw->sf.vp_offset |
    299 					     sf->sf5.front_winding |
    300 					     (sf->sf5.viewport_transform << 1)),
    301 			   I915_GEM_DOMAIN_INSTRUCTION, 0);
    302 
    303    brw->ctx.NewDriverState |= BRW_NEW_GEN4_UNIT_STATE;
    304 }
    305 
    306 const struct brw_tracked_state brw_sf_unit = {
    307    .dirty = {
    308       .mesa  = _NEW_BUFFERS |
    309                _NEW_LIGHT |
    310                _NEW_LINE |
    311                _NEW_POINT |
    312                _NEW_POLYGON |
    313                _NEW_PROGRAM |
    314                _NEW_SCISSOR,
    315       .brw   = BRW_NEW_BATCH |
    316                BRW_NEW_BLORP |
    317                BRW_NEW_PROGRAM_CACHE |
    318                BRW_NEW_SF_PROG_DATA |
    319                BRW_NEW_SF_VP |
    320                BRW_NEW_URB_FENCE,
    321    },
    322    .emit = upload_sf_unit,
    323 };
    324