Home | History | Annotate | Download | only in i965
      1 /*
      2  * Copyright 2003 VMware, Inc.
      3  * All Rights Reserved.
      4  *
      5  * Permission is hereby granted, free of charge, to any person obtaining a
      6  * copy of this software and associated documentation files (the
      7  * "Software"), to deal in the Software without restriction, including
      8  * without limitation the rights to use, copy, modify, merge, publish,
      9  * distribute, sublicense, and/or sell copies of the Software, and to
     10  * permit persons to whom the Software is furnished to do so, subject to
     11  * the following conditions:
     12  *
     13  * The above copyright notice and this permission notice (including the
     14  * next paragraph) shall be included in all copies or substantial portions
     15  * of the Software.
     16  *
     17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
     18  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
     19  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
     20  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
     21  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
     22  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
     23  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
     24  */
     25 
     26 #include <sys/errno.h>
     27 
     28 #include "main/context.h"
     29 #include "main/condrender.h"
     30 #include "main/samplerobj.h"
     31 #include "main/state.h"
     32 #include "main/enums.h"
     33 #include "main/macros.h"
     34 #include "main/transformfeedback.h"
     35 #include "main/framebuffer.h"
     36 #include "tnl/tnl.h"
     37 #include "vbo/vbo_context.h"
     38 #include "swrast/swrast.h"
     39 #include "swrast_setup/swrast_setup.h"
     40 #include "drivers/common/meta.h"
     41 #include "util/bitscan.h"
     42 
     43 #include "brw_blorp.h"
     44 #include "brw_draw.h"
     45 #include "brw_defines.h"
     46 #include "brw_context.h"
     47 #include "brw_state.h"
     48 #include "brw_vs.h"
     49 
     50 #include "intel_batchbuffer.h"
     51 #include "intel_buffers.h"
     52 #include "intel_fbo.h"
     53 #include "intel_mipmap_tree.h"
     54 #include "intel_buffer_objects.h"
     55 
     56 #define FILE_DEBUG_FLAG DEBUG_PRIMS
     57 
     58 
     59 static const GLenum reduced_prim[GL_POLYGON+1] = {
     60    [GL_POINTS] = GL_POINTS,
     61    [GL_LINES] = GL_LINES,
     62    [GL_LINE_LOOP] = GL_LINES,
     63    [GL_LINE_STRIP] = GL_LINES,
     64    [GL_TRIANGLES] = GL_TRIANGLES,
     65    [GL_TRIANGLE_STRIP] = GL_TRIANGLES,
     66    [GL_TRIANGLE_FAN] = GL_TRIANGLES,
     67    [GL_QUADS] = GL_TRIANGLES,
     68    [GL_QUAD_STRIP] = GL_TRIANGLES,
     69    [GL_POLYGON] = GL_TRIANGLES
     70 };
     71 
     72 /* When the primitive changes, set a state bit and re-validate.  Not
     73  * the nicest and would rather deal with this by having all the
     74  * programs be immune to the active primitive (ie. cope with all
     75  * possibilities).  That may not be realistic however.
     76  */
     77 static void
     78 brw_set_prim(struct brw_context *brw, const struct _mesa_prim *prim)
     79 {
     80    struct gl_context *ctx = &brw->ctx;
     81    uint32_t hw_prim = get_hw_prim_for_gl_prim(prim->mode);
     82 
     83    DBG("PRIM: %s\n", _mesa_enum_to_string(prim->mode));
     84 
     85    /* Slight optimization to avoid the GS program when not needed:
     86     */
     87    if (prim->mode == GL_QUAD_STRIP &&
     88        ctx->Light.ShadeModel != GL_FLAT &&
     89        ctx->Polygon.FrontMode == GL_FILL &&
     90        ctx->Polygon.BackMode == GL_FILL)
     91       hw_prim = _3DPRIM_TRISTRIP;
     92 
     93    if (prim->mode == GL_QUADS && prim->count == 4 &&
     94        ctx->Light.ShadeModel != GL_FLAT &&
     95        ctx->Polygon.FrontMode == GL_FILL &&
     96        ctx->Polygon.BackMode == GL_FILL) {
     97       hw_prim = _3DPRIM_TRIFAN;
     98    }
     99 
    100    if (hw_prim != brw->primitive) {
    101       brw->primitive = hw_prim;
    102       brw->ctx.NewDriverState |= BRW_NEW_PRIMITIVE;
    103 
    104       if (reduced_prim[prim->mode] != brw->reduced_primitive) {
    105          brw->reduced_primitive = reduced_prim[prim->mode];
    106          brw->ctx.NewDriverState |= BRW_NEW_REDUCED_PRIMITIVE;
    107       }
    108    }
    109 }
    110 
    111 static void
    112 gen6_set_prim(struct brw_context *brw, const struct _mesa_prim *prim)
    113 {
    114    const struct gl_context *ctx = &brw->ctx;
    115    uint32_t hw_prim;
    116 
    117    DBG("PRIM: %s\n", _mesa_enum_to_string(prim->mode));
    118 
    119    if (prim->mode == GL_PATCHES) {
    120       hw_prim = _3DPRIM_PATCHLIST(ctx->TessCtrlProgram.patch_vertices);
    121    } else {
    122       hw_prim = get_hw_prim_for_gl_prim(prim->mode);
    123    }
    124 
    125    if (hw_prim != brw->primitive) {
    126       brw->primitive = hw_prim;
    127       brw->ctx.NewDriverState |= BRW_NEW_PRIMITIVE;
    128       if (prim->mode == GL_PATCHES)
    129          brw->ctx.NewDriverState |= BRW_NEW_PATCH_PRIMITIVE;
    130    }
    131 }
    132 
    133 
    134 /**
    135  * The hardware is capable of removing dangling vertices on its own; however,
    136  * prior to Gen6, we sometimes convert quads into trifans (and quad strips
    137  * into tristrips), since pre-Gen6 hardware requires a GS to render quads.
    138  * This function manually trims dangling vertices from a draw call involving
    139  * quads so that those dangling vertices won't get drawn when we convert to
    140  * trifans/tristrips.
    141  */
    142 static GLuint
    143 trim(GLenum prim, GLuint length)
    144 {
    145    if (prim == GL_QUAD_STRIP)
    146       return length > 3 ? (length - length % 2) : 0;
    147    else if (prim == GL_QUADS)
    148       return length - length % 4;
    149    else
    150       return length;
    151 }
    152 
    153 
    154 static void
    155 brw_emit_prim(struct brw_context *brw,
    156               const struct _mesa_prim *prim,
    157               uint32_t hw_prim,
    158               struct brw_transform_feedback_object *xfb_obj,
    159               unsigned stream)
    160 {
    161    int verts_per_instance;
    162    int vertex_access_type;
    163    int indirect_flag;
    164 
    165    DBG("PRIM: %s %d %d\n", _mesa_enum_to_string(prim->mode),
    166        prim->start, prim->count);
    167 
    168    int start_vertex_location = prim->start;
    169    int base_vertex_location = prim->basevertex;
    170 
    171    if (prim->indexed) {
    172       vertex_access_type = brw->gen >= 7 ?
    173          GEN7_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM :
    174          GEN4_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM;
    175       start_vertex_location += brw->ib.start_vertex_offset;
    176       base_vertex_location += brw->vb.start_vertex_bias;
    177    } else {
    178       vertex_access_type = brw->gen >= 7 ?
    179          GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL :
    180          GEN4_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL;
    181       start_vertex_location += brw->vb.start_vertex_bias;
    182    }
    183 
    184    /* We only need to trim the primitive count on pre-Gen6. */
    185    if (brw->gen < 6)
    186       verts_per_instance = trim(prim->mode, prim->count);
    187    else
    188       verts_per_instance = prim->count;
    189 
    190    /* If nothing to emit, just return. */
    191    if (verts_per_instance == 0 && !prim->is_indirect && !xfb_obj)
    192       return;
    193 
    194    /* If we're set to always flush, do it before and after the primitive emit.
    195     * We want to catch both missed flushes that hurt instruction/state cache
    196     * and missed flushes of the render cache as it heads to other parts of
    197     * the besides the draw code.
    198     */
    199    if (brw->always_flush_cache)
    200       brw_emit_mi_flush(brw);
    201 
    202    /* If indirect, emit a bunch of loads from the indirect BO. */
    203    if (xfb_obj) {
    204       indirect_flag = GEN7_3DPRIM_INDIRECT_PARAMETER_ENABLE;
    205 
    206       brw_load_register_mem(brw, GEN7_3DPRIM_VERTEX_COUNT,
    207                             xfb_obj->prim_count_bo,
    208                             I915_GEM_DOMAIN_VERTEX, 0,
    209                             stream * sizeof(uint32_t));
    210       BEGIN_BATCH(9);
    211       OUT_BATCH(MI_LOAD_REGISTER_IMM | (9 - 2));
    212       OUT_BATCH(GEN7_3DPRIM_INSTANCE_COUNT);
    213       OUT_BATCH(prim->num_instances);
    214       OUT_BATCH(GEN7_3DPRIM_START_VERTEX);
    215       OUT_BATCH(0);
    216       OUT_BATCH(GEN7_3DPRIM_BASE_VERTEX);
    217       OUT_BATCH(0);
    218       OUT_BATCH(GEN7_3DPRIM_START_INSTANCE);
    219       OUT_BATCH(0);
    220       ADVANCE_BATCH();
    221    } else if (prim->is_indirect) {
    222       struct gl_buffer_object *indirect_buffer = brw->ctx.DrawIndirectBuffer;
    223       drm_intel_bo *bo = intel_bufferobj_buffer(brw,
    224             intel_buffer_object(indirect_buffer),
    225             prim->indirect_offset, 5 * sizeof(GLuint));
    226 
    227       indirect_flag = GEN7_3DPRIM_INDIRECT_PARAMETER_ENABLE;
    228 
    229       brw_load_register_mem(brw, GEN7_3DPRIM_VERTEX_COUNT, bo,
    230                             I915_GEM_DOMAIN_VERTEX, 0,
    231                             prim->indirect_offset + 0);
    232       brw_load_register_mem(brw, GEN7_3DPRIM_INSTANCE_COUNT, bo,
    233                             I915_GEM_DOMAIN_VERTEX, 0,
    234                             prim->indirect_offset + 4);
    235 
    236       brw_load_register_mem(brw, GEN7_3DPRIM_START_VERTEX, bo,
    237                             I915_GEM_DOMAIN_VERTEX, 0,
    238                             prim->indirect_offset + 8);
    239       if (prim->indexed) {
    240          brw_load_register_mem(brw, GEN7_3DPRIM_BASE_VERTEX, bo,
    241                                I915_GEM_DOMAIN_VERTEX, 0,
    242                                prim->indirect_offset + 12);
    243          brw_load_register_mem(brw, GEN7_3DPRIM_START_INSTANCE, bo,
    244                                I915_GEM_DOMAIN_VERTEX, 0,
    245                                prim->indirect_offset + 16);
    246       } else {
    247          brw_load_register_mem(brw, GEN7_3DPRIM_START_INSTANCE, bo,
    248                                I915_GEM_DOMAIN_VERTEX, 0,
    249                                prim->indirect_offset + 12);
    250          BEGIN_BATCH(3);
    251          OUT_BATCH(MI_LOAD_REGISTER_IMM | (3 - 2));
    252          OUT_BATCH(GEN7_3DPRIM_BASE_VERTEX);
    253          OUT_BATCH(0);
    254          ADVANCE_BATCH();
    255       }
    256    } else {
    257       indirect_flag = 0;
    258    }
    259 
    260    BEGIN_BATCH(brw->gen >= 7 ? 7 : 6);
    261 
    262    if (brw->gen >= 7) {
    263       const int predicate_enable =
    264          (brw->predicate.state == BRW_PREDICATE_STATE_USE_BIT)
    265          ? GEN7_3DPRIM_PREDICATE_ENABLE : 0;
    266 
    267       OUT_BATCH(CMD_3D_PRIM << 16 | (7 - 2) | indirect_flag | predicate_enable);
    268       OUT_BATCH(hw_prim | vertex_access_type);
    269    } else {
    270       OUT_BATCH(CMD_3D_PRIM << 16 | (6 - 2) |
    271                 hw_prim << GEN4_3DPRIM_TOPOLOGY_TYPE_SHIFT |
    272                 vertex_access_type);
    273    }
    274    OUT_BATCH(verts_per_instance);
    275    OUT_BATCH(start_vertex_location);
    276    OUT_BATCH(prim->num_instances);
    277    OUT_BATCH(prim->base_instance);
    278    OUT_BATCH(base_vertex_location);
    279    ADVANCE_BATCH();
    280 
    281    if (brw->always_flush_cache)
    282       brw_emit_mi_flush(brw);
    283 }
    284 
    285 
    286 static void
    287 brw_merge_inputs(struct brw_context *brw,
    288                  const struct gl_vertex_array *arrays[])
    289 {
    290    const struct gl_context *ctx = &brw->ctx;
    291    GLuint i;
    292 
    293    for (i = 0; i < brw->vb.nr_buffers; i++) {
    294       drm_intel_bo_unreference(brw->vb.buffers[i].bo);
    295       brw->vb.buffers[i].bo = NULL;
    296    }
    297    brw->vb.nr_buffers = 0;
    298 
    299    for (i = 0; i < VERT_ATTRIB_MAX; i++) {
    300       brw->vb.inputs[i].buffer = -1;
    301       brw->vb.inputs[i].glarray = arrays[i];
    302    }
    303 
    304    if (brw->gen < 8 && !brw->is_haswell) {
    305       uint64_t mask = ctx->VertexProgram._Current->info.inputs_read;
    306       /* Prior to Haswell, the hardware can't natively support GL_FIXED or
    307        * 2_10_10_10_REV vertex formats.  Set appropriate workaround flags.
    308        */
    309       while (mask) {
    310          uint8_t wa_flags = 0;
    311 
    312          i = u_bit_scan64(&mask);
    313 
    314          switch (brw->vb.inputs[i].glarray->Type) {
    315 
    316          case GL_FIXED:
    317             wa_flags = brw->vb.inputs[i].glarray->Size;
    318             break;
    319 
    320          case GL_INT_2_10_10_10_REV:
    321             wa_flags |= BRW_ATTRIB_WA_SIGN;
    322             /* fallthough */
    323 
    324          case GL_UNSIGNED_INT_2_10_10_10_REV:
    325             if (brw->vb.inputs[i].glarray->Format == GL_BGRA)
    326                wa_flags |= BRW_ATTRIB_WA_BGRA;
    327 
    328             if (brw->vb.inputs[i].glarray->Normalized)
    329                wa_flags |= BRW_ATTRIB_WA_NORMALIZE;
    330             else if (!brw->vb.inputs[i].glarray->Integer)
    331                wa_flags |= BRW_ATTRIB_WA_SCALE;
    332 
    333             break;
    334          }
    335 
    336          if (brw->vb.attrib_wa_flags[i] != wa_flags) {
    337             brw->vb.attrib_wa_flags[i] = wa_flags;
    338             brw->ctx.NewDriverState |= BRW_NEW_VS_ATTRIB_WORKAROUNDS;
    339          }
    340       }
    341    }
    342 }
    343 
    344 /**
    345  * \brief Call this after drawing to mark which buffers need resolving
    346  *
    347  * If the depth buffer was written to and if it has an accompanying HiZ
    348  * buffer, then mark that it needs a depth resolve.
    349  *
    350  * If the color buffer is a multisample window system buffer, then
    351  * mark that it needs a downsample.
    352  *
    353  * Also mark any render targets which will be textured as needing a render
    354  * cache flush.
    355  */
    356 static void
    357 brw_postdraw_set_buffers_need_resolve(struct brw_context *brw)
    358 {
    359    struct gl_context *ctx = &brw->ctx;
    360    struct gl_framebuffer *fb = ctx->DrawBuffer;
    361 
    362    struct intel_renderbuffer *front_irb = NULL;
    363    struct intel_renderbuffer *back_irb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
    364    struct intel_renderbuffer *depth_irb = intel_get_renderbuffer(fb, BUFFER_DEPTH);
    365    struct intel_renderbuffer *stencil_irb = intel_get_renderbuffer(fb, BUFFER_STENCIL);
    366    struct gl_renderbuffer_attachment *depth_att = &fb->Attachment[BUFFER_DEPTH];
    367 
    368    if (_mesa_is_front_buffer_drawing(fb))
    369       front_irb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
    370 
    371    if (front_irb)
    372       front_irb->need_downsample = true;
    373    if (back_irb)
    374       back_irb->need_downsample = true;
    375    if (depth_irb && brw_depth_writes_enabled(brw)) {
    376       intel_renderbuffer_att_set_needs_depth_resolve(depth_att);
    377       brw_render_cache_set_add_bo(brw, depth_irb->mt->bo);
    378    }
    379 
    380    if (ctx->Extensions.ARB_stencil_texturing &&
    381        stencil_irb && ctx->Stencil._WriteEnabled) {
    382       brw_render_cache_set_add_bo(brw, stencil_irb->mt->bo);
    383    }
    384 
    385    for (unsigned i = 0; i < fb->_NumColorDrawBuffers; i++) {
    386       struct intel_renderbuffer *irb =
    387          intel_renderbuffer(fb->_ColorDrawBuffers[i]);
    388 
    389       if (!irb)
    390          continue;
    391 
    392       brw_render_cache_set_add_bo(brw, irb->mt->bo);
    393       intel_miptree_used_for_rendering(
    394          brw, irb->mt, irb->mt_level, irb->mt_layer, irb->layer_count);
    395    }
    396 }
    397 
    398 static void
    399 brw_predraw_set_aux_buffers(struct brw_context *brw)
    400 {
    401    if (brw->gen < 9)
    402       return;
    403 
    404    struct gl_context *ctx = &brw->ctx;
    405    struct gl_framebuffer *fb = ctx->DrawBuffer;
    406 
    407    for (unsigned i = 0; i < fb->_NumColorDrawBuffers; i++) {
    408       struct intel_renderbuffer *irb =
    409          intel_renderbuffer(fb->_ColorDrawBuffers[i]);
    410 
    411       if (!irb) {
    412          continue;
    413       }
    414 
    415       /* For layered rendering non-compressed fast cleared buffers need to be
    416        * resolved. Surface state can carry only one fast color clear value
    417        * while each layer may have its own fast clear color value. For
    418        * compressed buffers color value is available in the color buffer.
    419        */
    420       if (irb->layer_count > 1 &&
    421           !(irb->mt->aux_disable & INTEL_AUX_DISABLE_CCS) &&
    422           !intel_miptree_is_lossless_compressed(brw, irb->mt)) {
    423          assert(brw->gen >= 8);
    424 
    425          intel_miptree_resolve_color(brw, irb->mt, irb->mt_level,
    426                                      irb->mt_layer, irb->layer_count, 0);
    427       }
    428    }
    429 }
    430 
    431 /* May fail if out of video memory for texture or vbo upload, or on
    432  * fallback conditions.
    433  */
    434 static void
    435 brw_try_draw_prims(struct gl_context *ctx,
    436                    const struct gl_vertex_array *arrays[],
    437                    const struct _mesa_prim *prims,
    438                    GLuint nr_prims,
    439                    const struct _mesa_index_buffer *ib,
    440                    bool index_bounds_valid,
    441                    GLuint min_index,
    442                    GLuint max_index,
    443                    struct brw_transform_feedback_object *xfb_obj,
    444                    unsigned stream,
    445                    struct gl_buffer_object *indirect)
    446 {
    447    struct brw_context *brw = brw_context(ctx);
    448    GLuint i;
    449    bool fail_next = false;
    450 
    451    if (ctx->NewState)
    452       _mesa_update_state(ctx);
    453 
    454    /* We have to validate the textures *before* checking for fallbacks;
    455     * otherwise, the software fallback won't be able to rely on the
    456     * texture state, the firstLevel and lastLevel fields won't be
    457     * set in the intel texture object (they'll both be 0), and the
    458     * software fallback will segfault if it attempts to access any
    459     * texture level other than level 0.
    460     */
    461    brw_validate_textures(brw);
    462 
    463    /* Find the highest sampler unit used by each shader program.  A bit-count
    464     * won't work since ARB programs use the texture unit number as the sampler
    465     * index.
    466     */
    467    brw->wm.base.sampler_count =
    468       util_last_bit(ctx->FragmentProgram._Current->SamplersUsed);
    469    brw->gs.base.sampler_count = ctx->GeometryProgram._Current ?
    470       util_last_bit(ctx->GeometryProgram._Current->SamplersUsed) : 0;
    471    brw->tes.base.sampler_count = ctx->TessEvalProgram._Current ?
    472       util_last_bit(ctx->TessEvalProgram._Current->SamplersUsed) : 0;
    473    brw->tcs.base.sampler_count = ctx->TessCtrlProgram._Current ?
    474       util_last_bit(ctx->TessCtrlProgram._Current->SamplersUsed) : 0;
    475    brw->vs.base.sampler_count =
    476       util_last_bit(ctx->VertexProgram._Current->SamplersUsed);
    477 
    478    intel_prepare_render(brw);
    479    brw_predraw_set_aux_buffers(brw);
    480 
    481    /* This workaround has to happen outside of brw_upload_render_state()
    482     * because it may flush the batchbuffer for a blit, affecting the state
    483     * flags.
    484     */
    485    brw_workaround_depthstencil_alignment(brw, 0);
    486 
    487    /* Bind all inputs, derive varying and size information:
    488     */
    489    brw_merge_inputs(brw, arrays);
    490 
    491    brw->ib.ib = ib;
    492    brw->ctx.NewDriverState |= BRW_NEW_INDICES;
    493 
    494    brw->vb.index_bounds_valid = index_bounds_valid;
    495    brw->vb.min_index = min_index;
    496    brw->vb.max_index = max_index;
    497    brw->ctx.NewDriverState |= BRW_NEW_VERTICES;
    498 
    499    for (i = 0; i < nr_prims; i++) {
    500       int estimated_max_prim_size;
    501       const int sampler_state_size = 16;
    502 
    503       estimated_max_prim_size = 512; /* batchbuffer commands */
    504       estimated_max_prim_size += BRW_MAX_TEX_UNIT *
    505          (sampler_state_size + sizeof(struct gen5_sampler_default_color));
    506       estimated_max_prim_size += 1024; /* gen6 VS push constants */
    507       estimated_max_prim_size += 1024; /* gen6 WM push constants */
    508       estimated_max_prim_size += 512; /* misc. pad */
    509 
    510       /* Flush the batch if it's approaching full, so that we don't wrap while
    511        * we've got validated state that needs to be in the same batch as the
    512        * primitives.
    513        */
    514       intel_batchbuffer_require_space(brw, estimated_max_prim_size, RENDER_RING);
    515       intel_batchbuffer_save_state(brw);
    516 
    517       if (brw->num_instances != prims[i].num_instances ||
    518           brw->basevertex != prims[i].basevertex ||
    519           brw->baseinstance != prims[i].base_instance) {
    520          brw->num_instances = prims[i].num_instances;
    521          brw->basevertex = prims[i].basevertex;
    522          brw->baseinstance = prims[i].base_instance;
    523          if (i > 0) { /* For i == 0 we just did this before the loop */
    524             brw->ctx.NewDriverState |= BRW_NEW_VERTICES;
    525             brw_merge_inputs(brw, arrays);
    526          }
    527       }
    528 
    529       /* Determine if we need to flag BRW_NEW_VERTICES for updating the
    530        * gl_BaseVertexARB or gl_BaseInstanceARB values. For indirect draw, we
    531        * always flag if the shader uses one of the values. For direct draws,
    532        * we only flag if the values change.
    533        */
    534       const int new_basevertex =
    535          prims[i].indexed ? prims[i].basevertex : prims[i].start;
    536       const int new_baseinstance = prims[i].base_instance;
    537       const struct brw_vs_prog_data *vs_prog_data =
    538          brw_vs_prog_data(brw->vs.base.prog_data);
    539       if (i > 0) {
    540          const bool uses_draw_parameters =
    541             vs_prog_data->uses_basevertex ||
    542             vs_prog_data->uses_baseinstance;
    543 
    544          if ((uses_draw_parameters && prims[i].is_indirect) ||
    545              (vs_prog_data->uses_basevertex &&
    546               brw->draw.params.gl_basevertex != new_basevertex) ||
    547              (vs_prog_data->uses_baseinstance &&
    548               brw->draw.params.gl_baseinstance != new_baseinstance))
    549             brw->ctx.NewDriverState |= BRW_NEW_VERTICES;
    550       }
    551 
    552       brw->draw.params.gl_basevertex = new_basevertex;
    553       brw->draw.params.gl_baseinstance = new_baseinstance;
    554       drm_intel_bo_unreference(brw->draw.draw_params_bo);
    555 
    556       if (prims[i].is_indirect) {
    557          /* Point draw_params_bo at the indirect buffer. */
    558          brw->draw.draw_params_bo =
    559             intel_buffer_object(ctx->DrawIndirectBuffer)->buffer;
    560          drm_intel_bo_reference(brw->draw.draw_params_bo);
    561          brw->draw.draw_params_offset =
    562             prims[i].indirect_offset + (prims[i].indexed ? 12 : 8);
    563       } else {
    564          /* Set draw_params_bo to NULL so brw_prepare_vertices knows it
    565           * has to upload gl_BaseVertex and such if they're needed.
    566           */
    567          brw->draw.draw_params_bo = NULL;
    568          brw->draw.draw_params_offset = 0;
    569       }
    570 
    571       /* gl_DrawID always needs its own vertex buffer since it's not part of
    572        * the indirect parameter buffer. If the program uses gl_DrawID we need
    573        * to flag BRW_NEW_VERTICES. For the first iteration, we don't have
    574        * valid vs_prog_data, but we always flag BRW_NEW_VERTICES before
    575        * the loop.
    576        */
    577       brw->draw.gl_drawid = prims[i].draw_id;
    578       drm_intel_bo_unreference(brw->draw.draw_id_bo);
    579       brw->draw.draw_id_bo = NULL;
    580       if (i > 0 && vs_prog_data->uses_drawid)
    581          brw->ctx.NewDriverState |= BRW_NEW_VERTICES;
    582 
    583       if (brw->gen < 6)
    584          brw_set_prim(brw, &prims[i]);
    585       else
    586          gen6_set_prim(brw, &prims[i]);
    587 
    588 retry:
    589 
    590       /* Note that before the loop, brw->ctx.NewDriverState was set to != 0, and
    591        * that the state updated in the loop outside of this block is that in
    592        * *_set_prim or intel_batchbuffer_flush(), which only impacts
    593        * brw->ctx.NewDriverState.
    594        */
    595       if (brw->ctx.NewDriverState) {
    596          brw->no_batch_wrap = true;
    597          brw_upload_render_state(brw);
    598       }
    599 
    600       brw_emit_prim(brw, &prims[i], brw->primitive, xfb_obj, stream);
    601 
    602       brw->no_batch_wrap = false;
    603 
    604       if (dri_bufmgr_check_aperture_space(&brw->batch.bo, 1)) {
    605          if (!fail_next) {
    606             intel_batchbuffer_reset_to_saved(brw);
    607             intel_batchbuffer_flush(brw);
    608             fail_next = true;
    609             goto retry;
    610          } else {
    611             int ret = intel_batchbuffer_flush(brw);
    612             WARN_ONCE(ret == -ENOSPC,
    613                       "i965: Single primitive emit exceeded "
    614                       "available aperture space\n");
    615          }
    616       }
    617 
    618       /* Now that we know we haven't run out of aperture space, we can safely
    619        * reset the dirty bits.
    620        */
    621       if (brw->ctx.NewDriverState)
    622          brw_render_state_finished(brw);
    623    }
    624 
    625    if (brw->always_flush_batch)
    626       intel_batchbuffer_flush(brw);
    627 
    628    brw_program_cache_check_size(brw);
    629    brw_postdraw_set_buffers_need_resolve(brw);
    630 
    631    return;
    632 }
    633 
    634 void
    635 brw_draw_prims(struct gl_context *ctx,
    636                const struct _mesa_prim *prims,
    637                GLuint nr_prims,
    638                const struct _mesa_index_buffer *ib,
    639                GLboolean index_bounds_valid,
    640                GLuint min_index,
    641                GLuint max_index,
    642                struct gl_transform_feedback_object *gl_xfb_obj,
    643                unsigned stream,
    644                struct gl_buffer_object *indirect)
    645 {
    646    struct brw_context *brw = brw_context(ctx);
    647    const struct gl_vertex_array **arrays = ctx->Array._DrawArrays;
    648    struct brw_transform_feedback_object *xfb_obj =
    649       (struct brw_transform_feedback_object *) gl_xfb_obj;
    650 
    651    if (!brw_check_conditional_render(brw))
    652       return;
    653 
    654    /* Handle primitive restart if needed */
    655    if (brw_handle_primitive_restart(ctx, prims, nr_prims, ib, indirect)) {
    656       /* The draw was handled, so we can exit now */
    657       return;
    658    }
    659 
    660    /* Do GL_SELECT and GL_FEEDBACK rendering using swrast, even though it
    661     * won't support all the extensions we support.
    662     */
    663    if (ctx->RenderMode != GL_RENDER) {
    664       perf_debug("%s render mode not supported in hardware\n",
    665                  _mesa_enum_to_string(ctx->RenderMode));
    666       _swsetup_Wakeup(ctx);
    667       _tnl_wakeup(ctx);
    668       _tnl_draw_prims(ctx, prims, nr_prims, ib,
    669                       index_bounds_valid, min_index, max_index, NULL, 0, NULL);
    670       return;
    671    }
    672 
    673    /* If we're going to have to upload any of the user's vertex arrays, then
    674     * get the minimum and maximum of their index buffer so we know what range
    675     * to upload.
    676     */
    677    if (!index_bounds_valid && !vbo_all_varyings_in_vbos(arrays)) {
    678       perf_debug("Scanning index buffer to compute index buffer bounds.  "
    679                  "Use glDrawRangeElements() to avoid this.\n");
    680       vbo_get_minmax_indices(ctx, prims, ib, &min_index, &max_index, nr_prims);
    681       index_bounds_valid = true;
    682    }
    683 
    684    /* Try drawing with the hardware, but don't do anything else if we can't
    685     * manage it.  swrast doesn't support our featureset, so we can't fall back
    686     * to it.
    687     */
    688    brw_try_draw_prims(ctx, arrays, prims, nr_prims, ib, index_bounds_valid,
    689                       min_index, max_index, xfb_obj, stream, indirect);
    690 }
    691 
    692 void
    693 brw_draw_init(struct brw_context *brw)
    694 {
    695    struct gl_context *ctx = &brw->ctx;
    696    struct vbo_context *vbo = vbo_context(ctx);
    697 
    698    /* Register our drawing function:
    699     */
    700    vbo->draw_prims = brw_draw_prims;
    701 
    702    for (int i = 0; i < VERT_ATTRIB_MAX; i++)
    703       brw->vb.inputs[i].buffer = -1;
    704    brw->vb.nr_buffers = 0;
    705    brw->vb.nr_enabled = 0;
    706 }
    707 
    708 void
    709 brw_draw_destroy(struct brw_context *brw)
    710 {
    711    unsigned i;
    712 
    713    for (i = 0; i < brw->vb.nr_buffers; i++) {
    714       drm_intel_bo_unreference(brw->vb.buffers[i].bo);
    715       brw->vb.buffers[i].bo = NULL;
    716    }
    717    brw->vb.nr_buffers = 0;
    718 
    719    for (i = 0; i < brw->vb.nr_enabled; i++) {
    720       brw->vb.enabled[i]->buffer = -1;
    721    }
    722    brw->vb.nr_enabled = 0;
    723 
    724    drm_intel_bo_unreference(brw->ib.bo);
    725    brw->ib.bo = NULL;
    726 }
    727