Home | History | Annotate | Download | only in i965
      1 /*
      2  * Copyright 2003 VMware, Inc.
      3  * All Rights Reserved.
      4  *
      5  * Permission is hereby granted, free of charge, to any person obtaining a
      6  * copy of this software and associated documentation files (the
      7  * "Software"), to deal in the Software without restriction, including
      8  * without limitation the rights to use, copy, modify, merge, publish,
      9  * distribute, sublicense, and/or sell copies of the Software, and to
     10  * permit persons to whom the Software is furnished to do so, subject to
     11  * the following conditions:
     12  *
     13  * The above copyright notice and this permission notice (including the
     14  * next paragraph) shall be included in all copies or substantial portions
     15  * of the Software.
     16  *
     17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
     18  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
     19  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
     20  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
     21  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
     22  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
     23  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
     24  */
     25 
     26 #include <sys/errno.h>
     27 
     28 #include "main/blend.h"
     29 #include "main/context.h"
     30 #include "main/condrender.h"
     31 #include "main/samplerobj.h"
     32 #include "main/state.h"
     33 #include "main/enums.h"
     34 #include "main/macros.h"
     35 #include "main/transformfeedback.h"
     36 #include "main/framebuffer.h"
     37 #include "tnl/tnl.h"
     38 #include "vbo/vbo_context.h"
     39 #include "swrast/swrast.h"
     40 #include "swrast_setup/swrast_setup.h"
     41 #include "drivers/common/meta.h"
     42 #include "util/bitscan.h"
     43 #include "util/bitset.h"
     44 
     45 #include "brw_blorp.h"
     46 #include "brw_draw.h"
     47 #include "brw_defines.h"
     48 #include "compiler/brw_eu_defines.h"
     49 #include "brw_context.h"
     50 #include "brw_state.h"
     51 
     52 #include "intel_batchbuffer.h"
     53 #include "intel_buffers.h"
     54 #include "intel_fbo.h"
     55 #include "intel_mipmap_tree.h"
     56 #include "intel_buffer_objects.h"
     57 
     58 #define FILE_DEBUG_FLAG DEBUG_PRIMS
     59 
     60 
     61 static const GLenum reduced_prim[GL_POLYGON+1] = {
     62    [GL_POINTS] = GL_POINTS,
     63    [GL_LINES] = GL_LINES,
     64    [GL_LINE_LOOP] = GL_LINES,
     65    [GL_LINE_STRIP] = GL_LINES,
     66    [GL_TRIANGLES] = GL_TRIANGLES,
     67    [GL_TRIANGLE_STRIP] = GL_TRIANGLES,
     68    [GL_TRIANGLE_FAN] = GL_TRIANGLES,
     69    [GL_QUADS] = GL_TRIANGLES,
     70    [GL_QUAD_STRIP] = GL_TRIANGLES,
     71    [GL_POLYGON] = GL_TRIANGLES
     72 };
     73 
     74 /* When the primitive changes, set a state bit and re-validate.  Not
     75  * the nicest and would rather deal with this by having all the
     76  * programs be immune to the active primitive (ie. cope with all
     77  * possibilities).  That may not be realistic however.
     78  */
     79 static void
     80 brw_set_prim(struct brw_context *brw, const struct _mesa_prim *prim)
     81 {
     82    struct gl_context *ctx = &brw->ctx;
     83    uint32_t hw_prim = get_hw_prim_for_gl_prim(prim->mode);
     84 
     85    DBG("PRIM: %s\n", _mesa_enum_to_string(prim->mode));
     86 
     87    /* Slight optimization to avoid the GS program when not needed:
     88     */
     89    if (prim->mode == GL_QUAD_STRIP &&
     90        ctx->Light.ShadeModel != GL_FLAT &&
     91        ctx->Polygon.FrontMode == GL_FILL &&
     92        ctx->Polygon.BackMode == GL_FILL)
     93       hw_prim = _3DPRIM_TRISTRIP;
     94 
     95    if (prim->mode == GL_QUADS && prim->count == 4 &&
     96        ctx->Light.ShadeModel != GL_FLAT &&
     97        ctx->Polygon.FrontMode == GL_FILL &&
     98        ctx->Polygon.BackMode == GL_FILL) {
     99       hw_prim = _3DPRIM_TRIFAN;
    100    }
    101 
    102    if (hw_prim != brw->primitive) {
    103       brw->primitive = hw_prim;
    104       brw->ctx.NewDriverState |= BRW_NEW_PRIMITIVE;
    105 
    106       if (reduced_prim[prim->mode] != brw->reduced_primitive) {
    107          brw->reduced_primitive = reduced_prim[prim->mode];
    108          brw->ctx.NewDriverState |= BRW_NEW_REDUCED_PRIMITIVE;
    109       }
    110    }
    111 }
    112 
    113 static void
    114 gen6_set_prim(struct brw_context *brw, const struct _mesa_prim *prim)
    115 {
    116    const struct gl_context *ctx = &brw->ctx;
    117    uint32_t hw_prim;
    118 
    119    DBG("PRIM: %s\n", _mesa_enum_to_string(prim->mode));
    120 
    121    if (prim->mode == GL_PATCHES) {
    122       hw_prim = _3DPRIM_PATCHLIST(ctx->TessCtrlProgram.patch_vertices);
    123    } else {
    124       hw_prim = get_hw_prim_for_gl_prim(prim->mode);
    125    }
    126 
    127    if (hw_prim != brw->primitive) {
    128       brw->primitive = hw_prim;
    129       brw->ctx.NewDriverState |= BRW_NEW_PRIMITIVE;
    130       if (prim->mode == GL_PATCHES)
    131          brw->ctx.NewDriverState |= BRW_NEW_PATCH_PRIMITIVE;
    132    }
    133 }
    134 
    135 
    136 /**
    137  * The hardware is capable of removing dangling vertices on its own; however,
    138  * prior to Gen6, we sometimes convert quads into trifans (and quad strips
    139  * into tristrips), since pre-Gen6 hardware requires a GS to render quads.
    140  * This function manually trims dangling vertices from a draw call involving
    141  * quads so that those dangling vertices won't get drawn when we convert to
    142  * trifans/tristrips.
    143  */
    144 static GLuint
    145 trim(GLenum prim, GLuint length)
    146 {
    147    if (prim == GL_QUAD_STRIP)
    148       return length > 3 ? (length - length % 2) : 0;
    149    else if (prim == GL_QUADS)
    150       return length - length % 4;
    151    else
    152       return length;
    153 }
    154 
    155 
    156 static void
    157 brw_emit_prim(struct brw_context *brw,
    158               const struct _mesa_prim *prim,
    159               uint32_t hw_prim,
    160               struct brw_transform_feedback_object *xfb_obj,
    161               unsigned stream)
    162 {
    163    const struct gen_device_info *devinfo = &brw->screen->devinfo;
    164    int verts_per_instance;
    165    int vertex_access_type;
    166    int indirect_flag;
    167 
    168    DBG("PRIM: %s %d %d\n", _mesa_enum_to_string(prim->mode),
    169        prim->start, prim->count);
    170 
    171    int start_vertex_location = prim->start;
    172    int base_vertex_location = prim->basevertex;
    173 
    174    if (prim->indexed) {
    175       vertex_access_type = devinfo->gen >= 7 ?
    176          GEN7_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM :
    177          GEN4_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM;
    178       start_vertex_location += brw->ib.start_vertex_offset;
    179       base_vertex_location += brw->vb.start_vertex_bias;
    180    } else {
    181       vertex_access_type = devinfo->gen >= 7 ?
    182          GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL :
    183          GEN4_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL;
    184       start_vertex_location += brw->vb.start_vertex_bias;
    185    }
    186 
    187    /* We only need to trim the primitive count on pre-Gen6. */
    188    if (devinfo->gen < 6)
    189       verts_per_instance = trim(prim->mode, prim->count);
    190    else
    191       verts_per_instance = prim->count;
    192 
    193    /* If nothing to emit, just return. */
    194    if (verts_per_instance == 0 && !prim->is_indirect && !xfb_obj)
    195       return;
    196 
    197    /* If we're set to always flush, do it before and after the primitive emit.
    198     * We want to catch both missed flushes that hurt instruction/state cache
    199     * and missed flushes of the render cache as it heads to other parts of
    200     * the besides the draw code.
    201     */
    202    if (brw->always_flush_cache)
    203       brw_emit_mi_flush(brw);
    204 
    205    /* If indirect, emit a bunch of loads from the indirect BO. */
    206    if (xfb_obj) {
    207       indirect_flag = GEN7_3DPRIM_INDIRECT_PARAMETER_ENABLE;
    208 
    209       brw_load_register_mem(brw, GEN7_3DPRIM_VERTEX_COUNT,
    210                             xfb_obj->prim_count_bo,
    211                             stream * sizeof(uint32_t));
    212       BEGIN_BATCH(9);
    213       OUT_BATCH(MI_LOAD_REGISTER_IMM | (9 - 2));
    214       OUT_BATCH(GEN7_3DPRIM_INSTANCE_COUNT);
    215       OUT_BATCH(prim->num_instances);
    216       OUT_BATCH(GEN7_3DPRIM_START_VERTEX);
    217       OUT_BATCH(0);
    218       OUT_BATCH(GEN7_3DPRIM_BASE_VERTEX);
    219       OUT_BATCH(0);
    220       OUT_BATCH(GEN7_3DPRIM_START_INSTANCE);
    221       OUT_BATCH(0);
    222       ADVANCE_BATCH();
    223    } else if (prim->is_indirect) {
    224       struct gl_buffer_object *indirect_buffer = brw->ctx.DrawIndirectBuffer;
    225       struct brw_bo *bo = intel_bufferobj_buffer(brw,
    226             intel_buffer_object(indirect_buffer),
    227             prim->indirect_offset, 5 * sizeof(GLuint), false);
    228 
    229       indirect_flag = GEN7_3DPRIM_INDIRECT_PARAMETER_ENABLE;
    230 
    231       brw_load_register_mem(brw, GEN7_3DPRIM_VERTEX_COUNT, bo,
    232                             prim->indirect_offset + 0);
    233       brw_load_register_mem(brw, GEN7_3DPRIM_INSTANCE_COUNT, bo,
    234                             prim->indirect_offset + 4);
    235 
    236       brw_load_register_mem(brw, GEN7_3DPRIM_START_VERTEX, bo,
    237                             prim->indirect_offset + 8);
    238       if (prim->indexed) {
    239          brw_load_register_mem(brw, GEN7_3DPRIM_BASE_VERTEX, bo,
    240                                prim->indirect_offset + 12);
    241          brw_load_register_mem(brw, GEN7_3DPRIM_START_INSTANCE, bo,
    242                                prim->indirect_offset + 16);
    243       } else {
    244          brw_load_register_mem(brw, GEN7_3DPRIM_START_INSTANCE, bo,
    245                                prim->indirect_offset + 12);
    246          brw_load_register_imm32(brw, GEN7_3DPRIM_BASE_VERTEX, 0);
    247       }
    248    } else {
    249       indirect_flag = 0;
    250    }
    251 
    252    BEGIN_BATCH(devinfo->gen >= 7 ? 7 : 6);
    253 
    254    if (devinfo->gen >= 7) {
    255       const int predicate_enable =
    256          (brw->predicate.state == BRW_PREDICATE_STATE_USE_BIT)
    257          ? GEN7_3DPRIM_PREDICATE_ENABLE : 0;
    258 
    259       OUT_BATCH(CMD_3D_PRIM << 16 | (7 - 2) | indirect_flag | predicate_enable);
    260       OUT_BATCH(hw_prim | vertex_access_type);
    261    } else {
    262       OUT_BATCH(CMD_3D_PRIM << 16 | (6 - 2) |
    263                 hw_prim << GEN4_3DPRIM_TOPOLOGY_TYPE_SHIFT |
    264                 vertex_access_type);
    265    }
    266    OUT_BATCH(verts_per_instance);
    267    OUT_BATCH(start_vertex_location);
    268    OUT_BATCH(prim->num_instances);
    269    OUT_BATCH(prim->base_instance);
    270    OUT_BATCH(base_vertex_location);
    271    ADVANCE_BATCH();
    272 
    273    if (brw->always_flush_cache)
    274       brw_emit_mi_flush(brw);
    275 }
    276 
    277 
    278 static void
    279 brw_merge_inputs(struct brw_context *brw,
    280                  const struct gl_vertex_array *arrays[])
    281 {
    282    const struct gen_device_info *devinfo = &brw->screen->devinfo;
    283    const struct gl_context *ctx = &brw->ctx;
    284    GLuint i;
    285 
    286    for (i = 0; i < brw->vb.nr_buffers; i++) {
    287       brw_bo_unreference(brw->vb.buffers[i].bo);
    288       brw->vb.buffers[i].bo = NULL;
    289    }
    290    brw->vb.nr_buffers = 0;
    291 
    292    for (i = 0; i < VERT_ATTRIB_MAX; i++) {
    293       brw->vb.inputs[i].buffer = -1;
    294       brw->vb.inputs[i].glarray = arrays[i];
    295    }
    296 
    297    if (devinfo->gen < 8 && !devinfo->is_haswell) {
    298       uint64_t mask = ctx->VertexProgram._Current->info.inputs_read;
    299       /* Prior to Haswell, the hardware can't natively support GL_FIXED or
    300        * 2_10_10_10_REV vertex formats.  Set appropriate workaround flags.
    301        */
    302       while (mask) {
    303          uint8_t wa_flags = 0;
    304 
    305          i = u_bit_scan64(&mask);
    306 
    307          switch (brw->vb.inputs[i].glarray->Type) {
    308 
    309          case GL_FIXED:
    310             wa_flags = brw->vb.inputs[i].glarray->Size;
    311             break;
    312 
    313          case GL_INT_2_10_10_10_REV:
    314             wa_flags |= BRW_ATTRIB_WA_SIGN;
    315             /* fallthough */
    316 
    317          case GL_UNSIGNED_INT_2_10_10_10_REV:
    318             if (brw->vb.inputs[i].glarray->Format == GL_BGRA)
    319                wa_flags |= BRW_ATTRIB_WA_BGRA;
    320 
    321             if (brw->vb.inputs[i].glarray->Normalized)
    322                wa_flags |= BRW_ATTRIB_WA_NORMALIZE;
    323             else if (!brw->vb.inputs[i].glarray->Integer)
    324                wa_flags |= BRW_ATTRIB_WA_SCALE;
    325 
    326             break;
    327          }
    328 
    329          if (brw->vb.attrib_wa_flags[i] != wa_flags) {
    330             brw->vb.attrib_wa_flags[i] = wa_flags;
    331             brw->ctx.NewDriverState |= BRW_NEW_VS_ATTRIB_WORKAROUNDS;
    332          }
    333       }
    334    }
    335 }
    336 
    337 /* Disable auxiliary buffers if a renderbuffer is also bound as a texture
    338  * or shader image.  This causes a self-dependency, where both rendering
    339  * and sampling may concurrently read or write the CCS buffer, causing
    340  * incorrect pixels.
    341  */
    342 static bool
    343 intel_disable_rb_aux_buffer(struct brw_context *brw,
    344                             bool *draw_aux_buffer_disabled,
    345                             struct intel_mipmap_tree *tex_mt,
    346                             unsigned min_level, unsigned num_levels,
    347                             const char *usage)
    348 {
    349    const struct gl_framebuffer *fb = brw->ctx.DrawBuffer;
    350    bool found = false;
    351 
    352    /* We only need to worry about color compression and fast clears. */
    353    if (tex_mt->aux_usage != ISL_AUX_USAGE_CCS_D &&
    354        tex_mt->aux_usage != ISL_AUX_USAGE_CCS_E)
    355       return false;
    356 
    357    for (unsigned i = 0; i < fb->_NumColorDrawBuffers; i++) {
    358       const struct intel_renderbuffer *irb =
    359          intel_renderbuffer(fb->_ColorDrawBuffers[i]);
    360 
    361       if (irb && irb->mt->bo == tex_mt->bo &&
    362           irb->mt_level >= min_level &&
    363           irb->mt_level < min_level + num_levels) {
    364          found = draw_aux_buffer_disabled[i] = true;
    365       }
    366    }
    367 
    368    if (found) {
    369       perf_debug("Disabling CCS because a renderbuffer is also bound %s.\n",
    370                  usage);
    371    }
    372 
    373    return found;
    374 }
    375 
    376 static void
    377 mark_textures_used_for_txf(BITSET_WORD *used_for_txf,
    378                            const struct gl_program *prog)
    379 {
    380    if (!prog)
    381       return;
    382 
    383    unsigned mask = prog->SamplersUsed & prog->info.textures_used_by_txf;
    384    while (mask) {
    385       int s = u_bit_scan(&mask);
    386       BITSET_SET(used_for_txf, prog->SamplerUnits[s]);
    387    }
    388 }
    389 
    390 /**
    391  * \brief Resolve buffers before drawing.
    392  *
    393  * Resolve the depth buffer's HiZ buffer, resolve the depth buffer of each
    394  * enabled depth texture, and flush the render cache for any dirty textures.
    395  */
    396 void
    397 brw_predraw_resolve_inputs(struct brw_context *brw, bool rendering,
    398                            bool *draw_aux_buffer_disabled)
    399 {
    400    struct gl_context *ctx = &brw->ctx;
    401    struct intel_texture_object *tex_obj;
    402 
    403    BITSET_DECLARE(used_for_txf, MAX_COMBINED_TEXTURE_IMAGE_UNITS);
    404    memset(used_for_txf, 0, sizeof(used_for_txf));
    405    if (rendering) {
    406       mark_textures_used_for_txf(used_for_txf, ctx->VertexProgram._Current);
    407       mark_textures_used_for_txf(used_for_txf, ctx->TessCtrlProgram._Current);
    408       mark_textures_used_for_txf(used_for_txf, ctx->TessEvalProgram._Current);
    409       mark_textures_used_for_txf(used_for_txf, ctx->GeometryProgram._Current);
    410       mark_textures_used_for_txf(used_for_txf, ctx->FragmentProgram._Current);
    411    } else {
    412       mark_textures_used_for_txf(used_for_txf, ctx->ComputeProgram._Current);
    413    }
    414 
    415    /* Resolve depth buffer and render cache of each enabled texture. */
    416    int maxEnabledUnit = ctx->Texture._MaxEnabledTexImageUnit;
    417    for (int i = 0; i <= maxEnabledUnit; i++) {
    418       if (!ctx->Texture.Unit[i]._Current)
    419 	 continue;
    420       tex_obj = intel_texture_object(ctx->Texture.Unit[i]._Current);
    421       if (!tex_obj || !tex_obj->mt)
    422 	 continue;
    423 
    424       struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, i);
    425       enum isl_format view_format =
    426          translate_tex_format(brw, tex_obj->_Format, sampler->sRGBDecode);
    427 
    428       unsigned min_level, min_layer, num_levels, num_layers;
    429       if (tex_obj->base.Immutable) {
    430          min_level  = tex_obj->base.MinLevel;
    431          num_levels = MIN2(tex_obj->base.NumLevels, tex_obj->_MaxLevel + 1);
    432          min_layer  = tex_obj->base.MinLayer;
    433          num_layers = tex_obj->base.Target != GL_TEXTURE_3D ?
    434                       tex_obj->base.NumLayers : INTEL_REMAINING_LAYERS;
    435       } else {
    436          min_level  = tex_obj->base.BaseLevel;
    437          num_levels = tex_obj->_MaxLevel - tex_obj->base.BaseLevel + 1;
    438          min_layer  = 0;
    439          num_layers = INTEL_REMAINING_LAYERS;
    440       }
    441 
    442       const bool disable_aux = rendering &&
    443          intel_disable_rb_aux_buffer(brw, draw_aux_buffer_disabled,
    444                                      tex_obj->mt, min_level, num_levels,
    445                                      "for sampling");
    446 
    447       intel_miptree_prepare_texture(brw, tex_obj->mt, view_format,
    448                                     min_level, num_levels,
    449                                     min_layer, num_layers,
    450                                     disable_aux);
    451 
    452       /* If any programs are using it with texelFetch, we may need to also do
    453        * a prepare with an sRGB format to ensure texelFetch works "properly".
    454        */
    455       if (BITSET_TEST(used_for_txf, i)) {
    456          enum isl_format txf_format =
    457             translate_tex_format(brw, tex_obj->_Format, GL_DECODE_EXT);
    458          if (txf_format != view_format) {
    459             intel_miptree_prepare_texture(brw, tex_obj->mt, txf_format,
    460                                           min_level, num_levels,
    461                                           min_layer, num_layers,
    462                                           disable_aux);
    463          }
    464       }
    465 
    466       brw_cache_flush_for_read(brw, tex_obj->mt->bo);
    467 
    468       if (tex_obj->base.StencilSampling ||
    469           tex_obj->mt->format == MESA_FORMAT_S_UINT8) {
    470          intel_update_r8stencil(brw, tex_obj->mt);
    471       }
    472    }
    473 
    474    /* Resolve color for each active shader image. */
    475    for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
    476       const struct gl_program *prog = ctx->_Shader->CurrentProgram[i];
    477 
    478       if (unlikely(prog && prog->info.num_images)) {
    479          for (unsigned j = 0; j < prog->info.num_images; j++) {
    480             struct gl_image_unit *u =
    481                &ctx->ImageUnits[prog->sh.ImageUnits[j]];
    482             tex_obj = intel_texture_object(u->TexObj);
    483 
    484             if (tex_obj && tex_obj->mt) {
    485                if (rendering) {
    486                   intel_disable_rb_aux_buffer(brw, draw_aux_buffer_disabled,
    487                                               tex_obj->mt, 0, ~0,
    488                                               "as a shader image");
    489                }
    490 
    491                intel_miptree_prepare_image(brw, tex_obj->mt);
    492 
    493                brw_cache_flush_for_read(brw, tex_obj->mt->bo);
    494             }
    495          }
    496       }
    497    }
    498 }
    499 
    500 static void
    501 brw_predraw_resolve_framebuffer(struct brw_context *brw,
    502                                 bool *draw_aux_buffer_disabled)
    503 {
    504    struct gl_context *ctx = &brw->ctx;
    505    struct intel_renderbuffer *depth_irb;
    506 
    507    /* Resolve the depth buffer's HiZ buffer. */
    508    depth_irb = intel_get_renderbuffer(ctx->DrawBuffer, BUFFER_DEPTH);
    509    if (depth_irb && depth_irb->mt) {
    510       intel_miptree_prepare_depth(brw, depth_irb->mt,
    511                                   depth_irb->mt_level,
    512                                   depth_irb->mt_layer,
    513                                   depth_irb->layer_count);
    514    }
    515 
    516    /* Resolve color buffers for non-coherent framebuffer fetch. */
    517    if (!ctx->Extensions.MESA_shader_framebuffer_fetch &&
    518        ctx->FragmentProgram._Current &&
    519        ctx->FragmentProgram._Current->info.outputs_read) {
    520       const struct gl_framebuffer *fb = ctx->DrawBuffer;
    521 
    522       /* This is only used for non-coherent framebuffer fetch, so we don't
    523        * need to worry about CCS_E and can simply pass 'false' below.
    524        */
    525       assert(brw->screen->devinfo.gen < 9);
    526 
    527       for (unsigned i = 0; i < fb->_NumColorDrawBuffers; i++) {
    528          const struct intel_renderbuffer *irb =
    529             intel_renderbuffer(fb->_ColorDrawBuffers[i]);
    530 
    531          if (irb) {
    532             intel_miptree_prepare_texture(brw, irb->mt, irb->mt->surf.format,
    533                                           irb->mt_level, 1,
    534                                           irb->mt_layer, irb->layer_count,
    535                                           false);
    536          }
    537       }
    538    }
    539 
    540    struct gl_framebuffer *fb = ctx->DrawBuffer;
    541    for (int i = 0; i < fb->_NumColorDrawBuffers; i++) {
    542       struct intel_renderbuffer *irb =
    543          intel_renderbuffer(fb->_ColorDrawBuffers[i]);
    544 
    545       if (irb == NULL || irb->mt == NULL)
    546          continue;
    547 
    548       mesa_format mesa_format =
    549          _mesa_get_render_format(ctx, intel_rb_format(irb));
    550       enum isl_format isl_format = brw_isl_format_for_mesa_format(mesa_format);
    551       bool blend_enabled = ctx->Color.BlendEnabled & (1 << i);
    552       enum isl_aux_usage aux_usage =
    553          intel_miptree_render_aux_usage(brw, irb->mt, isl_format,
    554                                         blend_enabled,
    555                                         draw_aux_buffer_disabled[i]);
    556       if (brw->draw_aux_usage[i] != aux_usage) {
    557          brw->ctx.NewDriverState |= BRW_NEW_AUX_STATE;
    558          brw->draw_aux_usage[i] = aux_usage;
    559       }
    560 
    561       intel_miptree_prepare_render(brw, irb->mt, irb->mt_level,
    562                                    irb->mt_layer, irb->layer_count,
    563                                    aux_usage);
    564 
    565       brw_cache_flush_for_render(brw, irb->mt->bo,
    566                                  isl_format, aux_usage);
    567    }
    568 }
    569 
    570 /**
    571  * \brief Call this after drawing to mark which buffers need resolving
    572  *
    573  * If the depth buffer was written to and if it has an accompanying HiZ
    574  * buffer, then mark that it needs a depth resolve.
    575  *
    576  * If the color buffer is a multisample window system buffer, then
    577  * mark that it needs a downsample.
    578  *
    579  * Also mark any render targets which will be textured as needing a render
    580  * cache flush.
    581  */
    582 static void
    583 brw_postdraw_set_buffers_need_resolve(struct brw_context *brw)
    584 {
    585    struct gl_context *ctx = &brw->ctx;
    586    struct gl_framebuffer *fb = ctx->DrawBuffer;
    587 
    588    struct intel_renderbuffer *front_irb = NULL;
    589    struct intel_renderbuffer *back_irb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
    590    struct intel_renderbuffer *depth_irb = intel_get_renderbuffer(fb, BUFFER_DEPTH);
    591    struct intel_renderbuffer *stencil_irb = intel_get_renderbuffer(fb, BUFFER_STENCIL);
    592    struct gl_renderbuffer_attachment *depth_att = &fb->Attachment[BUFFER_DEPTH];
    593 
    594    if (_mesa_is_front_buffer_drawing(fb))
    595       front_irb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
    596 
    597    if (front_irb)
    598       front_irb->need_downsample = true;
    599    if (back_irb)
    600       back_irb->need_downsample = true;
    601    if (depth_irb) {
    602       bool depth_written = brw_depth_writes_enabled(brw);
    603       if (depth_att->Layered) {
    604          intel_miptree_finish_depth(brw, depth_irb->mt,
    605                                     depth_irb->mt_level,
    606                                     depth_irb->mt_layer,
    607                                     depth_irb->layer_count,
    608                                     depth_written);
    609       } else {
    610          intel_miptree_finish_depth(brw, depth_irb->mt,
    611                                     depth_irb->mt_level,
    612                                     depth_irb->mt_layer, 1,
    613                                     depth_written);
    614       }
    615       if (depth_written)
    616          brw_depth_cache_add_bo(brw, depth_irb->mt->bo);
    617    }
    618 
    619    if (stencil_irb && brw->stencil_write_enabled)
    620       brw_depth_cache_add_bo(brw, stencil_irb->mt->bo);
    621 
    622    for (unsigned i = 0; i < fb->_NumColorDrawBuffers; i++) {
    623       struct intel_renderbuffer *irb =
    624          intel_renderbuffer(fb->_ColorDrawBuffers[i]);
    625 
    626       if (!irb)
    627          continue;
    628 
    629       mesa_format mesa_format =
    630          _mesa_get_render_format(ctx, intel_rb_format(irb));
    631       enum isl_format isl_format = brw_isl_format_for_mesa_format(mesa_format);
    632       enum isl_aux_usage aux_usage = brw->draw_aux_usage[i];
    633 
    634       brw_render_cache_add_bo(brw, irb->mt->bo, isl_format, aux_usage);
    635 
    636       intel_miptree_finish_render(brw, irb->mt, irb->mt_level,
    637                                   irb->mt_layer, irb->layer_count,
    638                                   aux_usage);
    639    }
    640 }
    641 
    642 static void
    643 intel_renderbuffer_move_temp_back(struct brw_context *brw,
    644                                   struct intel_renderbuffer *irb)
    645 {
    646    if (irb->align_wa_mt == NULL)
    647       return;
    648 
    649    brw_cache_flush_for_read(brw, irb->align_wa_mt->bo);
    650 
    651    intel_miptree_copy_slice(brw, irb->align_wa_mt, 0, 0,
    652                             irb->mt,
    653                             irb->Base.Base.TexImage->Level, irb->mt_layer);
    654 
    655    intel_miptree_reference(&irb->align_wa_mt, NULL);
    656 
    657    /* Finally restore the x,y to correspond to full miptree. */
    658    intel_renderbuffer_set_draw_offset(irb);
    659 
    660    /* Make sure render surface state gets re-emitted with updated miptree. */
    661    brw->NewGLState |= _NEW_BUFFERS;
    662 }
    663 
    664 static void
    665 brw_postdraw_reconcile_align_wa_slices(struct brw_context *brw)
    666 {
    667    struct gl_context *ctx = &brw->ctx;
    668    struct gl_framebuffer *fb = ctx->DrawBuffer;
    669 
    670    struct intel_renderbuffer *depth_irb =
    671       intel_get_renderbuffer(fb, BUFFER_DEPTH);
    672    struct intel_renderbuffer *stencil_irb =
    673       intel_get_renderbuffer(fb, BUFFER_STENCIL);
    674 
    675    if (depth_irb && depth_irb->align_wa_mt)
    676       intel_renderbuffer_move_temp_back(brw, depth_irb);
    677 
    678    if (stencil_irb && stencil_irb->align_wa_mt)
    679       intel_renderbuffer_move_temp_back(brw, stencil_irb);
    680 
    681    for (unsigned i = 0; i < fb->_NumColorDrawBuffers; i++) {
    682       struct intel_renderbuffer *irb =
    683          intel_renderbuffer(fb->_ColorDrawBuffers[i]);
    684 
    685       if (!irb || irb->align_wa_mt == NULL)
    686          continue;
    687 
    688       intel_renderbuffer_move_temp_back(brw, irb);
    689    }
    690 }
    691 
    692 static void
    693 brw_prepare_drawing(struct gl_context *ctx,
    694                     const struct gl_vertex_array *arrays[],
    695                     const struct _mesa_index_buffer *ib,
    696                     bool index_bounds_valid,
    697                     GLuint min_index,
    698                     GLuint max_index)
    699 {
    700    struct brw_context *brw = brw_context(ctx);
    701 
    702    if (ctx->NewState)
    703       _mesa_update_state(ctx);
    704 
    705    /* We have to validate the textures *before* checking for fallbacks;
    706     * otherwise, the software fallback won't be able to rely on the
    707     * texture state, the firstLevel and lastLevel fields won't be
    708     * set in the intel texture object (they'll both be 0), and the
    709     * software fallback will segfault if it attempts to access any
    710     * texture level other than level 0.
    711     */
    712    brw_validate_textures(brw);
    713 
    714    /* Find the highest sampler unit used by each shader program.  A bit-count
    715     * won't work since ARB programs use the texture unit number as the sampler
    716     * index.
    717     */
    718    brw->wm.base.sampler_count =
    719       util_last_bit(ctx->FragmentProgram._Current->SamplersUsed);
    720    brw->gs.base.sampler_count = ctx->GeometryProgram._Current ?
    721       util_last_bit(ctx->GeometryProgram._Current->SamplersUsed) : 0;
    722    brw->tes.base.sampler_count = ctx->TessEvalProgram._Current ?
    723       util_last_bit(ctx->TessEvalProgram._Current->SamplersUsed) : 0;
    724    brw->tcs.base.sampler_count = ctx->TessCtrlProgram._Current ?
    725       util_last_bit(ctx->TessCtrlProgram._Current->SamplersUsed) : 0;
    726    brw->vs.base.sampler_count =
    727       util_last_bit(ctx->VertexProgram._Current->SamplersUsed);
    728 
    729    intel_prepare_render(brw);
    730 
    731    /* This workaround has to happen outside of brw_upload_render_state()
    732     * because it may flush the batchbuffer for a blit, affecting the state
    733     * flags.
    734     */
    735    brw_workaround_depthstencil_alignment(brw, 0);
    736 
    737    /* Resolves must occur after updating renderbuffers, updating context state,
    738     * and finalizing textures but before setting up any hardware state for
    739     * this draw call.
    740     */
    741    bool draw_aux_buffer_disabled[MAX_DRAW_BUFFERS] = { };
    742    brw_predraw_resolve_inputs(brw, true, draw_aux_buffer_disabled);
    743    brw_predraw_resolve_framebuffer(brw, draw_aux_buffer_disabled);
    744 
    745    /* Bind all inputs, derive varying and size information:
    746     */
    747    brw_merge_inputs(brw, arrays);
    748 
    749    brw->ib.ib = ib;
    750    brw->ctx.NewDriverState |= BRW_NEW_INDICES;
    751 
    752    brw->vb.index_bounds_valid = index_bounds_valid;
    753    brw->vb.min_index = min_index;
    754    brw->vb.max_index = max_index;
    755    brw->ctx.NewDriverState |= BRW_NEW_VERTICES;
    756 }
    757 
    758 static void
    759 brw_finish_drawing(struct gl_context *ctx)
    760 {
    761    struct brw_context *brw = brw_context(ctx);
    762 
    763    if (brw->always_flush_batch)
    764       intel_batchbuffer_flush(brw);
    765 
    766    brw_program_cache_check_size(brw);
    767    brw_postdraw_reconcile_align_wa_slices(brw);
    768    brw_postdraw_set_buffers_need_resolve(brw);
    769 
    770    if (brw->draw.draw_params_count_bo) {
    771       brw_bo_unreference(brw->draw.draw_params_count_bo);
    772       brw->draw.draw_params_count_bo = NULL;
    773    }
    774 }
    775 
    776 /* May fail if out of video memory for texture or vbo upload, or on
    777  * fallback conditions.
    778  */
    779 static void
    780 brw_draw_single_prim(struct gl_context *ctx,
    781                      const struct gl_vertex_array *arrays[],
    782                      const struct _mesa_prim *prim,
    783                      unsigned prim_id,
    784                      struct brw_transform_feedback_object *xfb_obj,
    785                      unsigned stream,
    786                      struct gl_buffer_object *indirect)
    787 {
    788    struct brw_context *brw = brw_context(ctx);
    789    const struct gen_device_info *devinfo = &brw->screen->devinfo;
    790    bool fail_next = false;
    791 
    792    /* Flag BRW_NEW_DRAW_CALL on every draw.  This allows us to have
    793     * atoms that happen on every draw call.
    794     */
    795    brw->ctx.NewDriverState |= BRW_NEW_DRAW_CALL;
    796 
    797    /* Flush the batch if the batch/state buffers are nearly full.  We can
    798     * grow them if needed, but this is not free, so we'd like to avoid it.
    799     */
    800    intel_batchbuffer_require_space(brw, 1500, RENDER_RING);
    801    brw_require_statebuffer_space(brw, 2400);
    802    intel_batchbuffer_save_state(brw);
    803 
    804    if (brw->num_instances != prim->num_instances ||
    805        brw->basevertex != prim->basevertex ||
    806        brw->baseinstance != prim->base_instance) {
    807       brw->num_instances = prim->num_instances;
    808       brw->basevertex = prim->basevertex;
    809       brw->baseinstance = prim->base_instance;
    810       if (prim_id > 0) { /* For i == 0 we just did this before the loop */
    811          brw->ctx.NewDriverState |= BRW_NEW_VERTICES;
    812          brw_merge_inputs(brw, arrays);
    813       }
    814    }
    815 
    816    /* Determine if we need to flag BRW_NEW_VERTICES for updating the
    817     * gl_BaseVertexARB or gl_BaseInstanceARB values. For indirect draw, we
    818     * always flag if the shader uses one of the values. For direct draws,
    819     * we only flag if the values change.
    820     */
    821    const int new_basevertex =
    822       prim->indexed ? prim->basevertex : prim->start;
    823    const int new_baseinstance = prim->base_instance;
    824    const struct brw_vs_prog_data *vs_prog_data =
    825       brw_vs_prog_data(brw->vs.base.prog_data);
    826    if (prim_id > 0) {
    827       const bool uses_draw_parameters =
    828          vs_prog_data->uses_basevertex ||
    829          vs_prog_data->uses_baseinstance;
    830 
    831       if ((uses_draw_parameters && prim->is_indirect) ||
    832           (vs_prog_data->uses_basevertex &&
    833            brw->draw.params.gl_basevertex != new_basevertex) ||
    834           (vs_prog_data->uses_baseinstance &&
    835            brw->draw.params.gl_baseinstance != new_baseinstance))
    836          brw->ctx.NewDriverState |= BRW_NEW_VERTICES;
    837    }
    838 
    839    brw->draw.params.gl_basevertex = new_basevertex;
    840    brw->draw.params.gl_baseinstance = new_baseinstance;
    841    brw_bo_unreference(brw->draw.draw_params_bo);
    842 
    843    if (prim->is_indirect) {
    844       /* Point draw_params_bo at the indirect buffer. */
    845       brw->draw.draw_params_bo =
    846          intel_buffer_object(ctx->DrawIndirectBuffer)->buffer;
    847       brw_bo_reference(brw->draw.draw_params_bo);
    848       brw->draw.draw_params_offset =
    849          prim->indirect_offset + (prim->indexed ? 12 : 8);
    850    } else {
    851       /* Set draw_params_bo to NULL so brw_prepare_vertices knows it
    852        * has to upload gl_BaseVertex and such if they're needed.
    853        */
    854       brw->draw.draw_params_bo = NULL;
    855       brw->draw.draw_params_offset = 0;
    856    }
    857 
    858    /* gl_DrawID always needs its own vertex buffer since it's not part of
    859     * the indirect parameter buffer. If the program uses gl_DrawID we need
    860     * to flag BRW_NEW_VERTICES. For the first iteration, we don't have
    861     * valid vs_prog_data, but we always flag BRW_NEW_VERTICES before
    862     * the loop.
    863     */
    864    brw->draw.gl_drawid = prim->draw_id;
    865    brw_bo_unreference(brw->draw.draw_id_bo);
    866    brw->draw.draw_id_bo = NULL;
    867    if (prim_id > 0 && vs_prog_data->uses_drawid)
    868       brw->ctx.NewDriverState |= BRW_NEW_VERTICES;
    869 
    870    if (devinfo->gen < 6)
    871       brw_set_prim(brw, prim);
    872    else
    873       gen6_set_prim(brw, prim);
    874 
    875 retry:
    876 
    877    /* Note that before the loop, brw->ctx.NewDriverState was set to != 0, and
    878     * that the state updated in the loop outside of this block is that in
    879     * *_set_prim or intel_batchbuffer_flush(), which only impacts
    880     * brw->ctx.NewDriverState.
    881     */
    882    if (brw->ctx.NewDriverState) {
    883       brw->batch.no_wrap = true;
    884       brw_upload_render_state(brw);
    885    }
    886 
    887    brw_emit_prim(brw, prim, brw->primitive, xfb_obj, stream);
    888 
    889    brw->batch.no_wrap = false;
    890 
    891    if (!brw_batch_has_aperture_space(brw, 0)) {
    892       if (!fail_next) {
    893          intel_batchbuffer_reset_to_saved(brw);
    894          intel_batchbuffer_flush(brw);
    895          fail_next = true;
    896          goto retry;
    897       } else {
    898          int ret = intel_batchbuffer_flush(brw);
    899          WARN_ONCE(ret == -ENOSPC,
    900                    "i965: Single primitive emit exceeded "
    901                    "available aperture space\n");
    902       }
    903    }
    904 
    905    /* Now that we know we haven't run out of aperture space, we can safely
    906     * reset the dirty bits.
    907     */
    908    if (brw->ctx.NewDriverState)
    909       brw_render_state_finished(brw);
    910 
    911    return;
    912 }
    913 
    914 void
    915 brw_draw_prims(struct gl_context *ctx,
    916                const struct _mesa_prim *prims,
    917                GLuint nr_prims,
    918                const struct _mesa_index_buffer *ib,
    919                GLboolean index_bounds_valid,
    920                GLuint min_index,
    921                GLuint max_index,
    922                struct gl_transform_feedback_object *gl_xfb_obj,
    923                unsigned stream,
    924                struct gl_buffer_object *indirect)
    925 {
    926    unsigned i;
    927    struct brw_context *brw = brw_context(ctx);
    928    const struct gl_vertex_array **arrays = ctx->Array._DrawArrays;
    929    int predicate_state = brw->predicate.state;
    930    struct brw_transform_feedback_object *xfb_obj =
    931       (struct brw_transform_feedback_object *) gl_xfb_obj;
    932 
    933    if (!brw_check_conditional_render(brw))
    934       return;
    935 
    936    /* Handle primitive restart if needed */
    937    if (brw_handle_primitive_restart(ctx, prims, nr_prims, ib, indirect)) {
    938       /* The draw was handled, so we can exit now */
    939       return;
    940    }
    941 
    942    /* Do GL_SELECT and GL_FEEDBACK rendering using swrast, even though it
    943     * won't support all the extensions we support.
    944     */
    945    if (ctx->RenderMode != GL_RENDER) {
    946       perf_debug("%s render mode not supported in hardware\n",
    947                  _mesa_enum_to_string(ctx->RenderMode));
    948       _swsetup_Wakeup(ctx);
    949       _tnl_wakeup(ctx);
    950       _tnl_draw_prims(ctx, prims, nr_prims, ib,
    951                       index_bounds_valid, min_index, max_index, NULL, 0, NULL);
    952       return;
    953    }
    954 
    955    /* If we're going to have to upload any of the user's vertex arrays, then
    956     * get the minimum and maximum of their index buffer so we know what range
    957     * to upload.
    958     */
    959    if (!index_bounds_valid && !vbo_all_varyings_in_vbos(arrays)) {
    960       perf_debug("Scanning index buffer to compute index buffer bounds.  "
    961                  "Use glDrawRangeElements() to avoid this.\n");
    962       vbo_get_minmax_indices(ctx, prims, ib, &min_index, &max_index, nr_prims);
    963       index_bounds_valid = true;
    964    }
    965 
    966    brw_prepare_drawing(ctx, arrays, ib, index_bounds_valid, min_index,
    967                        max_index);
    968    /* Try drawing with the hardware, but don't do anything else if we can't
    969     * manage it.  swrast doesn't support our featureset, so we can't fall back
    970     * to it.
    971     */
    972 
    973    for (i = 0; i < nr_prims; i++) {
    974       /* Implementation of ARB_indirect_parameters via predicates */
    975       if (brw->draw.draw_params_count_bo) {
    976          brw_emit_pipe_control_flush(brw, PIPE_CONTROL_FLUSH_ENABLE);
    977 
    978          /* Upload the current draw count from the draw parameters buffer to
    979           * MI_PREDICATE_SRC0.
    980           */
    981          brw_load_register_mem(brw, MI_PREDICATE_SRC0,
    982                                brw->draw.draw_params_count_bo,
    983                                brw->draw.draw_params_count_offset);
    984          /* Zero the top 32-bits of MI_PREDICATE_SRC0 */
    985          brw_load_register_imm32(brw, MI_PREDICATE_SRC0 + 4, 0);
    986          /* Upload the id of the current primitive to MI_PREDICATE_SRC1. */
    987          brw_load_register_imm64(brw, MI_PREDICATE_SRC1, prims[i].draw_id);
    988 
    989          BEGIN_BATCH(1);
    990          if (i == 0 && brw->predicate.state != BRW_PREDICATE_STATE_USE_BIT) {
    991             OUT_BATCH(GEN7_MI_PREDICATE | MI_PREDICATE_LOADOP_LOADINV |
    992                       MI_PREDICATE_COMBINEOP_SET |
    993                       MI_PREDICATE_COMPAREOP_SRCS_EQUAL);
    994          } else {
    995             OUT_BATCH(GEN7_MI_PREDICATE |
    996                       MI_PREDICATE_LOADOP_LOAD | MI_PREDICATE_COMBINEOP_XOR |
    997                       MI_PREDICATE_COMPAREOP_SRCS_EQUAL);
    998          }
    999          ADVANCE_BATCH();
   1000 
   1001          brw->predicate.state = BRW_PREDICATE_STATE_USE_BIT;
   1002       }
   1003 
   1004       brw_draw_single_prim(ctx, arrays, &prims[i], i, xfb_obj, stream,
   1005                            indirect);
   1006    }
   1007 
   1008    brw_finish_drawing(ctx);
   1009    brw->predicate.state = predicate_state;
   1010 }
   1011 
   1012 void
   1013 brw_draw_indirect_prims(struct gl_context *ctx,
   1014                         GLuint mode,
   1015                         struct gl_buffer_object *indirect_data,
   1016                         GLsizeiptr indirect_offset,
   1017                         unsigned draw_count,
   1018                         unsigned stride,
   1019                         struct gl_buffer_object *indirect_params,
   1020                         GLsizeiptr indirect_params_offset,
   1021                         const struct _mesa_index_buffer *ib)
   1022 {
   1023    struct brw_context *brw = brw_context(ctx);
   1024    struct _mesa_prim *prim;
   1025    GLsizei i;
   1026 
   1027    prim = calloc(draw_count, sizeof(*prim));
   1028    if (prim == NULL) {
   1029       _mesa_error(ctx, GL_OUT_OF_MEMORY, "gl%sDraw%sIndirect%s",
   1030                   (draw_count > 1) ? "Multi" : "",
   1031                   ib ? "Elements" : "Arrays",
   1032                   indirect_params ? "CountARB" : "");
   1033       return;
   1034    }
   1035 
   1036    prim[0].begin = 1;
   1037    prim[draw_count - 1].end = 1;
   1038    for (i = 0; i < draw_count; ++i, indirect_offset += stride) {
   1039       prim[i].mode = mode;
   1040       prim[i].indexed = ib != NULL;
   1041       prim[i].indirect_offset = indirect_offset;
   1042       prim[i].is_indirect = 1;
   1043       prim[i].draw_id = i;
   1044    }
   1045 
   1046    if (indirect_params) {
   1047       brw->draw.draw_params_count_bo =
   1048          intel_buffer_object(indirect_params)->buffer;
   1049       brw_bo_reference(brw->draw.draw_params_count_bo);
   1050       brw->draw.draw_params_count_offset = indirect_params_offset;
   1051    }
   1052 
   1053    brw_draw_prims(ctx, prim, draw_count,
   1054                   ib, false, 0, ~0,
   1055                   NULL, 0,
   1056                   indirect_data);
   1057 
   1058    free(prim);
   1059 }
   1060 
   1061 void
   1062 brw_draw_init(struct brw_context *brw)
   1063 {
   1064    struct gl_context *ctx = &brw->ctx;
   1065    struct vbo_context *vbo = vbo_context(ctx);
   1066 
   1067    /* Register our drawing function:
   1068     */
   1069    vbo->draw_prims = brw_draw_prims;
   1070    vbo->draw_indirect_prims = brw_draw_indirect_prims;
   1071 
   1072    for (int i = 0; i < VERT_ATTRIB_MAX; i++)
   1073       brw->vb.inputs[i].buffer = -1;
   1074    brw->vb.nr_buffers = 0;
   1075    brw->vb.nr_enabled = 0;
   1076 }
   1077 
   1078 void
   1079 brw_draw_destroy(struct brw_context *brw)
   1080 {
   1081    unsigned i;
   1082 
   1083    for (i = 0; i < brw->vb.nr_buffers; i++) {
   1084       brw_bo_unreference(brw->vb.buffers[i].bo);
   1085       brw->vb.buffers[i].bo = NULL;
   1086    }
   1087    brw->vb.nr_buffers = 0;
   1088 
   1089    for (i = 0; i < brw->vb.nr_enabled; i++) {
   1090       brw->vb.enabled[i]->buffer = -1;
   1091    }
   1092    brw->vb.nr_enabled = 0;
   1093 
   1094    brw_bo_unreference(brw->ib.bo);
   1095    brw->ib.bo = NULL;
   1096 }
   1097