Home | History | Annotate | Download | only in i965
      1 /*
      2  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
      3  Intel funded Tungsten Graphics to
      4  develop this 3D driver.
      5 
      6  Permission is hereby granted, free of charge, to any person obtaining
      7  a copy of this software and associated documentation files (the
      8  "Software"), to deal in the Software without restriction, including
      9  without limitation the rights to use, copy, modify, merge, publish,
     10  distribute, sublicense, and/or sell copies of the Software, and to
     11  permit persons to whom the Software is furnished to do so, subject to
     12  the following conditions:
     13 
     14  The above copyright notice and this permission notice (including the
     15  next paragraph) shall be included in all copies or substantial
     16  portions of the Software.
     17 
     18  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
     19  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
     20  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
     21  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
     22  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
     23  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
     24  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
     25 
     26  **********************************************************************/
     27  /*
     28   * Authors:
     29   *   Keith Whitwell <keithw (at) vmware.com>
     30   */
     31 
     32 
     33 
     34 #include "brw_context.h"
     35 #include "brw_state.h"
     36 #include "drivers/common/meta.h"
     37 #include "intel_batchbuffer.h"
     38 #include "intel_buffers.h"
     39 #include "brw_vs.h"
     40 #include "brw_ff_gs.h"
     41 #include "brw_gs.h"
     42 #include "brw_wm.h"
     43 #include "brw_cs.h"
     44 #include "main/framebuffer.h"
     45 
     46 static const struct brw_tracked_state *gen4_atoms[] =
     47 {
     48    /* Once all the programs are done, we know how large urb entry
     49     * sizes need to be and can decide if we need to change the urb
     50     * layout.
     51     */
     52    &brw_curbe_offsets,
     53    &brw_recalculate_urb_fence,
     54 
     55    &brw_cc_vp,
     56    &brw_cc_unit,
     57 
     58    /* Surface state setup.  Must come before the VS/WM unit.  The binding
     59     * table upload must be last.
     60     */
     61    &brw_vs_pull_constants,
     62    &brw_wm_pull_constants,
     63    &brw_renderbuffer_surfaces,
     64    &brw_renderbuffer_read_surfaces,
     65    &brw_texture_surfaces,
     66    &brw_vs_binding_table,
     67    &brw_wm_binding_table,
     68 
     69    &brw_fs_samplers,
     70    &brw_vs_samplers,
     71 
     72    /* These set up state for brw_psp_urb_cbs */
     73    &brw_wm_unit,
     74    &brw_sf_vp,
     75    &brw_sf_unit,
     76    &brw_vs_unit,		/* always required, enabled or not */
     77    &brw_clip_unit,
     78    &brw_gs_unit,
     79 
     80    /* Command packets:
     81     */
     82    &brw_invariant_state,
     83 
     84    &brw_binding_table_pointers,
     85    &brw_blend_constant_color,
     86 
     87    &brw_depthbuffer,
     88 
     89    &brw_polygon_stipple,
     90    &brw_polygon_stipple_offset,
     91 
     92    &brw_line_stipple,
     93 
     94    &brw_psp_urb_cbs,
     95 
     96    &brw_drawing_rect,
     97    &brw_indices, /* must come before brw_vertices */
     98    &brw_index_buffer,
     99    &brw_vertices,
    100 
    101    &brw_constant_buffer
    102 };
    103 
    104 static const struct brw_tracked_state *gen6_atoms[] =
    105 {
    106    &gen6_sf_and_clip_viewports,
    107 
    108    /* Command packets: */
    109 
    110    &brw_cc_vp,
    111    &gen6_viewport_state,	/* must do after *_vp stages */
    112 
    113    &gen6_urb,
    114    &gen6_blend_state,		/* must do before cc unit */
    115    &gen6_color_calc_state,	/* must do before cc unit */
    116    &gen6_depth_stencil_state,	/* must do before cc unit */
    117 
    118    &gen6_vs_push_constants, /* Before vs_state */
    119    &gen6_gs_push_constants, /* Before gs_state */
    120    &gen6_wm_push_constants, /* Before wm_state */
    121 
    122    /* Surface state setup.  Must come before the VS/WM unit.  The binding
    123     * table upload must be last.
    124     */
    125    &brw_vs_pull_constants,
    126    &brw_vs_ubo_surfaces,
    127    &brw_gs_pull_constants,
    128    &brw_gs_ubo_surfaces,
    129    &brw_wm_pull_constants,
    130    &brw_wm_ubo_surfaces,
    131    &gen6_renderbuffer_surfaces,
    132    &brw_renderbuffer_read_surfaces,
    133    &brw_texture_surfaces,
    134    &gen6_sol_surface,
    135    &brw_vs_binding_table,
    136    &gen6_gs_binding_table,
    137    &brw_wm_binding_table,
    138 
    139    &brw_fs_samplers,
    140    &brw_vs_samplers,
    141    &brw_gs_samplers,
    142    &gen6_sampler_state,
    143    &gen6_multisample_state,
    144 
    145    &gen6_vs_state,
    146    &gen6_gs_state,
    147    &gen6_clip_state,
    148    &gen6_sf_state,
    149    &gen6_wm_state,
    150 
    151    &gen6_scissor_state,
    152 
    153    &gen6_binding_table_pointers,
    154 
    155    &brw_depthbuffer,
    156 
    157    &brw_polygon_stipple,
    158    &brw_polygon_stipple_offset,
    159 
    160    &brw_line_stipple,
    161 
    162    &brw_drawing_rect,
    163 
    164    &brw_indices, /* must come before brw_vertices */
    165    &brw_index_buffer,
    166    &brw_vertices,
    167 };
    168 
    169 static const struct brw_tracked_state *gen7_render_atoms[] =
    170 {
    171    /* Command packets: */
    172 
    173    &brw_cc_vp,
    174    &gen7_sf_clip_viewport,
    175 
    176    &gen7_l3_state,
    177    &gen7_push_constant_space,
    178    &gen7_urb,
    179    &gen6_blend_state,		/* must do before cc unit */
    180    &gen6_color_calc_state,	/* must do before cc unit */
    181    &gen6_depth_stencil_state,	/* must do before cc unit */
    182 
    183    &gen7_hw_binding_tables, /* Enable hw-generated binding tables for Haswell */
    184 
    185    &brw_vs_image_surfaces, /* Before vs push/pull constants and binding table */
    186    &brw_tcs_image_surfaces, /* Before tcs push/pull constants and binding table */
    187    &brw_tes_image_surfaces, /* Before tes push/pull constants and binding table */
    188    &brw_gs_image_surfaces, /* Before gs push/pull constants and binding table */
    189    &brw_wm_image_surfaces, /* Before wm push/pull constants and binding table */
    190 
    191    &gen6_vs_push_constants, /* Before vs_state */
    192    &gen7_tcs_push_constants,
    193    &gen7_tes_push_constants,
    194    &gen6_gs_push_constants, /* Before gs_state */
    195    &gen6_wm_push_constants, /* Before wm_surfaces and constant_buffer */
    196 
    197    /* Surface state setup.  Must come before the VS/WM unit.  The binding
    198     * table upload must be last.
    199     */
    200    &brw_vs_pull_constants,
    201    &brw_vs_ubo_surfaces,
    202    &brw_vs_abo_surfaces,
    203    &brw_tcs_pull_constants,
    204    &brw_tcs_ubo_surfaces,
    205    &brw_tcs_abo_surfaces,
    206    &brw_tes_pull_constants,
    207    &brw_tes_ubo_surfaces,
    208    &brw_tes_abo_surfaces,
    209    &brw_gs_pull_constants,
    210    &brw_gs_ubo_surfaces,
    211    &brw_gs_abo_surfaces,
    212    &brw_wm_pull_constants,
    213    &brw_wm_ubo_surfaces,
    214    &brw_wm_abo_surfaces,
    215    &gen6_renderbuffer_surfaces,
    216    &brw_renderbuffer_read_surfaces,
    217    &brw_texture_surfaces,
    218    &brw_vs_binding_table,
    219    &brw_tcs_binding_table,
    220    &brw_tes_binding_table,
    221    &brw_gs_binding_table,
    222    &brw_wm_binding_table,
    223 
    224    &brw_fs_samplers,
    225    &brw_vs_samplers,
    226    &brw_tcs_samplers,
    227    &brw_tes_samplers,
    228    &brw_gs_samplers,
    229    &gen6_multisample_state,
    230 
    231    &gen7_vs_state,
    232    &gen7_hs_state,
    233    &gen7_te_state,
    234    &gen7_ds_state,
    235    &gen7_gs_state,
    236    &gen7_sol_state,
    237    &gen6_clip_state,
    238    &gen7_sbe_state,
    239    &gen7_sf_state,
    240    &gen7_wm_state,
    241    &gen7_ps_state,
    242 
    243    &gen6_scissor_state,
    244 
    245    &gen7_depthbuffer,
    246 
    247    &brw_polygon_stipple,
    248    &brw_polygon_stipple_offset,
    249 
    250    &brw_line_stipple,
    251 
    252    &brw_drawing_rect,
    253 
    254    &brw_indices, /* must come before brw_vertices */
    255    &brw_index_buffer,
    256    &brw_vertices,
    257 
    258    &haswell_cut_index,
    259 };
    260 
    261 static const struct brw_tracked_state *gen7_compute_atoms[] =
    262 {
    263    &gen7_l3_state,
    264    &brw_cs_image_surfaces,
    265    &gen7_cs_push_constants,
    266    &brw_cs_pull_constants,
    267    &brw_cs_ubo_surfaces,
    268    &brw_cs_abo_surfaces,
    269    &brw_cs_texture_surfaces,
    270    &brw_cs_work_groups_surface,
    271    &brw_cs_samplers,
    272    &brw_cs_state,
    273 };
    274 
    275 static const struct brw_tracked_state *gen8_render_atoms[] =
    276 {
    277    &brw_cc_vp,
    278    &gen8_sf_clip_viewport,
    279 
    280    &gen7_l3_state,
    281    &gen7_push_constant_space,
    282    &gen7_urb,
    283    &gen8_blend_state,
    284    &gen6_color_calc_state,
    285 
    286    &gen7_hw_binding_tables, /* Enable hw-generated binding tables for Broadwell */
    287 
    288    &brw_vs_image_surfaces, /* Before vs push/pull constants and binding table */
    289    &brw_tcs_image_surfaces, /* Before tcs push/pull constants and binding table */
    290    &brw_tes_image_surfaces, /* Before tes push/pull constants and binding table */
    291    &brw_gs_image_surfaces, /* Before gs push/pull constants and binding table */
    292    &brw_wm_image_surfaces, /* Before wm push/pull constants and binding table */
    293 
    294    &gen6_vs_push_constants, /* Before vs_state */
    295    &gen7_tcs_push_constants,
    296    &gen7_tes_push_constants,
    297    &gen6_gs_push_constants, /* Before gs_state */
    298    &gen6_wm_push_constants, /* Before wm_surfaces and constant_buffer */
    299 
    300    /* Surface state setup.  Must come before the VS/WM unit.  The binding
    301     * table upload must be last.
    302     */
    303    &brw_vs_pull_constants,
    304    &brw_vs_ubo_surfaces,
    305    &brw_vs_abo_surfaces,
    306    &brw_tcs_pull_constants,
    307    &brw_tcs_ubo_surfaces,
    308    &brw_tcs_abo_surfaces,
    309    &brw_tes_pull_constants,
    310    &brw_tes_ubo_surfaces,
    311    &brw_tes_abo_surfaces,
    312    &brw_gs_pull_constants,
    313    &brw_gs_ubo_surfaces,
    314    &brw_gs_abo_surfaces,
    315    &brw_wm_pull_constants,
    316    &brw_wm_ubo_surfaces,
    317    &brw_wm_abo_surfaces,
    318    &gen6_renderbuffer_surfaces,
    319    &brw_renderbuffer_read_surfaces,
    320    &brw_texture_surfaces,
    321    &brw_vs_binding_table,
    322    &brw_tcs_binding_table,
    323    &brw_tes_binding_table,
    324    &brw_gs_binding_table,
    325    &brw_wm_binding_table,
    326 
    327    &brw_fs_samplers,
    328    &brw_vs_samplers,
    329    &brw_tcs_samplers,
    330    &brw_tes_samplers,
    331    &brw_gs_samplers,
    332    &gen8_multisample_state,
    333 
    334    &gen8_vs_state,
    335    &gen8_hs_state,
    336    &gen7_te_state,
    337    &gen8_ds_state,
    338    &gen8_gs_state,
    339    &gen7_sol_state,
    340    &gen6_clip_state,
    341    &gen8_raster_state,
    342    &gen8_sbe_state,
    343    &gen8_sf_state,
    344    &gen8_ps_blend,
    345    &gen8_ps_extra,
    346    &gen8_ps_state,
    347    &gen8_wm_depth_stencil,
    348    &gen8_wm_state,
    349 
    350    &gen6_scissor_state,
    351 
    352    &gen7_depthbuffer,
    353 
    354    &brw_polygon_stipple,
    355    &brw_polygon_stipple_offset,
    356 
    357    &brw_line_stipple,
    358 
    359    &brw_drawing_rect,
    360 
    361    &gen8_vf_topology,
    362 
    363    &brw_indices,
    364    &gen8_index_buffer,
    365    &gen8_vertices,
    366 
    367    &haswell_cut_index,
    368    &gen8_pma_fix,
    369 };
    370 
    371 static const struct brw_tracked_state *gen8_compute_atoms[] =
    372 {
    373    &gen7_l3_state,
    374    &brw_cs_image_surfaces,
    375    &gen7_cs_push_constants,
    376    &brw_cs_pull_constants,
    377    &brw_cs_ubo_surfaces,
    378    &brw_cs_abo_surfaces,
    379    &brw_cs_texture_surfaces,
    380    &brw_cs_work_groups_surface,
    381    &brw_cs_samplers,
    382    &brw_cs_state,
    383 };
    384 
    385 static void
    386 brw_upload_initial_gpu_state(struct brw_context *brw)
    387 {
    388    /* On platforms with hardware contexts, we can set our initial GPU state
    389     * right away rather than doing it via state atoms.  This saves a small
    390     * amount of overhead on every draw call.
    391     */
    392    if (!brw->hw_ctx)
    393       return;
    394 
    395    if (brw->gen == 6)
    396       brw_emit_post_sync_nonzero_flush(brw);
    397 
    398    brw_upload_invariant_state(brw);
    399 
    400    /* Recommended optimization for Victim Cache eviction in pixel backend. */
    401    if (brw->gen >= 9) {
    402       BEGIN_BATCH(3);
    403       OUT_BATCH(MI_LOAD_REGISTER_IMM | (3 - 2));
    404       OUT_BATCH(GEN7_CACHE_MODE_1);
    405       OUT_BATCH(REG_MASK(GEN9_PARTIAL_RESOLVE_DISABLE_IN_VC) |
    406                 GEN9_PARTIAL_RESOLVE_DISABLE_IN_VC);
    407       ADVANCE_BATCH();
    408    }
    409 
    410    if (brw->gen >= 8) {
    411       gen8_emit_3dstate_sample_pattern(brw);
    412 
    413       BEGIN_BATCH(5);
    414       OUT_BATCH(_3DSTATE_WM_HZ_OP << 16 | (5 - 2));
    415       OUT_BATCH(0);
    416       OUT_BATCH(0);
    417       OUT_BATCH(0);
    418       OUT_BATCH(0);
    419       ADVANCE_BATCH();
    420 
    421       BEGIN_BATCH(2);
    422       OUT_BATCH(_3DSTATE_WM_CHROMAKEY << 16 | (2 - 2));
    423       OUT_BATCH(0);
    424       ADVANCE_BATCH();
    425    }
    426 }
    427 
    428 static inline const struct brw_tracked_state *
    429 brw_get_pipeline_atoms(struct brw_context *brw,
    430                        enum brw_pipeline pipeline)
    431 {
    432    switch (pipeline) {
    433    case BRW_RENDER_PIPELINE:
    434       return brw->render_atoms;
    435    case BRW_COMPUTE_PIPELINE:
    436       return brw->compute_atoms;
    437    default:
    438       STATIC_ASSERT(BRW_NUM_PIPELINES == 2);
    439       unreachable("Unsupported pipeline");
    440       return NULL;
    441    }
    442 }
    443 
    444 static void
    445 brw_copy_pipeline_atoms(struct brw_context *brw,
    446                         enum brw_pipeline pipeline,
    447                         const struct brw_tracked_state **atoms,
    448                         int num_atoms)
    449 {
    450    /* This is to work around brw_context::atoms being declared const.  We want
    451     * it to be const, but it needs to be initialized somehow!
    452     */
    453    struct brw_tracked_state *context_atoms =
    454       (struct brw_tracked_state *) brw_get_pipeline_atoms(brw, pipeline);
    455 
    456    for (int i = 0; i < num_atoms; i++) {
    457       context_atoms[i] = *atoms[i];
    458       assert(context_atoms[i].dirty.mesa | context_atoms[i].dirty.brw);
    459       assert(context_atoms[i].emit);
    460    }
    461 
    462    brw->num_atoms[pipeline] = num_atoms;
    463 }
    464 
    465 void brw_init_state( struct brw_context *brw )
    466 {
    467    struct gl_context *ctx = &brw->ctx;
    468 
    469    /* Force the first brw_select_pipeline to emit pipeline select */
    470    brw->last_pipeline = BRW_NUM_PIPELINES;
    471 
    472    STATIC_ASSERT(ARRAY_SIZE(gen4_atoms) <= ARRAY_SIZE(brw->render_atoms));
    473    STATIC_ASSERT(ARRAY_SIZE(gen6_atoms) <= ARRAY_SIZE(brw->render_atoms));
    474    STATIC_ASSERT(ARRAY_SIZE(gen7_render_atoms) <=
    475                  ARRAY_SIZE(brw->render_atoms));
    476    STATIC_ASSERT(ARRAY_SIZE(gen8_render_atoms) <=
    477                  ARRAY_SIZE(brw->render_atoms));
    478    STATIC_ASSERT(ARRAY_SIZE(gen7_compute_atoms) <=
    479                  ARRAY_SIZE(brw->compute_atoms));
    480    STATIC_ASSERT(ARRAY_SIZE(gen8_compute_atoms) <=
    481                  ARRAY_SIZE(brw->compute_atoms));
    482 
    483    brw_init_caches(brw);
    484 
    485    if (brw->gen >= 8) {
    486       brw_copy_pipeline_atoms(brw, BRW_RENDER_PIPELINE,
    487                               gen8_render_atoms,
    488                               ARRAY_SIZE(gen8_render_atoms));
    489       brw_copy_pipeline_atoms(brw, BRW_COMPUTE_PIPELINE,
    490                               gen8_compute_atoms,
    491                               ARRAY_SIZE(gen8_compute_atoms));
    492    } else if (brw->gen == 7) {
    493       brw_copy_pipeline_atoms(brw, BRW_RENDER_PIPELINE,
    494                               gen7_render_atoms,
    495                               ARRAY_SIZE(gen7_render_atoms));
    496       brw_copy_pipeline_atoms(brw, BRW_COMPUTE_PIPELINE,
    497                               gen7_compute_atoms,
    498                               ARRAY_SIZE(gen7_compute_atoms));
    499    } else if (brw->gen == 6) {
    500       brw_copy_pipeline_atoms(brw, BRW_RENDER_PIPELINE,
    501                               gen6_atoms, ARRAY_SIZE(gen6_atoms));
    502    } else {
    503       brw_copy_pipeline_atoms(brw, BRW_RENDER_PIPELINE,
    504                               gen4_atoms, ARRAY_SIZE(gen4_atoms));
    505    }
    506 
    507    brw_upload_initial_gpu_state(brw);
    508 
    509    brw->NewGLState = ~0;
    510    brw->ctx.NewDriverState = ~0ull;
    511 
    512    /* ~0 is a nonsensical value which won't match anything we program, so
    513     * the programming will take effect on the first time around.
    514     */
    515    brw->pma_stall_bits = ~0;
    516 
    517    /* Make sure that brw->ctx.NewDriverState has enough bits to hold all possible
    518     * dirty flags.
    519     */
    520    STATIC_ASSERT(BRW_NUM_STATE_BITS <= 8 * sizeof(brw->ctx.NewDriverState));
    521 
    522    ctx->DriverFlags.NewTransformFeedback = BRW_NEW_TRANSFORM_FEEDBACK;
    523    ctx->DriverFlags.NewTransformFeedbackProg = BRW_NEW_TRANSFORM_FEEDBACK;
    524    ctx->DriverFlags.NewRasterizerDiscard = BRW_NEW_RASTERIZER_DISCARD;
    525    ctx->DriverFlags.NewUniformBuffer = BRW_NEW_UNIFORM_BUFFER;
    526    ctx->DriverFlags.NewShaderStorageBuffer = BRW_NEW_UNIFORM_BUFFER;
    527    ctx->DriverFlags.NewTextureBuffer = BRW_NEW_TEXTURE_BUFFER;
    528    ctx->DriverFlags.NewAtomicBuffer = BRW_NEW_ATOMIC_BUFFER;
    529    ctx->DriverFlags.NewImageUnits = BRW_NEW_IMAGE_UNITS;
    530    ctx->DriverFlags.NewDefaultTessLevels = BRW_NEW_DEFAULT_TESS_LEVELS;
    531    ctx->DriverFlags.NewIntelConservativeRasterization = BRW_NEW_CONSERVATIVE_RASTERIZATION;
    532 }
    533 
    534 
    535 void brw_destroy_state( struct brw_context *brw )
    536 {
    537    brw_destroy_caches(brw);
    538 }
    539 
    540 /***********************************************************************
    541  */
    542 
    543 static bool
    544 check_state(const struct brw_state_flags *a, const struct brw_state_flags *b)
    545 {
    546    return ((a->mesa & b->mesa) | (a->brw & b->brw)) != 0;
    547 }
    548 
    549 static void accumulate_state( struct brw_state_flags *a,
    550 			      const struct brw_state_flags *b )
    551 {
    552    a->mesa |= b->mesa;
    553    a->brw |= b->brw;
    554 }
    555 
    556 
    557 static void xor_states( struct brw_state_flags *result,
    558 			     const struct brw_state_flags *a,
    559 			      const struct brw_state_flags *b )
    560 {
    561    result->mesa = a->mesa ^ b->mesa;
    562    result->brw = a->brw ^ b->brw;
    563 }
    564 
    565 struct dirty_bit_map {
    566    uint64_t bit;
    567    char *name;
    568    uint32_t count;
    569 };
    570 
    571 #define DEFINE_BIT(name) {name, #name, 0}
    572 
    573 static struct dirty_bit_map mesa_bits[] = {
    574    DEFINE_BIT(_NEW_MODELVIEW),
    575    DEFINE_BIT(_NEW_PROJECTION),
    576    DEFINE_BIT(_NEW_TEXTURE_MATRIX),
    577    DEFINE_BIT(_NEW_COLOR),
    578    DEFINE_BIT(_NEW_DEPTH),
    579    DEFINE_BIT(_NEW_EVAL),
    580    DEFINE_BIT(_NEW_FOG),
    581    DEFINE_BIT(_NEW_HINT),
    582    DEFINE_BIT(_NEW_LIGHT),
    583    DEFINE_BIT(_NEW_LINE),
    584    DEFINE_BIT(_NEW_PIXEL),
    585    DEFINE_BIT(_NEW_POINT),
    586    DEFINE_BIT(_NEW_POLYGON),
    587    DEFINE_BIT(_NEW_POLYGONSTIPPLE),
    588    DEFINE_BIT(_NEW_SCISSOR),
    589    DEFINE_BIT(_NEW_STENCIL),
    590    DEFINE_BIT(_NEW_TEXTURE),
    591    DEFINE_BIT(_NEW_TRANSFORM),
    592    DEFINE_BIT(_NEW_VIEWPORT),
    593    DEFINE_BIT(_NEW_ARRAY),
    594    DEFINE_BIT(_NEW_RENDERMODE),
    595    DEFINE_BIT(_NEW_BUFFERS),
    596    DEFINE_BIT(_NEW_CURRENT_ATTRIB),
    597    DEFINE_BIT(_NEW_MULTISAMPLE),
    598    DEFINE_BIT(_NEW_TRACK_MATRIX),
    599    DEFINE_BIT(_NEW_PROGRAM),
    600    DEFINE_BIT(_NEW_PROGRAM_CONSTANTS),
    601    DEFINE_BIT(_NEW_BUFFER_OBJECT),
    602    DEFINE_BIT(_NEW_FRAG_CLAMP),
    603    /* Avoid sign extension problems. */
    604    {(unsigned) _NEW_VARYING_VP_INPUTS, "_NEW_VARYING_VP_INPUTS", 0},
    605    {0, 0, 0}
    606 };
    607 
    608 static struct dirty_bit_map brw_bits[] = {
    609    DEFINE_BIT(BRW_NEW_FS_PROG_DATA),
    610    DEFINE_BIT(BRW_NEW_BLORP_BLIT_PROG_DATA),
    611    DEFINE_BIT(BRW_NEW_SF_PROG_DATA),
    612    DEFINE_BIT(BRW_NEW_VS_PROG_DATA),
    613    DEFINE_BIT(BRW_NEW_FF_GS_PROG_DATA),
    614    DEFINE_BIT(BRW_NEW_GS_PROG_DATA),
    615    DEFINE_BIT(BRW_NEW_TCS_PROG_DATA),
    616    DEFINE_BIT(BRW_NEW_TES_PROG_DATA),
    617    DEFINE_BIT(BRW_NEW_CLIP_PROG_DATA),
    618    DEFINE_BIT(BRW_NEW_CS_PROG_DATA),
    619    DEFINE_BIT(BRW_NEW_URB_FENCE),
    620    DEFINE_BIT(BRW_NEW_FRAGMENT_PROGRAM),
    621    DEFINE_BIT(BRW_NEW_GEOMETRY_PROGRAM),
    622    DEFINE_BIT(BRW_NEW_TESS_PROGRAMS),
    623    DEFINE_BIT(BRW_NEW_VERTEX_PROGRAM),
    624    DEFINE_BIT(BRW_NEW_CURBE_OFFSETS),
    625    DEFINE_BIT(BRW_NEW_REDUCED_PRIMITIVE),
    626    DEFINE_BIT(BRW_NEW_PATCH_PRIMITIVE),
    627    DEFINE_BIT(BRW_NEW_PRIMITIVE),
    628    DEFINE_BIT(BRW_NEW_CONTEXT),
    629    DEFINE_BIT(BRW_NEW_PSP),
    630    DEFINE_BIT(BRW_NEW_SURFACES),
    631    DEFINE_BIT(BRW_NEW_BINDING_TABLE_POINTERS),
    632    DEFINE_BIT(BRW_NEW_INDICES),
    633    DEFINE_BIT(BRW_NEW_VERTICES),
    634    DEFINE_BIT(BRW_NEW_DEFAULT_TESS_LEVELS),
    635    DEFINE_BIT(BRW_NEW_BATCH),
    636    DEFINE_BIT(BRW_NEW_INDEX_BUFFER),
    637    DEFINE_BIT(BRW_NEW_VS_CONSTBUF),
    638    DEFINE_BIT(BRW_NEW_TCS_CONSTBUF),
    639    DEFINE_BIT(BRW_NEW_TES_CONSTBUF),
    640    DEFINE_BIT(BRW_NEW_GS_CONSTBUF),
    641    DEFINE_BIT(BRW_NEW_PROGRAM_CACHE),
    642    DEFINE_BIT(BRW_NEW_STATE_BASE_ADDRESS),
    643    DEFINE_BIT(BRW_NEW_VUE_MAP_GEOM_OUT),
    644    DEFINE_BIT(BRW_NEW_TRANSFORM_FEEDBACK),
    645    DEFINE_BIT(BRW_NEW_RASTERIZER_DISCARD),
    646    DEFINE_BIT(BRW_NEW_STATS_WM),
    647    DEFINE_BIT(BRW_NEW_UNIFORM_BUFFER),
    648    DEFINE_BIT(BRW_NEW_ATOMIC_BUFFER),
    649    DEFINE_BIT(BRW_NEW_IMAGE_UNITS),
    650    DEFINE_BIT(BRW_NEW_META_IN_PROGRESS),
    651    DEFINE_BIT(BRW_NEW_PUSH_CONSTANT_ALLOCATION),
    652    DEFINE_BIT(BRW_NEW_NUM_SAMPLES),
    653    DEFINE_BIT(BRW_NEW_TEXTURE_BUFFER),
    654    DEFINE_BIT(BRW_NEW_GEN4_UNIT_STATE),
    655    DEFINE_BIT(BRW_NEW_CC_VP),
    656    DEFINE_BIT(BRW_NEW_SF_VP),
    657    DEFINE_BIT(BRW_NEW_CLIP_VP),
    658    DEFINE_BIT(BRW_NEW_SAMPLER_STATE_TABLE),
    659    DEFINE_BIT(BRW_NEW_VS_ATTRIB_WORKAROUNDS),
    660    DEFINE_BIT(BRW_NEW_COMPUTE_PROGRAM),
    661    DEFINE_BIT(BRW_NEW_CS_WORK_GROUPS),
    662    DEFINE_BIT(BRW_NEW_URB_SIZE),
    663    DEFINE_BIT(BRW_NEW_CC_STATE),
    664    DEFINE_BIT(BRW_NEW_BLORP),
    665    DEFINE_BIT(BRW_NEW_VIEWPORT_COUNT),
    666    DEFINE_BIT(BRW_NEW_CONSERVATIVE_RASTERIZATION),
    667    {0, 0, 0}
    668 };
    669 
    670 static void
    671 brw_update_dirty_count(struct dirty_bit_map *bit_map, uint64_t bits)
    672 {
    673    for (int i = 0; bit_map[i].bit != 0; i++) {
    674       if (bit_map[i].bit & bits)
    675 	 bit_map[i].count++;
    676    }
    677 }
    678 
    679 static void
    680 brw_print_dirty_count(struct dirty_bit_map *bit_map)
    681 {
    682    for (int i = 0; bit_map[i].bit != 0; i++) {
    683       if (bit_map[i].count > 1) {
    684          fprintf(stderr, "0x%016"PRIx64": %12d (%s)\n",
    685                  bit_map[i].bit, bit_map[i].count, bit_map[i].name);
    686       }
    687    }
    688 }
    689 
    690 static inline void
    691 brw_upload_tess_programs(struct brw_context *brw)
    692 {
    693    if (brw->tess_eval_program) {
    694       brw_upload_tcs_prog(brw);
    695       brw_upload_tes_prog(brw);
    696    } else {
    697       brw->tcs.base.prog_data = NULL;
    698       brw->tes.base.prog_data = NULL;
    699    }
    700 }
    701 
    702 static inline void
    703 brw_upload_programs(struct brw_context *brw,
    704                     enum brw_pipeline pipeline)
    705 {
    706    struct gl_context *ctx = &brw->ctx;
    707 
    708    if (pipeline == BRW_RENDER_PIPELINE) {
    709       brw_upload_vs_prog(brw);
    710       brw_upload_tess_programs(brw);
    711 
    712       if (brw->gen < 6)
    713          brw_upload_ff_gs_prog(brw);
    714       else
    715          brw_upload_gs_prog(brw);
    716 
    717       /* Update the VUE map for data exiting the GS stage of the pipeline.
    718        * This comes from the last enabled shader stage.
    719        */
    720       GLbitfield64 old_slots = brw->vue_map_geom_out.slots_valid;
    721       bool old_separate = brw->vue_map_geom_out.separate;
    722       struct brw_vue_prog_data *vue_prog_data;
    723       if (brw->geometry_program)
    724          vue_prog_data = brw_vue_prog_data(brw->gs.base.prog_data);
    725       else if (brw->tess_eval_program)
    726          vue_prog_data = brw_vue_prog_data(brw->tes.base.prog_data);
    727       else
    728          vue_prog_data = brw_vue_prog_data(brw->vs.base.prog_data);
    729 
    730       brw->vue_map_geom_out = vue_prog_data->vue_map;
    731 
    732       /* If the layout has changed, signal BRW_NEW_VUE_MAP_GEOM_OUT. */
    733       if (old_slots != brw->vue_map_geom_out.slots_valid ||
    734           old_separate != brw->vue_map_geom_out.separate)
    735          brw->ctx.NewDriverState |= BRW_NEW_VUE_MAP_GEOM_OUT;
    736 
    737       if ((old_slots ^ brw->vue_map_geom_out.slots_valid) &
    738           VARYING_BIT_VIEWPORT) {
    739          ctx->NewDriverState |= BRW_NEW_VIEWPORT_COUNT;
    740          brw->clip.viewport_count =
    741             (brw->vue_map_geom_out.slots_valid & VARYING_BIT_VIEWPORT) ?
    742             ctx->Const.MaxViewports : 1;
    743       }
    744 
    745       brw_upload_wm_prog(brw);
    746 
    747       if (brw->gen < 6) {
    748          brw_upload_clip_prog(brw);
    749          brw_upload_sf_prog(brw);
    750       }
    751    } else if (pipeline == BRW_COMPUTE_PIPELINE) {
    752       brw_upload_cs_prog(brw);
    753    }
    754 }
    755 
    756 static inline void
    757 merge_ctx_state(struct brw_context *brw,
    758                 struct brw_state_flags *state)
    759 {
    760    state->mesa |= brw->NewGLState;
    761    state->brw |= brw->ctx.NewDriverState;
    762 }
    763 
    764 static inline void
    765 check_and_emit_atom(struct brw_context *brw,
    766                     struct brw_state_flags *state,
    767                     const struct brw_tracked_state *atom)
    768 {
    769    if (check_state(state, &atom->dirty)) {
    770       atom->emit(brw);
    771       merge_ctx_state(brw, state);
    772    }
    773 }
    774 
    775 static inline void
    776 brw_upload_pipeline_state(struct brw_context *brw,
    777                           enum brw_pipeline pipeline)
    778 {
    779    struct gl_context *ctx = &brw->ctx;
    780    int i;
    781    static int dirty_count = 0;
    782    struct brw_state_flags state = brw->state.pipelines[pipeline];
    783    unsigned int fb_samples = _mesa_geometric_samples(ctx->DrawBuffer);
    784 
    785    brw_select_pipeline(brw, pipeline);
    786 
    787    if (0) {
    788       /* Always re-emit all state. */
    789       brw->NewGLState = ~0;
    790       ctx->NewDriverState = ~0ull;
    791    }
    792 
    793    if (pipeline == BRW_RENDER_PIPELINE) {
    794       if (brw->fragment_program != ctx->FragmentProgram._Current) {
    795          brw->fragment_program = ctx->FragmentProgram._Current;
    796          brw->ctx.NewDriverState |= BRW_NEW_FRAGMENT_PROGRAM;
    797       }
    798 
    799       if (brw->tess_eval_program != ctx->TessEvalProgram._Current) {
    800          brw->tess_eval_program = ctx->TessEvalProgram._Current;
    801          brw->ctx.NewDriverState |= BRW_NEW_TESS_PROGRAMS;
    802       }
    803 
    804       if (brw->tess_ctrl_program != ctx->TessCtrlProgram._Current) {
    805          brw->tess_ctrl_program = ctx->TessCtrlProgram._Current;
    806          brw->ctx.NewDriverState |= BRW_NEW_TESS_PROGRAMS;
    807       }
    808 
    809       if (brw->geometry_program != ctx->GeometryProgram._Current) {
    810          brw->geometry_program = ctx->GeometryProgram._Current;
    811          brw->ctx.NewDriverState |= BRW_NEW_GEOMETRY_PROGRAM;
    812       }
    813 
    814       if (brw->vertex_program != ctx->VertexProgram._Current) {
    815          brw->vertex_program = ctx->VertexProgram._Current;
    816          brw->ctx.NewDriverState |= BRW_NEW_VERTEX_PROGRAM;
    817       }
    818    }
    819 
    820    if (brw->compute_program != ctx->ComputeProgram._Current) {
    821       brw->compute_program = ctx->ComputeProgram._Current;
    822       brw->ctx.NewDriverState |= BRW_NEW_COMPUTE_PROGRAM;
    823    }
    824 
    825    if (brw->meta_in_progress != _mesa_meta_in_progress(ctx)) {
    826       brw->meta_in_progress = _mesa_meta_in_progress(ctx);
    827       brw->ctx.NewDriverState |= BRW_NEW_META_IN_PROGRESS;
    828    }
    829 
    830    if (brw->num_samples != fb_samples) {
    831       brw->num_samples = fb_samples;
    832       brw->ctx.NewDriverState |= BRW_NEW_NUM_SAMPLES;
    833    }
    834 
    835    /* Exit early if no state is flagged as dirty */
    836    merge_ctx_state(brw, &state);
    837    if ((state.mesa | state.brw) == 0)
    838       return;
    839 
    840    /* Emit Sandybridge workaround flushes on every primitive, for safety. */
    841    if (brw->gen == 6)
    842       brw_emit_post_sync_nonzero_flush(brw);
    843 
    844    brw_upload_programs(brw, pipeline);
    845    merge_ctx_state(brw, &state);
    846 
    847    brw_upload_state_base_address(brw);
    848 
    849    const struct brw_tracked_state *atoms =
    850       brw_get_pipeline_atoms(brw, pipeline);
    851    const int num_atoms = brw->num_atoms[pipeline];
    852 
    853    if (unlikely(INTEL_DEBUG)) {
    854       /* Debug version which enforces various sanity checks on the
    855        * state flags which are generated and checked to help ensure
    856        * state atoms are ordered correctly in the list.
    857        */
    858       struct brw_state_flags examined, prev;
    859       memset(&examined, 0, sizeof(examined));
    860       prev = state;
    861 
    862       for (i = 0; i < num_atoms; i++) {
    863 	 const struct brw_tracked_state *atom = &atoms[i];
    864 	 struct brw_state_flags generated;
    865 
    866          check_and_emit_atom(brw, &state, atom);
    867 
    868 	 accumulate_state(&examined, &atom->dirty);
    869 
    870 	 /* generated = (prev ^ state)
    871 	  * if (examined & generated)
    872 	  *     fail;
    873 	  */
    874 	 xor_states(&generated, &prev, &state);
    875 	 assert(!check_state(&examined, &generated));
    876 	 prev = state;
    877       }
    878    }
    879    else {
    880       for (i = 0; i < num_atoms; i++) {
    881 	 const struct brw_tracked_state *atom = &atoms[i];
    882 
    883          check_and_emit_atom(brw, &state, atom);
    884       }
    885    }
    886 
    887    if (unlikely(INTEL_DEBUG & DEBUG_STATE)) {
    888       STATIC_ASSERT(ARRAY_SIZE(brw_bits) == BRW_NUM_STATE_BITS + 1);
    889 
    890       brw_update_dirty_count(mesa_bits, state.mesa);
    891       brw_update_dirty_count(brw_bits, state.brw);
    892       if (dirty_count++ % 1000 == 0) {
    893 	 brw_print_dirty_count(mesa_bits);
    894 	 brw_print_dirty_count(brw_bits);
    895 	 fprintf(stderr, "\n");
    896       }
    897    }
    898 }
    899 
    900 /***********************************************************************
    901  * Emit all state:
    902  */
    903 void brw_upload_render_state(struct brw_context *brw)
    904 {
    905    brw_upload_pipeline_state(brw, BRW_RENDER_PIPELINE);
    906 }
    907 
    908 static inline void
    909 brw_pipeline_state_finished(struct brw_context *brw,
    910                             enum brw_pipeline pipeline)
    911 {
    912    /* Save all dirty state into the other pipelines */
    913    for (unsigned i = 0; i < BRW_NUM_PIPELINES; i++) {
    914       if (i != pipeline) {
    915          brw->state.pipelines[i].mesa |= brw->NewGLState;
    916          brw->state.pipelines[i].brw |= brw->ctx.NewDriverState;
    917       } else {
    918          memset(&brw->state.pipelines[i], 0, sizeof(struct brw_state_flags));
    919       }
    920    }
    921 
    922    brw->NewGLState = 0;
    923    brw->ctx.NewDriverState = 0ull;
    924 }
    925 
    926 /**
    927  * Clear dirty bits to account for the fact that the state emitted by
    928  * brw_upload_render_state() has been committed to the hardware. This is a
    929  * separate call from brw_upload_render_state() because it's possible that
    930  * after the call to brw_upload_render_state(), we will discover that we've
    931  * run out of aperture space, and need to rewind the batch buffer to the state
    932  * it had before the brw_upload_render_state() call.
    933  */
    934 void
    935 brw_render_state_finished(struct brw_context *brw)
    936 {
    937    brw_pipeline_state_finished(brw, BRW_RENDER_PIPELINE);
    938 }
    939 
    940 void
    941 brw_upload_compute_state(struct brw_context *brw)
    942 {
    943    brw_upload_pipeline_state(brw, BRW_COMPUTE_PIPELINE);
    944 }
    945 
    946 void
    947 brw_compute_state_finished(struct brw_context *brw)
    948 {
    949    brw_pipeline_state_finished(brw, BRW_COMPUTE_PIPELINE);
    950 }
    951