Home | History | Annotate | Download | only in i965
      1 /*
      2  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
      3  Intel funded Tungsten Graphics to
      4  develop this 3D driver.
      5 
      6  Permission is hereby granted, free of charge, to any person obtaining
      7  a copy of this software and associated documentation files (the
      8  "Software"), to deal in the Software without restriction, including
      9  without limitation the rights to use, copy, modify, merge, publish,
     10  distribute, sublicense, and/or sell copies of the Software, and to
     11  permit persons to whom the Software is furnished to do so, subject to
     12  the following conditions:
     13 
     14  The above copyright notice and this permission notice (including the
     15  next paragraph) shall be included in all copies or substantial
     16  portions of the Software.
     17 
     18  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
     19  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
     20  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
     21  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
     22  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
     23  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
     24  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
     25 
     26  **********************************************************************/
     27  /*
     28   * Authors:
     29   *   Keith Whitwell <keithw (at) vmware.com>
     30   */
     31 
     32 
     33 
     34 #include "brw_context.h"
     35 #include "brw_defines.h"
     36 #include "brw_state.h"
     37 #include "brw_program.h"
     38 #include "drivers/common/meta.h"
     39 #include "intel_batchbuffer.h"
     40 #include "intel_buffers.h"
     41 #include "brw_vs.h"
     42 #include "brw_ff_gs.h"
     43 #include "brw_gs.h"
     44 #include "brw_wm.h"
     45 #include "brw_cs.h"
     46 #include "main/framebuffer.h"
     47 
     48 static void
     49 brw_upload_initial_gpu_state(struct brw_context *brw)
     50 {
     51    const struct gen_device_info *devinfo = &brw->screen->devinfo;
     52 
     53    /* On platforms with hardware contexts, we can set our initial GPU state
     54     * right away rather than doing it via state atoms.  This saves a small
     55     * amount of overhead on every draw call.
     56     */
     57    if (!brw->hw_ctx)
     58       return;
     59 
     60    if (devinfo->gen == 6)
     61       brw_emit_post_sync_nonzero_flush(brw);
     62 
     63    brw_upload_invariant_state(brw);
     64 
     65    if (devinfo->gen == 10) {
     66       brw_load_register_imm32(brw, GEN10_CACHE_MODE_SS,
     67                               REG_MASK(GEN10_FLOAT_BLEND_OPTIMIZATION_ENABLE) |
     68                               GEN10_FLOAT_BLEND_OPTIMIZATION_ENABLE);
     69 
     70       /* From gen10 workaround table in h/w specs:
     71        *
     72        *    "On 3DSTATE_3D_MODE, driver must always program bits 31:16 of DW1
     73        *     a value of 0xFFFF"
     74        *
     75        * This means that we end up setting the entire 3D_MODE state. Bits
     76        * in this register control things such as slice hashing and we want
     77        * the default values of zero at the moment.
     78        */
     79       BEGIN_BATCH(2);
     80       OUT_BATCH(_3DSTATE_3D_MODE  << 16 | (2 - 2));
     81       OUT_BATCH(0xFFFF << 16);
     82       ADVANCE_BATCH();
     83    }
     84 
     85    if (devinfo->gen == 9) {
     86       /* Recommended optimizations for Victim Cache eviction and floating
     87        * point blending.
     88        */
     89       brw_load_register_imm32(brw, GEN7_CACHE_MODE_1,
     90                               REG_MASK(GEN9_FLOAT_BLEND_OPTIMIZATION_ENABLE) |
     91                               REG_MASK(GEN9_PARTIAL_RESOLVE_DISABLE_IN_VC) |
     92                               GEN9_FLOAT_BLEND_OPTIMIZATION_ENABLE |
     93                               GEN9_PARTIAL_RESOLVE_DISABLE_IN_VC);
     94 
     95       if (gen_device_info_is_9lp(devinfo)) {
     96          brw_load_register_imm32(brw, GEN7_GT_MODE,
     97                                  GEN9_SUBSLICE_HASHING_MASK_BITS |
     98                                  GEN9_SUBSLICE_HASHING_16x16);
     99       }
    100    }
    101 
    102    if (devinfo->gen >= 8) {
    103       gen8_emit_3dstate_sample_pattern(brw);
    104 
    105       BEGIN_BATCH(5);
    106       OUT_BATCH(_3DSTATE_WM_HZ_OP << 16 | (5 - 2));
    107       OUT_BATCH(0);
    108       OUT_BATCH(0);
    109       OUT_BATCH(0);
    110       OUT_BATCH(0);
    111       ADVANCE_BATCH();
    112 
    113       BEGIN_BATCH(2);
    114       OUT_BATCH(_3DSTATE_WM_CHROMAKEY << 16 | (2 - 2));
    115       OUT_BATCH(0);
    116       ADVANCE_BATCH();
    117    }
    118 }
    119 
    120 static inline const struct brw_tracked_state *
    121 brw_get_pipeline_atoms(struct brw_context *brw,
    122                        enum brw_pipeline pipeline)
    123 {
    124    switch (pipeline) {
    125    case BRW_RENDER_PIPELINE:
    126       return brw->render_atoms;
    127    case BRW_COMPUTE_PIPELINE:
    128       return brw->compute_atoms;
    129    default:
    130       STATIC_ASSERT(BRW_NUM_PIPELINES == 2);
    131       unreachable("Unsupported pipeline");
    132       return NULL;
    133    }
    134 }
    135 
    136 void
    137 brw_copy_pipeline_atoms(struct brw_context *brw,
    138                         enum brw_pipeline pipeline,
    139                         const struct brw_tracked_state **atoms,
    140                         int num_atoms)
    141 {
    142    /* This is to work around brw_context::atoms being declared const.  We want
    143     * it to be const, but it needs to be initialized somehow!
    144     */
    145    struct brw_tracked_state *context_atoms =
    146       (struct brw_tracked_state *) brw_get_pipeline_atoms(brw, pipeline);
    147 
    148    for (int i = 0; i < num_atoms; i++) {
    149       context_atoms[i] = *atoms[i];
    150       assert(context_atoms[i].dirty.mesa | context_atoms[i].dirty.brw);
    151       assert(context_atoms[i].emit);
    152    }
    153 
    154    brw->num_atoms[pipeline] = num_atoms;
    155 }
    156 
    157 void brw_init_state( struct brw_context *brw )
    158 {
    159    struct gl_context *ctx = &brw->ctx;
    160    const struct gen_device_info *devinfo = &brw->screen->devinfo;
    161 
    162    /* Force the first brw_select_pipeline to emit pipeline select */
    163    brw->last_pipeline = BRW_NUM_PIPELINES;
    164 
    165    brw_init_caches(brw);
    166 
    167    if (devinfo->gen >= 10)
    168       gen10_init_atoms(brw);
    169    else if (devinfo->gen >= 9)
    170       gen9_init_atoms(brw);
    171    else if (devinfo->gen >= 8)
    172       gen8_init_atoms(brw);
    173    else if (devinfo->is_haswell)
    174       gen75_init_atoms(brw);
    175    else if (devinfo->gen >= 7)
    176       gen7_init_atoms(brw);
    177    else if (devinfo->gen >= 6)
    178       gen6_init_atoms(brw);
    179    else if (devinfo->gen >= 5)
    180       gen5_init_atoms(brw);
    181    else if (devinfo->is_g4x)
    182       gen45_init_atoms(brw);
    183    else
    184       gen4_init_atoms(brw);
    185 
    186    brw_upload_initial_gpu_state(brw);
    187 
    188    brw->NewGLState = ~0;
    189    brw->ctx.NewDriverState = ~0ull;
    190 
    191    /* ~0 is a nonsensical value which won't match anything we program, so
    192     * the programming will take effect on the first time around.
    193     */
    194    brw->pma_stall_bits = ~0;
    195 
    196    /* Make sure that brw->ctx.NewDriverState has enough bits to hold all possible
    197     * dirty flags.
    198     */
    199    STATIC_ASSERT(BRW_NUM_STATE_BITS <= 8 * sizeof(brw->ctx.NewDriverState));
    200 
    201    ctx->DriverFlags.NewTransformFeedback = BRW_NEW_TRANSFORM_FEEDBACK;
    202    ctx->DriverFlags.NewTransformFeedbackProg = BRW_NEW_TRANSFORM_FEEDBACK;
    203    ctx->DriverFlags.NewRasterizerDiscard = BRW_NEW_RASTERIZER_DISCARD;
    204    ctx->DriverFlags.NewUniformBuffer = BRW_NEW_UNIFORM_BUFFER;
    205    ctx->DriverFlags.NewShaderStorageBuffer = BRW_NEW_UNIFORM_BUFFER;
    206    ctx->DriverFlags.NewTextureBuffer = BRW_NEW_TEXTURE_BUFFER;
    207    ctx->DriverFlags.NewAtomicBuffer = BRW_NEW_UNIFORM_BUFFER;
    208    ctx->DriverFlags.NewImageUnits = BRW_NEW_IMAGE_UNITS;
    209    ctx->DriverFlags.NewDefaultTessLevels = BRW_NEW_DEFAULT_TESS_LEVELS;
    210    ctx->DriverFlags.NewIntelConservativeRasterization = BRW_NEW_CONSERVATIVE_RASTERIZATION;
    211 }
    212 
    213 
    214 void brw_destroy_state( struct brw_context *brw )
    215 {
    216    brw_destroy_caches(brw);
    217 }
    218 
    219 /***********************************************************************
    220  */
    221 
    222 static bool
    223 check_state(const struct brw_state_flags *a, const struct brw_state_flags *b)
    224 {
    225    return ((a->mesa & b->mesa) | (a->brw & b->brw)) != 0;
    226 }
    227 
    228 static void accumulate_state( struct brw_state_flags *a,
    229 			      const struct brw_state_flags *b )
    230 {
    231    a->mesa |= b->mesa;
    232    a->brw |= b->brw;
    233 }
    234 
    235 
    236 static void xor_states( struct brw_state_flags *result,
    237 			     const struct brw_state_flags *a,
    238 			      const struct brw_state_flags *b )
    239 {
    240    result->mesa = a->mesa ^ b->mesa;
    241    result->brw = a->brw ^ b->brw;
    242 }
    243 
    244 struct dirty_bit_map {
    245    uint64_t bit;
    246    char *name;
    247    uint32_t count;
    248 };
    249 
    250 #define DEFINE_BIT(name) {name, #name, 0}
    251 
    252 static struct dirty_bit_map mesa_bits[] = {
    253    DEFINE_BIT(_NEW_MODELVIEW),
    254    DEFINE_BIT(_NEW_PROJECTION),
    255    DEFINE_BIT(_NEW_TEXTURE_MATRIX),
    256    DEFINE_BIT(_NEW_COLOR),
    257    DEFINE_BIT(_NEW_DEPTH),
    258    DEFINE_BIT(_NEW_EVAL),
    259    DEFINE_BIT(_NEW_FOG),
    260    DEFINE_BIT(_NEW_HINT),
    261    DEFINE_BIT(_NEW_LIGHT),
    262    DEFINE_BIT(_NEW_LINE),
    263    DEFINE_BIT(_NEW_PIXEL),
    264    DEFINE_BIT(_NEW_POINT),
    265    DEFINE_BIT(_NEW_POLYGON),
    266    DEFINE_BIT(_NEW_POLYGONSTIPPLE),
    267    DEFINE_BIT(_NEW_SCISSOR),
    268    DEFINE_BIT(_NEW_STENCIL),
    269    DEFINE_BIT(_NEW_TEXTURE_OBJECT),
    270    DEFINE_BIT(_NEW_TRANSFORM),
    271    DEFINE_BIT(_NEW_VIEWPORT),
    272    DEFINE_BIT(_NEW_TEXTURE_STATE),
    273    DEFINE_BIT(_NEW_ARRAY),
    274    DEFINE_BIT(_NEW_RENDERMODE),
    275    DEFINE_BIT(_NEW_BUFFERS),
    276    DEFINE_BIT(_NEW_CURRENT_ATTRIB),
    277    DEFINE_BIT(_NEW_MULTISAMPLE),
    278    DEFINE_BIT(_NEW_TRACK_MATRIX),
    279    DEFINE_BIT(_NEW_PROGRAM),
    280    DEFINE_BIT(_NEW_PROGRAM_CONSTANTS),
    281    DEFINE_BIT(_NEW_FRAG_CLAMP),
    282    /* Avoid sign extension problems. */
    283    {(unsigned) _NEW_VARYING_VP_INPUTS, "_NEW_VARYING_VP_INPUTS", 0},
    284    {0, 0, 0}
    285 };
    286 
    287 static struct dirty_bit_map brw_bits[] = {
    288    DEFINE_BIT(BRW_NEW_FS_PROG_DATA),
    289    DEFINE_BIT(BRW_NEW_BLORP_BLIT_PROG_DATA),
    290    DEFINE_BIT(BRW_NEW_SF_PROG_DATA),
    291    DEFINE_BIT(BRW_NEW_VS_PROG_DATA),
    292    DEFINE_BIT(BRW_NEW_FF_GS_PROG_DATA),
    293    DEFINE_BIT(BRW_NEW_GS_PROG_DATA),
    294    DEFINE_BIT(BRW_NEW_TCS_PROG_DATA),
    295    DEFINE_BIT(BRW_NEW_TES_PROG_DATA),
    296    DEFINE_BIT(BRW_NEW_CLIP_PROG_DATA),
    297    DEFINE_BIT(BRW_NEW_CS_PROG_DATA),
    298    DEFINE_BIT(BRW_NEW_URB_FENCE),
    299    DEFINE_BIT(BRW_NEW_FRAGMENT_PROGRAM),
    300    DEFINE_BIT(BRW_NEW_GEOMETRY_PROGRAM),
    301    DEFINE_BIT(BRW_NEW_TESS_PROGRAMS),
    302    DEFINE_BIT(BRW_NEW_VERTEX_PROGRAM),
    303    DEFINE_BIT(BRW_NEW_REDUCED_PRIMITIVE),
    304    DEFINE_BIT(BRW_NEW_PATCH_PRIMITIVE),
    305    DEFINE_BIT(BRW_NEW_PRIMITIVE),
    306    DEFINE_BIT(BRW_NEW_CONTEXT),
    307    DEFINE_BIT(BRW_NEW_PSP),
    308    DEFINE_BIT(BRW_NEW_SURFACES),
    309    DEFINE_BIT(BRW_NEW_BINDING_TABLE_POINTERS),
    310    DEFINE_BIT(BRW_NEW_INDICES),
    311    DEFINE_BIT(BRW_NEW_VERTICES),
    312    DEFINE_BIT(BRW_NEW_DEFAULT_TESS_LEVELS),
    313    DEFINE_BIT(BRW_NEW_BATCH),
    314    DEFINE_BIT(BRW_NEW_INDEX_BUFFER),
    315    DEFINE_BIT(BRW_NEW_VS_CONSTBUF),
    316    DEFINE_BIT(BRW_NEW_TCS_CONSTBUF),
    317    DEFINE_BIT(BRW_NEW_TES_CONSTBUF),
    318    DEFINE_BIT(BRW_NEW_GS_CONSTBUF),
    319    DEFINE_BIT(BRW_NEW_PROGRAM_CACHE),
    320    DEFINE_BIT(BRW_NEW_STATE_BASE_ADDRESS),
    321    DEFINE_BIT(BRW_NEW_VUE_MAP_GEOM_OUT),
    322    DEFINE_BIT(BRW_NEW_TRANSFORM_FEEDBACK),
    323    DEFINE_BIT(BRW_NEW_RASTERIZER_DISCARD),
    324    DEFINE_BIT(BRW_NEW_STATS_WM),
    325    DEFINE_BIT(BRW_NEW_UNIFORM_BUFFER),
    326    DEFINE_BIT(BRW_NEW_IMAGE_UNITS),
    327    DEFINE_BIT(BRW_NEW_META_IN_PROGRESS),
    328    DEFINE_BIT(BRW_NEW_PUSH_CONSTANT_ALLOCATION),
    329    DEFINE_BIT(BRW_NEW_NUM_SAMPLES),
    330    DEFINE_BIT(BRW_NEW_TEXTURE_BUFFER),
    331    DEFINE_BIT(BRW_NEW_GEN4_UNIT_STATE),
    332    DEFINE_BIT(BRW_NEW_CC_VP),
    333    DEFINE_BIT(BRW_NEW_SF_VP),
    334    DEFINE_BIT(BRW_NEW_CLIP_VP),
    335    DEFINE_BIT(BRW_NEW_SAMPLER_STATE_TABLE),
    336    DEFINE_BIT(BRW_NEW_VS_ATTRIB_WORKAROUNDS),
    337    DEFINE_BIT(BRW_NEW_COMPUTE_PROGRAM),
    338    DEFINE_BIT(BRW_NEW_CS_WORK_GROUPS),
    339    DEFINE_BIT(BRW_NEW_URB_SIZE),
    340    DEFINE_BIT(BRW_NEW_CC_STATE),
    341    DEFINE_BIT(BRW_NEW_BLORP),
    342    DEFINE_BIT(BRW_NEW_VIEWPORT_COUNT),
    343    DEFINE_BIT(BRW_NEW_CONSERVATIVE_RASTERIZATION),
    344    DEFINE_BIT(BRW_NEW_DRAW_CALL),
    345    DEFINE_BIT(BRW_NEW_AUX_STATE),
    346    {0, 0, 0}
    347 };
    348 
    349 static void
    350 brw_update_dirty_count(struct dirty_bit_map *bit_map, uint64_t bits)
    351 {
    352    for (int i = 0; bit_map[i].bit != 0; i++) {
    353       if (bit_map[i].bit & bits)
    354 	 bit_map[i].count++;
    355    }
    356 }
    357 
    358 static void
    359 brw_print_dirty_count(struct dirty_bit_map *bit_map)
    360 {
    361    for (int i = 0; bit_map[i].bit != 0; i++) {
    362       if (bit_map[i].count > 1) {
    363          fprintf(stderr, "0x%016"PRIx64": %12d (%s)\n",
    364                  bit_map[i].bit, bit_map[i].count, bit_map[i].name);
    365       }
    366    }
    367 }
    368 
    369 static inline void
    370 brw_upload_tess_programs(struct brw_context *brw)
    371 {
    372    if (brw->programs[MESA_SHADER_TESS_EVAL]) {
    373       brw_upload_tcs_prog(brw);
    374       brw_upload_tes_prog(brw);
    375    } else {
    376       brw->tcs.base.prog_data = NULL;
    377       brw->tes.base.prog_data = NULL;
    378    }
    379 }
    380 
    381 static inline void
    382 brw_upload_programs(struct brw_context *brw,
    383                     enum brw_pipeline pipeline)
    384 {
    385    struct gl_context *ctx = &brw->ctx;
    386    const struct gen_device_info *devinfo = &brw->screen->devinfo;
    387 
    388    if (pipeline == BRW_RENDER_PIPELINE) {
    389       brw_upload_vs_prog(brw);
    390       brw_upload_tess_programs(brw);
    391 
    392       if (brw->programs[MESA_SHADER_GEOMETRY]) {
    393          brw_upload_gs_prog(brw);
    394       } else {
    395          brw->gs.base.prog_data = NULL;
    396          if (devinfo->gen < 7)
    397             brw_upload_ff_gs_prog(brw);
    398       }
    399 
    400       /* Update the VUE map for data exiting the GS stage of the pipeline.
    401        * This comes from the last enabled shader stage.
    402        */
    403       GLbitfield64 old_slots = brw->vue_map_geom_out.slots_valid;
    404       bool old_separate = brw->vue_map_geom_out.separate;
    405       struct brw_vue_prog_data *vue_prog_data;
    406       if (brw->programs[MESA_SHADER_GEOMETRY])
    407          vue_prog_data = brw_vue_prog_data(brw->gs.base.prog_data);
    408       else if (brw->programs[MESA_SHADER_TESS_EVAL])
    409          vue_prog_data = brw_vue_prog_data(brw->tes.base.prog_data);
    410       else
    411          vue_prog_data = brw_vue_prog_data(brw->vs.base.prog_data);
    412 
    413       brw->vue_map_geom_out = vue_prog_data->vue_map;
    414 
    415       /* If the layout has changed, signal BRW_NEW_VUE_MAP_GEOM_OUT. */
    416       if (old_slots != brw->vue_map_geom_out.slots_valid ||
    417           old_separate != brw->vue_map_geom_out.separate)
    418          brw->ctx.NewDriverState |= BRW_NEW_VUE_MAP_GEOM_OUT;
    419 
    420       if ((old_slots ^ brw->vue_map_geom_out.slots_valid) &
    421           VARYING_BIT_VIEWPORT) {
    422          ctx->NewDriverState |= BRW_NEW_VIEWPORT_COUNT;
    423          brw->clip.viewport_count =
    424             (brw->vue_map_geom_out.slots_valid & VARYING_BIT_VIEWPORT) ?
    425             ctx->Const.MaxViewports : 1;
    426       }
    427 
    428       brw_upload_wm_prog(brw);
    429 
    430       if (devinfo->gen < 6) {
    431          brw_upload_clip_prog(brw);
    432          brw_upload_sf_prog(brw);
    433       }
    434 
    435       brw_disk_cache_write_render_programs(brw);
    436    } else if (pipeline == BRW_COMPUTE_PIPELINE) {
    437       brw_upload_cs_prog(brw);
    438       brw_disk_cache_write_compute_program(brw);
    439    }
    440 }
    441 
    442 static inline void
    443 merge_ctx_state(struct brw_context *brw,
    444                 struct brw_state_flags *state)
    445 {
    446    state->mesa |= brw->NewGLState;
    447    state->brw |= brw->ctx.NewDriverState;
    448 }
    449 
    450 static ALWAYS_INLINE void
    451 check_and_emit_atom(struct brw_context *brw,
    452                     struct brw_state_flags *state,
    453                     const struct brw_tracked_state *atom)
    454 {
    455    if (check_state(state, &atom->dirty)) {
    456       atom->emit(brw);
    457       merge_ctx_state(brw, state);
    458    }
    459 }
    460 
    461 static inline void
    462 brw_upload_pipeline_state(struct brw_context *brw,
    463                           enum brw_pipeline pipeline)
    464 {
    465    const struct gen_device_info *devinfo = &brw->screen->devinfo;
    466    struct gl_context *ctx = &brw->ctx;
    467    int i;
    468    static int dirty_count = 0;
    469    struct brw_state_flags state = brw->state.pipelines[pipeline];
    470    const unsigned fb_samples =
    471       MAX2(_mesa_geometric_samples(ctx->DrawBuffer), 1);
    472 
    473    brw_select_pipeline(brw, pipeline);
    474 
    475    if (unlikely(INTEL_DEBUG & DEBUG_REEMIT)) {
    476       /* Always re-emit all state. */
    477       brw->NewGLState = ~0;
    478       ctx->NewDriverState = ~0ull;
    479    }
    480 
    481    if (pipeline == BRW_RENDER_PIPELINE) {
    482       if (brw->programs[MESA_SHADER_FRAGMENT] !=
    483           ctx->FragmentProgram._Current) {
    484          brw->programs[MESA_SHADER_FRAGMENT] = ctx->FragmentProgram._Current;
    485          brw->ctx.NewDriverState |= BRW_NEW_FRAGMENT_PROGRAM;
    486       }
    487 
    488       if (brw->programs[MESA_SHADER_TESS_EVAL] !=
    489           ctx->TessEvalProgram._Current) {
    490          brw->programs[MESA_SHADER_TESS_EVAL] = ctx->TessEvalProgram._Current;
    491          brw->ctx.NewDriverState |= BRW_NEW_TESS_PROGRAMS;
    492       }
    493 
    494       if (brw->programs[MESA_SHADER_TESS_CTRL] !=
    495           ctx->TessCtrlProgram._Current) {
    496          brw->programs[MESA_SHADER_TESS_CTRL] = ctx->TessCtrlProgram._Current;
    497          brw->ctx.NewDriverState |= BRW_NEW_TESS_PROGRAMS;
    498       }
    499 
    500       if (brw->programs[MESA_SHADER_GEOMETRY] !=
    501           ctx->GeometryProgram._Current) {
    502          brw->programs[MESA_SHADER_GEOMETRY] = ctx->GeometryProgram._Current;
    503          brw->ctx.NewDriverState |= BRW_NEW_GEOMETRY_PROGRAM;
    504       }
    505 
    506       if (brw->programs[MESA_SHADER_VERTEX] != ctx->VertexProgram._Current) {
    507          brw->programs[MESA_SHADER_VERTEX] = ctx->VertexProgram._Current;
    508          brw->ctx.NewDriverState |= BRW_NEW_VERTEX_PROGRAM;
    509       }
    510    }
    511 
    512    if (brw->programs[MESA_SHADER_COMPUTE] != ctx->ComputeProgram._Current) {
    513       brw->programs[MESA_SHADER_COMPUTE] = ctx->ComputeProgram._Current;
    514       brw->ctx.NewDriverState |= BRW_NEW_COMPUTE_PROGRAM;
    515    }
    516 
    517    if (brw->meta_in_progress != _mesa_meta_in_progress(ctx)) {
    518       brw->meta_in_progress = _mesa_meta_in_progress(ctx);
    519       brw->ctx.NewDriverState |= BRW_NEW_META_IN_PROGRESS;
    520    }
    521 
    522    if (brw->num_samples != fb_samples) {
    523       brw->num_samples = fb_samples;
    524       brw->ctx.NewDriverState |= BRW_NEW_NUM_SAMPLES;
    525    }
    526 
    527    /* Exit early if no state is flagged as dirty */
    528    merge_ctx_state(brw, &state);
    529    if ((state.mesa | state.brw) == 0)
    530       return;
    531 
    532    /* Emit Sandybridge workaround flushes on every primitive, for safety. */
    533    if (devinfo->gen == 6)
    534       brw_emit_post_sync_nonzero_flush(brw);
    535 
    536    brw_upload_programs(brw, pipeline);
    537    merge_ctx_state(brw, &state);
    538 
    539    brw_upload_state_base_address(brw);
    540 
    541    const struct brw_tracked_state *atoms =
    542       brw_get_pipeline_atoms(brw, pipeline);
    543    const int num_atoms = brw->num_atoms[pipeline];
    544 
    545    if (unlikely(INTEL_DEBUG)) {
    546       /* Debug version which enforces various sanity checks on the
    547        * state flags which are generated and checked to help ensure
    548        * state atoms are ordered correctly in the list.
    549        */
    550       struct brw_state_flags examined, prev;
    551       memset(&examined, 0, sizeof(examined));
    552       prev = state;
    553 
    554       for (i = 0; i < num_atoms; i++) {
    555 	 const struct brw_tracked_state *atom = &atoms[i];
    556 	 struct brw_state_flags generated;
    557 
    558          check_and_emit_atom(brw, &state, atom);
    559 
    560 	 accumulate_state(&examined, &atom->dirty);
    561 
    562 	 /* generated = (prev ^ state)
    563 	  * if (examined & generated)
    564 	  *     fail;
    565 	  */
    566 	 xor_states(&generated, &prev, &state);
    567 	 assert(!check_state(&examined, &generated));
    568 	 prev = state;
    569       }
    570    }
    571    else {
    572       for (i = 0; i < num_atoms; i++) {
    573 	 const struct brw_tracked_state *atom = &atoms[i];
    574 
    575          check_and_emit_atom(brw, &state, atom);
    576       }
    577    }
    578 
    579    if (unlikely(INTEL_DEBUG & DEBUG_STATE)) {
    580       STATIC_ASSERT(ARRAY_SIZE(brw_bits) == BRW_NUM_STATE_BITS + 1);
    581 
    582       brw_update_dirty_count(mesa_bits, state.mesa);
    583       brw_update_dirty_count(brw_bits, state.brw);
    584       if (dirty_count++ % 1000 == 0) {
    585 	 brw_print_dirty_count(mesa_bits);
    586 	 brw_print_dirty_count(brw_bits);
    587 	 fprintf(stderr, "\n");
    588       }
    589    }
    590 }
    591 
    592 /***********************************************************************
    593  * Emit all state:
    594  */
    595 void brw_upload_render_state(struct brw_context *brw)
    596 {
    597    brw_upload_pipeline_state(brw, BRW_RENDER_PIPELINE);
    598 }
    599 
    600 static inline void
    601 brw_pipeline_state_finished(struct brw_context *brw,
    602                             enum brw_pipeline pipeline)
    603 {
    604    /* Save all dirty state into the other pipelines */
    605    for (unsigned i = 0; i < BRW_NUM_PIPELINES; i++) {
    606       if (i != pipeline) {
    607          brw->state.pipelines[i].mesa |= brw->NewGLState;
    608          brw->state.pipelines[i].brw |= brw->ctx.NewDriverState;
    609       } else {
    610          memset(&brw->state.pipelines[i], 0, sizeof(struct brw_state_flags));
    611       }
    612    }
    613 
    614    brw->NewGLState = 0;
    615    brw->ctx.NewDriverState = 0ull;
    616 }
    617 
    618 /**
    619  * Clear dirty bits to account for the fact that the state emitted by
    620  * brw_upload_render_state() has been committed to the hardware. This is a
    621  * separate call from brw_upload_render_state() because it's possible that
    622  * after the call to brw_upload_render_state(), we will discover that we've
    623  * run out of aperture space, and need to rewind the batch buffer to the state
    624  * it had before the brw_upload_render_state() call.
    625  */
    626 void
    627 brw_render_state_finished(struct brw_context *brw)
    628 {
    629    brw_pipeline_state_finished(brw, BRW_RENDER_PIPELINE);
    630 }
    631 
    632 void
    633 brw_upload_compute_state(struct brw_context *brw)
    634 {
    635    brw_upload_pipeline_state(brw, BRW_COMPUTE_PIPELINE);
    636 }
    637 
    638 void
    639 brw_compute_state_finished(struct brw_context *brw)
    640 {
    641    brw_pipeline_state_finished(brw, BRW_COMPUTE_PIPELINE);
    642 }
    643