Home | History | Annotate | Download | only in etnaviv
      1 /*
      2  * Copyright (c) 2014-2015 Etnaviv Project
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * the rights to use, copy, modify, merge, publish, distribute, sub license,
      8  * and/or sell copies of the Software, and to permit persons to whom the
      9  * Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice (including the
     12  * next paragraph) shall be included in all copies or substantial portions
     13  * of the Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
     18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
     21  * DEALINGS IN THE SOFTWARE.
     22  *
     23  * Authors:
     24  *    Wladimir J. van der Laan <laanwj (at) gmail.com>
     25  */
     26 
     27 #include "etnaviv_emit.h"
     28 
     29 #include "etnaviv_blend.h"
     30 #include "etnaviv_compiler.h"
     31 #include "etnaviv_context.h"
     32 #include "etnaviv_rasterizer.h"
     33 #include "etnaviv_resource.h"
     34 #include "etnaviv_rs.h"
     35 #include "etnaviv_screen.h"
     36 #include "etnaviv_shader.h"
     37 #include "etnaviv_texture.h"
     38 #include "etnaviv_translate.h"
     39 #include "etnaviv_uniforms.h"
     40 #include "etnaviv_util.h"
     41 #include "etnaviv_zsa.h"
     42 #include "hw/common.xml.h"
     43 #include "hw/state.xml.h"
     44 #include "hw/state_blt.xml.h"
     45 #include "util/u_math.h"
     46 
     47 /* Queue a STALL command (queues 2 words) */
     48 static inline void
     49 CMD_STALL(struct etna_cmd_stream *stream, uint32_t from, uint32_t to)
     50 {
     51    etna_cmd_stream_emit(stream, VIV_FE_STALL_HEADER_OP_STALL);
     52    etna_cmd_stream_emit(stream, VIV_FE_STALL_TOKEN_FROM(from) | VIV_FE_STALL_TOKEN_TO(to));
     53 }
     54 
     55 void
     56 etna_stall(struct etna_cmd_stream *stream, uint32_t from, uint32_t to)
     57 {
     58    bool blt = (from == SYNC_RECIPIENT_BLT) || (to == SYNC_RECIPIENT_BLT);
     59    etna_cmd_stream_reserve(stream, blt ? 8 : 4);
     60 
     61    if (blt) {
     62       etna_emit_load_state(stream, VIVS_BLT_ENABLE >> 2, 1, 0);
     63       etna_cmd_stream_emit(stream, 1);
     64    }
     65 
     66    /* TODO: set bit 28/29 of token after BLT COPY_BUFFER */
     67    etna_emit_load_state(stream, VIVS_GL_SEMAPHORE_TOKEN >> 2, 1, 0);
     68    etna_cmd_stream_emit(stream, VIVS_GL_SEMAPHORE_TOKEN_FROM(from) | VIVS_GL_SEMAPHORE_TOKEN_TO(to));
     69 
     70    if (from == SYNC_RECIPIENT_FE) {
     71       /* if the frontend is to be stalled, queue a STALL frontend command */
     72       CMD_STALL(stream, from, to);
     73    } else {
     74       /* otherwise, load the STALL token state */
     75       etna_emit_load_state(stream, VIVS_GL_STALL_TOKEN >> 2, 1, 0);
     76       etna_cmd_stream_emit(stream, VIVS_GL_STALL_TOKEN_FROM(from) | VIVS_GL_STALL_TOKEN_TO(to));
     77    }
     78 
     79    if (blt) {
     80       etna_emit_load_state(stream, VIVS_BLT_ENABLE >> 2, 1, 0);
     81       etna_cmd_stream_emit(stream, 0);
     82    }
     83 }
     84 
     85 #define EMIT_STATE(state_name, src_value) \
     86    etna_coalsence_emit(stream, &coalesce, VIVS_##state_name, src_value)
     87 
     88 #define EMIT_STATE_FIXP(state_name, src_value) \
     89    etna_coalsence_emit_fixp(stream, &coalesce, VIVS_##state_name, src_value)
     90 
     91 #define EMIT_STATE_RELOC(state_name, src_value) \
     92    etna_coalsence_emit_reloc(stream, &coalesce, VIVS_##state_name, src_value)
     93 
     94 #define ETNA_3D_CONTEXT_SIZE  (400) /* keep this number above "Total state updates (fixed)" from gen_weave_state tool */
     95 
     96 static unsigned
     97 required_stream_size(struct etna_context *ctx)
     98 {
     99    unsigned size = ETNA_3D_CONTEXT_SIZE;
    100 
    101    /* stall + flush */
    102    size += 2 + 4;
    103 
    104    /* vertex elements */
    105    size += ctx->vertex_elements->num_elements + 1;
    106 
    107    /* uniforms - worst case (2 words per uniform load) */
    108    size += ctx->shader.vs->uniforms.const_count * 2;
    109    size += ctx->shader.fs->uniforms.const_count * 2;
    110 
    111    /* shader */
    112    size += ctx->shader_state.vs_inst_mem_size + 1;
    113    size += ctx->shader_state.ps_inst_mem_size + 1;
    114 
    115    /* DRAW_INDEXED_PRIMITIVES command */
    116    size += 6;
    117 
    118    /* reserve for alignment etc. */
    119    size += 64;
    120 
    121    return size;
    122 }
    123 
    124 /* Emit state that only exists on HALTI5+ */
    125 static void
    126 emit_halti5_only_state(struct etna_context *ctx, int vs_output_count)
    127 {
    128    struct etna_cmd_stream *stream = ctx->stream;
    129    uint32_t dirty = ctx->dirty;
    130    struct etna_coalesce coalesce;
    131 
    132    etna_coalesce_start(stream, &coalesce);
    133    if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
    134       /* Magic states (load balancing, inter-unit sync, buffers) */
    135       /*00870*/ EMIT_STATE(VS_HALTI5_OUTPUT_COUNT, vs_output_count | ((vs_output_count * 0x10) << 8));
    136       /*008A0*/ EMIT_STATE(VS_HALTI5_UNK008A0, 0x0001000e | ((0x110/vs_output_count) << 20));
    137       for (int x = 0; x < 4; ++x) {
    138          /*008E0*/ EMIT_STATE(VS_HALTI5_OUTPUT(x), ctx->shader_state.VS_OUTPUT[x]);
    139       }
    140    }
    141    if (unlikely(dirty & (ETNA_DIRTY_VERTEX_ELEMENTS | ETNA_DIRTY_SHADER))) {
    142       for (int x = 0; x < 4; ++x) {
    143          /*008C0*/ EMIT_STATE(VS_HALTI5_INPUT(x), ctx->shader_state.VS_INPUT[x]);
    144       }
    145    }
    146    if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
    147       /*00A90*/ EMIT_STATE(PA_VARYING_NUM_COMPONENTS(0), ctx->shader_state.GL_VARYING_NUM_COMPONENTS);
    148       /*00AA8*/ EMIT_STATE(PA_VS_OUTPUT_COUNT, vs_output_count);
    149       /*01080*/ EMIT_STATE(PS_VARYING_NUM_COMPONENTS(0), ctx->shader_state.GL_VARYING_NUM_COMPONENTS);
    150       /*03888*/ EMIT_STATE(GL_HALTI5_SH_SPECIALS, ctx->shader_state.GL_HALTI5_SH_SPECIALS);
    151    }
    152    etna_coalesce_end(stream, &coalesce);
    153 }
    154 
    155 /* Emit state that no longer exists on HALTI5 */
    156 static void
    157 emit_pre_halti5_state(struct etna_context *ctx)
    158 {
    159    struct etna_cmd_stream *stream = ctx->stream;
    160    uint32_t dirty = ctx->dirty;
    161    struct etna_coalesce coalesce;
    162 
    163    etna_coalesce_start(stream, &coalesce);
    164    if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
    165       /*00800*/ EMIT_STATE(VS_END_PC, ctx->shader_state.VS_END_PC);
    166    }
    167    if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
    168       for (int x = 0; x < 4; ++x) {
    169         /*00810*/ EMIT_STATE(VS_OUTPUT(x), ctx->shader_state.VS_OUTPUT[x]);
    170       }
    171    }
    172    if (unlikely(dirty & (ETNA_DIRTY_VERTEX_ELEMENTS | ETNA_DIRTY_SHADER))) {
    173       for (int x = 0; x < 4; ++x) {
    174         /*00820*/ EMIT_STATE(VS_INPUT(x), ctx->shader_state.VS_INPUT[x]);
    175       }
    176    }
    177    if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
    178       /*00838*/ EMIT_STATE(VS_START_PC, ctx->shader_state.VS_START_PC);
    179    }
    180    if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
    181       for (int x = 0; x < 10; ++x) {
    182          /*00A40*/ EMIT_STATE(PA_SHADER_ATTRIBUTES(x), ctx->shader_state.PA_SHADER_ATTRIBUTES[x]);
    183       }
    184    }
    185    if (unlikely(dirty & (ETNA_DIRTY_FRAMEBUFFER))) {
    186       /*00E04*/ EMIT_STATE(RA_MULTISAMPLE_UNK00E04, ctx->framebuffer.RA_MULTISAMPLE_UNK00E04);
    187       for (int x = 0; x < 4; ++x) {
    188          /*00E10*/ EMIT_STATE(RA_MULTISAMPLE_UNK00E10(x), ctx->framebuffer.RA_MULTISAMPLE_UNK00E10[x]);
    189       }
    190       for (int x = 0; x < 16; ++x) {
    191          /*00E40*/ EMIT_STATE(RA_CENTROID_TABLE(x), ctx->framebuffer.RA_CENTROID_TABLE[x]);
    192       }
    193    }
    194    if (unlikely(dirty & (ETNA_DIRTY_SHADER | ETNA_DIRTY_FRAMEBUFFER))) {
    195       /*01000*/ EMIT_STATE(PS_END_PC, ctx->shader_state.PS_END_PC);
    196    }
    197    if (unlikely(dirty & (ETNA_DIRTY_SHADER | ETNA_DIRTY_FRAMEBUFFER))) {
    198       /*01018*/ EMIT_STATE(PS_START_PC, ctx->shader_state.PS_START_PC);
    199    }
    200    if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
    201       /*03820*/ EMIT_STATE(GL_VARYING_NUM_COMPONENTS, ctx->shader_state.GL_VARYING_NUM_COMPONENTS);
    202       for (int x = 0; x < 2; ++x) {
    203          /*03828*/ EMIT_STATE(GL_VARYING_COMPONENT_USE(x), ctx->shader_state.GL_VARYING_COMPONENT_USE[x]);
    204       }
    205    }
    206    etna_coalesce_end(stream, &coalesce);
    207 }
    208 
    209 /* Weave state before draw operation. This function merges all the compiled
    210  * state blocks under the context into one device register state. Parts of
    211  * this state that are changed since last call (dirty) will be uploaded as
    212  * state changes in the command buffer. */
    213 void
    214 etna_emit_state(struct etna_context *ctx)
    215 {
    216    struct etna_cmd_stream *stream = ctx->stream;
    217 
    218    /* Pre-reserve the command buffer space which we are likely to need.
    219     * This must cover all the state emitted below, and the following
    220     * draw command. */
    221    etna_cmd_stream_reserve(stream, required_stream_size(ctx));
    222 
    223    uint32_t dirty = ctx->dirty;
    224 
    225    /* Pre-processing: see what caches we need to flush before making state changes. */
    226    uint32_t to_flush = 0;
    227    if (unlikely(dirty & (ETNA_DIRTY_BLEND))) {
    228       /* Need flush COLOR when changing PE.COLOR_FORMAT.OVERWRITE. */
    229 #if 0
    230         /* TODO*/
    231         if ((ctx->gpu3d.PE_COLOR_FORMAT & VIVS_PE_COLOR_FORMAT_OVERWRITE) !=
    232            (etna_blend_state(ctx->blend)->PE_COLOR_FORMAT & VIVS_PE_COLOR_FORMAT_OVERWRITE))
    233 #endif
    234       to_flush |= VIVS_GL_FLUSH_CACHE_COLOR;
    235    }
    236    if (unlikely(dirty & (ETNA_DIRTY_TEXTURE_CACHES)))
    237       to_flush |= VIVS_GL_FLUSH_CACHE_TEXTURE;
    238    if (unlikely(dirty & (ETNA_DIRTY_FRAMEBUFFER))) /* Framebuffer config changed? */
    239       to_flush |= VIVS_GL_FLUSH_CACHE_COLOR | VIVS_GL_FLUSH_CACHE_DEPTH;
    240    if (DBG_ENABLED(ETNA_DBG_CFLUSH_ALL))
    241       to_flush |= VIVS_GL_FLUSH_CACHE_TEXTURE | VIVS_GL_FLUSH_CACHE_COLOR | VIVS_GL_FLUSH_CACHE_DEPTH;
    242 
    243    if (to_flush) {
    244       etna_set_state(stream, VIVS_GL_FLUSH_CACHE, to_flush);
    245       etna_stall(stream, SYNC_RECIPIENT_RA, SYNC_RECIPIENT_PE);
    246    }
    247 
    248    /* Flush TS cache before changing TS configuration. */
    249    if (unlikely(dirty & ETNA_DIRTY_TS)) {
    250       etna_set_state(stream, VIVS_TS_FLUSH_CACHE, VIVS_TS_FLUSH_CACHE_FLUSH);
    251    }
    252 
    253    /* If MULTI_SAMPLE_CONFIG.MSAA_SAMPLES changed, clobber affected shader
    254     * state to make sure it is always rewritten. */
    255    if (unlikely(dirty & (ETNA_DIRTY_FRAMEBUFFER))) {
    256       if ((ctx->gpu3d.GL_MULTI_SAMPLE_CONFIG & VIVS_GL_MULTI_SAMPLE_CONFIG_MSAA_SAMPLES__MASK) !=
    257           (ctx->framebuffer.GL_MULTI_SAMPLE_CONFIG & VIVS_GL_MULTI_SAMPLE_CONFIG_MSAA_SAMPLES__MASK)) {
    258          /* XXX what does the GPU set these states to on MSAA samples change?
    259           * Does it do the right thing?
    260           * (increase/decrease as necessary) or something else? Just set some
    261           * invalid value until we know for
    262           * sure. */
    263          ctx->gpu3d.PS_INPUT_COUNT = 0xffffffff;
    264          ctx->gpu3d.PS_TEMP_REGISTER_CONTROL = 0xffffffff;
    265       }
    266    }
    267 
    268    /* Update vertex elements. This is different from any of the other states, in that
    269     * a) the number of vertex elements written matters: so write only active ones
    270     * b) the vertex element states must all be written: do not skip entries that stay the same */
    271    if (dirty & (ETNA_DIRTY_VERTEX_ELEMENTS)) {
    272       if (ctx->specs.halti >= 5) {
    273          /*17800*/ etna_set_state_multi(stream, VIVS_NFE_GENERIC_ATTRIB_CONFIG0(0),
    274             ctx->vertex_elements->num_elements,
    275             ctx->vertex_elements->NFE_GENERIC_ATTRIB_CONFIG0);
    276          /*17A00*/ etna_set_state_multi(stream, VIVS_NFE_GENERIC_ATTRIB_SCALE(0),
    277             ctx->vertex_elements->num_elements,
    278             ctx->vertex_elements->NFE_GENERIC_ATTRIB_SCALE);
    279          /*17A80*/ etna_set_state_multi(stream, VIVS_NFE_GENERIC_ATTRIB_CONFIG1(0),
    280             ctx->vertex_elements->num_elements,
    281             ctx->vertex_elements->NFE_GENERIC_ATTRIB_CONFIG1);
    282       } else {
    283          /* Special case: vertex elements must always be sent in full if changed */
    284          /*00600*/ etna_set_state_multi(stream, VIVS_FE_VERTEX_ELEMENT_CONFIG(0),
    285             ctx->vertex_elements->num_elements,
    286             ctx->vertex_elements->FE_VERTEX_ELEMENT_CONFIG);
    287          if (ctx->specs.halti >= 2) {
    288             /*00780*/ etna_set_state_multi(stream, VIVS_FE_GENERIC_ATTRIB_SCALE(0),
    289                ctx->vertex_elements->num_elements,
    290                ctx->vertex_elements->NFE_GENERIC_ATTRIB_SCALE);
    291          }
    292       }
    293    }
    294    unsigned vs_output_count = etna_rasterizer_state(ctx->rasterizer)->point_size_per_vertex
    295                            ? ctx->shader_state.VS_OUTPUT_COUNT_PSIZE
    296                            : ctx->shader_state.VS_OUTPUT_COUNT;
    297 
    298    /* The following code is originally generated by gen_merge_state.py, to
    299     * emit state in increasing order of address (this makes it possible to merge
    300     * consecutive register updates into one SET_STATE command)
    301     *
    302     * There have been some manual changes, where the weaving operation is not
    303     * simply bitwise or:
    304     * - scissor fixp
    305     * - num vertex elements
    306     * - scissor handling
    307     * - num samplers
    308     * - texture lod
    309     * - ETNA_DIRTY_TS
    310     * - removed ETNA_DIRTY_BASE_SETUP statements -- these are guaranteed to not
    311     * change anyway
    312     * - PS / framebuffer interaction for MSAA
    313     * - move update of GL_MULTI_SAMPLE_CONFIG first
    314     * - add unlikely()/likely()
    315     */
    316    struct etna_coalesce coalesce;
    317 
    318    etna_coalesce_start(stream, &coalesce);
    319 
    320    /* begin only EMIT_STATE -- make sure no new etna_reserve calls are done here
    321     * directly
    322     *    or indirectly */
    323    /* multi sample config is set first, and outside of the normal sorting
    324     * order, as changing the multisample state clobbers PS.INPUT_COUNT (and
    325     * possibly PS.TEMP_REGISTER_CONTROL).
    326     */
    327    if (unlikely(dirty & (ETNA_DIRTY_FRAMEBUFFER | ETNA_DIRTY_SAMPLE_MASK))) {
    328       uint32_t val = VIVS_GL_MULTI_SAMPLE_CONFIG_MSAA_ENABLES(ctx->sample_mask);
    329       val |= ctx->framebuffer.GL_MULTI_SAMPLE_CONFIG;
    330 
    331       /*03818*/ EMIT_STATE(GL_MULTI_SAMPLE_CONFIG, val);
    332    }
    333    if (likely(dirty & (ETNA_DIRTY_INDEX_BUFFER))) {
    334       /*00644*/ EMIT_STATE_RELOC(FE_INDEX_STREAM_BASE_ADDR, &ctx->index_buffer.FE_INDEX_STREAM_BASE_ADDR);
    335       /*00648*/ EMIT_STATE(FE_INDEX_STREAM_CONTROL, ctx->index_buffer.FE_INDEX_STREAM_CONTROL);
    336    }
    337    if (likely(dirty & (ETNA_DIRTY_INDEX_BUFFER))) {
    338       /*00674*/ EMIT_STATE(FE_PRIMITIVE_RESTART_INDEX, ctx->index_buffer.FE_PRIMITIVE_RESTART_INDEX);
    339    }
    340    if (likely(dirty & (ETNA_DIRTY_VERTEX_BUFFERS))) {
    341       if (ctx->specs.halti >= 2) { /* HALTI2+: NFE_VERTEX_STREAMS */
    342          for (int x = 0; x < ctx->vertex_buffer.count; ++x) {
    343             /*14600*/ EMIT_STATE_RELOC(NFE_VERTEX_STREAMS_BASE_ADDR(x), &ctx->vertex_buffer.cvb[x].FE_VERTEX_STREAM_BASE_ADDR);
    344          }
    345          for (int x = 0; x < ctx->vertex_buffer.count; ++x) {
    346             if (ctx->vertex_buffer.cvb[x].FE_VERTEX_STREAM_BASE_ADDR.bo) {
    347                /*14640*/ EMIT_STATE(NFE_VERTEX_STREAMS_CONTROL(x), ctx->vertex_buffer.cvb[x].FE_VERTEX_STREAM_CONTROL);
    348             }
    349          }
    350          for (int x = 0; x < ctx->vertex_buffer.count; ++x) {
    351             if (ctx->vertex_buffer.cvb[x].FE_VERTEX_STREAM_BASE_ADDR.bo) {
    352                /*14680*/ EMIT_STATE(NFE_VERTEX_STREAMS_UNK14680(x), ctx->vertex_buffer.cvb[x].FE_VERTEX_STREAM_UNK14680);
    353             }
    354          }
    355       } else if(ctx->specs.stream_count >= 1) { /* hw w/ multiple vertex streams */
    356          for (int x = 0; x < ctx->vertex_buffer.count; ++x) {
    357             /*00680*/ EMIT_STATE_RELOC(FE_VERTEX_STREAMS_BASE_ADDR(x), &ctx->vertex_buffer.cvb[x].FE_VERTEX_STREAM_BASE_ADDR);
    358          }
    359          for (int x = 0; x < ctx->vertex_buffer.count; ++x) {
    360             if (ctx->vertex_buffer.cvb[x].FE_VERTEX_STREAM_BASE_ADDR.bo) {
    361                /*006A0*/ EMIT_STATE(FE_VERTEX_STREAMS_CONTROL(x), ctx->vertex_buffer.cvb[x].FE_VERTEX_STREAM_CONTROL);
    362             }
    363          }
    364       } else { /* hw w/ single vertex stream */
    365          /*0064C*/ EMIT_STATE_RELOC(FE_VERTEX_STREAM_BASE_ADDR, &ctx->vertex_buffer.cvb[0].FE_VERTEX_STREAM_BASE_ADDR);
    366          /*00650*/ EMIT_STATE(FE_VERTEX_STREAM_CONTROL, ctx->vertex_buffer.cvb[0].FE_VERTEX_STREAM_CONTROL);
    367       }
    368    }
    369    if (unlikely(dirty & (ETNA_DIRTY_SHADER | ETNA_DIRTY_RASTERIZER))) {
    370 
    371       /*00804*/ EMIT_STATE(VS_OUTPUT_COUNT, vs_output_count);
    372    }
    373    if (unlikely(dirty & (ETNA_DIRTY_VERTEX_ELEMENTS | ETNA_DIRTY_SHADER))) {
    374       /*00808*/ EMIT_STATE(VS_INPUT_COUNT, ctx->shader_state.VS_INPUT_COUNT);
    375       /*0080C*/ EMIT_STATE(VS_TEMP_REGISTER_CONTROL, ctx->shader_state.VS_TEMP_REGISTER_CONTROL);
    376    }
    377    if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
    378       /*00830*/ EMIT_STATE(VS_LOAD_BALANCING, ctx->shader_state.VS_LOAD_BALANCING);
    379    }
    380    if (unlikely(dirty & (ETNA_DIRTY_VIEWPORT))) {
    381       /*00A00*/ EMIT_STATE_FIXP(PA_VIEWPORT_SCALE_X, ctx->viewport.PA_VIEWPORT_SCALE_X);
    382       /*00A04*/ EMIT_STATE_FIXP(PA_VIEWPORT_SCALE_Y, ctx->viewport.PA_VIEWPORT_SCALE_Y);
    383       /*00A08*/ EMIT_STATE(PA_VIEWPORT_SCALE_Z, ctx->viewport.PA_VIEWPORT_SCALE_Z);
    384       /*00A0C*/ EMIT_STATE_FIXP(PA_VIEWPORT_OFFSET_X, ctx->viewport.PA_VIEWPORT_OFFSET_X);
    385       /*00A10*/ EMIT_STATE_FIXP(PA_VIEWPORT_OFFSET_Y, ctx->viewport.PA_VIEWPORT_OFFSET_Y);
    386       /*00A14*/ EMIT_STATE(PA_VIEWPORT_OFFSET_Z, ctx->viewport.PA_VIEWPORT_OFFSET_Z);
    387    }
    388    if (unlikely(dirty & (ETNA_DIRTY_RASTERIZER))) {
    389       struct etna_rasterizer_state *rasterizer = etna_rasterizer_state(ctx->rasterizer);
    390 
    391       /*00A18*/ EMIT_STATE(PA_LINE_WIDTH, rasterizer->PA_LINE_WIDTH);
    392       /*00A1C*/ EMIT_STATE(PA_POINT_SIZE, rasterizer->PA_POINT_SIZE);
    393       /*00A28*/ EMIT_STATE(PA_SYSTEM_MODE, rasterizer->PA_SYSTEM_MODE);
    394    }
    395    if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
    396       /*00A30*/ EMIT_STATE(PA_ATTRIBUTE_ELEMENT_COUNT, ctx->shader_state.PA_ATTRIBUTE_ELEMENT_COUNT);
    397    }
    398    if (unlikely(dirty & (ETNA_DIRTY_RASTERIZER | ETNA_DIRTY_SHADER))) {
    399       uint32_t val = etna_rasterizer_state(ctx->rasterizer)->PA_CONFIG;
    400       /*00A34*/ EMIT_STATE(PA_CONFIG, val & ctx->shader_state.PA_CONFIG);
    401    }
    402    if (unlikely(dirty & (ETNA_DIRTY_RASTERIZER))) {
    403       struct etna_rasterizer_state *rasterizer = etna_rasterizer_state(ctx->rasterizer);
    404       /*00A38*/ EMIT_STATE(PA_WIDE_LINE_WIDTH0, rasterizer->PA_LINE_WIDTH);
    405       /*00A3C*/ EMIT_STATE(PA_WIDE_LINE_WIDTH1, rasterizer->PA_LINE_WIDTH);
    406    }
    407    if (unlikely(dirty & (ETNA_DIRTY_SCISSOR | ETNA_DIRTY_FRAMEBUFFER |
    408                          ETNA_DIRTY_RASTERIZER | ETNA_DIRTY_VIEWPORT))) {
    409       /* this is a bit of a mess: rasterizer.scissor determines whether to use
    410        * only the framebuffer scissor, or specific scissor state, and the
    411        * viewport clips too so the logic spans four CSOs */
    412       struct etna_rasterizer_state *rasterizer = etna_rasterizer_state(ctx->rasterizer);
    413 
    414       uint32_t scissor_left =
    415          MAX2(ctx->framebuffer.SE_SCISSOR_LEFT, ctx->viewport.SE_SCISSOR_LEFT);
    416       uint32_t scissor_top =
    417          MAX2(ctx->framebuffer.SE_SCISSOR_TOP, ctx->viewport.SE_SCISSOR_TOP);
    418       uint32_t scissor_right =
    419          MIN2(ctx->framebuffer.SE_SCISSOR_RIGHT, ctx->viewport.SE_SCISSOR_RIGHT);
    420       uint32_t scissor_bottom =
    421          MIN2(ctx->framebuffer.SE_SCISSOR_BOTTOM, ctx->viewport.SE_SCISSOR_BOTTOM);
    422 
    423       if (rasterizer->scissor) {
    424          scissor_left = MAX2(ctx->scissor.SE_SCISSOR_LEFT, scissor_left);
    425          scissor_top = MAX2(ctx->scissor.SE_SCISSOR_TOP, scissor_top);
    426          scissor_right = MIN2(ctx->scissor.SE_SCISSOR_RIGHT, scissor_right);
    427          scissor_bottom = MIN2(ctx->scissor.SE_SCISSOR_BOTTOM, scissor_bottom);
    428       }
    429 
    430       /*00C00*/ EMIT_STATE_FIXP(SE_SCISSOR_LEFT, scissor_left);
    431       /*00C04*/ EMIT_STATE_FIXP(SE_SCISSOR_TOP, scissor_top);
    432       /*00C08*/ EMIT_STATE_FIXP(SE_SCISSOR_RIGHT, scissor_right);
    433       /*00C0C*/ EMIT_STATE_FIXP(SE_SCISSOR_BOTTOM, scissor_bottom);
    434    }
    435    if (unlikely(dirty & (ETNA_DIRTY_RASTERIZER))) {
    436       struct etna_rasterizer_state *rasterizer = etna_rasterizer_state(ctx->rasterizer);
    437 
    438       /*00C10*/ EMIT_STATE(SE_DEPTH_SCALE, rasterizer->SE_DEPTH_SCALE);
    439       /*00C14*/ EMIT_STATE(SE_DEPTH_BIAS, rasterizer->SE_DEPTH_BIAS);
    440       /*00C18*/ EMIT_STATE(SE_CONFIG, rasterizer->SE_CONFIG);
    441    }
    442    if (unlikely(dirty & (ETNA_DIRTY_SCISSOR | ETNA_DIRTY_FRAMEBUFFER |
    443                          ETNA_DIRTY_RASTERIZER | ETNA_DIRTY_VIEWPORT))) {
    444       struct etna_rasterizer_state *rasterizer = etna_rasterizer_state(ctx->rasterizer);
    445 
    446       uint32_t clip_right =
    447          MIN2(ctx->framebuffer.SE_CLIP_RIGHT, ctx->viewport.SE_CLIP_RIGHT);
    448       uint32_t clip_bottom =
    449          MIN2(ctx->framebuffer.SE_CLIP_BOTTOM, ctx->viewport.SE_CLIP_BOTTOM);
    450 
    451       if (rasterizer->scissor) {
    452          clip_right = MIN2(ctx->scissor.SE_CLIP_RIGHT, clip_right);
    453          clip_bottom = MIN2(ctx->scissor.SE_CLIP_BOTTOM, clip_bottom);
    454       }
    455 
    456       /*00C20*/ EMIT_STATE_FIXP(SE_CLIP_RIGHT, clip_right);
    457       /*00C24*/ EMIT_STATE_FIXP(SE_CLIP_BOTTOM, clip_bottom);
    458    }
    459    if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
    460       /*00E00*/ EMIT_STATE(RA_CONTROL, ctx->shader_state.RA_CONTROL);
    461    }
    462    if (unlikely(dirty & (ETNA_DIRTY_SHADER | ETNA_DIRTY_FRAMEBUFFER))) {
    463       /*01004*/ EMIT_STATE(PS_OUTPUT_REG, ctx->shader_state.PS_OUTPUT_REG);
    464       /*01008*/ EMIT_STATE(PS_INPUT_COUNT,
    465                            ctx->framebuffer.msaa_mode
    466                               ? ctx->shader_state.PS_INPUT_COUNT_MSAA
    467                               : ctx->shader_state.PS_INPUT_COUNT);
    468       /*0100C*/ EMIT_STATE(PS_TEMP_REGISTER_CONTROL,
    469                            ctx->framebuffer.msaa_mode
    470                               ? ctx->shader_state.PS_TEMP_REGISTER_CONTROL_MSAA
    471                               : ctx->shader_state.PS_TEMP_REGISTER_CONTROL);
    472       /*01010*/ EMIT_STATE(PS_CONTROL, ctx->shader_state.PS_CONTROL);
    473    }
    474    if (unlikely(dirty & (ETNA_DIRTY_ZSA | ETNA_DIRTY_FRAMEBUFFER))) {
    475       uint32_t val = etna_zsa_state(ctx->zsa)->PE_DEPTH_CONFIG;
    476       /*01400*/ EMIT_STATE(PE_DEPTH_CONFIG, val | ctx->framebuffer.PE_DEPTH_CONFIG);
    477    }
    478    if (unlikely(dirty & (ETNA_DIRTY_VIEWPORT))) {
    479       /*01404*/ EMIT_STATE(PE_DEPTH_NEAR, ctx->viewport.PE_DEPTH_NEAR);
    480       /*01408*/ EMIT_STATE(PE_DEPTH_FAR, ctx->viewport.PE_DEPTH_FAR);
    481    }
    482    if (unlikely(dirty & (ETNA_DIRTY_FRAMEBUFFER))) {
    483       /*0140C*/ EMIT_STATE(PE_DEPTH_NORMALIZE, ctx->framebuffer.PE_DEPTH_NORMALIZE);
    484 
    485       if (ctx->specs.pixel_pipes == 1) {
    486          /*01410*/ EMIT_STATE_RELOC(PE_DEPTH_ADDR, &ctx->framebuffer.PE_DEPTH_ADDR);
    487       }
    488 
    489       /*01414*/ EMIT_STATE(PE_DEPTH_STRIDE, ctx->framebuffer.PE_DEPTH_STRIDE);
    490    }
    491    if (unlikely(dirty & (ETNA_DIRTY_ZSA))) {
    492       uint32_t val = etna_zsa_state(ctx->zsa)->PE_STENCIL_OP;
    493       /*01418*/ EMIT_STATE(PE_STENCIL_OP, val);
    494    }
    495    if (unlikely(dirty & (ETNA_DIRTY_ZSA | ETNA_DIRTY_STENCIL_REF))) {
    496       uint32_t val = etna_zsa_state(ctx->zsa)->PE_STENCIL_CONFIG;
    497       /*0141C*/ EMIT_STATE(PE_STENCIL_CONFIG, val | ctx->stencil_ref.PE_STENCIL_CONFIG);
    498    }
    499    if (unlikely(dirty & (ETNA_DIRTY_ZSA))) {
    500       uint32_t val = etna_zsa_state(ctx->zsa)->PE_ALPHA_OP;
    501       /*01420*/ EMIT_STATE(PE_ALPHA_OP, val);
    502    }
    503    if (unlikely(dirty & (ETNA_DIRTY_BLEND_COLOR))) {
    504       /*01424*/ EMIT_STATE(PE_ALPHA_BLEND_COLOR, ctx->blend_color.PE_ALPHA_BLEND_COLOR);
    505    }
    506    if (unlikely(dirty & (ETNA_DIRTY_BLEND))) {
    507       uint32_t val = etna_blend_state(ctx->blend)->PE_ALPHA_CONFIG;
    508       /*01428*/ EMIT_STATE(PE_ALPHA_CONFIG, val);
    509    }
    510    if (unlikely(dirty & (ETNA_DIRTY_BLEND | ETNA_DIRTY_FRAMEBUFFER))) {
    511       uint32_t val;
    512       /* Use the components and overwrite bits in framebuffer.PE_COLOR_FORMAT
    513        * as a mask to enable the bits from blend PE_COLOR_FORMAT */
    514       val = ~(VIVS_PE_COLOR_FORMAT_COMPONENTS__MASK |
    515               VIVS_PE_COLOR_FORMAT_OVERWRITE);
    516       val |= etna_blend_state(ctx->blend)->PE_COLOR_FORMAT;
    517       val &= ctx->framebuffer.PE_COLOR_FORMAT;
    518       /*0142C*/ EMIT_STATE(PE_COLOR_FORMAT, val);
    519    }
    520    if (unlikely(dirty & (ETNA_DIRTY_FRAMEBUFFER))) {
    521       if (ctx->specs.pixel_pipes == 1) {
    522          /*01430*/ EMIT_STATE_RELOC(PE_COLOR_ADDR, &ctx->framebuffer.PE_COLOR_ADDR);
    523          /*01434*/ EMIT_STATE(PE_COLOR_STRIDE, ctx->framebuffer.PE_COLOR_STRIDE);
    524          /*01454*/ EMIT_STATE(PE_HDEPTH_CONTROL, ctx->framebuffer.PE_HDEPTH_CONTROL);
    525       } else if (ctx->specs.pixel_pipes == 2) {
    526          /*01434*/ EMIT_STATE(PE_COLOR_STRIDE, ctx->framebuffer.PE_COLOR_STRIDE);
    527          /*01454*/ EMIT_STATE(PE_HDEPTH_CONTROL, ctx->framebuffer.PE_HDEPTH_CONTROL);
    528          /*01460*/ EMIT_STATE_RELOC(PE_PIPE_COLOR_ADDR(0), &ctx->framebuffer.PE_PIPE_COLOR_ADDR[0]);
    529          /*01464*/ EMIT_STATE_RELOC(PE_PIPE_COLOR_ADDR(1), &ctx->framebuffer.PE_PIPE_COLOR_ADDR[1]);
    530          /*01480*/ EMIT_STATE_RELOC(PE_PIPE_DEPTH_ADDR(0), &ctx->framebuffer.PE_PIPE_DEPTH_ADDR[0]);
    531          /*01484*/ EMIT_STATE_RELOC(PE_PIPE_DEPTH_ADDR(1), &ctx->framebuffer.PE_PIPE_DEPTH_ADDR[1]);
    532       } else {
    533          abort();
    534       }
    535    }
    536    if (unlikely(dirty & (ETNA_DIRTY_STENCIL_REF))) {
    537       /*014A0*/ EMIT_STATE(PE_STENCIL_CONFIG_EXT, ctx->stencil_ref.PE_STENCIL_CONFIG_EXT);
    538    }
    539    if (unlikely(dirty & (ETNA_DIRTY_BLEND | ETNA_DIRTY_FRAMEBUFFER))) {
    540       struct etna_blend_state *blend = etna_blend_state(ctx->blend);
    541       /*014A4*/ EMIT_STATE(PE_LOGIC_OP, blend->PE_LOGIC_OP | ctx->framebuffer.PE_LOGIC_OP);
    542    }
    543    if (unlikely(dirty & (ETNA_DIRTY_BLEND))) {
    544       struct etna_blend_state *blend = etna_blend_state(ctx->blend);
    545       for (int x = 0; x < 2; ++x) {
    546          /*014A8*/ EMIT_STATE(PE_DITHER(x), blend->PE_DITHER[x]);
    547       }
    548    }
    549    if (unlikely(dirty & (ETNA_DIRTY_FRAMEBUFFER | ETNA_DIRTY_TS))) {
    550       /*01654*/ EMIT_STATE(TS_MEM_CONFIG, ctx->framebuffer.TS_MEM_CONFIG);
    551       /*01658*/ EMIT_STATE_RELOC(TS_COLOR_STATUS_BASE, &ctx->framebuffer.TS_COLOR_STATUS_BASE);
    552       /*0165C*/ EMIT_STATE_RELOC(TS_COLOR_SURFACE_BASE, &ctx->framebuffer.TS_COLOR_SURFACE_BASE);
    553       /*01660*/ EMIT_STATE(TS_COLOR_CLEAR_VALUE, ctx->framebuffer.TS_COLOR_CLEAR_VALUE);
    554       /*01664*/ EMIT_STATE_RELOC(TS_DEPTH_STATUS_BASE, &ctx->framebuffer.TS_DEPTH_STATUS_BASE);
    555       /*01668*/ EMIT_STATE_RELOC(TS_DEPTH_SURFACE_BASE, &ctx->framebuffer.TS_DEPTH_SURFACE_BASE);
    556       /*0166C*/ EMIT_STATE(TS_DEPTH_CLEAR_VALUE, ctx->framebuffer.TS_DEPTH_CLEAR_VALUE);
    557    }
    558    if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
    559       /*0381C*/ EMIT_STATE(GL_VARYING_TOTAL_COMPONENTS, ctx->shader_state.GL_VARYING_TOTAL_COMPONENTS);
    560    }
    561    etna_coalesce_end(stream, &coalesce);
    562    /* end only EMIT_STATE */
    563 
    564    /* Emit strongly architecture-specific state */
    565    if (ctx->specs.halti >= 5)
    566       emit_halti5_only_state(ctx, vs_output_count);
    567    else
    568       emit_pre_halti5_state(ctx);
    569 
    570    ctx->emit_texture_state(ctx);
    571 
    572    /* Insert a FE/PE stall as changing the shader instructions (and maybe
    573     * the uniforms) can corrupt the previous in-progress draw operation.
    574     * Observed with amoeba on GC2000 during the right-to-left rendering
    575     * of PI, and can cause GPU hangs immediately after.
    576     * I summise that this is because the "new" locations at 0xc000 are not
    577     * properly protected against updates as other states seem to be. Hence,
    578     * we detect the "new" vertex shader instruction offset to apply this. */
    579    if (ctx->dirty & (ETNA_DIRTY_SHADER | ETNA_DIRTY_CONSTBUF) && ctx->specs.vs_offset > 0x4000)
    580       etna_stall(ctx->stream, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_PE);
    581 
    582    /* We need to update the uniform cache only if one of the following bits are
    583     * set in ctx->dirty:
    584     * - ETNA_DIRTY_SHADER
    585     * - ETNA_DIRTY_CONSTBUF
    586     * - uniforms_dirty_bits
    587     *
    588     * In case of ETNA_DIRTY_SHADER we need load all uniforms from the cache. In
    589     * all
    590     * other cases we can load on the changed uniforms.
    591     */
    592    static const uint32_t uniform_dirty_bits =
    593       ETNA_DIRTY_SHADER | ETNA_DIRTY_CONSTBUF;
    594 
    595    if (dirty & (uniform_dirty_bits | ctx->shader.fs->uniforms_dirty_bits))
    596       etna_uniforms_write(
    597          ctx, ctx->shader.vs, &ctx->constant_buffer[PIPE_SHADER_VERTEX],
    598          ctx->shader_state.VS_UNIFORMS, &ctx->shader_state.vs_uniforms_size);
    599 
    600    if (dirty & (uniform_dirty_bits | ctx->shader.vs->uniforms_dirty_bits))
    601       etna_uniforms_write(
    602          ctx, ctx->shader.fs, &ctx->constant_buffer[PIPE_SHADER_FRAGMENT],
    603          ctx->shader_state.PS_UNIFORMS, &ctx->shader_state.ps_uniforms_size);
    604 
    605    /**** Large dynamically-sized state ****/
    606    bool do_uniform_flush = ctx->specs.halti < 5;
    607    if (dirty & (ETNA_DIRTY_SHADER)) {
    608       /* Special case: a new shader was loaded; simply re-load all uniforms and
    609        * shader code at once */
    610       /* This sequence is special, do not change ordering unless necessary. According to comment
    611          snippets in the Vivante kernel driver a process called "steering" goes on while programming
    612          shader state. This (as I understand it) means certain unified states are "steered"
    613          toward a specific shader unit (VS/PS/...) based on either explicit flags in register
    614          00860, or what other state is written before "auto-steering". So this means some
    615          state can legitimately be programmed multiple times.
    616        */
    617 
    618       if (ctx->specs.halti >= 5) { /* ICACHE (HALTI5) */
    619          assert(ctx->shader_state.VS_INST_ADDR.bo && ctx->shader_state.PS_INST_ADDR.bo);
    620          /* Set icache (VS) */
    621          etna_set_state(stream, VIVS_VS_NEWRANGE_LOW, 0);
    622          etna_set_state(stream, VIVS_VS_NEWRANGE_HIGH, ctx->shader_state.vs_inst_mem_size / 4);
    623          assert(ctx->shader_state.VS_INST_ADDR.bo);
    624          etna_set_state_reloc(stream, VIVS_VS_INST_ADDR, &ctx->shader_state.VS_INST_ADDR);
    625          etna_set_state(stream, VIVS_SH_CONFIG, 0x00000002);
    626          etna_set_state(stream, VIVS_VS_ICACHE_CONTROL, VIVS_VS_ICACHE_CONTROL_ENABLE);
    627          etna_set_state(stream, VIVS_VS_ICACHE_COUNT, ctx->shader_state.vs_inst_mem_size / 4 - 1);
    628 
    629          /* Set icache (PS) */
    630          etna_set_state(stream, VIVS_PS_NEWRANGE_LOW, 0);
    631          etna_set_state(stream, VIVS_PS_NEWRANGE_HIGH, ctx->shader_state.ps_inst_mem_size / 4);
    632          assert(ctx->shader_state.PS_INST_ADDR.bo);
    633          etna_set_state_reloc(stream, VIVS_PS_INST_ADDR, &ctx->shader_state.PS_INST_ADDR);
    634          etna_set_state(stream, VIVS_SH_CONFIG, 0x00000002);
    635          etna_set_state(stream, VIVS_VS_ICACHE_CONTROL, VIVS_VS_ICACHE_CONTROL_ENABLE);
    636          etna_set_state(stream, VIVS_PS_ICACHE_COUNT, ctx->shader_state.ps_inst_mem_size / 4 - 1);
    637 
    638       } else if (ctx->shader_state.VS_INST_ADDR.bo || ctx->shader_state.PS_INST_ADDR.bo) {
    639          /* ICACHE (pre-HALTI5) */
    640          assert(ctx->specs.has_icache && ctx->specs.has_shader_range_registers);
    641          /* Set icache (VS) */
    642          etna_set_state(stream, VIVS_VS_RANGE, (ctx->shader_state.vs_inst_mem_size / 4 - 1) << 16);
    643          etna_set_state(stream, VIVS_VS_ICACHE_CONTROL,
    644                VIVS_VS_ICACHE_CONTROL_ENABLE |
    645                VIVS_VS_ICACHE_CONTROL_FLUSH_VS);
    646          assert(ctx->shader_state.VS_INST_ADDR.bo);
    647          etna_set_state_reloc(stream, VIVS_VS_INST_ADDR, &ctx->shader_state.VS_INST_ADDR);
    648 
    649          /* Set icache (PS) */
    650          etna_set_state(stream, VIVS_PS_RANGE, (ctx->shader_state.ps_inst_mem_size / 4 - 1) << 16);
    651          etna_set_state(stream, VIVS_VS_ICACHE_CONTROL,
    652                VIVS_VS_ICACHE_CONTROL_ENABLE |
    653                VIVS_VS_ICACHE_CONTROL_FLUSH_PS);
    654          assert(ctx->shader_state.PS_INST_ADDR.bo);
    655          etna_set_state_reloc(stream, VIVS_PS_INST_ADDR, &ctx->shader_state.PS_INST_ADDR);
    656       } else {
    657          /* Upload shader directly, first flushing and disabling icache if
    658           * supported on this hw */
    659          if (ctx->specs.has_icache) {
    660             etna_set_state(stream, VIVS_VS_ICACHE_CONTROL,
    661                   VIVS_VS_ICACHE_CONTROL_FLUSH_PS |
    662                   VIVS_VS_ICACHE_CONTROL_FLUSH_VS);
    663          }
    664          if (ctx->specs.has_shader_range_registers) {
    665             etna_set_state(stream, VIVS_VS_RANGE, (ctx->shader_state.vs_inst_mem_size / 4 - 1) << 16);
    666             etna_set_state(stream, VIVS_PS_RANGE, ((ctx->shader_state.ps_inst_mem_size / 4 - 1 + 0x100) << 16) |
    667                                         0x100);
    668          }
    669          etna_set_state_multi(stream, ctx->specs.vs_offset,
    670                               ctx->shader_state.vs_inst_mem_size,
    671                               ctx->shader_state.VS_INST_MEM);
    672          etna_set_state_multi(stream, ctx->specs.ps_offset,
    673                               ctx->shader_state.ps_inst_mem_size,
    674                               ctx->shader_state.PS_INST_MEM);
    675       }
    676 
    677       if (ctx->specs.has_unified_uniforms) {
    678          etna_set_state(stream, VIVS_VS_UNIFORM_BASE, 0);
    679          etna_set_state(stream, VIVS_PS_UNIFORM_BASE, ctx->specs.max_vs_uniforms);
    680       }
    681 
    682       if (do_uniform_flush)
    683          etna_set_state(stream, VIVS_VS_UNIFORM_CACHE, VIVS_VS_UNIFORM_CACHE_FLUSH);
    684       etna_set_state_multi(stream, ctx->specs.vs_uniforms_offset,
    685                                      ctx->shader_state.vs_uniforms_size,
    686                                      ctx->shader_state.VS_UNIFORMS);
    687       if (do_uniform_flush)
    688          etna_set_state(stream, VIVS_VS_UNIFORM_CACHE, VIVS_VS_UNIFORM_CACHE_FLUSH | VIVS_VS_UNIFORM_CACHE_PS);
    689       etna_set_state_multi(stream, ctx->specs.ps_uniforms_offset,
    690                                      ctx->shader_state.ps_uniforms_size,
    691                                      ctx->shader_state.PS_UNIFORMS);
    692 
    693       /* Copy uniforms to gpu3d, so that incremental updates to uniforms are
    694        * possible as long as the
    695        * same shader remains bound */
    696       ctx->gpu3d.vs_uniforms_size = ctx->shader_state.vs_uniforms_size;
    697       ctx->gpu3d.ps_uniforms_size = ctx->shader_state.ps_uniforms_size;
    698       memcpy(ctx->gpu3d.VS_UNIFORMS, ctx->shader_state.VS_UNIFORMS,
    699              ctx->shader_state.vs_uniforms_size * 4);
    700       memcpy(ctx->gpu3d.PS_UNIFORMS, ctx->shader_state.PS_UNIFORMS,
    701              ctx->shader_state.ps_uniforms_size * 4);
    702 
    703       if (ctx->specs.halti >= 5) {
    704          /* HALTI5 needs to be prompted to pre-fetch shaders */
    705          etna_set_state(stream, VIVS_VS_ICACHE_PREFETCH, 0x00000000);
    706          etna_set_state(stream, VIVS_PS_ICACHE_PREFETCH, 0x00000000);
    707          etna_stall(stream, SYNC_RECIPIENT_RA, SYNC_RECIPIENT_PE);
    708       }
    709    } else {
    710       /* ideally this cache would only be flushed if there are VS uniform changes */
    711       if (do_uniform_flush)
    712          etna_set_state(stream, VIVS_VS_UNIFORM_CACHE, VIVS_VS_UNIFORM_CACHE_FLUSH);
    713       etna_coalesce_start(stream, &coalesce);
    714       for (int x = 0; x < ctx->shader.vs->uniforms.const_count; ++x) {
    715          if (ctx->gpu3d.VS_UNIFORMS[x] != ctx->shader_state.VS_UNIFORMS[x]) {
    716             etna_coalsence_emit(stream, &coalesce, ctx->specs.vs_uniforms_offset + x*4, ctx->shader_state.VS_UNIFORMS[x]);
    717             ctx->gpu3d.VS_UNIFORMS[x] = ctx->shader_state.VS_UNIFORMS[x];
    718          }
    719       }
    720       etna_coalesce_end(stream, &coalesce);
    721 
    722       /* ideally this cache would only be flushed if there are PS uniform changes */
    723       if (do_uniform_flush)
    724          etna_set_state(stream, VIVS_VS_UNIFORM_CACHE, VIVS_VS_UNIFORM_CACHE_FLUSH | VIVS_VS_UNIFORM_CACHE_PS);
    725       etna_coalesce_start(stream, &coalesce);
    726       for (int x = 0; x < ctx->shader.fs->uniforms.const_count; ++x) {
    727          if (ctx->gpu3d.PS_UNIFORMS[x] != ctx->shader_state.PS_UNIFORMS[x]) {
    728             etna_coalsence_emit(stream, &coalesce, ctx->specs.ps_uniforms_offset + x*4, ctx->shader_state.PS_UNIFORMS[x]);
    729             ctx->gpu3d.PS_UNIFORMS[x] = ctx->shader_state.PS_UNIFORMS[x];
    730          }
    731       }
    732       etna_coalesce_end(stream, &coalesce);
    733    }
    734 /**** End of state update ****/
    735 #undef EMIT_STATE
    736 #undef EMIT_STATE_FIXP
    737 #undef EMIT_STATE_RELOC
    738    ctx->dirty = 0;
    739    ctx->dirty_sampler_views = 0;
    740 }
    741