Home | History | Annotate | Download | only in nv50
      1 /*
      2  * Copyright 2008 Ben Skeggs
      3  * Copyright 2010 Christoph Bumiller
      4  *
      5  * Permission is hereby granted, free of charge, to any person obtaining a
      6  * copy of this software and associated documentation files (the "Software"),
      7  * to deal in the Software without restriction, including without limitation
      8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      9  * and/or sell copies of the Software, and to permit persons to whom the
     10  * Software is furnished to do so, subject to the following conditions:
     11  *
     12  * The above copyright notice and this permission notice shall be included in
     13  * all copies or substantial portions of the Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     18  * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
     19  * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
     20  * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
     21  * SOFTWARE.
     22  */
     23 
     24 #include "pipe/p_context.h"
     25 #include "pipe/p_defines.h"
     26 #include "pipe/p_state.h"
     27 #include "util/u_inlines.h"
     28 
     29 #include "nv50_context.h"
     30 
     31 void
     32 nv50_constbufs_validate(struct nv50_context *nv50)
     33 {
     34    struct nouveau_pushbuf *push = nv50->base.pushbuf;
     35    unsigned s;
     36 
     37    for (s = 0; s < 3; ++s) {
     38       unsigned p;
     39 
     40       if (s == PIPE_SHADER_FRAGMENT)
     41          p = NV50_3D_SET_PROGRAM_CB_PROGRAM_FRAGMENT;
     42       else
     43       if (s == PIPE_SHADER_GEOMETRY)
     44          p = NV50_3D_SET_PROGRAM_CB_PROGRAM_GEOMETRY;
     45       else
     46          p = NV50_3D_SET_PROGRAM_CB_PROGRAM_VERTEX;
     47 
     48       while (nv50->constbuf_dirty[s]) {
     49          const int i = ffs(nv50->constbuf_dirty[s]) - 1;
     50          nv50->constbuf_dirty[s] &= ~(1 << i);
     51 
     52          if (nv50->constbuf[s][i].user) {
     53             const unsigned b = NV50_CB_PVP + s;
     54             unsigned start = 0;
     55             unsigned words = nv50->constbuf[s][0].size / 4;
     56             if (i) {
     57                NOUVEAU_ERR("user constbufs only supported in slot 0\n");
     58                continue;
     59             }
     60             if (!nv50->state.uniform_buffer_bound[s]) {
     61                nv50->state.uniform_buffer_bound[s] = TRUE;
     62                BEGIN_NV04(push, NV50_3D(SET_PROGRAM_CB), 1);
     63                PUSH_DATA (push, (b << 12) | (i << 8) | p | 1);
     64             }
     65             while (words) {
     66                unsigned nr;
     67 
     68                if (!PUSH_SPACE(push, 16))
     69                   break;
     70                nr = PUSH_AVAIL(push);
     71                assert(nr >= 16);
     72                nr = MIN2(MIN2(nr - 3, words), NV04_PFIFO_MAX_PACKET_LEN);
     73 
     74                BEGIN_NV04(push, NV50_3D(CB_ADDR), 1);
     75                PUSH_DATA (push, (start << 8) | b);
     76                BEGIN_NI04(push, NV50_3D(CB_DATA(0)), nr);
     77                PUSH_DATAp(push, &nv50->constbuf[s][0].u.data[start * 4], nr);
     78 
     79                start += nr;
     80                words -= nr;
     81             }
     82          } else {
     83             struct nv04_resource *res =
     84                nv04_resource(nv50->constbuf[s][i].u.buf);
     85             if (res) {
     86                /* TODO: allocate persistent bindings */
     87                const unsigned b = s * 16 + i;
     88 
     89                assert(nouveau_resource_mapped_by_gpu(&res->base));
     90 
     91                BEGIN_NV04(push, NV50_3D(CB_DEF_ADDRESS_HIGH), 3);
     92                PUSH_DATAh(push, res->address + nv50->constbuf[s][i].offset);
     93                PUSH_DATA (push, res->address + nv50->constbuf[s][i].offset);
     94                PUSH_DATA (push, (b << 16) |
     95                           (nv50->constbuf[s][i].size & 0xffff));
     96                BEGIN_NV04(push, NV50_3D(SET_PROGRAM_CB), 1);
     97                PUSH_DATA (push, (b << 12) | (i << 8) | p | 1);
     98 
     99                BCTX_REFN(nv50->bufctx_3d, CB(s, i), res, RD);
    100             } else {
    101                BEGIN_NV04(push, NV50_3D(SET_PROGRAM_CB), 1);
    102                PUSH_DATA (push, (i << 8) | p | 0);
    103             }
    104             if (i == 0)
    105                nv50->state.uniform_buffer_bound[s] = FALSE;
    106          }
    107       }
    108    }
    109 }
    110 
    111 static boolean
    112 nv50_program_validate(struct nv50_context *nv50, struct nv50_program *prog)
    113 {
    114    if (!prog->translated) {
    115       prog->translated = nv50_program_translate(
    116          prog, nv50->screen->base.device->chipset);
    117       if (!prog->translated)
    118          return FALSE;
    119    } else
    120    if (prog->mem)
    121       return TRUE;
    122 
    123    return nv50_program_upload_code(nv50, prog);
    124 }
    125 
    126 static INLINE void
    127 nv50_program_update_context_state(struct nv50_context *nv50,
    128                                   struct nv50_program *prog, int stage)
    129 {
    130    const unsigned flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR;
    131 
    132    if (prog && prog->tls_space) {
    133       if (nv50->state.new_tls_space)
    134          nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_TLS);
    135       if (!nv50->state.tls_required || nv50->state.new_tls_space)
    136          BCTX_REFN_bo(nv50->bufctx_3d, TLS, flags, nv50->screen->tls_bo);
    137       nv50->state.new_tls_space = FALSE;
    138       nv50->state.tls_required |= 1 << stage;
    139    } else {
    140       if (nv50->state.tls_required == (1 << stage))
    141          nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_TLS);
    142       nv50->state.tls_required &= ~(1 << stage);
    143    }
    144 }
    145 
    146 void
    147 nv50_vertprog_validate(struct nv50_context *nv50)
    148 {
    149    struct nouveau_pushbuf *push = nv50->base.pushbuf;
    150    struct nv50_program *vp = nv50->vertprog;
    151 
    152    if (!nv50_program_validate(nv50, vp))
    153          return;
    154    nv50_program_update_context_state(nv50, vp, 0);
    155 
    156    BEGIN_NV04(push, NV50_3D(VP_ATTR_EN(0)), 2);
    157    PUSH_DATA (push, vp->vp.attrs[0]);
    158    PUSH_DATA (push, vp->vp.attrs[1]);
    159    BEGIN_NV04(push, NV50_3D(VP_REG_ALLOC_RESULT), 1);
    160    PUSH_DATA (push, vp->max_out);
    161    BEGIN_NV04(push, NV50_3D(VP_REG_ALLOC_TEMP), 1);
    162    PUSH_DATA (push, vp->max_gpr);
    163    BEGIN_NV04(push, NV50_3D(VP_START_ID), 1);
    164    PUSH_DATA (push, vp->code_base);
    165 }
    166 
    167 void
    168 nv50_fragprog_validate(struct nv50_context *nv50)
    169 {
    170    struct nouveau_pushbuf *push = nv50->base.pushbuf;
    171    struct nv50_program *fp = nv50->fragprog;
    172 
    173    if (!nv50_program_validate(nv50, fp))
    174          return;
    175    nv50_program_update_context_state(nv50, fp, 1);
    176 
    177    BEGIN_NV04(push, NV50_3D(FP_REG_ALLOC_TEMP), 1);
    178    PUSH_DATA (push, fp->max_gpr);
    179    BEGIN_NV04(push, NV50_3D(FP_RESULT_COUNT), 1);
    180    PUSH_DATA (push, fp->max_out);
    181    BEGIN_NV04(push, NV50_3D(FP_CONTROL), 1);
    182    PUSH_DATA (push, fp->fp.flags[0]);
    183    BEGIN_NV04(push, NV50_3D(FP_CTRL_UNK196C), 1);
    184    PUSH_DATA (push, fp->fp.flags[1]);
    185    BEGIN_NV04(push, NV50_3D(FP_START_ID), 1);
    186    PUSH_DATA (push, fp->code_base);
    187 }
    188 
    189 void
    190 nv50_gmtyprog_validate(struct nv50_context *nv50)
    191 {
    192    struct nouveau_pushbuf *push = nv50->base.pushbuf;
    193    struct nv50_program *gp = nv50->gmtyprog;
    194 
    195    if (gp) {
    196       BEGIN_NV04(push, NV50_3D(GP_REG_ALLOC_TEMP), 1);
    197       PUSH_DATA (push, gp->max_gpr);
    198       BEGIN_NV04(push, NV50_3D(GP_REG_ALLOC_RESULT), 1);
    199       PUSH_DATA (push, gp->max_out);
    200       BEGIN_NV04(push, NV50_3D(GP_OUTPUT_PRIMITIVE_TYPE), 1);
    201       PUSH_DATA (push, gp->gp.prim_type);
    202       BEGIN_NV04(push, NV50_3D(GP_VERTEX_OUTPUT_COUNT), 1);
    203       PUSH_DATA (push, gp->gp.vert_count);
    204       BEGIN_NV04(push, NV50_3D(GP_START_ID), 1);
    205       PUSH_DATA (push, gp->code_base);
    206 
    207       nv50->state.prim_size = gp->gp.prim_type; /* enum matches vertex count */
    208    }
    209    nv50_program_update_context_state(nv50, gp, 2);
    210 
    211    /* GP_ENABLE is updated in linkage validation */
    212 }
    213 
    214 static void
    215 nv50_sprite_coords_validate(struct nv50_context *nv50)
    216 {
    217    struct nouveau_pushbuf *push = nv50->base.pushbuf;
    218    uint32_t pntc[8], mode;
    219    struct nv50_program *fp = nv50->fragprog;
    220    unsigned i, c;
    221    unsigned m = (nv50->state.interpolant_ctrl >> 8) & 0xff;
    222 
    223    if (!nv50->rast->pipe.point_quad_rasterization) {
    224       if (nv50->state.point_sprite) {
    225          BEGIN_NV04(push, NV50_3D(POINT_COORD_REPLACE_MAP(0)), 8);
    226          for (i = 0; i < 8; ++i)
    227             PUSH_DATA(push, 0);
    228 
    229          nv50->state.point_sprite = FALSE;
    230       }
    231       return;
    232    } else {
    233       nv50->state.point_sprite = TRUE;
    234    }
    235 
    236    memset(pntc, 0, sizeof(pntc));
    237 
    238    for (i = 0; i < fp->in_nr; i++) {
    239       unsigned n = util_bitcount(fp->in[i].mask);
    240 
    241       if (fp->in[i].sn != TGSI_SEMANTIC_GENERIC) {
    242          m += n;
    243          continue;
    244       }
    245       if (!(nv50->rast->pipe.sprite_coord_enable & (1 << fp->in[i].si))) {
    246          m += n;
    247          continue;
    248       }
    249 
    250       for (c = 0; c < 4; ++c) {
    251          if (fp->in[i].mask & (1 << c)) {
    252             pntc[m / 8] |= (c + 1) << ((m % 8) * 4);
    253             ++m;
    254          }
    255       }
    256    }
    257 
    258    if (nv50->rast->pipe.sprite_coord_mode == PIPE_SPRITE_COORD_LOWER_LEFT)
    259       mode = 0x00;
    260    else
    261       mode = 0x10;
    262 
    263    BEGIN_NV04(push, NV50_3D(POINT_SPRITE_CTRL), 1);
    264    PUSH_DATA (push, mode);
    265 
    266    BEGIN_NV04(push, NV50_3D(POINT_COORD_REPLACE_MAP(0)), 8);
    267    PUSH_DATAp(push, pntc, 8);
    268 }
    269 
    270 /* Validate state derived from shaders and the rasterizer cso. */
    271 void
    272 nv50_validate_derived_rs(struct nv50_context *nv50)
    273 {
    274    struct nouveau_pushbuf *push = nv50->base.pushbuf;
    275    uint32_t color, psize;
    276 
    277    nv50_sprite_coords_validate(nv50);
    278 
    279    if (nv50->state.rasterizer_discard != nv50->rast->pipe.rasterizer_discard) {
    280       nv50->state.rasterizer_discard = nv50->rast->pipe.rasterizer_discard;
    281       BEGIN_NV04(push, NV50_3D(RASTERIZE_ENABLE), 1);
    282       PUSH_DATA (push, !nv50->rast->pipe.rasterizer_discard);
    283    }
    284 
    285    if (nv50->dirty & NV50_NEW_FRAGPROG)
    286       return;
    287    psize = nv50->state.semantic_psize & ~NV50_3D_SEMANTIC_PTSZ_PTSZ_EN__MASK;
    288    color = nv50->state.semantic_color & ~NV50_3D_SEMANTIC_COLOR_CLMP_EN;
    289 
    290    if (nv50->rast->pipe.clamp_vertex_color)
    291       color |= NV50_3D_SEMANTIC_COLOR_CLMP_EN;
    292 
    293    if (color != nv50->state.semantic_color) {
    294       nv50->state.semantic_color = color;
    295       BEGIN_NV04(push, NV50_3D(SEMANTIC_COLOR), 1);
    296       PUSH_DATA (push, color);
    297    }
    298 
    299    if (nv50->rast->pipe.point_size_per_vertex)
    300       psize |= NV50_3D_SEMANTIC_PTSZ_PTSZ_EN__MASK;
    301 
    302    if (psize != nv50->state.semantic_psize) {
    303       nv50->state.semantic_psize = psize;
    304       BEGIN_NV04(push, NV50_3D(SEMANTIC_PTSZ), 1);
    305       PUSH_DATA (push, psize);
    306    }
    307 }
    308 
    309 static int
    310 nv50_vec4_map(uint8_t *map, int mid, uint32_t lin[4],
    311               struct nv50_varying *in, struct nv50_varying *out)
    312 {
    313    int c;
    314    uint8_t mv = out->mask, mf = in->mask, oid = out->hw;
    315 
    316    for (c = 0; c < 4; ++c) {
    317       if (mf & 1) {
    318          if (in->linear)
    319             lin[mid / 32] |= 1 << (mid % 32);
    320          if (mv & 1)
    321             map[mid] = oid;
    322          else
    323          if (c == 3)
    324             map[mid] |= 1;
    325          ++mid;
    326       }
    327 
    328       oid += mv & 1;
    329       mf >>= 1;
    330       mv >>= 1;
    331    }
    332 
    333    return mid;
    334 }
    335 
    336 void
    337 nv50_fp_linkage_validate(struct nv50_context *nv50)
    338 {
    339    struct nouveau_pushbuf *push = nv50->base.pushbuf;
    340    struct nv50_program *vp = nv50->gmtyprog ? nv50->gmtyprog : nv50->vertprog;
    341    struct nv50_program *fp = nv50->fragprog;
    342    struct nv50_varying dummy;
    343    int i, n, c, m;
    344    uint32_t primid = 0;
    345    uint32_t psiz = 0x000;
    346    uint32_t interp = fp->fp.interp;
    347    uint32_t colors = fp->fp.colors;
    348    uint32_t lin[4];
    349    uint8_t map[64];
    350    uint8_t so_map[64];
    351 
    352    if (!(nv50->dirty & (NV50_NEW_VERTPROG |
    353                         NV50_NEW_FRAGPROG |
    354                         NV50_NEW_GMTYPROG))) {
    355       uint8_t bfc, ffc;
    356       ffc = (nv50->state.semantic_color & NV50_3D_SEMANTIC_COLOR_FFC0_ID__MASK);
    357       bfc = (nv50->state.semantic_color & NV50_3D_SEMANTIC_COLOR_BFC0_ID__MASK)
    358          >> 8;
    359       if (nv50->rast->pipe.light_twoside == ((ffc == bfc) ? 0 : 1))
    360          return;
    361    }
    362 
    363    memset(lin, 0x00, sizeof(lin));
    364 
    365    /* XXX: in buggy-endian mode, is the first element of map (u32)0x000000xx
    366     *  or is it the first byte ?
    367     */
    368    memset(map, nv50->gmtyprog ? 0x80 : 0x40, sizeof(map));
    369 
    370    dummy.mask = 0xf; /* map all components of HPOS */
    371    dummy.linear = 0;
    372    m = nv50_vec4_map(map, 0, lin, &dummy, &vp->out[0]);
    373 
    374    for (c = 0; c < vp->vp.clpd_nr; ++c)
    375       map[m++] = vp->vp.clpd[c / 4] + (c % 4);
    376 
    377    colors |= m << 8; /* adjust BFC0 id */
    378 
    379    dummy.mask = 0x0;
    380 
    381    /* if light_twoside is active, FFC0_ID == BFC0_ID is invalid */
    382    if (nv50->rast->pipe.light_twoside) {
    383       for (i = 0; i < 2; ++i) {
    384          n = vp->vp.bfc[i];
    385          if (fp->vp.bfc[i] >= fp->in_nr)
    386             continue;
    387          m = nv50_vec4_map(map, m, lin, &fp->in[fp->vp.bfc[i]],
    388                            (n < vp->out_nr) ? &vp->out[n] : &dummy);
    389       }
    390    }
    391    colors += m - 4; /* adjust FFC0 id */
    392    interp |= m << 8; /* set map id where 'normal' FP inputs start */
    393 
    394    for (i = 0; i < fp->in_nr; ++i) {
    395       for (n = 0; n < vp->out_nr; ++n)
    396          if (vp->out[n].sn == fp->in[i].sn &&
    397              vp->out[n].si == fp->in[i].si)
    398             break;
    399       m = nv50_vec4_map(map, m, lin,
    400                         &fp->in[i], (n < vp->out_nr) ? &vp->out[n] : &dummy);
    401    }
    402 
    403    /* PrimitiveID either is replaced by the system value, or
    404     * written by the geometry shader into an output register
    405     */
    406    if (fp->gp.primid < 0x80) {
    407       primid = m;
    408       map[m++] = vp->gp.primid;
    409    }
    410 
    411    if (nv50->rast->pipe.point_size_per_vertex) {
    412       psiz = (m << 4) | 1;
    413       map[m++] = vp->vp.psiz;
    414    }
    415 
    416    if (nv50->rast->pipe.clamp_vertex_color)
    417       colors |= NV50_3D_SEMANTIC_COLOR_CLMP_EN;
    418 
    419    if (unlikely(vp->so)) {
    420       /* Slot i in STRMOUT_MAP specifies the offset where slot i in RESULT_MAP
    421        * gets written.
    422        *
    423        * TODO:
    424        * Inverting vp->so->map (output -> offset) would probably speed this up.
    425        */
    426       memset(so_map, 0, sizeof(so_map));
    427       for (i = 0; i < vp->so->map_size; ++i) {
    428          if (vp->so->map[i] == 0xff)
    429             continue;
    430          for (c = 0; c < m; ++c)
    431             if (map[c] == vp->so->map[i] && !so_map[c])
    432                break;
    433          if (c == m) {
    434             c = m;
    435             map[m++] = vp->so->map[i];
    436          }
    437          so_map[c] = 0x80 | i;
    438       }
    439       for (c = m; c & 3; ++c)
    440          so_map[c] = 0;
    441    }
    442 
    443    n = (m + 3) / 4;
    444    assert(m <= 64);
    445 
    446    if (unlikely(nv50->gmtyprog)) {
    447       BEGIN_NV04(push, NV50_3D(GP_RESULT_MAP_SIZE), 1);
    448       PUSH_DATA (push, m);
    449       BEGIN_NV04(push, NV50_3D(GP_RESULT_MAP(0)), n);
    450       PUSH_DATAp(push, map, n);
    451    } else {
    452       BEGIN_NV04(push, NV50_3D(VP_GP_BUILTIN_ATTR_EN), 1);
    453       PUSH_DATA (push, vp->vp.attrs[2]);
    454 
    455       BEGIN_NV04(push, NV50_3D(SEMANTIC_PRIM_ID), 1);
    456       PUSH_DATA (push, primid);
    457 
    458       BEGIN_NV04(push, NV50_3D(VP_RESULT_MAP_SIZE), 1);
    459       PUSH_DATA (push, m);
    460       BEGIN_NV04(push, NV50_3D(VP_RESULT_MAP(0)), n);
    461       PUSH_DATAp(push, map, n);
    462    }
    463 
    464    BEGIN_NV04(push, NV50_3D(SEMANTIC_COLOR), 4);
    465    PUSH_DATA (push, colors);
    466    PUSH_DATA (push, (vp->vp.clpd_nr << 8) | 4);
    467    PUSH_DATA (push, 0);
    468    PUSH_DATA (push, psiz);
    469 
    470    BEGIN_NV04(push, NV50_3D(FP_INTERPOLANT_CTRL), 1);
    471    PUSH_DATA (push, interp);
    472 
    473    nv50->state.interpolant_ctrl = interp;
    474 
    475    nv50->state.semantic_color = colors;
    476    nv50->state.semantic_psize = psiz;
    477 
    478    BEGIN_NV04(push, NV50_3D(NOPERSPECTIVE_BITMAP(0)), 4);
    479    PUSH_DATAp(push, lin, 4);
    480 
    481    BEGIN_NV04(push, NV50_3D(GP_ENABLE), 1);
    482    PUSH_DATA (push, nv50->gmtyprog ? 1 : 0);
    483 
    484    if (vp->so) {
    485       BEGIN_NV04(push, NV50_3D(STRMOUT_MAP(0)), n);
    486       PUSH_DATAp(push, so_map, n);
    487    }
    488 }
    489 
    490 static int
    491 nv50_vp_gp_mapping(uint8_t *map, int m,
    492                    struct nv50_program *vp, struct nv50_program *gp)
    493 {
    494    int i, j, c;
    495 
    496    for (i = 0; i < gp->in_nr; ++i) {
    497       uint8_t oid = 0, mv = 0, mg = gp->in[i].mask;
    498 
    499       for (j = 0; j < vp->out_nr; ++j) {
    500          if (vp->out[j].sn == gp->in[i].sn &&
    501              vp->out[j].si == gp->in[i].si) {
    502             mv = vp->out[j].mask;
    503             oid = vp->out[j].hw;
    504             break;
    505          }
    506       }
    507 
    508       for (c = 0; c < 4; ++c, mv >>= 1, mg >>= 1) {
    509          if (mg & mv & 1)
    510             map[m++] = oid;
    511          else
    512          if (mg & 1)
    513             map[m++] = (c == 3) ? 0x41 : 0x40;
    514          oid += mv & 1;
    515       }
    516    }
    517    return m;
    518 }
    519 
    520 void
    521 nv50_gp_linkage_validate(struct nv50_context *nv50)
    522 {
    523    struct nouveau_pushbuf *push = nv50->base.pushbuf;
    524    struct nv50_program *vp = nv50->vertprog;
    525    struct nv50_program *gp = nv50->gmtyprog;
    526    int m = 0;
    527    int n;
    528    uint8_t map[64];
    529 
    530    if (!gp)
    531       return;
    532    memset(map, 0, sizeof(map));
    533 
    534    m = nv50_vp_gp_mapping(map, m, vp, gp);
    535 
    536    n = (m + 3) / 4;
    537 
    538    BEGIN_NV04(push, NV50_3D(VP_GP_BUILTIN_ATTR_EN), 1);
    539    PUSH_DATA (push, vp->vp.attrs[2] | gp->vp.attrs[2]);
    540 
    541    BEGIN_NV04(push, NV50_3D(VP_RESULT_MAP_SIZE), 1);
    542    PUSH_DATA (push, m);
    543    BEGIN_NV04(push, NV50_3D(VP_RESULT_MAP(0)), n);
    544    PUSH_DATAp(push, map, n);
    545 }
    546 
    547 void
    548 nv50_stream_output_validate(struct nv50_context *nv50)
    549 {
    550    struct nouveau_pushbuf *push = nv50->base.pushbuf;
    551    struct nv50_stream_output_state *so;
    552    uint32_t ctrl;
    553    unsigned i;
    554    unsigned prims = ~0;
    555 
    556    so = nv50->gmtyprog ? nv50->gmtyprog->so : nv50->vertprog->so;
    557 
    558    BEGIN_NV04(push, NV50_3D(STRMOUT_ENABLE), 1);
    559    PUSH_DATA (push, 0);
    560    if (!so || !nv50->num_so_targets) {
    561       if (nv50->screen->base.class_3d < NVA0_3D_CLASS) {
    562          BEGIN_NV04(push, NV50_3D(STRMOUT_PRIMITIVE_LIMIT), 1);
    563          PUSH_DATA (push, 0);
    564       }
    565       BEGIN_NV04(push, NV50_3D(STRMOUT_PARAMS_LATCH), 1);
    566       PUSH_DATA (push, 1);
    567       return;
    568    }
    569 
    570    /* previous TFB needs to complete */
    571    if (nv50->screen->base.class_3d < NVA0_3D_CLASS) {
    572       BEGIN_NV04(push, SUBC_3D(NV50_GRAPH_SERIALIZE), 1);
    573       PUSH_DATA (push, 0);
    574    }
    575 
    576    ctrl = so->ctrl;
    577    if (nv50->screen->base.class_3d >= NVA0_3D_CLASS)
    578       ctrl |= NVA0_3D_STRMOUT_BUFFERS_CTRL_LIMIT_MODE_OFFSET;
    579 
    580    BEGIN_NV04(push, NV50_3D(STRMOUT_BUFFERS_CTRL), 1);
    581    PUSH_DATA (push, ctrl);
    582 
    583    nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_SO);
    584 
    585    for (i = 0; i < nv50->num_so_targets; ++i) {
    586       struct nv50_so_target *targ = nv50_so_target(nv50->so_target[i]);
    587       struct nv04_resource *buf = nv04_resource(targ->pipe.buffer);
    588 
    589       const unsigned n = nv50->screen->base.class_3d >= NVA0_3D_CLASS ? 4 : 3;
    590 
    591       if (n == 4 && !targ->clean)
    592          nv84_query_fifo_wait(push, targ->pq);
    593       BEGIN_NV04(push, NV50_3D(STRMOUT_ADDRESS_HIGH(i)), n);
    594       PUSH_DATAh(push, buf->address + targ->pipe.buffer_offset);
    595       PUSH_DATA (push, buf->address + targ->pipe.buffer_offset);
    596       PUSH_DATA (push, so->num_attribs[i]);
    597       if (n == 4) {
    598          PUSH_DATA(push, targ->pipe.buffer_size);
    599 
    600          BEGIN_NV04(push, NVA0_3D(STRMOUT_OFFSET(i)), 1);
    601          if (!targ->clean) {
    602             assert(targ->pq);
    603             nv50_query_pushbuf_submit(push, targ->pq, 0x4);
    604          } else {
    605             PUSH_DATA(push, 0);
    606             targ->clean = FALSE;
    607          }
    608       } else {
    609          const unsigned limit = targ->pipe.buffer_size /
    610             (so->stride[i] * nv50->state.prim_size);
    611          prims = MIN2(prims, limit);
    612       }
    613       BCTX_REFN(nv50->bufctx_3d, SO, buf, WR);
    614    }
    615    if (prims != ~0) {
    616       BEGIN_NV04(push, NV50_3D(STRMOUT_PRIMITIVE_LIMIT), 1);
    617       PUSH_DATA (push, prims);
    618    }
    619    BEGIN_NV04(push, NV50_3D(STRMOUT_PARAMS_LATCH), 1);
    620    PUSH_DATA (push, 1);
    621    BEGIN_NV04(push, NV50_3D(STRMOUT_ENABLE), 1);
    622    PUSH_DATA (push, 1);
    623 }
    624