Home | History | Annotate | Download | only in nvc0
      1 /*
      2  * Copyright 2013 Nouveau Project
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8  * and/or sell copies of the Software, and to permit persons to whom the
      9  * Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice shall be included in
     12  * all copies or substantial portions of the Software.
     13  *
     14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     17  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
     18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
     19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
     20  * OTHER DEALINGS IN THE SOFTWARE.
     21  *
     22  * Authors: Christoph Bumiller, Samuel Pitoiset
     23  */
     24 
     25 #include "nvc0/nvc0_context.h"
     26 
     27 #include "nvc0/nvc0_compute.xml.h"
     28 
     29 int
     30 nvc0_screen_compute_setup(struct nvc0_screen *screen,
     31                           struct nouveau_pushbuf *push)
     32 {
     33    struct nouveau_object *chan = screen->base.channel;
     34    struct nouveau_device *dev = screen->base.device;
     35    uint32_t obj_class;
     36    int ret;
     37    int i;
     38 
     39    switch (dev->chipset & ~0xf) {
     40    case 0xc0:
     41    case 0xd0:
     42       /* In theory, GF110+ should also support NVC8_COMPUTE_CLASS but,
     43        * in practice, a ILLEGAL_CLASS dmesg fail appears when using it. */
     44       obj_class = NVC0_COMPUTE_CLASS;
     45       break;
     46    default:
     47       NOUVEAU_ERR("unsupported chipset: NV%02x\n", dev->chipset);
     48       return -1;
     49    }
     50 
     51    ret = nouveau_object_new(chan, 0xbeef90c0, obj_class, NULL, 0,
     52                             &screen->compute);
     53    if (ret) {
     54       NOUVEAU_ERR("Failed to allocate compute object: %d\n", ret);
     55       return ret;
     56    }
     57 
     58    BEGIN_NVC0(push, SUBC_CP(NV01_SUBCHAN_OBJECT), 1);
     59    PUSH_DATA (push, screen->compute->oclass);
     60 
     61    /* hardware limit */
     62    BEGIN_NVC0(push, NVC0_CP(MP_LIMIT), 1);
     63    PUSH_DATA (push, screen->mp_count);
     64    BEGIN_NVC0(push, NVC0_CP(CALL_LIMIT_LOG), 1);
     65    PUSH_DATA (push, 0xf);
     66 
     67    BEGIN_NVC0(push, SUBC_CP(0x02a0), 1);
     68    PUSH_DATA (push, 0x8000);
     69 
     70    /* global memory setup */
     71    BEGIN_NVC0(push, SUBC_CP(0x02c4), 1);
     72    PUSH_DATA (push, 0);
     73    BEGIN_NIC0(push, NVC0_CP(GLOBAL_BASE), 0x100);
     74    for (i = 0; i <= 0xff; i++)
     75       PUSH_DATA (push, (0xc << 28) | (i << 16) | i);
     76    BEGIN_NVC0(push, SUBC_CP(0x02c4), 1);
     77    PUSH_DATA (push, 1);
     78 
     79    /* local memory and cstack setup */
     80    BEGIN_NVC0(push, NVC0_CP(TEMP_ADDRESS_HIGH), 2);
     81    PUSH_DATAh(push, screen->tls->offset);
     82    PUSH_DATA (push, screen->tls->offset);
     83    BEGIN_NVC0(push, NVC0_CP(TEMP_SIZE_HIGH), 2);
     84    PUSH_DATAh(push, screen->tls->size);
     85    PUSH_DATA (push, screen->tls->size);
     86    BEGIN_NVC0(push, NVC0_CP(WARP_TEMP_ALLOC), 1);
     87    PUSH_DATA (push, 0);
     88    BEGIN_NVC0(push, NVC0_CP(LOCAL_BASE), 1);
     89    PUSH_DATA (push, 0xff << 24);
     90 
     91    /* shared memory setup */
     92    BEGIN_NVC0(push, NVC0_CP(CACHE_SPLIT), 1);
     93    PUSH_DATA (push, NVC0_COMPUTE_CACHE_SPLIT_48K_SHARED_16K_L1);
     94    BEGIN_NVC0(push, NVC0_CP(SHARED_BASE), 1);
     95    PUSH_DATA (push, 0xfe << 24);
     96    BEGIN_NVC0(push, NVC0_CP(SHARED_SIZE), 1);
     97    PUSH_DATA (push, 0);
     98 
     99    /* code segment setup */
    100    BEGIN_NVC0(push, NVC0_CP(CODE_ADDRESS_HIGH), 2);
    101    PUSH_DATAh(push, screen->text->offset);
    102    PUSH_DATA (push, screen->text->offset);
    103 
    104    /* textures */
    105    BEGIN_NVC0(push, NVC0_CP(TIC_ADDRESS_HIGH), 3);
    106    PUSH_DATAh(push, screen->txc->offset);
    107    PUSH_DATA (push, screen->txc->offset);
    108    PUSH_DATA (push, NVC0_TIC_MAX_ENTRIES - 1);
    109 
    110    /* samplers */
    111    BEGIN_NVC0(push, NVC0_CP(TSC_ADDRESS_HIGH), 3);
    112    PUSH_DATAh(push, screen->txc->offset + 65536);
    113    PUSH_DATA (push, screen->txc->offset + 65536);
    114    PUSH_DATA (push, NVC0_TSC_MAX_ENTRIES - 1);
    115 
    116    /* MS sample coordinate offsets */
    117    BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3);
    118    PUSH_DATA (push, NVC0_CB_AUX_SIZE);
    119    PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(5));
    120    PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(5));
    121    BEGIN_1IC0(push, NVC0_CP(CB_POS), 1 + 2 * 8);
    122    PUSH_DATA (push, NVC0_CB_AUX_MS_INFO);
    123    PUSH_DATA (push, 0); /* 0 */
    124    PUSH_DATA (push, 0);
    125    PUSH_DATA (push, 1); /* 1 */
    126    PUSH_DATA (push, 0);
    127    PUSH_DATA (push, 0); /* 2 */
    128    PUSH_DATA (push, 1);
    129    PUSH_DATA (push, 1); /* 3 */
    130    PUSH_DATA (push, 1);
    131    PUSH_DATA (push, 2); /* 4 */
    132    PUSH_DATA (push, 0);
    133    PUSH_DATA (push, 3); /* 5 */
    134    PUSH_DATA (push, 0);
    135    PUSH_DATA (push, 2); /* 6 */
    136    PUSH_DATA (push, 1);
    137    PUSH_DATA (push, 3); /* 7 */
    138    PUSH_DATA (push, 1);
    139 
    140    return 0;
    141 }
    142 
    143 static void
    144 nvc0_compute_validate_samplers(struct nvc0_context *nvc0)
    145 {
    146    bool need_flush = nvc0_validate_tsc(nvc0, 5);
    147    if (need_flush) {
    148       BEGIN_NVC0(nvc0->base.pushbuf, NVC0_CP(TSC_FLUSH), 1);
    149       PUSH_DATA (nvc0->base.pushbuf, 0);
    150    }
    151 
    152    /* Invalidate all 3D samplers because they are aliased. */
    153    for (int s = 0; s < 5; s++)
    154       nvc0->samplers_dirty[s] = ~0;
    155    nvc0->dirty_3d |= NVC0_NEW_3D_SAMPLERS;
    156 }
    157 
    158 static void
    159 nvc0_compute_validate_textures(struct nvc0_context *nvc0)
    160 {
    161    bool need_flush = nvc0_validate_tic(nvc0, 5);
    162    if (need_flush) {
    163       BEGIN_NVC0(nvc0->base.pushbuf, NVC0_CP(TIC_FLUSH), 1);
    164       PUSH_DATA (nvc0->base.pushbuf, 0);
    165    }
    166 
    167    /* Invalidate all 3D textures because they are aliased. */
    168    for (int s = 0; s < 5; s++) {
    169       for (int i = 0; i < nvc0->num_textures[s]; i++)
    170          nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_TEX(s, i));
    171       nvc0->textures_dirty[s] = ~0;
    172    }
    173    nvc0->dirty_3d |= NVC0_NEW_3D_TEXTURES;
    174 }
    175 
    176 static inline void
    177 nvc0_compute_invalidate_constbufs(struct nvc0_context *nvc0)
    178 {
    179    int s;
    180 
    181    /* Invalidate all 3D constbufs because they are aliased with COMPUTE. */
    182    for (s = 0; s < 5; s++) {
    183       nvc0->constbuf_dirty[s] |= nvc0->constbuf_valid[s];
    184       nvc0->state.uniform_buffer_bound[s] = 0;
    185    }
    186    nvc0->dirty_3d |= NVC0_NEW_3D_CONSTBUF;
    187 }
    188 
    189 static void
    190 nvc0_compute_validate_constbufs(struct nvc0_context *nvc0)
    191 {
    192    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
    193    const int s = 5;
    194 
    195    while (nvc0->constbuf_dirty[s]) {
    196       int i = ffs(nvc0->constbuf_dirty[s]) - 1;
    197       nvc0->constbuf_dirty[s] &= ~(1 << i);
    198 
    199       if (nvc0->constbuf[s][i].user) {
    200          struct nouveau_bo *bo = nvc0->screen->uniform_bo;
    201          const unsigned base = NVC0_CB_USR_INFO(s);
    202          const unsigned size = nvc0->constbuf[s][0].size;
    203          assert(i == 0); /* we really only want OpenGL uniforms here */
    204          assert(nvc0->constbuf[s][0].u.data);
    205 
    206          if (nvc0->state.uniform_buffer_bound[s] < size) {
    207             nvc0->state.uniform_buffer_bound[s] = align(size, 0x100);
    208 
    209             BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3);
    210             PUSH_DATA (push, nvc0->state.uniform_buffer_bound[s]);
    211             PUSH_DATAh(push, bo->offset + base);
    212             PUSH_DATA (push, bo->offset + base);
    213             BEGIN_NVC0(push, NVC0_CP(CB_BIND), 1);
    214             PUSH_DATA (push, (0 << 8) | 1);
    215          }
    216          nvc0_cb_bo_push(&nvc0->base, bo, NV_VRAM_DOMAIN(&nvc0->screen->base),
    217                          base, nvc0->state.uniform_buffer_bound[s],
    218                          0, (size + 3) / 4,
    219                          nvc0->constbuf[s][0].u.data);
    220       } else {
    221          struct nv04_resource *res =
    222             nv04_resource(nvc0->constbuf[s][i].u.buf);
    223          if (res) {
    224             BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3);
    225             PUSH_DATA (push, nvc0->constbuf[s][i].size);
    226             PUSH_DATAh(push, res->address + nvc0->constbuf[s][i].offset);
    227             PUSH_DATA (push, res->address + nvc0->constbuf[s][i].offset);
    228             BEGIN_NVC0(push, NVC0_CP(CB_BIND), 1);
    229             PUSH_DATA (push, (i << 8) | 1);
    230 
    231             BCTX_REFN(nvc0->bufctx_cp, CP_CB(i), res, RD);
    232 
    233             res->cb_bindings[s] |= 1 << i;
    234          } else {
    235             BEGIN_NVC0(push, NVC0_CP(CB_BIND), 1);
    236             PUSH_DATA (push, (i << 8) | 0);
    237          }
    238          if (i == 0)
    239             nvc0->state.uniform_buffer_bound[s] = 0;
    240       }
    241    }
    242 
    243    nvc0_compute_invalidate_constbufs(nvc0);
    244 
    245    BEGIN_NVC0(push, NVC0_CP(FLUSH), 1);
    246    PUSH_DATA (push, NVC0_COMPUTE_FLUSH_CB);
    247 }
    248 
    249 static void
    250 nvc0_compute_validate_driverconst(struct nvc0_context *nvc0)
    251 {
    252    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
    253    struct nvc0_screen *screen = nvc0->screen;
    254 
    255    BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3);
    256    PUSH_DATA (push, NVC0_CB_AUX_SIZE);
    257    PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(5));
    258    PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(5));
    259    BEGIN_NVC0(push, NVC0_CP(CB_BIND), 1);
    260    PUSH_DATA (push, (15 << 8) | 1);
    261 
    262    nvc0->dirty_3d |= NVC0_NEW_3D_DRIVERCONST;
    263 }
    264 
    265 static void
    266 nvc0_compute_validate_buffers(struct nvc0_context *nvc0)
    267 {
    268    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
    269    struct nvc0_screen *screen = nvc0->screen;
    270    const int s = 5;
    271    int i;
    272 
    273    BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3);
    274    PUSH_DATA (push, NVC0_CB_AUX_SIZE);
    275    PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
    276    PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
    277    BEGIN_1IC0(push, NVC0_CP(CB_POS), 1 + 4 * NVC0_MAX_BUFFERS);
    278    PUSH_DATA (push, NVC0_CB_AUX_BUF_INFO(0));
    279 
    280    for (i = 0; i < NVC0_MAX_BUFFERS; i++) {
    281       if (nvc0->buffers[s][i].buffer) {
    282          struct nv04_resource *res =
    283             nv04_resource(nvc0->buffers[s][i].buffer);
    284          PUSH_DATA (push, res->address + nvc0->buffers[s][i].buffer_offset);
    285          PUSH_DATAh(push, res->address + nvc0->buffers[s][i].buffer_offset);
    286          PUSH_DATA (push, nvc0->buffers[s][i].buffer_size);
    287          PUSH_DATA (push, 0);
    288          BCTX_REFN(nvc0->bufctx_cp, CP_BUF, res, RDWR);
    289          util_range_add(&res->valid_buffer_range,
    290                         nvc0->buffers[s][i].buffer_offset,
    291                         nvc0->buffers[s][i].buffer_offset +
    292                         nvc0->buffers[s][i].buffer_size);
    293       } else {
    294          PUSH_DATA (push, 0);
    295          PUSH_DATA (push, 0);
    296          PUSH_DATA (push, 0);
    297          PUSH_DATA (push, 0);
    298       }
    299    }
    300 }
    301 
    302 void
    303 nvc0_compute_validate_globals(struct nvc0_context *nvc0)
    304 {
    305    unsigned i;
    306 
    307    for (i = 0; i < nvc0->global_residents.size / sizeof(struct pipe_resource *);
    308         ++i) {
    309       struct pipe_resource *res = *util_dynarray_element(
    310          &nvc0->global_residents, struct pipe_resource *, i);
    311       if (res)
    312          nvc0_add_resident(nvc0->bufctx_cp, NVC0_BIND_CP_GLOBAL,
    313                            nv04_resource(res), NOUVEAU_BO_RDWR);
    314    }
    315 }
    316 
    317 static inline void
    318 nvc0_compute_invalidate_surfaces(struct nvc0_context *nvc0, const int s)
    319 {
    320    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
    321    int i;
    322 
    323    for (i = 0; i < NVC0_MAX_IMAGES; ++i) {
    324       if (s == 5)
    325          BEGIN_NVC0(push, NVC0_CP(IMAGE(i)), 6);
    326       else
    327          BEGIN_NVC0(push, NVC0_3D(IMAGE(i)), 6);
    328       PUSH_DATA(push, 0);
    329       PUSH_DATA(push, 0);
    330       PUSH_DATA(push, 0);
    331       PUSH_DATA(push, 0);
    332       PUSH_DATA(push, 0x14000);
    333       PUSH_DATA(push, 0);
    334    }
    335 }
    336 
    337 static void
    338 nvc0_compute_validate_surfaces(struct nvc0_context *nvc0)
    339 {
    340    /* TODO: Invalidating both 3D and CP surfaces before validating surfaces for
    341     * compute is probably not really necessary, but we didn't find any better
    342     * solutions for now. This fixes some invalidation issues when compute and
    343     * fragment shaders are used inside the same context. Anyway, we definitely
    344     * have invalidation issues between 3D and CP for other resources like SSBO
    345     * and atomic counters. */
    346    nvc0_compute_invalidate_surfaces(nvc0, 4);
    347    nvc0_compute_invalidate_surfaces(nvc0, 5);
    348 
    349    nvc0_validate_suf(nvc0, 5);
    350 
    351    /* Invalidate all FRAGMENT images because they are aliased with COMPUTE. */
    352    nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_SUF);
    353    nvc0->dirty_3d |= NVC0_NEW_3D_SURFACES;
    354    nvc0->images_dirty[4] |= nvc0->images_valid[4];
    355 }
    356 
    357 static struct nvc0_state_validate
    358 validate_list_cp[] = {
    359    { nvc0_compprog_validate,              NVC0_NEW_CP_PROGRAM     },
    360    { nvc0_compute_validate_constbufs,     NVC0_NEW_CP_CONSTBUF    },
    361    { nvc0_compute_validate_driverconst,   NVC0_NEW_CP_DRIVERCONST },
    362    { nvc0_compute_validate_buffers,       NVC0_NEW_CP_BUFFERS     },
    363    { nvc0_compute_validate_textures,      NVC0_NEW_CP_TEXTURES    },
    364    { nvc0_compute_validate_samplers,      NVC0_NEW_CP_SAMPLERS    },
    365    { nvc0_compute_validate_globals,       NVC0_NEW_CP_GLOBALS     },
    366    { nvc0_compute_validate_surfaces,      NVC0_NEW_CP_SURFACES    },
    367 };
    368 
    369 static bool
    370 nvc0_state_validate_cp(struct nvc0_context *nvc0, uint32_t mask)
    371 {
    372    bool ret;
    373 
    374    ret = nvc0_state_validate(nvc0, mask, validate_list_cp,
    375                              ARRAY_SIZE(validate_list_cp), &nvc0->dirty_cp,
    376                              nvc0->bufctx_cp);
    377 
    378    if (unlikely(nvc0->state.flushed))
    379       nvc0_bufctx_fence(nvc0, nvc0->bufctx_cp, true);
    380    return ret;
    381 }
    382 
    383 static void
    384 nvc0_compute_upload_input(struct nvc0_context *nvc0,
    385                           const struct pipe_grid_info *info)
    386 {
    387    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
    388    struct nvc0_screen *screen = nvc0->screen;
    389    struct nvc0_program *cp = nvc0->compprog;
    390 
    391    if (cp->parm_size) {
    392       struct nouveau_bo *bo = screen->uniform_bo;
    393       const unsigned base = NVC0_CB_USR_INFO(5);
    394 
    395       BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3);
    396       PUSH_DATA (push, align(cp->parm_size, 0x100));
    397       PUSH_DATAh(push, bo->offset + base);
    398       PUSH_DATA (push, bo->offset + base);
    399       BEGIN_NVC0(push, NVC0_CP(CB_BIND), 1);
    400       PUSH_DATA (push, (0 << 8) | 1);
    401       /* NOTE: size is limited to 4 KiB, which is < NV04_PFIFO_MAX_PACKET_LEN */
    402       BEGIN_1IC0(push, NVC0_CP(CB_POS), 1 + cp->parm_size / 4);
    403       PUSH_DATA (push, 0);
    404       PUSH_DATAp(push, info->input, cp->parm_size / 4);
    405 
    406       nvc0_compute_invalidate_constbufs(nvc0);
    407    }
    408 
    409    BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3);
    410    PUSH_DATA (push, NVC0_CB_AUX_SIZE);
    411    PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(5));
    412    PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(5));
    413 
    414    BEGIN_1IC0(push, NVC0_CP(CB_POS), 1 + 1);
    415    /* (7) as we only upload work_dim on nvc0, the rest uses special regs */
    416    PUSH_DATA (push, NVC0_CB_AUX_GRID_INFO(7));
    417    PUSH_DATA (push, info->work_dim);
    418 
    419    BEGIN_NVC0(push, NVC0_CP(FLUSH), 1);
    420    PUSH_DATA (push, NVC0_COMPUTE_FLUSH_CB);
    421 }
    422 
    423 void
    424 nvc0_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info)
    425 {
    426    struct nvc0_context *nvc0 = nvc0_context(pipe);
    427    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
    428    struct nvc0_program *cp = nvc0->compprog;
    429    int ret;
    430 
    431    ret = !nvc0_state_validate_cp(nvc0, ~0);
    432    if (ret) {
    433       NOUVEAU_ERR("Failed to launch grid !\n");
    434       return;
    435    }
    436 
    437    nvc0_compute_upload_input(nvc0, info);
    438 
    439    BEGIN_NVC0(push, NVC0_CP(CP_START_ID), 1);
    440    PUSH_DATA (push, nvc0_program_symbol_offset(cp, info->pc));
    441 
    442    BEGIN_NVC0(push, NVC0_CP(LOCAL_POS_ALLOC), 3);
    443    PUSH_DATA (push, (cp->hdr[1] & 0xfffff0) + align(cp->cp.lmem_size, 0x10));
    444    PUSH_DATA (push, 0);
    445    PUSH_DATA (push, 0x800); /* WARP_CSTACK_SIZE */
    446 
    447    BEGIN_NVC0(push, NVC0_CP(SHARED_SIZE), 3);
    448    PUSH_DATA (push, align(cp->cp.smem_size, 0x100));
    449    PUSH_DATA (push, info->block[0] * info->block[1] * info->block[2]);
    450    PUSH_DATA (push, cp->num_barriers);
    451    BEGIN_NVC0(push, NVC0_CP(CP_GPR_ALLOC), 1);
    452    PUSH_DATA (push, cp->num_gprs);
    453 
    454    /* launch preliminary setup */
    455    BEGIN_NVC0(push, NVC0_CP(GRIDID), 1);
    456    PUSH_DATA (push, 0x1);
    457    BEGIN_NVC0(push, SUBC_CP(0x036c), 1);
    458    PUSH_DATA (push, 0);
    459    BEGIN_NVC0(push, NVC0_CP(FLUSH), 1);
    460    PUSH_DATA (push, NVC0_COMPUTE_FLUSH_GLOBAL | NVC0_COMPUTE_FLUSH_UNK8);
    461 
    462    /* block setup */
    463    BEGIN_NVC0(push, NVC0_CP(BLOCKDIM_YX), 2);
    464    PUSH_DATA (push, (info->block[1] << 16) | info->block[0]);
    465    PUSH_DATA (push, info->block[2]);
    466 
    467    if (unlikely(info->indirect)) {
    468       struct nv04_resource *res = nv04_resource(info->indirect);
    469       uint32_t offset = res->offset + info->indirect_offset;
    470       unsigned macro = NVC0_CP_MACRO_LAUNCH_GRID_INDIRECT;
    471 
    472       nouveau_pushbuf_space(push, 16, 0, 1);
    473       PUSH_REFN(push, res->bo, NOUVEAU_BO_RD | res->domain);
    474       PUSH_DATA(push, NVC0_FIFO_PKHDR_1I(1, macro, 3));
    475       nouveau_pushbuf_data(push, res->bo, offset,
    476                            NVC0_IB_ENTRY_1_NO_PREFETCH | 3 * 4);
    477    } else {
    478       /* grid setup */
    479       BEGIN_NVC0(push, NVC0_CP(GRIDDIM_YX), 2);
    480       PUSH_DATA (push, (info->grid[1] << 16) | info->grid[0]);
    481       PUSH_DATA (push, info->grid[2]);
    482 
    483       /* kernel launching */
    484       BEGIN_NVC0(push, NVC0_CP(COMPUTE_BEGIN), 1);
    485       PUSH_DATA (push, 0);
    486       BEGIN_NVC0(push, SUBC_CP(0x0a08), 1);
    487       PUSH_DATA (push, 0);
    488       BEGIN_NVC0(push, NVC0_CP(LAUNCH), 1);
    489       PUSH_DATA (push, 0x1000);
    490       BEGIN_NVC0(push, NVC0_CP(COMPUTE_END), 1);
    491       PUSH_DATA (push, 0);
    492       BEGIN_NVC0(push, SUBC_CP(0x0360), 1);
    493       PUSH_DATA (push, 0x1);
    494    }
    495 
    496    /* TODO: Not sure if this is really necessary. */
    497    nvc0_compute_invalidate_surfaces(nvc0, 5);
    498    nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_SUF);
    499    nvc0->dirty_cp |= NVC0_NEW_CP_SURFACES;
    500    nvc0->images_dirty[5] |= nvc0->images_valid[5];
    501 }
    502