Home | History | Annotate | Download | only in nv50
      1 /*
      2  * Copyright 2012 Francisco Jerez
      3  * Copyright 2015 Samuel Pitoiset
      4  *
      5  * Permission is hereby granted, free of charge, to any person obtaining
      6  * a copy of this software and associated documentation files (the
      7  * "Software"), to deal in the Software without restriction, including
      8  * without limitation the rights to use, copy, modify, merge, publish,
      9  * distribute, sublicense, and/or sell copies of the Software, and to
     10  * permit persons to whom the Software is furnished to do so, subject to
     11  * the following conditions:
     12  *
     13  * The above copyright notice and this permission notice (including the
     14  * next paragraph) shall be included in all copies or substantial
     15  * portions of the Software.
     16  *
     17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
     18  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
     19  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
     20  * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
     21  * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
     22  * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
     23  * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
     24  *
     25  */
     26 
     27 #include "nv50/nv50_context.h"
     28 #include "nv50/nv50_compute.xml.h"
     29 
     30 #include "codegen/nv50_ir_driver.h"
     31 
     32 int
     33 nv50_screen_compute_setup(struct nv50_screen *screen,
     34                           struct nouveau_pushbuf *push)
     35 {
     36    struct nouveau_device *dev = screen->base.device;
     37    struct nouveau_object *chan = screen->base.channel;
     38    struct nv04_fifo *fifo = (struct nv04_fifo *)chan->data;
     39    unsigned obj_class;
     40    int i, ret;
     41 
     42    switch (dev->chipset & 0xf0) {
     43    case 0x50:
     44    case 0x80:
     45    case 0x90:
     46       obj_class = NV50_COMPUTE_CLASS;
     47       break;
     48    case 0xa0:
     49       switch (dev->chipset) {
     50       case 0xa3:
     51       case 0xa5:
     52       case 0xa8:
     53          obj_class = NVA3_COMPUTE_CLASS;
     54          break;
     55       default:
     56          obj_class = NV50_COMPUTE_CLASS;
     57          break;
     58       }
     59       break;
     60    default:
     61       NOUVEAU_ERR("unsupported chipset: NV%02x\n", dev->chipset);
     62       return -1;
     63    }
     64 
     65    ret = nouveau_object_new(chan, 0xbeef50c0, obj_class, NULL, 0,
     66                             &screen->compute);
     67    if (ret)
     68       return ret;
     69 
     70    BEGIN_NV04(push, SUBC_CP(NV01_SUBCHAN_OBJECT), 1);
     71    PUSH_DATA (push, screen->compute->handle);
     72 
     73    BEGIN_NV04(push, NV50_CP(UNK02A0), 1);
     74    PUSH_DATA (push, 1);
     75    BEGIN_NV04(push, NV50_CP(DMA_STACK), 1);
     76    PUSH_DATA (push, fifo->vram);
     77    BEGIN_NV04(push, NV50_CP(STACK_ADDRESS_HIGH), 2);
     78    PUSH_DATAh(push, screen->stack_bo->offset);
     79    PUSH_DATA (push, screen->stack_bo->offset);
     80    BEGIN_NV04(push, NV50_CP(STACK_SIZE_LOG), 1);
     81    PUSH_DATA (push, 4);
     82 
     83    BEGIN_NV04(push, NV50_CP(UNK0290), 1);
     84    PUSH_DATA (push, 1);
     85    BEGIN_NV04(push, NV50_CP(LANES32_ENABLE), 1);
     86    PUSH_DATA (push, 1);
     87    BEGIN_NV04(push, NV50_CP(REG_MODE), 1);
     88    PUSH_DATA (push, NV50_COMPUTE_REG_MODE_STRIPED);
     89    BEGIN_NV04(push, NV50_CP(UNK0384), 1);
     90    PUSH_DATA (push, 0x100);
     91    BEGIN_NV04(push, NV50_CP(DMA_GLOBAL), 1);
     92    PUSH_DATA (push, fifo->vram);
     93 
     94    for (i = 0; i < 15; i++) {
     95       BEGIN_NV04(push, NV50_CP(GLOBAL_ADDRESS_HIGH(i)), 2);
     96       PUSH_DATA (push, 0);
     97       PUSH_DATA (push, 0);
     98       BEGIN_NV04(push, NV50_CP(GLOBAL_LIMIT(i)), 1);
     99       PUSH_DATA (push, 0);
    100       BEGIN_NV04(push, NV50_CP(GLOBAL_MODE(i)), 1);
    101       PUSH_DATA (push, NV50_COMPUTE_GLOBAL_MODE_LINEAR);
    102    }
    103 
    104    BEGIN_NV04(push, NV50_CP(GLOBAL_ADDRESS_HIGH(15)), 2);
    105    PUSH_DATA (push, 0);
    106    PUSH_DATA (push, 0);
    107    BEGIN_NV04(push, NV50_CP(GLOBAL_LIMIT(15)), 1);
    108    PUSH_DATA (push, ~0);
    109    BEGIN_NV04(push, NV50_CP(GLOBAL_MODE(15)), 1);
    110    PUSH_DATA (push, NV50_COMPUTE_GLOBAL_MODE_LINEAR);
    111 
    112    BEGIN_NV04(push, NV50_CP(LOCAL_WARPS_LOG_ALLOC), 1);
    113    PUSH_DATA (push, 7);
    114    BEGIN_NV04(push, NV50_CP(LOCAL_WARPS_NO_CLAMP), 1);
    115    PUSH_DATA (push, 1);
    116    BEGIN_NV04(push, NV50_CP(STACK_WARPS_LOG_ALLOC), 1);
    117    PUSH_DATA (push, 7);
    118    BEGIN_NV04(push, NV50_CP(STACK_WARPS_NO_CLAMP), 1);
    119    PUSH_DATA (push, 1);
    120    BEGIN_NV04(push, NV50_CP(USER_PARAM_COUNT), 1);
    121    PUSH_DATA (push, 0);
    122 
    123    BEGIN_NV04(push, NV50_CP(DMA_TEXTURE), 1);
    124    PUSH_DATA (push, fifo->vram);
    125    BEGIN_NV04(push, NV50_CP(TEX_LIMITS), 1);
    126    PUSH_DATA (push, 0x54);
    127    BEGIN_NV04(push, NV50_CP(LINKED_TSC), 1);
    128    PUSH_DATA (push, 0);
    129 
    130    BEGIN_NV04(push, NV50_CP(DMA_TIC), 1);
    131    PUSH_DATA (push, fifo->vram);
    132    BEGIN_NV04(push, NV50_CP(TIC_ADDRESS_HIGH), 3);
    133    PUSH_DATAh(push, screen->txc->offset);
    134    PUSH_DATA (push, screen->txc->offset);
    135    PUSH_DATA (push, NV50_TIC_MAX_ENTRIES - 1);
    136 
    137    BEGIN_NV04(push, NV50_CP(DMA_TSC), 1);
    138    PUSH_DATA (push, fifo->vram);
    139    BEGIN_NV04(push, NV50_CP(TSC_ADDRESS_HIGH), 3);
    140    PUSH_DATAh(push, screen->txc->offset + 65536);
    141    PUSH_DATA (push, screen->txc->offset + 65536);
    142    PUSH_DATA (push, NV50_TSC_MAX_ENTRIES - 1);
    143 
    144    BEGIN_NV04(push, NV50_CP(DMA_CODE_CB), 1);
    145    PUSH_DATA (push, fifo->vram);
    146 
    147    BEGIN_NV04(push, NV50_CP(DMA_LOCAL), 1);
    148    PUSH_DATA (push, fifo->vram);
    149    BEGIN_NV04(push, NV50_CP(LOCAL_ADDRESS_HIGH), 2);
    150    PUSH_DATAh(push, screen->tls_bo->offset + 65536);
    151    PUSH_DATA (push, screen->tls_bo->offset + 65536);
    152    BEGIN_NV04(push, NV50_CP(LOCAL_SIZE_LOG), 1);
    153    PUSH_DATA (push, util_logbase2((screen->max_tls_space / ONE_TEMP_SIZE) * 2));
    154 
    155    return 0;
    156 }
    157 
    158 static void
    159 nv50_compute_validate_globals(struct nv50_context *nv50)
    160 {
    161    unsigned i;
    162 
    163    for (i = 0; i < nv50->global_residents.size / sizeof(struct pipe_resource *);
    164         ++i) {
    165       struct pipe_resource *res = *util_dynarray_element(
    166          &nv50->global_residents, struct pipe_resource *, i);
    167       if (res)
    168          nv50_add_bufctx_resident(nv50->bufctx_cp, NV50_BIND_CP_GLOBAL,
    169                                   nv04_resource(res), NOUVEAU_BO_RDWR);
    170    }
    171 }
    172 
    173 static struct nv50_state_validate
    174 validate_list_cp[] = {
    175    { nv50_compprog_validate,              NV50_NEW_CP_PROGRAM     },
    176    { nv50_compute_validate_globals,       NV50_NEW_CP_GLOBALS     },
    177 };
    178 
    179 static bool
    180 nv50_state_validate_cp(struct nv50_context *nv50, uint32_t mask)
    181 {
    182    bool ret;
    183 
    184    /* TODO: validate textures, samplers, surfaces */
    185    ret = nv50_state_validate(nv50, mask, validate_list_cp,
    186                              ARRAY_SIZE(validate_list_cp), &nv50->dirty_cp,
    187                              nv50->bufctx_cp);
    188 
    189    if (unlikely(nv50->state.flushed))
    190       nv50_bufctx_fence(nv50->bufctx_cp, true);
    191    return ret;
    192 }
    193 
    194 static void
    195 nv50_compute_upload_input(struct nv50_context *nv50, const uint32_t *input)
    196 {
    197    struct nv50_screen *screen = nv50->screen;
    198    struct nouveau_pushbuf *push = screen->base.pushbuf;
    199    unsigned size = align(nv50->compprog->parm_size, 0x4);
    200 
    201    BEGIN_NV04(push, NV50_CP(USER_PARAM_COUNT), 1);
    202    PUSH_DATA (push, (size / 4) << 8);
    203 
    204    if (size) {
    205       struct nouveau_mm_allocation *mm;
    206       struct nouveau_bo *bo = NULL;
    207       unsigned offset;
    208 
    209       mm = nouveau_mm_allocate(screen->base.mm_GART, size, &bo, &offset);
    210       assert(mm);
    211 
    212       nouveau_bo_map(bo, 0, screen->base.client);
    213       memcpy(bo->map + offset, input, size);
    214 
    215       nouveau_bufctx_refn(nv50->bufctx, 0, bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
    216       nouveau_pushbuf_bufctx(push, nv50->bufctx);
    217       nouveau_pushbuf_validate(push);
    218 
    219       BEGIN_NV04(push, NV50_CP(USER_PARAM(0)), size / 4);
    220       nouveau_pushbuf_data(push, bo, offset, size);
    221 
    222       nouveau_fence_work(screen->base.fence.current, nouveau_mm_free_work, mm);
    223       nouveau_bo_ref(NULL, &bo);
    224       nouveau_bufctx_reset(nv50->bufctx, 0);
    225    }
    226 }
    227 
    228 static uint32_t
    229 nv50_compute_find_symbol(struct nv50_context *nv50, uint32_t label)
    230 {
    231    struct nv50_program *prog = nv50->compprog;
    232    const struct nv50_ir_prog_symbol *syms =
    233       (const struct nv50_ir_prog_symbol *)prog->cp.syms;
    234    unsigned i;
    235 
    236    for (i = 0; i < prog->cp.num_syms; ++i) {
    237       if (syms[i].label == label)
    238          return prog->code_base + syms[i].offset;
    239    }
    240    return prog->code_base; /* no symbols or symbol not found */
    241 }
    242 
    243 void
    244 nv50_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info)
    245 {
    246    struct nv50_context *nv50 = nv50_context(pipe);
    247    struct nouveau_pushbuf *push = nv50->base.pushbuf;
    248    unsigned block_size = info->block[0] * info->block[1] * info->block[2];
    249    struct nv50_program *cp = nv50->compprog;
    250    bool ret;
    251 
    252    ret = !nv50_state_validate_cp(nv50, ~0);
    253    if (ret) {
    254       NOUVEAU_ERR("Failed to launch grid !\n");
    255       return;
    256    }
    257 
    258    nv50_compute_upload_input(nv50, info->input);
    259 
    260    BEGIN_NV04(push, NV50_CP(CP_START_ID), 1);
    261    PUSH_DATA (push, nv50_compute_find_symbol(nv50, info->pc));
    262 
    263    BEGIN_NV04(push, NV50_CP(SHARED_SIZE), 1);
    264    PUSH_DATA (push, align(cp->cp.smem_size + cp->parm_size + 0x10, 0x40));
    265    BEGIN_NV04(push, NV50_CP(CP_REG_ALLOC_TEMP), 1);
    266    PUSH_DATA (push, cp->max_gpr);
    267 
    268    /* grid/block setup */
    269    BEGIN_NV04(push, NV50_CP(BLOCKDIM_XY), 2);
    270    PUSH_DATA (push, info->block[1] << 16 | info->block[0]);
    271    PUSH_DATA (push, info->block[2]);
    272    BEGIN_NV04(push, NV50_CP(BLOCK_ALLOC), 1);
    273    PUSH_DATA (push, 1 << 16 | block_size);
    274    BEGIN_NV04(push, NV50_CP(BLOCKDIM_LATCH), 1);
    275    PUSH_DATA (push, 1);
    276    BEGIN_NV04(push, NV50_CP(GRIDDIM), 1);
    277    PUSH_DATA (push, info->grid[1] << 16 | info->grid[0]);
    278    BEGIN_NV04(push, NV50_CP(GRIDID), 1);
    279    PUSH_DATA (push, 1);
    280 
    281    /* kernel launching */
    282    BEGIN_NV04(push, NV50_CP(LAUNCH), 1);
    283    PUSH_DATA (push, 0);
    284    BEGIN_NV04(push, SUBC_CP(NV50_GRAPH_SERIALIZE), 1);
    285    PUSH_DATA (push, 0);
    286 
    287    /* bind a compute shader clobbers fragment shader state */
    288    nv50->dirty_3d |= NV50_NEW_3D_FRAGPROG;
    289 }
    290