Home | History | Annotate | Download | only in nv50
      1 /*
      2  * Copyright 2010 Christoph Bumiller
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8  * and/or sell copies of the Software, and to permit persons to whom the
      9  * Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice shall be included in
     12  * all copies or substantial portions of the Software.
     13  *
     14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     17  * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
     18  * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
     19  * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
     20  * SOFTWARE.
     21  */
     22 
     23 #include "nv50_program.h"
     24 #include "nv50_context.h"
     25 
     26 #include "codegen/nv50_ir_driver.h"
     27 
     28 static INLINE unsigned
     29 bitcount4(const uint32_t val)
     30 {
     31    static const uint8_t cnt[16]
     32    = { 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4 };
     33    return cnt[val & 0xf];
     34 }
     35 
     36 static int
     37 nv50_vertprog_assign_slots(struct nv50_ir_prog_info *info)
     38 {
     39    struct nv50_program *prog = (struct nv50_program *)info->driverPriv;
     40    unsigned i, n, c;
     41 
     42    n = 0;
     43    for (i = 0; i < info->numInputs; ++i) {
     44       prog->in[i].id = i;
     45       prog->in[i].sn = info->in[i].sn;
     46       prog->in[i].si = info->in[i].si;
     47       prog->in[i].hw = n;
     48       prog->in[i].mask = info->in[i].mask;
     49 
     50       prog->vp.attrs[(4 * i) / 32] |= info->in[i].mask << ((4 * i) % 32);
     51 
     52       for (c = 0; c < 4; ++c)
     53          if (info->in[i].mask & (1 << c))
     54             info->in[i].slot[c] = n++;
     55    }
     56    prog->in_nr = info->numInputs;
     57 
     58    for (i = 0; i < info->numSysVals; ++i) {
     59       switch (info->sv[i].sn) {
     60       case TGSI_SEMANTIC_INSTANCEID:
     61          prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_INSTANCE_ID;
     62          continue;
     63       case TGSI_SEMANTIC_VERTEXID:
     64          prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_VERTEX_ID;
     65          prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_UNK12;
     66          continue;
     67       default:
     68          break;
     69       }
     70    }
     71 
     72    /*
     73     * Corner case: VP has no inputs, but we will still need to submit data to
     74     * draw it. HW will shout at us and won't draw anything if we don't enable
     75     * any input, so let's just pretend it's the first one.
     76     */
     77    if (prog->vp.attrs[0] == 0 &&
     78        prog->vp.attrs[1] == 0 &&
     79        prog->vp.attrs[2] == 0)
     80       prog->vp.attrs[0] |= 0xf;
     81 
     82    /* VertexID before InstanceID */
     83    if (info->io.vertexId < info->numSysVals)
     84       info->sv[info->io.vertexId].slot[0] = n++;
     85    if (info->io.instanceId < info->numSysVals)
     86       info->sv[info->io.instanceId].slot[0] = n++;
     87 
     88    n = 0;
     89    for (i = 0; i < info->numOutputs; ++i) {
     90       switch (info->out[i].sn) {
     91       case TGSI_SEMANTIC_PSIZE:
     92          prog->vp.psiz = i;
     93          break;
     94       case TGSI_SEMANTIC_CLIPDIST:
     95          prog->vp.clpd[info->out[i].si] = n;
     96          break;
     97       case TGSI_SEMANTIC_EDGEFLAG:
     98          prog->vp.edgeflag = i;
     99          break;
    100       case TGSI_SEMANTIC_BCOLOR:
    101          prog->vp.bfc[info->out[i].si] = i;
    102          break;
    103       default:
    104          break;
    105       }
    106       prog->out[i].id = i;
    107       prog->out[i].sn = info->out[i].sn;
    108       prog->out[i].si = info->out[i].si;
    109       prog->out[i].hw = n;
    110       prog->out[i].mask = info->out[i].mask;
    111 
    112       for (c = 0; c < 4; ++c)
    113          if (info->out[i].mask & (1 << c))
    114             info->out[i].slot[c] = n++;
    115    }
    116    prog->out_nr = info->numOutputs;
    117    prog->max_out = n;
    118 
    119    if (prog->vp.psiz < info->numOutputs)
    120       prog->vp.psiz = prog->out[prog->vp.psiz].hw;
    121 
    122    return 0;
    123 }
    124 
    125 static int
    126 nv50_fragprog_assign_slots(struct nv50_ir_prog_info *info)
    127 {
    128    struct nv50_program *prog = (struct nv50_program *)info->driverPriv;
    129    unsigned i, n, m, c;
    130    unsigned nvary;
    131    unsigned nflat;
    132    unsigned nintp = 0;
    133 
    134    /* count recorded non-flat inputs */
    135    for (m = 0, i = 0; i < info->numInputs; ++i) {
    136       switch (info->in[i].sn) {
    137       case TGSI_SEMANTIC_POSITION:
    138       case TGSI_SEMANTIC_FACE:
    139          continue;
    140       default:
    141          m += info->in[i].flat ? 0 : 1;
    142          break;
    143       }
    144    }
    145    /* careful: id may be != i in info->in[prog->in[i].id] */
    146 
    147    /* Fill prog->in[] so that non-flat inputs are first and
    148     * kick out special inputs that don't use the RESULT_MAP.
    149     */
    150    for (n = 0, i = 0; i < info->numInputs; ++i) {
    151       if (info->in[i].sn == TGSI_SEMANTIC_POSITION) {
    152          prog->fp.interp |= info->in[i].mask << 24;
    153          for (c = 0; c < 4; ++c)
    154             if (info->in[i].mask & (1 << c))
    155                info->in[i].slot[c] = nintp++;
    156       } else
    157       if (info->in[i].sn == TGSI_SEMANTIC_FACE) {
    158          info->in[i].slot[0] = 255;
    159       } else {
    160          unsigned j = info->in[i].flat ? m++ : n++;
    161 
    162          if (info->in[i].sn == TGSI_SEMANTIC_COLOR)
    163             prog->vp.bfc[info->in[i].si] = j;
    164 
    165          prog->in[j].id = i;
    166          prog->in[j].mask = info->in[i].mask;
    167          prog->in[j].sn = info->in[i].sn;
    168          prog->in[j].si = info->in[i].si;
    169          prog->in[j].linear = info->in[i].linear;
    170 
    171          prog->in_nr++;
    172       }
    173    }
    174    if (!(prog->fp.interp & (8 << 24))) {
    175       ++nintp;
    176       prog->fp.interp |= 8 << 24;
    177    }
    178 
    179    for (i = 0; i < prog->in_nr; ++i) {
    180       int j = prog->in[i].id;
    181 
    182       prog->in[i].hw = nintp;
    183       for (c = 0; c < 4; ++c)
    184          if (prog->in[i].mask & (1 << c))
    185             info->in[j].slot[c] = nintp++;
    186    }
    187    /* (n == m) if m never increased, i.e. no flat inputs */
    188    nflat = (n < m) ? (nintp - prog->in[n].hw) : 0;
    189    nintp -= bitcount4(prog->fp.interp >> 24); /* subtract position inputs */
    190    nvary = nintp - nflat;
    191 
    192    prog->fp.interp |= nvary << NV50_3D_FP_INTERPOLANT_CTRL_COUNT_NONFLAT__SHIFT;
    193    prog->fp.interp |= nintp << NV50_3D_FP_INTERPOLANT_CTRL_COUNT__SHIFT;
    194 
    195    /* put front/back colors right after HPOS */
    196    prog->fp.colors = 4 << NV50_3D_SEMANTIC_COLOR_FFC0_ID__SHIFT;
    197    for (i = 0; i < 2; ++i)
    198       if (prog->vp.bfc[i] < 0xff)
    199          prog->fp.colors += bitcount4(prog->in[prog->vp.bfc[i]].mask) << 16;
    200 
    201    /* FP outputs */
    202 
    203    if (info->prop.fp.numColourResults > 1)
    204       prog->fp.flags[0] |= NV50_3D_FP_CONTROL_MULTIPLE_RESULTS;
    205 
    206    for (i = 0; i < info->numOutputs; ++i) {
    207       prog->out[i].id = i;
    208       prog->out[i].sn = info->out[i].sn;
    209       prog->out[i].si = info->out[i].si;
    210       prog->out[i].mask = info->out[i].mask;
    211 
    212       if (i == info->io.fragDepth || i == info->io.sampleMask)
    213          continue;
    214       prog->out[i].hw = info->out[i].si * 4;
    215 
    216       for (c = 0; c < 4; ++c)
    217          info->out[i].slot[c] = prog->out[i].hw + c;
    218 
    219       prog->max_out = MAX2(prog->max_out, prog->out[i].hw + 4);
    220    }
    221 
    222    if (info->io.sampleMask < PIPE_MAX_SHADER_OUTPUTS)
    223       info->out[info->io.sampleMask].slot[0] = prog->max_out++;
    224 
    225    if (info->io.fragDepth < PIPE_MAX_SHADER_OUTPUTS)
    226       info->out[info->io.fragDepth].slot[2] = prog->max_out++;
    227 
    228    if (!prog->max_out)
    229       prog->max_out = 4;
    230 
    231    return 0;
    232 }
    233 
    234 static int
    235 nv50_program_assign_varying_slots(struct nv50_ir_prog_info *info)
    236 {
    237    switch (info->type) {
    238    case PIPE_SHADER_VERTEX:
    239       return nv50_vertprog_assign_slots(info);
    240    case PIPE_SHADER_GEOMETRY:
    241       return nv50_vertprog_assign_slots(info);
    242    case PIPE_SHADER_FRAGMENT:
    243       return nv50_fragprog_assign_slots(info);
    244    default:
    245       return -1;
    246    }
    247 }
    248 
    249 static struct nv50_stream_output_state *
    250 nv50_program_create_strmout_state(const struct nv50_ir_prog_info *info,
    251                                   const struct pipe_stream_output_info *pso)
    252 {
    253    struct nv50_stream_output_state *so;
    254    unsigned b, i, c;
    255    unsigned base[4];
    256 
    257    so = MALLOC_STRUCT(nv50_stream_output_state);
    258    if (!so)
    259       return NULL;
    260    memset(so->map, 0xff, sizeof(so->map));
    261 
    262    for (b = 0; b < 4; ++b)
    263       so->num_attribs[b] = 0;
    264    for (i = 0; i < pso->num_outputs; ++i) {
    265       unsigned end =  pso->output[i].dst_offset + pso->output[i].num_components;
    266       b = pso->output[i].output_buffer;
    267       assert(b < 4);
    268       so->num_attribs[b] = MAX2(so->num_attribs[b], end);
    269    }
    270 
    271    so->ctrl = NV50_3D_STRMOUT_BUFFERS_CTRL_INTERLEAVED;
    272 
    273    so->stride[0] = pso->stride[0] * 4;
    274    base[0] = 0;
    275    for (b = 1; b < 4; ++b) {
    276       assert(!so->num_attribs[b] || so->num_attribs[b] == pso->stride[b]);
    277       so->stride[b] = so->num_attribs[b] * 4;
    278       if (so->num_attribs[b])
    279          so->ctrl = (b + 1) << NV50_3D_STRMOUT_BUFFERS_CTRL_SEPARATE__SHIFT;
    280       base[b] = align(base[b - 1] + so->num_attribs[b - 1], 4);
    281    }
    282    if (so->ctrl & NV50_3D_STRMOUT_BUFFERS_CTRL_INTERLEAVED) {
    283       assert(so->stride[0] < NV50_3D_STRMOUT_BUFFERS_CTRL_STRIDE__MAX);
    284       so->ctrl |= so->stride[0] << NV50_3D_STRMOUT_BUFFERS_CTRL_STRIDE__SHIFT;
    285    }
    286 
    287    so->map_size = base[3] + so->num_attribs[3];
    288 
    289    for (i = 0; i < pso->num_outputs; ++i) {
    290       const unsigned s = pso->output[i].start_component;
    291       const unsigned p = pso->output[i].dst_offset;
    292       const unsigned r = pso->output[i].register_index;
    293       b = pso->output[i].output_buffer;
    294 
    295       for (c = 0; c < pso->output[i].num_components; ++c)
    296          so->map[base[b] + p + c] = info->out[r].slot[s + c];
    297    }
    298 
    299    return so;
    300 }
    301 
    302 boolean
    303 nv50_program_translate(struct nv50_program *prog, uint16_t chipset)
    304 {
    305    struct nv50_ir_prog_info *info;
    306    int ret;
    307    const uint8_t map_undef = (prog->type == PIPE_SHADER_VERTEX) ? 0x40 : 0x80;
    308 
    309    info = CALLOC_STRUCT(nv50_ir_prog_info);
    310    if (!info)
    311       return FALSE;
    312 
    313    info->type = prog->type;
    314    info->target = chipset;
    315    info->bin.sourceRep = NV50_PROGRAM_IR_TGSI;
    316    info->bin.source = (void *)prog->pipe.tokens;
    317 
    318    info->io.ucpBinding = 15;
    319    info->io.ucpBase = 0;
    320    info->io.genUserClip = prog->vp.clpd_nr;
    321 
    322    info->assignSlots = nv50_program_assign_varying_slots;
    323 
    324    prog->vp.bfc[0] = 0xff;
    325    prog->vp.bfc[1] = 0xff;
    326    prog->vp.edgeflag = 0xff;
    327    prog->vp.clpd[0] = map_undef;
    328    prog->vp.clpd[1] = map_undef;
    329    prog->vp.psiz = map_undef;
    330    prog->gp.primid = 0x80;
    331 
    332    info->driverPriv = prog;
    333 
    334 #ifdef DEBUG
    335    info->optLevel = debug_get_num_option("NV50_PROG_OPTIMIZE", 3);
    336    info->dbgFlags = debug_get_num_option("NV50_PROG_DEBUG", 0);
    337 #else
    338    info->optLevel = 3;
    339 #endif
    340 
    341    ret = nv50_ir_generate_code(info);
    342    if (ret) {
    343       NOUVEAU_ERR("shader translation failed: %i\n", ret);
    344       goto out;
    345    }
    346    if (info->bin.syms) /* we don't need them yet */
    347       FREE(info->bin.syms);
    348 
    349    prog->code = info->bin.code;
    350    prog->code_size = info->bin.codeSize;
    351    prog->fixups = info->bin.relocData;
    352    prog->max_gpr = MAX2(4, (info->bin.maxGPR >> 1) + 1);
    353    prog->tls_space = info->bin.tlsSpace;
    354 
    355    if (prog->type == PIPE_SHADER_FRAGMENT) {
    356       if (info->prop.fp.writesDepth) {
    357          prog->fp.flags[0] |= NV50_3D_FP_CONTROL_EXPORTS_Z;
    358          prog->fp.flags[1] = 0x11;
    359       }
    360       if (info->prop.fp.usesDiscard)
    361          prog->fp.flags[0] |= NV50_3D_FP_CONTROL_USES_KIL;
    362    }
    363 
    364    if (prog->pipe.stream_output.num_outputs)
    365       prog->so = nv50_program_create_strmout_state(info,
    366                                                    &prog->pipe.stream_output);
    367 
    368 out:
    369    FREE(info);
    370    return !ret;
    371 }
    372 
    373 boolean
    374 nv50_program_upload_code(struct nv50_context *nv50, struct nv50_program *prog)
    375 {
    376    struct nouveau_heap *heap;
    377    int ret;
    378    uint32_t size = align(prog->code_size, 0x40);
    379 
    380    switch (prog->type) {
    381    case PIPE_SHADER_VERTEX:   heap = nv50->screen->vp_code_heap; break;
    382    case PIPE_SHADER_GEOMETRY: heap = nv50->screen->fp_code_heap; break;
    383    case PIPE_SHADER_FRAGMENT: heap = nv50->screen->gp_code_heap; break;
    384    default:
    385       assert(!"invalid program type");
    386       return FALSE;
    387    }
    388 
    389    ret = nouveau_heap_alloc(heap, size, prog, &prog->mem);
    390    if (ret) {
    391       /* Out of space: evict everything to compactify the code segment, hoping
    392        * the working set is much smaller and drifts slowly. Improve me !
    393        */
    394       while (heap->next) {
    395          struct nv50_program *evict = heap->next->priv;
    396          if (evict)
    397             nouveau_heap_free(&evict->mem);
    398       }
    399       debug_printf("WARNING: out of code space, evicting all shaders.\n");
    400    }
    401    prog->code_base = prog->mem->start;
    402 
    403    ret = nv50_tls_realloc(nv50->screen, prog->tls_space);
    404    if (ret < 0)
    405       return FALSE;
    406    if (ret > 0)
    407       nv50->state.new_tls_space = TRUE;
    408 
    409    if (prog->fixups)
    410       nv50_ir_relocate_code(prog->fixups, prog->code, prog->code_base, 0, 0);
    411 
    412    nv50_sifc_linear_u8(&nv50->base, nv50->screen->code,
    413                        (prog->type << NV50_CODE_BO_SIZE_LOG2) + prog->code_base,
    414                        NOUVEAU_BO_VRAM, prog->code_size, prog->code);
    415 
    416    BEGIN_NV04(nv50->base.pushbuf, NV50_3D(CODE_CB_FLUSH), 1);
    417    PUSH_DATA (nv50->base.pushbuf, 0);
    418 
    419    return TRUE;
    420 }
    421 
    422 void
    423 nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p)
    424 {
    425    const struct pipe_shader_state pipe = p->pipe;
    426    const ubyte type = p->type;
    427 
    428    if (p->mem)
    429       nouveau_heap_free(&p->mem);
    430 
    431    if (p->code)
    432       FREE(p->code);
    433 
    434    if (p->fixups)
    435       FREE(p->fixups);
    436 
    437    if (p->so)
    438       FREE(p->so);
    439 
    440    memset(p, 0, sizeof(*p));
    441 
    442    p->pipe = pipe;
    443    p->type = type;
    444 }
    445