Home | History | Annotate | Download | only in swr
      1 /****************************************************************************
      2  * Copyright (C) 2015 Intel Corporation.   All Rights Reserved.
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8  * and/or sell copies of the Software, and to permit persons to whom the
      9  * Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice (including the next
     12  * paragraph) shall be included in all copies or substantial portions of the
     13  * Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
     21  * IN THE SOFTWARE.
     22  ***************************************************************************/
     23 
     24 #include "swr_screen.h"
     25 #include "swr_context.h"
     26 #include "swr_resource.h"
     27 #include "swr_fence.h"
     28 #include "swr_query.h"
     29 #include "jit_api.h"
     30 
     31 #include "util/u_draw.h"
     32 #include "util/u_prim.h"
     33 
     34 /*
     35  * Draw vertex arrays, with optional indexing, optional instancing.
     36  */
     37 static void
     38 swr_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
     39 {
     40    struct swr_context *ctx = swr_context(pipe);
     41 
     42    if (!info->count_from_stream_output && !info->indirect &&
     43        !info->primitive_restart &&
     44        !u_trim_pipe_prim(info->mode, (unsigned*)&info->count))
     45       return;
     46 
     47    if (!swr_check_render_cond(pipe))
     48       return;
     49 
     50    if (info->indirect) {
     51       util_draw_indirect(pipe, info);
     52       return;
     53    }
     54 
     55    /* If indexed draw, force vertex validation since index buffer comes
     56     * from draw info. */
     57    if (info->index_size)
     58       ctx->dirty |= SWR_NEW_VERTEX;
     59 
     60    /* Update derived state, pass draw info to update function. */
     61    swr_update_derived(pipe, info);
     62 
     63    swr_update_draw_context(ctx);
     64 
     65    if (ctx->vs->pipe.stream_output.num_outputs) {
     66       if (!ctx->vs->soFunc[info->mode]) {
     67          STREAMOUT_COMPILE_STATE state = {0};
     68          struct pipe_stream_output_info *so = &ctx->vs->pipe.stream_output;
     69 
     70          state.numVertsPerPrim = u_vertices_per_prim(info->mode);
     71 
     72          uint32_t offsets[MAX_SO_STREAMS] = {0};
     73          uint32_t num = 0;
     74 
     75          for (uint32_t i = 0; i < so->num_outputs; i++) {
     76             assert(so->output[i].stream == 0); // @todo
     77             uint32_t output_buffer = so->output[i].output_buffer;
     78             if (so->output[i].dst_offset != offsets[output_buffer]) {
     79                // hole - need to fill
     80                state.stream.decl[num].bufferIndex = output_buffer;
     81                state.stream.decl[num].hole = true;
     82                state.stream.decl[num].componentMask =
     83                   (1 << (so->output[i].dst_offset - offsets[output_buffer]))
     84                   - 1;
     85                num++;
     86                offsets[output_buffer] = so->output[i].dst_offset;
     87             }
     88 
     89             unsigned attrib_slot = so->output[i].register_index;
     90             attrib_slot = swr_so_adjust_attrib(attrib_slot, ctx->vs);
     91 
     92             state.stream.decl[num].bufferIndex = output_buffer;
     93             state.stream.decl[num].attribSlot = attrib_slot;
     94             state.stream.decl[num].componentMask =
     95                ((1 << so->output[i].num_components) - 1)
     96                << so->output[i].start_component;
     97             state.stream.decl[num].hole = false;
     98             num++;
     99 
    100             offsets[output_buffer] += so->output[i].num_components;
    101          }
    102 
    103          state.stream.numDecls = num;
    104 
    105          HANDLE hJitMgr = swr_screen(pipe->screen)->hJitMgr;
    106          ctx->vs->soFunc[info->mode] = JitCompileStreamout(hJitMgr, state);
    107          debug_printf("so shader    %p\n", ctx->vs->soFunc[info->mode]);
    108          assert(ctx->vs->soFunc[info->mode] && "Error: SoShader = NULL");
    109       }
    110 
    111       ctx->api.pfnSwrSetSoFunc(ctx->swrContext, ctx->vs->soFunc[info->mode], 0);
    112    }
    113 
    114    struct swr_vertex_element_state *velems = ctx->velems;
    115    if (info->primitive_restart)
    116       velems->fsState.cutIndex = info->restart_index;
    117    else
    118       velems->fsState.cutIndex = 0;
    119    velems->fsState.bEnableCutIndex = info->primitive_restart;
    120    velems->fsState.bPartialVertexBuffer = (info->min_index > 0);
    121 
    122    swr_jit_fetch_key key;
    123    swr_generate_fetch_key(key, velems);
    124    auto search = velems->map.find(key);
    125    if (search != velems->map.end()) {
    126       velems->fsFunc = search->second;
    127    } else {
    128       HANDLE hJitMgr = swr_screen(ctx->pipe.screen)->hJitMgr;
    129       velems->fsFunc = JitCompileFetch(hJitMgr, velems->fsState);
    130 
    131       debug_printf("fetch shader %p\n", velems->fsFunc);
    132       assert(velems->fsFunc && "Error: FetchShader = NULL");
    133 
    134       velems->map.insert(std::make_pair(key, velems->fsFunc));
    135    }
    136 
    137    ctx->api.pfnSwrSetFetchFunc(ctx->swrContext, velems->fsFunc);
    138 
    139    /* Set up frontend state
    140     * XXX setup provokingVertex & topologyProvokingVertex */
    141    SWR_FRONTEND_STATE feState = {0};
    142 
    143    // feState.vsVertexSize seeds the PA size that is used as an interface
    144    // between all the shader stages, so it has to be large enough to
    145    // incorporate all interfaces between stages
    146 
    147    // max of gs and vs num_outputs
    148    feState.vsVertexSize = ctx->vs->info.base.num_outputs;
    149    if (ctx->gs &&
    150        ctx->gs->info.base.num_outputs > feState.vsVertexSize) {
    151       feState.vsVertexSize = ctx->gs->info.base.num_outputs;
    152    }
    153 
    154    if (ctx->vs->info.base.num_outputs) {
    155       // gs does not adjust for position in SGV slot at input from vs
    156       if (!ctx->gs)
    157          feState.vsVertexSize--;
    158    }
    159 
    160    // other (non-SGV) slots start at VERTEX_ATTRIB_START_SLOT
    161    feState.vsVertexSize += VERTEX_ATTRIB_START_SLOT;
    162 
    163    // The PA in the clipper does not handle BE vertex sizes
    164    // different from FE. Increase vertexsize only for the cases that needed it
    165 
    166    // primid needs a slot
    167    if (ctx->fs->info.base.uses_primid)
    168       feState.vsVertexSize++;
    169    // sprite coord enable
    170    if (ctx->rasterizer->sprite_coord_enable)
    171       feState.vsVertexSize++;
    172 
    173 
    174    if (ctx->rasterizer->flatshade_first) {
    175       feState.provokingVertex = {1, 0, 0};
    176    } else {
    177       feState.provokingVertex = {2, 1, 2};
    178    }
    179 
    180    enum pipe_prim_type topology;
    181    if (ctx->gs)
    182       topology = (pipe_prim_type)ctx->gs->info.base.properties[TGSI_PROPERTY_GS_OUTPUT_PRIM];
    183    else
    184       topology = info->mode;
    185 
    186    switch (topology) {
    187    case PIPE_PRIM_TRIANGLE_FAN:
    188       feState.topologyProvokingVertex = feState.provokingVertex.triFan;
    189       break;
    190    case PIPE_PRIM_TRIANGLE_STRIP:
    191    case PIPE_PRIM_TRIANGLES:
    192       feState.topologyProvokingVertex = feState.provokingVertex.triStripList;
    193       break;
    194    case PIPE_PRIM_QUAD_STRIP:
    195    case PIPE_PRIM_QUADS:
    196       if (ctx->rasterizer->flatshade_first)
    197          feState.topologyProvokingVertex = 0;
    198       else
    199          feState.topologyProvokingVertex = 3;
    200       break;
    201    case PIPE_PRIM_LINES:
    202    case PIPE_PRIM_LINE_LOOP:
    203    case PIPE_PRIM_LINE_STRIP:
    204       feState.topologyProvokingVertex = feState.provokingVertex.lineStripList;
    205       break;
    206    default:
    207       feState.topologyProvokingVertex = 0;
    208    }
    209 
    210    feState.bEnableCutIndex = info->primitive_restart;
    211    ctx->api.pfnSwrSetFrontendState(ctx->swrContext, &feState);
    212 
    213    if (info->index_size)
    214       ctx->api.pfnSwrDrawIndexedInstanced(ctx->swrContext,
    215                                           swr_convert_prim_topology(info->mode),
    216                                           info->count,
    217                                           info->instance_count,
    218                                           info->start,
    219                                           info->index_bias,
    220                                           info->start_instance);
    221    else
    222       ctx->api.pfnSwrDrawInstanced(ctx->swrContext,
    223                                    swr_convert_prim_topology(info->mode),
    224                                    info->count,
    225                                    info->instance_count,
    226                                    info->start,
    227                                    info->start_instance);
    228 
    229    /* On large client-buffer draw, we used client buffer directly, without
    230     * copy.  Block until draw is finished.
    231     * VMD is an example application that benefits from this. */
    232    if (ctx->dirty & SWR_LARGE_CLIENT_DRAW) {
    233       struct swr_screen *screen = swr_screen(pipe->screen);
    234       swr_fence_submit(ctx, screen->flush_fence);
    235       swr_fence_finish(pipe->screen, NULL, screen->flush_fence, 0);
    236    }
    237 }
    238 
    239 
    240 static void
    241 swr_flush(struct pipe_context *pipe,
    242           struct pipe_fence_handle **fence,
    243           unsigned flags)
    244 {
    245    struct swr_context *ctx = swr_context(pipe);
    246    struct swr_screen *screen = swr_screen(pipe->screen);
    247 
    248    for (int i=0; i < ctx->framebuffer.nr_cbufs; i++) {
    249       struct pipe_surface *cb = ctx->framebuffer.cbufs[i];
    250       if (cb) {
    251          swr_store_dirty_resource(pipe, cb->texture, SWR_TILE_RESOLVED);
    252       }
    253    }
    254    if (ctx->framebuffer.zsbuf) {
    255       swr_store_dirty_resource(pipe, ctx->framebuffer.zsbuf->texture,
    256                                SWR_TILE_RESOLVED);
    257    }
    258 
    259    if (fence)
    260       swr_fence_reference(pipe->screen, fence, screen->flush_fence);
    261 }
    262 
    263 void
    264 swr_finish(struct pipe_context *pipe)
    265 {
    266    struct pipe_fence_handle *fence = nullptr;
    267 
    268    swr_flush(pipe, &fence, 0);
    269    swr_fence_finish(pipe->screen, NULL, fence, 0);
    270    swr_fence_reference(pipe->screen, &fence, NULL);
    271 }
    272 
    273 /*
    274  * Invalidate tiles so they can be reloaded back when needed
    275  */
    276 void
    277 swr_invalidate_render_target(struct pipe_context *pipe,
    278                              uint32_t attachment,
    279                              uint16_t width, uint16_t height)
    280 {
    281    struct swr_context *ctx = swr_context(pipe);
    282 
    283    /* grab the rect from the passed in arguments */
    284    swr_update_draw_context(ctx);
    285    SWR_RECT full_rect =
    286       {0, 0, (int32_t)width, (int32_t)height};
    287    ctx->api.pfnSwrInvalidateTiles(ctx->swrContext,
    288                                   1 << attachment,
    289                                   full_rect);
    290 }
    291 
    292 
    293 /*
    294  * Store SWR HotTiles back to renderTarget surface.
    295  */
    296 void
    297 swr_store_render_target(struct pipe_context *pipe,
    298                         uint32_t attachment,
    299                         enum SWR_TILE_STATE post_tile_state)
    300 {
    301    struct swr_context *ctx = swr_context(pipe);
    302    struct swr_draw_context *pDC = &ctx->swrDC;
    303    struct SWR_SURFACE_STATE *renderTarget = &pDC->renderTargets[attachment];
    304 
    305    /* Only proceed if there's a valid surface to store to */
    306    if (renderTarget->xpBaseAddress) {
    307       swr_update_draw_context(ctx);
    308       SWR_RECT full_rect =
    309          {0, 0,
    310           (int32_t)u_minify(renderTarget->width, renderTarget->lod),
    311           (int32_t)u_minify(renderTarget->height, renderTarget->lod)};
    312       ctx->api.pfnSwrStoreTiles(ctx->swrContext,
    313                                 1 << attachment,
    314                                 post_tile_state,
    315                                 full_rect);
    316    }
    317 }
    318 
    319 void
    320 swr_store_dirty_resource(struct pipe_context *pipe,
    321                          struct pipe_resource *resource,
    322                          enum SWR_TILE_STATE post_tile_state)
    323 {
    324    /* Only store resource if it has been written to */
    325    if (swr_resource(resource)->status & SWR_RESOURCE_WRITE) {
    326       struct swr_context *ctx = swr_context(pipe);
    327       struct swr_screen *screen = swr_screen(pipe->screen);
    328       struct swr_resource *spr = swr_resource(resource);
    329 
    330       swr_draw_context *pDC = &ctx->swrDC;
    331       SWR_SURFACE_STATE *renderTargets = pDC->renderTargets;
    332       for (uint32_t i = 0; i < SWR_NUM_ATTACHMENTS; i++)
    333          if (renderTargets[i].xpBaseAddress == spr->swr.xpBaseAddress ||
    334              (spr->secondary.xpBaseAddress &&
    335               renderTargets[i].xpBaseAddress == spr->secondary.xpBaseAddress)) {
    336             swr_store_render_target(pipe, i, post_tile_state);
    337 
    338             /* Mesa thinks depth/stencil are fused, so we'll never get an
    339              * explicit resource for stencil.  So, if checking depth, then
    340              * also check for stencil. */
    341             if (spr->has_stencil && (i == SWR_ATTACHMENT_DEPTH)) {
    342                swr_store_render_target(
    343                   pipe, SWR_ATTACHMENT_STENCIL, post_tile_state);
    344             }
    345 
    346             /* This fence signals StoreTiles completion */
    347             swr_fence_submit(ctx, screen->flush_fence);
    348 
    349             break;
    350          }
    351    }
    352 }
    353 
    354 void
    355 swr_draw_init(struct pipe_context *pipe)
    356 {
    357    pipe->draw_vbo = swr_draw_vbo;
    358    pipe->flush = swr_flush;
    359 }
    360