Home | History | Annotate | Download | only in i965
      1 /*
      2  * Copyright  2011 Intel Corporation
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8  * and/or sell copies of the Software, and to permit persons to whom the
      9  * Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice (including the next
     12  * paragraph) shall be included in all copies or substantial portions of the
     13  * Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
     21  * IN THE SOFTWARE.
     22  */
     23 
     24 /** \file gen6_sol.c
     25  *
     26  * Code to initialize the binding table entries used by transform feedback.
     27  */
     28 
     29 #include "main/bufferobj.h"
     30 #include "main/macros.h"
     31 #include "brw_context.h"
     32 #include "intel_batchbuffer.h"
     33 #include "brw_defines.h"
     34 #include "brw_state.h"
     35 #include "main/transformfeedback.h"
     36 
     37 static void
     38 gen6_update_sol_surfaces(struct brw_context *brw)
     39 {
     40    struct gl_context *ctx = &brw->ctx;
     41    bool xfb_active = _mesa_is_xfb_active_and_unpaused(ctx);
     42    struct gl_transform_feedback_object *xfb_obj;
     43    const struct gl_transform_feedback_info *linked_xfb_info = NULL;
     44 
     45    if (xfb_active) {
     46       /* BRW_NEW_TRANSFORM_FEEDBACK */
     47       xfb_obj = ctx->TransformFeedback.CurrentObject;
     48       linked_xfb_info = xfb_obj->program->sh.LinkedTransformFeedback;
     49    }
     50 
     51    for (int i = 0; i < BRW_MAX_SOL_BINDINGS; ++i) {
     52       const int surf_index = BRW_GEN6_SOL_BINDING_START + i;
     53       if (xfb_active && i < linked_xfb_info->NumOutputs) {
     54          unsigned buffer = linked_xfb_info->Outputs[i].OutputBuffer;
     55          unsigned buffer_offset =
     56             xfb_obj->Offset[buffer] / 4 +
     57             linked_xfb_info->Outputs[i].DstOffset;
     58          if (brw->programs[MESA_SHADER_GEOMETRY]) {
     59             brw_update_sol_surface(
     60                brw, xfb_obj->Buffers[buffer],
     61                &brw->gs.base.surf_offset[surf_index],
     62                linked_xfb_info->Outputs[i].NumComponents,
     63                linked_xfb_info->Buffers[buffer].Stride, buffer_offset);
     64          } else {
     65             brw_update_sol_surface(
     66                brw, xfb_obj->Buffers[buffer],
     67                &brw->ff_gs.surf_offset[surf_index],
     68                linked_xfb_info->Outputs[i].NumComponents,
     69                linked_xfb_info->Buffers[buffer].Stride, buffer_offset);
     70          }
     71       } else {
     72          if (!brw->programs[MESA_SHADER_GEOMETRY])
     73             brw->ff_gs.surf_offset[surf_index] = 0;
     74          else
     75             brw->gs.base.surf_offset[surf_index] = 0;
     76       }
     77    }
     78 
     79    brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
     80 }
     81 
     82 const struct brw_tracked_state gen6_sol_surface = {
     83    .dirty = {
     84       .mesa = 0,
     85       .brw = BRW_NEW_BATCH |
     86              BRW_NEW_BLORP |
     87              BRW_NEW_TRANSFORM_FEEDBACK,
     88    },
     89    .emit = gen6_update_sol_surfaces,
     90 };
     91 
     92 /**
     93  * Constructs the binding table for the WM surface state, which maps unit
     94  * numbers to surface state objects.
     95  */
     96 static void
     97 brw_gs_upload_binding_table(struct brw_context *brw)
     98 {
     99    uint32_t *bind;
    100    struct gl_context *ctx = &brw->ctx;
    101    const struct gl_program *prog;
    102    bool need_binding_table = false;
    103 
    104    /* We have two scenarios here:
    105     * 1) We are using a geometry shader only to implement transform feedback
    106     *    for a vertex shader (brw->programs[MESA_SHADER_GEOMETRY] == NULL).
    107     *    In this case, we only need surfaces for transform feedback in the
    108     *    GS stage.
    109     * 2) We have a user-provided geometry shader. In this case we may need
    110     *    surfaces for transform feedback and/or other stuff, like textures,
    111     *    in the GS stage.
    112     */
    113 
    114    if (!brw->programs[MESA_SHADER_GEOMETRY]) {
    115       /* BRW_NEW_VERTEX_PROGRAM */
    116       prog = ctx->_Shader->CurrentProgram[MESA_SHADER_VERTEX];
    117       if (prog) {
    118          /* Skip making a binding table if we don't have anything to put in it */
    119          const struct gl_transform_feedback_info *linked_xfb_info =
    120             prog->sh.LinkedTransformFeedback;
    121          need_binding_table = linked_xfb_info->NumOutputs > 0;
    122       }
    123       if (!need_binding_table) {
    124          if (brw->ff_gs.bind_bo_offset != 0) {
    125             brw->ctx.NewDriverState |= BRW_NEW_BINDING_TABLE_POINTERS;
    126             brw->ff_gs.bind_bo_offset = 0;
    127          }
    128          return;
    129       }
    130 
    131       /* Might want to calculate nr_surfaces first, to avoid taking up so much
    132        * space for the binding table. Anyway, in this case we know that we only
    133        * use BRW_MAX_SOL_BINDINGS surfaces at most.
    134        */
    135       bind = brw_state_batch(brw, sizeof(uint32_t) * BRW_MAX_SOL_BINDINGS,
    136                              32, &brw->ff_gs.bind_bo_offset);
    137 
    138       /* BRW_NEW_SURFACES */
    139       memcpy(bind, brw->ff_gs.surf_offset,
    140              BRW_MAX_SOL_BINDINGS * sizeof(uint32_t));
    141    } else {
    142       /* BRW_NEW_GEOMETRY_PROGRAM */
    143       prog = ctx->_Shader->CurrentProgram[MESA_SHADER_GEOMETRY];
    144       if (prog) {
    145          /* Skip making a binding table if we don't have anything to put in it */
    146          struct brw_stage_prog_data *prog_data = brw->gs.base.prog_data;
    147          const struct gl_transform_feedback_info *linked_xfb_info =
    148             prog->sh.LinkedTransformFeedback;
    149          need_binding_table = linked_xfb_info->NumOutputs > 0 ||
    150                               prog_data->binding_table.size_bytes > 0;
    151       }
    152       if (!need_binding_table) {
    153          if (brw->gs.base.bind_bo_offset != 0) {
    154             brw->gs.base.bind_bo_offset = 0;
    155             brw->ctx.NewDriverState |= BRW_NEW_BINDING_TABLE_POINTERS;
    156          }
    157          return;
    158       }
    159 
    160       /* Might want to calculate nr_surfaces first, to avoid taking up so much
    161        * space for the binding table.
    162        */
    163       bind = brw_state_batch(brw, sizeof(uint32_t) * BRW_MAX_SURFACES,
    164                              32, &brw->gs.base.bind_bo_offset);
    165 
    166       /* BRW_NEW_SURFACES */
    167       memcpy(bind, brw->gs.base.surf_offset,
    168              BRW_MAX_SURFACES * sizeof(uint32_t));
    169    }
    170 
    171    brw->ctx.NewDriverState |= BRW_NEW_BINDING_TABLE_POINTERS;
    172 }
    173 
    174 const struct brw_tracked_state gen6_gs_binding_table = {
    175    .dirty = {
    176       .mesa = 0,
    177       .brw = BRW_NEW_BATCH |
    178              BRW_NEW_BLORP |
    179              BRW_NEW_GEOMETRY_PROGRAM |
    180              BRW_NEW_VERTEX_PROGRAM |
    181              BRW_NEW_SURFACES,
    182    },
    183    .emit = brw_gs_upload_binding_table,
    184 };
    185 
    186 struct gl_transform_feedback_object *
    187 brw_new_transform_feedback(struct gl_context *ctx, GLuint name)
    188 {
    189    struct brw_context *brw = brw_context(ctx);
    190    struct brw_transform_feedback_object *brw_obj =
    191       CALLOC_STRUCT(brw_transform_feedback_object);
    192    if (!brw_obj)
    193       return NULL;
    194 
    195    _mesa_init_transform_feedback_object(&brw_obj->base, name);
    196 
    197    brw_obj->offset_bo =
    198       brw_bo_alloc(brw->bufmgr, "transform feedback offsets", 16, 64);
    199    brw_obj->prim_count_bo =
    200       brw_bo_alloc(brw->bufmgr, "xfb primitive counts", 16384, 64);
    201 
    202    return &brw_obj->base;
    203 }
    204 
    205 void
    206 brw_delete_transform_feedback(struct gl_context *ctx,
    207                               struct gl_transform_feedback_object *obj)
    208 {
    209    struct brw_transform_feedback_object *brw_obj =
    210       (struct brw_transform_feedback_object *) obj;
    211 
    212    for (unsigned i = 0; i < ARRAY_SIZE(obj->Buffers); i++) {
    213       _mesa_reference_buffer_object(ctx, &obj->Buffers[i], NULL);
    214    }
    215 
    216    brw_bo_unreference(brw_obj->offset_bo);
    217    brw_bo_unreference(brw_obj->prim_count_bo);
    218 
    219    free(brw_obj);
    220 }
    221 
    222 /**
    223  * Tally the number of primitives generated so far.
    224  *
    225  * The buffer contains a series of pairs:
    226  * (<start0, start1, start2, start3>, <end0, end1, end2, end3>) ;
    227  * (<start0, start1, start2, start3>, <end0, end1, end2, end3>) ;
    228  *
    229  * For each stream, we subtract the pair of values (end - start) to get the
    230  * number of primitives generated during one section.  We accumulate these
    231  * values, adding them up to get the total number of primitives generated.
    232  *
    233  * Note that we expose one stream pre-Gen7, so the above is just (start, end).
    234  */
    235 static void
    236 aggregate_transform_feedback_counter(
    237    struct brw_context *brw,
    238    struct brw_bo *bo,
    239    struct brw_transform_feedback_counter *counter)
    240 {
    241    const unsigned streams = brw->ctx.Const.MaxVertexStreams;
    242 
    243    /* If the current batch is still contributing to the number of primitives
    244     * generated, flush it now so the results will be present when mapped.
    245     */
    246    if (brw_batch_references(&brw->batch, bo))
    247       intel_batchbuffer_flush(brw);
    248 
    249    if (unlikely(brw->perf_debug && brw_bo_busy(bo)))
    250       perf_debug("Stalling for # of transform feedback primitives written.\n");
    251 
    252    uint64_t *prim_counts = brw_bo_map(brw, bo, MAP_READ);
    253    prim_counts += counter->bo_start * streams;
    254 
    255    for (unsigned i = counter->bo_start; i + 1 < counter->bo_end; i += 2) {
    256       for (unsigned s = 0; s < streams; s++)
    257          counter->accum[s] += prim_counts[streams + s] - prim_counts[s];
    258 
    259       prim_counts += 2 * streams;
    260    }
    261 
    262    brw_bo_unmap(bo);
    263 
    264    /* We've already gathered up the old data; we can safely overwrite it now. */
    265    counter->bo_start = counter->bo_end = 0;
    266 }
    267 
    268 /**
    269  * Store the SO_NUM_PRIMS_WRITTEN counters for each stream (4 uint64_t values)
    270  * to prim_count_bo.
    271  *
    272  * If prim_count_bo is out of space, gather up the results so far into
    273  * prims_generated[] and allocate a new buffer with enough space.
    274  *
    275  * The number of primitives written is used to compute the number of vertices
    276  * written to a transform feedback stream, which is required to implement
    277  * DrawTransformFeedback().
    278  */
    279 void
    280 brw_save_primitives_written_counters(struct brw_context *brw,
    281                                      struct brw_transform_feedback_object *obj)
    282 {
    283    const struct gen_device_info *devinfo = &brw->screen->devinfo;
    284    const struct gl_context *ctx = &brw->ctx;
    285    const int streams = ctx->Const.MaxVertexStreams;
    286 
    287    assert(obj->prim_count_bo != NULL);
    288 
    289    /* Check if there's enough space for a new pair of four values. */
    290    if ((obj->counter.bo_end + 2) * streams * sizeof(uint64_t) >=
    291        obj->prim_count_bo->size) {
    292       aggregate_transform_feedback_counter(brw, obj->prim_count_bo,
    293                                            &obj->previous_counter);
    294       aggregate_transform_feedback_counter(brw, obj->prim_count_bo,
    295                                            &obj->counter);
    296    }
    297 
    298    /* Flush any drawing so that the counters have the right values. */
    299    brw_emit_mi_flush(brw);
    300 
    301    /* Emit MI_STORE_REGISTER_MEM commands to write the values. */
    302    if (devinfo->gen >= 7) {
    303       for (int i = 0; i < streams; i++) {
    304          int offset = (streams * obj->counter.bo_end + i) * sizeof(uint64_t);
    305          brw_store_register_mem64(brw, obj->prim_count_bo,
    306                                   GEN7_SO_NUM_PRIMS_WRITTEN(i),
    307                                   offset);
    308       }
    309    } else {
    310       brw_store_register_mem64(brw, obj->prim_count_bo,
    311                                GEN6_SO_NUM_PRIMS_WRITTEN,
    312                                obj->counter.bo_end * sizeof(uint64_t));
    313    }
    314 
    315    /* Update where to write data to. */
    316    obj->counter.bo_end++;
    317 }
    318 
    319 static void
    320 compute_vertices_written_so_far(struct brw_context *brw,
    321                                 struct brw_transform_feedback_object *obj,
    322                                 struct brw_transform_feedback_counter *counter,
    323                                 uint64_t *vertices_written)
    324 {
    325    const struct gl_context *ctx = &brw->ctx;
    326    unsigned vertices_per_prim = 0;
    327 
    328    switch (obj->primitive_mode) {
    329    case GL_POINTS:
    330       vertices_per_prim = 1;
    331       break;
    332    case GL_LINES:
    333       vertices_per_prim = 2;
    334       break;
    335    case GL_TRIANGLES:
    336       vertices_per_prim = 3;
    337       break;
    338    default:
    339       unreachable("Invalid transform feedback primitive mode.");
    340    }
    341 
    342    /* Get the number of primitives generated. */
    343    aggregate_transform_feedback_counter(brw, obj->prim_count_bo, counter);
    344 
    345    for (int i = 0; i < ctx->Const.MaxVertexStreams; i++) {
    346       vertices_written[i] = vertices_per_prim * counter->accum[i];
    347    }
    348 }
    349 
    350 /**
    351  * Compute the number of vertices written by the last transform feedback
    352  * begin/end block.
    353  */
    354 static void
    355 compute_xfb_vertices_written(struct brw_context *brw,
    356                              struct brw_transform_feedback_object *obj)
    357 {
    358    if (obj->vertices_written_valid || !obj->base.EndedAnytime)
    359       return;
    360 
    361    compute_vertices_written_so_far(brw, obj, &obj->previous_counter,
    362                                    obj->vertices_written);
    363    obj->vertices_written_valid = true;
    364 }
    365 
    366 /**
    367  * GetTransformFeedbackVertexCount() driver hook.
    368  *
    369  * Returns the number of vertices written to a particular stream by the last
    370  * Begin/EndTransformFeedback block.  Used to implement DrawTransformFeedback().
    371  */
    372 GLsizei
    373 brw_get_transform_feedback_vertex_count(struct gl_context *ctx,
    374                                         struct gl_transform_feedback_object *obj,
    375                                         GLuint stream)
    376 {
    377    struct brw_context *brw = brw_context(ctx);
    378    struct brw_transform_feedback_object *brw_obj =
    379       (struct brw_transform_feedback_object *) obj;
    380 
    381    assert(obj->EndedAnytime);
    382    assert(stream < ctx->Const.MaxVertexStreams);
    383 
    384    compute_xfb_vertices_written(brw, brw_obj);
    385    return brw_obj->vertices_written[stream];
    386 }
    387 
    388 void
    389 brw_begin_transform_feedback(struct gl_context *ctx, GLenum mode,
    390 			     struct gl_transform_feedback_object *obj)
    391 {
    392    struct brw_context *brw = brw_context(ctx);
    393    const struct gl_program *prog;
    394    const struct gl_transform_feedback_info *linked_xfb_info;
    395    struct gl_transform_feedback_object *xfb_obj =
    396       ctx->TransformFeedback.CurrentObject;
    397    struct brw_transform_feedback_object *brw_obj =
    398       (struct brw_transform_feedback_object *) xfb_obj;
    399 
    400    assert(brw->screen->devinfo.gen == 6);
    401 
    402    if (ctx->_Shader->CurrentProgram[MESA_SHADER_GEOMETRY]) {
    403       /* BRW_NEW_GEOMETRY_PROGRAM */
    404       prog = ctx->_Shader->CurrentProgram[MESA_SHADER_GEOMETRY];
    405    } else {
    406       /* BRW_NEW_VERTEX_PROGRAM */
    407       prog = ctx->_Shader->CurrentProgram[MESA_SHADER_VERTEX];
    408    }
    409    linked_xfb_info = prog->sh.LinkedTransformFeedback;
    410 
    411    /* Compute the maximum number of vertices that we can write without
    412     * overflowing any of the buffers currently being used for feedback.
    413     */
    414    brw_obj->max_index
    415       = _mesa_compute_max_transform_feedback_vertices(ctx, xfb_obj,
    416                                                       linked_xfb_info);
    417 
    418    /* Initialize the SVBI 0 register to zero and set the maximum index. */
    419    BEGIN_BATCH(4);
    420    OUT_BATCH(_3DSTATE_GS_SVB_INDEX << 16 | (4 - 2));
    421    OUT_BATCH(0); /* SVBI 0 */
    422    OUT_BATCH(0); /* starting index */
    423    OUT_BATCH(brw_obj->max_index);
    424    ADVANCE_BATCH();
    425 
    426    /* Initialize the rest of the unused streams to sane values.  Otherwise,
    427     * they may indicate that there is no room to write data and prevent
    428     * anything from happening at all.
    429     */
    430    for (int i = 1; i < 4; i++) {
    431       BEGIN_BATCH(4);
    432       OUT_BATCH(_3DSTATE_GS_SVB_INDEX << 16 | (4 - 2));
    433       OUT_BATCH(i << SVB_INDEX_SHIFT);
    434       OUT_BATCH(0); /* starting index */
    435       OUT_BATCH(0xffffffff);
    436       ADVANCE_BATCH();
    437    }
    438 
    439    /* Store the starting value of the SO_NUM_PRIMS_WRITTEN counters. */
    440    brw_save_primitives_written_counters(brw, brw_obj);
    441 
    442    brw_obj->primitive_mode = mode;
    443 }
    444 
    445 void
    446 brw_end_transform_feedback(struct gl_context *ctx,
    447                            struct gl_transform_feedback_object *obj)
    448 {
    449    struct brw_context *brw = brw_context(ctx);
    450    struct brw_transform_feedback_object *brw_obj =
    451       (struct brw_transform_feedback_object *) obj;
    452 
    453    /* Store the ending value of the SO_NUM_PRIMS_WRITTEN counters. */
    454    if (!obj->Paused)
    455       brw_save_primitives_written_counters(brw, brw_obj);
    456 
    457    /* We've reached the end of a transform feedback begin/end block.  This
    458     * means that future DrawTransformFeedback() calls will need to pick up the
    459     * results of the current counter, and that it's time to roll back the
    460     * current primitive counter to zero.
    461     */
    462    brw_obj->previous_counter = brw_obj->counter;
    463    brw_reset_transform_feedback_counter(&brw_obj->counter);
    464 
    465    /* EndTransformFeedback() means that we need to update the number of
    466     * vertices written.  Since it's only necessary if DrawTransformFeedback()
    467     * is called and it means mapping a buffer object, we delay computing it
    468     * until it's absolutely necessary to try and avoid stalls.
    469     */
    470    brw_obj->vertices_written_valid = false;
    471 }
    472 
    473 void
    474 brw_pause_transform_feedback(struct gl_context *ctx,
    475                              struct gl_transform_feedback_object *obj)
    476 {
    477    struct brw_context *brw = brw_context(ctx);
    478    struct brw_transform_feedback_object *brw_obj =
    479       (struct brw_transform_feedback_object *) obj;
    480 
    481    /* Store the temporary ending value of the SO_NUM_PRIMS_WRITTEN counters.
    482     * While this operation is paused, other transform feedback actions may
    483     * occur, which will contribute to the counters.  We need to exclude that
    484     * from our counts.
    485     */
    486    brw_save_primitives_written_counters(brw, brw_obj);
    487 }
    488 
    489 void
    490 brw_resume_transform_feedback(struct gl_context *ctx,
    491                               struct gl_transform_feedback_object *obj)
    492 {
    493    struct brw_context *brw = brw_context(ctx);
    494    struct brw_transform_feedback_object *brw_obj =
    495       (struct brw_transform_feedback_object *) obj;
    496 
    497    /* Reload SVBI 0 with the count of vertices written so far. */
    498    uint64_t svbi;
    499    compute_vertices_written_so_far(brw, brw_obj, &brw_obj->counter, &svbi);
    500 
    501    BEGIN_BATCH(4);
    502    OUT_BATCH(_3DSTATE_GS_SVB_INDEX << 16 | (4 - 2));
    503    OUT_BATCH(0); /* SVBI 0 */
    504    OUT_BATCH((uint32_t) svbi); /* starting index */
    505    OUT_BATCH(brw_obj->max_index);
    506    ADVANCE_BATCH();
    507 
    508    /* Initialize the rest of the unused streams to sane values.  Otherwise,
    509     * they may indicate that there is no room to write data and prevent
    510     * anything from happening at all.
    511     */
    512    for (int i = 1; i < 4; i++) {
    513       BEGIN_BATCH(4);
    514       OUT_BATCH(_3DSTATE_GS_SVB_INDEX << 16 | (4 - 2));
    515       OUT_BATCH(i << SVB_INDEX_SHIFT);
    516       OUT_BATCH(0); /* starting index */
    517       OUT_BATCH(0xffffffff);
    518       ADVANCE_BATCH();
    519    }
    520 
    521    /* Store the new starting value of the SO_NUM_PRIMS_WRITTEN counters. */
    522    brw_save_primitives_written_counters(brw, brw_obj);
    523 }
    524