Home | History | Annotate | Download | only in i965
      1 /*
      2  * Copyright  2011 Intel Corporation
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8  * and/or sell copies of the Software, and to permit persons to whom the
      9  * Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice (including the next
     12  * paragraph) shall be included in all copies or substantial portions of the
     13  * Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
     21  * IN THE SOFTWARE.
     22  */
     23 
     24 /**
     25  * @file gen7_sol_state.c
     26  *
     27  * Controls the stream output logic (SOL) stage of the gen7 hardware, which is
     28  * used to implement GL_EXT_transform_feedback.
     29  */
     30 
     31 #include "brw_context.h"
     32 #include "brw_state.h"
     33 #include "brw_defines.h"
     34 #include "intel_batchbuffer.h"
     35 #include "intel_buffer_objects.h"
     36 
     37 static void
     38 upload_3dstate_so_buffers(struct brw_context *brw)
     39 {
     40    struct intel_context *intel = &brw->intel;
     41    struct gl_context *ctx = &intel->ctx;
     42    /* BRW_NEW_VERTEX_PROGRAM */
     43    const struct gl_shader_program *vs_prog =
     44       ctx->Shader.CurrentVertexProgram;
     45    const struct gl_transform_feedback_info *linked_xfb_info =
     46       &vs_prog->LinkedTransformFeedback;
     47    /* _NEW_TRANSFORM_FEEDBACK */
     48    struct gl_transform_feedback_object *xfb_obj =
     49       ctx->TransformFeedback.CurrentObject;
     50    int i;
     51 
     52    /* Set up the up to 4 output buffers.  These are the ranges defined in the
     53     * gl_transform_feedback_object.
     54     */
     55    for (i = 0; i < 4; i++) {
     56       struct intel_buffer_object *bufferobj =
     57 	 intel_buffer_object(xfb_obj->Buffers[i]);
     58       drm_intel_bo *bo;
     59       uint32_t start, end;
     60       uint32_t stride;
     61 
     62       if (!xfb_obj->Buffers[i]) {
     63 	 /* The pitch of 0 in this command indicates that the buffer is
     64 	  * unbound and won't be written to.
     65 	  */
     66 	 BEGIN_BATCH(4);
     67 	 OUT_BATCH(_3DSTATE_SO_BUFFER << 16 | (4 - 2));
     68 	 OUT_BATCH((i << SO_BUFFER_INDEX_SHIFT));
     69 	 OUT_BATCH(0);
     70 	 OUT_BATCH(0);
     71 	 ADVANCE_BATCH();
     72 
     73 	 continue;
     74       }
     75 
     76       bo = intel_bufferobj_buffer(intel, bufferobj, INTEL_WRITE_PART);
     77       stride = linked_xfb_info->BufferStride[i] * 4;
     78 
     79       start = xfb_obj->Offset[i];
     80       assert(start % 4 == 0);
     81       end = ALIGN(start + xfb_obj->Size[i], 4);
     82       assert(end <= bo->size);
     83 
     84       /* Offset the starting offset by the current vertex index into the
     85        * feedback buffer, offset register is always set to 0 at the start of the
     86        * batchbuffer.
     87        */
     88       start += brw->sol.offset_0_batch_start * stride;
     89       assert(start <= end);
     90 
     91       BEGIN_BATCH(4);
     92       OUT_BATCH(_3DSTATE_SO_BUFFER << 16 | (4 - 2));
     93       OUT_BATCH((i << SO_BUFFER_INDEX_SHIFT) | stride);
     94       OUT_RELOC(bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, start);
     95       OUT_RELOC(bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, end);
     96       ADVANCE_BATCH();
     97    }
     98 }
     99 
    100 /**
    101  * Outputs the 3DSTATE_SO_DECL_LIST command.
    102  *
    103  * The data output is a series of 64-bit entries containing a SO_DECL per
    104  * stream.  We only have one stream of rendering coming out of the GS unit, so
    105  * we only emit stream 0 (low 16 bits) SO_DECLs.
    106  */
    107 static void
    108 upload_3dstate_so_decl_list(struct brw_context *brw,
    109 			    struct brw_vue_map *vue_map)
    110 {
    111    struct intel_context *intel = &brw->intel;
    112    struct gl_context *ctx = &intel->ctx;
    113    /* BRW_NEW_VERTEX_PROGRAM */
    114    const struct gl_shader_program *vs_prog =
    115       ctx->Shader.CurrentVertexProgram;
    116    /* _NEW_TRANSFORM_FEEDBACK */
    117    const struct gl_transform_feedback_info *linked_xfb_info =
    118       &vs_prog->LinkedTransformFeedback;
    119    int i;
    120    uint16_t so_decl[128];
    121    int buffer_mask = 0;
    122    int next_offset[4] = {0, 0, 0, 0};
    123 
    124    STATIC_ASSERT(ARRAY_SIZE(so_decl) >= MAX_PROGRAM_OUTPUTS);
    125 
    126    /* Construct the list of SO_DECLs to be emitted.  The formatting of the
    127     * command is feels strange -- each dword pair contains a SO_DECL per stream.
    128     */
    129    for (i = 0; i < linked_xfb_info->NumOutputs; i++) {
    130       int buffer = linked_xfb_info->Outputs[i].OutputBuffer;
    131       uint16_t decl = 0;
    132       int vert_result = linked_xfb_info->Outputs[i].OutputRegister;
    133       unsigned component_mask =
    134          (1 << linked_xfb_info->Outputs[i].NumComponents) - 1;
    135 
    136       /* gl_PointSize is stored in VERT_RESULT_PSIZ.w. */
    137       if (vert_result == VERT_RESULT_PSIZ) {
    138          assert(linked_xfb_info->Outputs[i].NumComponents == 1);
    139          component_mask <<= 3;
    140       } else {
    141          component_mask <<= linked_xfb_info->Outputs[i].ComponentOffset;
    142       }
    143 
    144       buffer_mask |= 1 << buffer;
    145 
    146       decl |= buffer << SO_DECL_OUTPUT_BUFFER_SLOT_SHIFT;
    147       decl |= vue_map->vert_result_to_slot[vert_result] <<
    148 	 SO_DECL_REGISTER_INDEX_SHIFT;
    149       decl |= component_mask << SO_DECL_COMPONENT_MASK_SHIFT;
    150 
    151       /* This assert should be true until GL_ARB_transform_feedback_instanced
    152        * is added and we start using the hole flag.
    153        */
    154       assert(linked_xfb_info->Outputs[i].DstOffset == next_offset[buffer]);
    155 
    156       next_offset[buffer] += linked_xfb_info->Outputs[i].NumComponents;
    157 
    158       so_decl[i] = decl;
    159    }
    160 
    161    BEGIN_BATCH(linked_xfb_info->NumOutputs * 2 + 3);
    162    OUT_BATCH(_3DSTATE_SO_DECL_LIST << 16 |
    163 	     (linked_xfb_info->NumOutputs * 2 + 1));
    164 
    165    OUT_BATCH((buffer_mask << SO_STREAM_TO_BUFFER_SELECTS_0_SHIFT) |
    166 	     (0 << SO_STREAM_TO_BUFFER_SELECTS_1_SHIFT) |
    167 	     (0 << SO_STREAM_TO_BUFFER_SELECTS_2_SHIFT) |
    168 	     (0 << SO_STREAM_TO_BUFFER_SELECTS_3_SHIFT));
    169 
    170    OUT_BATCH((linked_xfb_info->NumOutputs << SO_NUM_ENTRIES_0_SHIFT) |
    171 	     (0 << SO_NUM_ENTRIES_1_SHIFT) |
    172 	     (0 << SO_NUM_ENTRIES_2_SHIFT) |
    173 	     (0 << SO_NUM_ENTRIES_3_SHIFT));
    174 
    175    for (i = 0; i < linked_xfb_info->NumOutputs; i++) {
    176       OUT_BATCH(so_decl[i]);
    177       OUT_BATCH(0);
    178    }
    179 
    180    ADVANCE_BATCH();
    181 }
    182 
    183 static void
    184 upload_3dstate_streamout(struct brw_context *brw, bool active,
    185 			 struct brw_vue_map *vue_map)
    186 {
    187    struct intel_context *intel = &brw->intel;
    188    struct gl_context *ctx = &intel->ctx;
    189    /* _NEW_TRANSFORM_FEEDBACK */
    190    struct gl_transform_feedback_object *xfb_obj =
    191       ctx->TransformFeedback.CurrentObject;
    192    uint32_t dw1 = 0, dw2 = 0;
    193    int i;
    194 
    195    /* _NEW_RASTERIZER_DISCARD */
    196    if (ctx->RasterDiscard)
    197       dw1 |= SO_RENDERING_DISABLE;
    198 
    199    if (active) {
    200       int urb_entry_read_offset = 0;
    201       int urb_entry_read_length = (vue_map->num_slots + 1) / 2 -
    202 	 urb_entry_read_offset;
    203 
    204       dw1 |= SO_FUNCTION_ENABLE;
    205       dw1 |= SO_STATISTICS_ENABLE;
    206 
    207       /* _NEW_LIGHT */
    208       if (ctx->Light.ProvokingVertex != GL_FIRST_VERTEX_CONVENTION)
    209 	 dw1 |= SO_REORDER_TRAILING;
    210 
    211       for (i = 0; i < 4; i++) {
    212 	 if (xfb_obj->Buffers[i]) {
    213 	    dw1 |= SO_BUFFER_ENABLE(i);
    214 	 }
    215       }
    216 
    217       /* We always read the whole vertex.  This could be reduced at some
    218        * point by reading less and offsetting the register index in the
    219        * SO_DECLs.
    220        */
    221       dw2 |= urb_entry_read_offset << SO_STREAM_0_VERTEX_READ_OFFSET_SHIFT;
    222       dw2 |= (urb_entry_read_length - 1) <<
    223 	 SO_STREAM_0_VERTEX_READ_LENGTH_SHIFT;
    224    }
    225 
    226    BEGIN_BATCH(3);
    227    OUT_BATCH(_3DSTATE_STREAMOUT << 16 | (3 - 2));
    228    OUT_BATCH(dw1);
    229    OUT_BATCH(dw2);
    230    ADVANCE_BATCH();
    231 }
    232 
    233 static void
    234 upload_sol_state(struct brw_context *brw)
    235 {
    236    struct intel_context *intel = &brw->intel;
    237    struct gl_context *ctx = &intel->ctx;
    238    /* _NEW_TRANSFORM_FEEDBACK */
    239    struct gl_transform_feedback_object *xfb_obj =
    240       ctx->TransformFeedback.CurrentObject;
    241    bool active = xfb_obj->Active && !xfb_obj->Paused;
    242 
    243    if (active) {
    244       upload_3dstate_so_buffers(brw);
    245       /* CACHE_NEW_VS_PROG */
    246       upload_3dstate_so_decl_list(brw, &brw->vs.prog_data->vue_map);
    247 
    248       intel->batch.needs_sol_reset = true;
    249    }
    250 
    251    /* Finally, set up the SOL stage.  This command must always follow updates to
    252     * the nonpipelined SOL state (3DSTATE_SO_BUFFER, 3DSTATE_SO_DECL_LIST) or
    253     * MMIO register updates (current performed by the kernel at each batch
    254     * emit).
    255     */
    256    upload_3dstate_streamout(brw, active, &brw->vs.prog_data->vue_map);
    257 }
    258 
    259 const struct brw_tracked_state gen7_sol_state = {
    260    .dirty = {
    261       .mesa  = (_NEW_RASTERIZER_DISCARD |
    262 		_NEW_LIGHT |
    263 		_NEW_TRANSFORM_FEEDBACK),
    264       .brw   = (BRW_NEW_BATCH |
    265 		BRW_NEW_VERTEX_PROGRAM),
    266       .cache = CACHE_NEW_VS_PROG,
    267    },
    268    .emit = upload_sol_state,
    269 };
    270 
    271 void
    272 gen7_end_transform_feedback(struct gl_context *ctx,
    273 			    struct gl_transform_feedback_object *obj)
    274 {
    275    /* Because we have to rely on the kernel to reset our SO write offsets, and
    276     * we only get to do it once per batchbuffer, flush the batch after feedback
    277     * so another transform feedback can get the write offset reset it needs.
    278     *
    279     * This also covers any cache flushing required.
    280     */
    281    struct brw_context *brw = brw_context(ctx);
    282    struct intel_context *intel = &brw->intel;
    283 
    284    intel_batchbuffer_flush(intel);
    285 }
    286