Home | History | Annotate | Download | only in i965
      1 /*
      2  * Copyright  2016 Intel Corporation
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8  * and/or sell copies of the Software, and to permit persons to whom the
      9  * Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice (including the next
     12  * paragraph) shall be included in all copies or substantial portions of the
     13  * Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
     21  * IN THE SOFTWARE.
     22  */
     23 
     24 /**
     25  * An implementation of the transform feedback driver hooks for Haswell
     26  * and later hardware.  This uses MI_MATH to compute the number of vertices
     27  * written (for use by DrawTransformFeedback()) without any CPU<->GPU
     28  * synchronization which could stall.
     29  */
     30 
     31 #include "brw_context.h"
     32 #include "brw_state.h"
     33 #include "brw_defines.h"
     34 #include "intel_batchbuffer.h"
     35 #include "intel_buffer_objects.h"
     36 #include "main/transformfeedback.h"
     37 
     38 /**
     39  * We store several values in obj->prim_count_bo:
     40  *
     41  * [4x 32-bit values]: Final Number of Vertices Written
     42  * [4x 32-bit values]: Tally of Primitives Written So Far
     43  * [4x 64-bit values]: Starting SO_NUM_PRIMS_WRITTEN Counter Snapshots
     44  *
     45  * The first set of values is used by DrawTransformFeedback(), which
     46  * copies one of them into the 3DPRIM_VERTEX_COUNT register and performs
     47  * an indirect draw.  The other values are just temporary storage.
     48  */
     49 
     50 #define TALLY_OFFSET (BRW_MAX_XFB_STREAMS * sizeof(uint32_t))
     51 #define START_OFFSET (TALLY_OFFSET * 2)
     52 
     53 /**
     54  * Store the SO_NUM_PRIMS_WRITTEN counters for each stream (4 uint64_t values)
     55  * to prim_count_bo.
     56  */
     57 static void
     58 save_prim_start_values(struct brw_context *brw,
     59                        struct brw_transform_feedback_object *obj)
     60 {
     61    /* Flush any drawing so that the counters have the right values. */
     62    brw_emit_mi_flush(brw);
     63 
     64    /* Emit MI_STORE_REGISTER_MEM commands to write the values. */
     65    for (int i = 0; i < BRW_MAX_XFB_STREAMS; i++) {
     66       brw_store_register_mem64(brw, obj->prim_count_bo,
     67                                GEN7_SO_NUM_PRIMS_WRITTEN(i),
     68                                START_OFFSET + i * sizeof(uint64_t));
     69    }
     70 }
     71 
     72 /**
     73  * Compute the number of primitives written during our most recent
     74  * transform feedback activity (the current SO_NUM_PRIMS_WRITTEN value
     75  * minus the stashed "start" value), and add it to our running tally.
     76  *
     77  * If \p finalize is true, also compute the number of vertices written
     78  * (by multiplying by the number of vertices per primitive), and store
     79  * that to the "final" location.
     80  *
     81  * Otherwise, just overwrite the old tally with the new one.
     82  */
     83 static void
     84 tally_prims_written(struct brw_context *brw,
     85                     struct brw_transform_feedback_object *obj,
     86                     bool finalize)
     87 {
     88    /* Flush any drawing so that the counters have the right values. */
     89    brw_emit_mi_flush(brw);
     90 
     91    for (int i = 0; i < BRW_MAX_XFB_STREAMS; i++) {
     92       /* GPR0 = Tally */
     93       brw_load_register_imm32(brw, HSW_CS_GPR(0) + 4, 0);
     94       brw_load_register_mem(brw, HSW_CS_GPR(0), obj->prim_count_bo,
     95                             I915_GEM_DOMAIN_INSTRUCTION,
     96                             I915_GEM_DOMAIN_INSTRUCTION,
     97                             TALLY_OFFSET + i * sizeof(uint32_t));
     98       if (!obj->base.Paused) {
     99          /* GPR1 = Start Snapshot */
    100          brw_load_register_mem64(brw, HSW_CS_GPR(1), obj->prim_count_bo,
    101                                  I915_GEM_DOMAIN_INSTRUCTION,
    102                                  I915_GEM_DOMAIN_INSTRUCTION,
    103                                  START_OFFSET + i * sizeof(uint64_t));
    104          /* GPR2 = Ending Snapshot */
    105          brw_load_register_reg64(brw, GEN7_SO_NUM_PRIMS_WRITTEN(i), HSW_CS_GPR(2));
    106 
    107          BEGIN_BATCH(9);
    108          OUT_BATCH(HSW_MI_MATH | (9 - 2));
    109          /* GPR1 = GPR2 (End) - GPR1 (Start) */
    110          OUT_BATCH(MI_MATH_ALU2(LOAD, SRCA, R2));
    111          OUT_BATCH(MI_MATH_ALU2(LOAD, SRCB, R1));
    112          OUT_BATCH(MI_MATH_ALU0(SUB));
    113          OUT_BATCH(MI_MATH_ALU2(STORE, R1, ACCU));
    114          /* GPR0 = GPR0 (Tally) + GPR1 (Diff) */
    115          OUT_BATCH(MI_MATH_ALU2(LOAD, SRCA, R0));
    116          OUT_BATCH(MI_MATH_ALU2(LOAD, SRCB, R1));
    117             OUT_BATCH(MI_MATH_ALU0(ADD));
    118          OUT_BATCH(MI_MATH_ALU2(STORE, R0, ACCU));
    119          ADVANCE_BATCH();
    120       }
    121 
    122       if (!finalize) {
    123          /* Write back the new tally */
    124          brw_store_register_mem32(brw, obj->prim_count_bo, HSW_CS_GPR(0),
    125                                   TALLY_OFFSET + i * sizeof(uint32_t));
    126       } else {
    127          /* Convert the number of primitives to the number of vertices. */
    128          if (obj->primitive_mode == GL_LINES) {
    129             /* Double R0 (R0 = R0 + R0) */
    130             BEGIN_BATCH(5);
    131             OUT_BATCH(HSW_MI_MATH | (5 - 2));
    132             OUT_BATCH(MI_MATH_ALU2(LOAD, SRCA, R0));
    133             OUT_BATCH(MI_MATH_ALU2(LOAD, SRCB, R0));
    134             OUT_BATCH(MI_MATH_ALU0(ADD));
    135             OUT_BATCH(MI_MATH_ALU2(STORE, R0, ACCU));
    136             ADVANCE_BATCH();
    137          } else if (obj->primitive_mode == GL_TRIANGLES) {
    138             /* Triple R0 (R1 = R0 + R0, R0 = R0 + R1) */
    139             BEGIN_BATCH(9);
    140             OUT_BATCH(HSW_MI_MATH | (9 - 2));
    141             OUT_BATCH(MI_MATH_ALU2(LOAD, SRCA, R0));
    142             OUT_BATCH(MI_MATH_ALU2(LOAD, SRCB, R0));
    143             OUT_BATCH(MI_MATH_ALU0(ADD));
    144             OUT_BATCH(MI_MATH_ALU2(STORE, R1, ACCU));
    145             OUT_BATCH(MI_MATH_ALU2(LOAD, SRCA, R0));
    146             OUT_BATCH(MI_MATH_ALU2(LOAD, SRCB, R1));
    147             OUT_BATCH(MI_MATH_ALU0(ADD));
    148             OUT_BATCH(MI_MATH_ALU2(STORE, R0, ACCU));
    149             ADVANCE_BATCH();
    150          }
    151          /* Store it to the final result */
    152          brw_store_register_mem32(brw, obj->prim_count_bo, HSW_CS_GPR(0),
    153                                   i * sizeof(uint32_t));
    154       }
    155    }
    156 }
    157 
    158 /**
    159  * BeginTransformFeedback() driver hook.
    160  */
    161 void
    162 hsw_begin_transform_feedback(struct gl_context *ctx, GLenum mode,
    163                               struct gl_transform_feedback_object *obj)
    164 {
    165    struct brw_context *brw = brw_context(ctx);
    166    struct brw_transform_feedback_object *brw_obj =
    167       (struct brw_transform_feedback_object *) obj;
    168 
    169    brw_obj->primitive_mode = mode;
    170 
    171    /* Reset the SO buffer offsets to 0. */
    172    if (brw->gen >= 8) {
    173       brw_obj->zero_offsets = true;
    174    } else {
    175       BEGIN_BATCH(1 + 2 * BRW_MAX_XFB_STREAMS);
    176       OUT_BATCH(MI_LOAD_REGISTER_IMM | (1 + 2 * BRW_MAX_XFB_STREAMS - 2));
    177       for (int i = 0; i < BRW_MAX_XFB_STREAMS; i++) {
    178          OUT_BATCH(GEN7_SO_WRITE_OFFSET(i));
    179          OUT_BATCH(0);
    180       }
    181       ADVANCE_BATCH();
    182    }
    183 
    184    /* Zero out the initial tallies */
    185    brw_store_data_imm64(brw, brw_obj->prim_count_bo, TALLY_OFFSET,     0ull);
    186    brw_store_data_imm64(brw, brw_obj->prim_count_bo, TALLY_OFFSET + 8, 0ull);
    187 
    188    /* Store the new starting value of the SO_NUM_PRIMS_WRITTEN counters. */
    189    save_prim_start_values(brw, brw_obj);
    190 }
    191 
    192 /**
    193  * PauseTransformFeedback() driver hook.
    194  */
    195 void
    196 hsw_pause_transform_feedback(struct gl_context *ctx,
    197                               struct gl_transform_feedback_object *obj)
    198 {
    199    struct brw_context *brw = brw_context(ctx);
    200    struct brw_transform_feedback_object *brw_obj =
    201       (struct brw_transform_feedback_object *) obj;
    202 
    203    if (brw->is_haswell) {
    204       /* Flush any drawing so that the counters have the right values. */
    205       brw_emit_mi_flush(brw);
    206 
    207       /* Save the SOL buffer offset register values. */
    208       for (int i = 0; i < BRW_MAX_XFB_STREAMS; i++) {
    209          BEGIN_BATCH(3);
    210          OUT_BATCH(MI_STORE_REGISTER_MEM | (3 - 2));
    211          OUT_BATCH(GEN7_SO_WRITE_OFFSET(i));
    212          OUT_RELOC(brw_obj->offset_bo,
    213                    I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
    214                    i * sizeof(uint32_t));
    215          ADVANCE_BATCH();
    216       }
    217    }
    218 
    219    /* Add any primitives written to our tally */
    220    tally_prims_written(brw, brw_obj, false);
    221 }
    222 
    223 /**
    224  * ResumeTransformFeedback() driver hook.
    225  */
    226 void
    227 hsw_resume_transform_feedback(struct gl_context *ctx,
    228                                struct gl_transform_feedback_object *obj)
    229 {
    230    struct brw_context *brw = brw_context(ctx);
    231    struct brw_transform_feedback_object *brw_obj =
    232       (struct brw_transform_feedback_object *) obj;
    233 
    234    if (brw->is_haswell) {
    235       /* Reload the SOL buffer offset registers. */
    236       for (int i = 0; i < BRW_MAX_XFB_STREAMS; i++) {
    237          BEGIN_BATCH(3);
    238          OUT_BATCH(GEN7_MI_LOAD_REGISTER_MEM | (3 - 2));
    239          OUT_BATCH(GEN7_SO_WRITE_OFFSET(i));
    240          OUT_RELOC(brw_obj->offset_bo,
    241                    I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
    242                    i * sizeof(uint32_t));
    243          ADVANCE_BATCH();
    244       }
    245    }
    246 
    247    /* Store the new starting value of the SO_NUM_PRIMS_WRITTEN counters. */
    248    save_prim_start_values(brw, brw_obj);
    249 }
    250 
    251 /**
    252  * EndTransformFeedback() driver hook.
    253  */
    254 void
    255 hsw_end_transform_feedback(struct gl_context *ctx,
    256 			    struct gl_transform_feedback_object *obj)
    257 {
    258    struct brw_context *brw = brw_context(ctx);
    259    struct brw_transform_feedback_object *brw_obj =
    260       (struct brw_transform_feedback_object *) obj;
    261 
    262    /* Add any primitives written to our tally, convert it from the number
    263     * of primitives written to the number of vertices written, and store
    264     * it in the "final" location in the buffer which DrawTransformFeedback()
    265     * will use as the vertex count.
    266     */
    267    tally_prims_written(brw, brw_obj, true);
    268 }
    269