1 /* 2 * Copyright 2016 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24 /** 25 * An implementation of the transform feedback driver hooks for Haswell 26 * and later hardware. This uses MI_MATH to compute the number of vertices 27 * written (for use by DrawTransformFeedback()) without any CPU<->GPU 28 * synchronization which could stall. 29 */ 30 31 #include "brw_context.h" 32 #include "brw_state.h" 33 #include "brw_defines.h" 34 #include "intel_batchbuffer.h" 35 #include "intel_buffer_objects.h" 36 #include "main/transformfeedback.h" 37 38 /** 39 * We store several values in obj->prim_count_bo: 40 * 41 * [4x 32-bit values]: Final Number of Vertices Written 42 * [4x 32-bit values]: Tally of Primitives Written So Far 43 * [4x 64-bit values]: Starting SO_NUM_PRIMS_WRITTEN Counter Snapshots 44 * 45 * The first set of values is used by DrawTransformFeedback(), which 46 * copies one of them into the 3DPRIM_VERTEX_COUNT register and performs 47 * an indirect draw. The other values are just temporary storage. 48 */ 49 50 #define TALLY_OFFSET (BRW_MAX_XFB_STREAMS * sizeof(uint32_t)) 51 #define START_OFFSET (TALLY_OFFSET * 2) 52 53 /** 54 * Store the SO_NUM_PRIMS_WRITTEN counters for each stream (4 uint64_t values) 55 * to prim_count_bo. 56 */ 57 static void 58 save_prim_start_values(struct brw_context *brw, 59 struct brw_transform_feedback_object *obj) 60 { 61 /* Flush any drawing so that the counters have the right values. */ 62 brw_emit_mi_flush(brw); 63 64 /* Emit MI_STORE_REGISTER_MEM commands to write the values. */ 65 for (int i = 0; i < BRW_MAX_XFB_STREAMS; i++) { 66 brw_store_register_mem64(brw, obj->prim_count_bo, 67 GEN7_SO_NUM_PRIMS_WRITTEN(i), 68 START_OFFSET + i * sizeof(uint64_t)); 69 } 70 } 71 72 /** 73 * Compute the number of primitives written during our most recent 74 * transform feedback activity (the current SO_NUM_PRIMS_WRITTEN value 75 * minus the stashed "start" value), and add it to our running tally. 76 * 77 * If \p finalize is true, also compute the number of vertices written 78 * (by multiplying by the number of vertices per primitive), and store 79 * that to the "final" location. 80 * 81 * Otherwise, just overwrite the old tally with the new one. 82 */ 83 static void 84 tally_prims_written(struct brw_context *brw, 85 struct brw_transform_feedback_object *obj, 86 bool finalize) 87 { 88 /* Flush any drawing so that the counters have the right values. */ 89 brw_emit_mi_flush(brw); 90 91 for (int i = 0; i < BRW_MAX_XFB_STREAMS; i++) { 92 /* GPR0 = Tally */ 93 brw_load_register_imm32(brw, HSW_CS_GPR(0) + 4, 0); 94 brw_load_register_mem(brw, HSW_CS_GPR(0), obj->prim_count_bo, 95 TALLY_OFFSET + i * sizeof(uint32_t)); 96 if (!obj->base.Paused) { 97 /* GPR1 = Start Snapshot */ 98 brw_load_register_mem64(brw, HSW_CS_GPR(1), obj->prim_count_bo, 99 START_OFFSET + i * sizeof(uint64_t)); 100 /* GPR2 = Ending Snapshot */ 101 brw_load_register_reg64(brw, GEN7_SO_NUM_PRIMS_WRITTEN(i), HSW_CS_GPR(2)); 102 103 BEGIN_BATCH(9); 104 OUT_BATCH(HSW_MI_MATH | (9 - 2)); 105 /* GPR1 = GPR2 (End) - GPR1 (Start) */ 106 OUT_BATCH(MI_MATH_ALU2(LOAD, SRCA, R2)); 107 OUT_BATCH(MI_MATH_ALU2(LOAD, SRCB, R1)); 108 OUT_BATCH(MI_MATH_ALU0(SUB)); 109 OUT_BATCH(MI_MATH_ALU2(STORE, R1, ACCU)); 110 /* GPR0 = GPR0 (Tally) + GPR1 (Diff) */ 111 OUT_BATCH(MI_MATH_ALU2(LOAD, SRCA, R0)); 112 OUT_BATCH(MI_MATH_ALU2(LOAD, SRCB, R1)); 113 OUT_BATCH(MI_MATH_ALU0(ADD)); 114 OUT_BATCH(MI_MATH_ALU2(STORE, R0, ACCU)); 115 ADVANCE_BATCH(); 116 } 117 118 if (!finalize) { 119 /* Write back the new tally */ 120 brw_store_register_mem32(brw, obj->prim_count_bo, HSW_CS_GPR(0), 121 TALLY_OFFSET + i * sizeof(uint32_t)); 122 } else { 123 /* Convert the number of primitives to the number of vertices. */ 124 if (obj->primitive_mode == GL_LINES) { 125 /* Double R0 (R0 = R0 + R0) */ 126 BEGIN_BATCH(5); 127 OUT_BATCH(HSW_MI_MATH | (5 - 2)); 128 OUT_BATCH(MI_MATH_ALU2(LOAD, SRCA, R0)); 129 OUT_BATCH(MI_MATH_ALU2(LOAD, SRCB, R0)); 130 OUT_BATCH(MI_MATH_ALU0(ADD)); 131 OUT_BATCH(MI_MATH_ALU2(STORE, R0, ACCU)); 132 ADVANCE_BATCH(); 133 } else if (obj->primitive_mode == GL_TRIANGLES) { 134 /* Triple R0 (R1 = R0 + R0, R0 = R0 + R1) */ 135 BEGIN_BATCH(9); 136 OUT_BATCH(HSW_MI_MATH | (9 - 2)); 137 OUT_BATCH(MI_MATH_ALU2(LOAD, SRCA, R0)); 138 OUT_BATCH(MI_MATH_ALU2(LOAD, SRCB, R0)); 139 OUT_BATCH(MI_MATH_ALU0(ADD)); 140 OUT_BATCH(MI_MATH_ALU2(STORE, R1, ACCU)); 141 OUT_BATCH(MI_MATH_ALU2(LOAD, SRCA, R0)); 142 OUT_BATCH(MI_MATH_ALU2(LOAD, SRCB, R1)); 143 OUT_BATCH(MI_MATH_ALU0(ADD)); 144 OUT_BATCH(MI_MATH_ALU2(STORE, R0, ACCU)); 145 ADVANCE_BATCH(); 146 } 147 /* Store it to the final result */ 148 brw_store_register_mem32(brw, obj->prim_count_bo, HSW_CS_GPR(0), 149 i * sizeof(uint32_t)); 150 } 151 } 152 } 153 154 /** 155 * BeginTransformFeedback() driver hook. 156 */ 157 void 158 hsw_begin_transform_feedback(struct gl_context *ctx, GLenum mode, 159 struct gl_transform_feedback_object *obj) 160 { 161 struct brw_context *brw = brw_context(ctx); 162 struct brw_transform_feedback_object *brw_obj = 163 (struct brw_transform_feedback_object *) obj; 164 const struct gen_device_info *devinfo = &brw->screen->devinfo; 165 166 brw_obj->primitive_mode = mode; 167 168 /* Reset the SO buffer offsets to 0. */ 169 if (devinfo->gen >= 8) { 170 brw_obj->zero_offsets = true; 171 } else { 172 BEGIN_BATCH(1 + 2 * BRW_MAX_XFB_STREAMS); 173 OUT_BATCH(MI_LOAD_REGISTER_IMM | (1 + 2 * BRW_MAX_XFB_STREAMS - 2)); 174 for (int i = 0; i < BRW_MAX_XFB_STREAMS; i++) { 175 OUT_BATCH(GEN7_SO_WRITE_OFFSET(i)); 176 OUT_BATCH(0); 177 } 178 ADVANCE_BATCH(); 179 } 180 181 /* Zero out the initial tallies */ 182 brw_store_data_imm64(brw, brw_obj->prim_count_bo, TALLY_OFFSET, 0ull); 183 brw_store_data_imm64(brw, brw_obj->prim_count_bo, TALLY_OFFSET + 8, 0ull); 184 185 /* Store the new starting value of the SO_NUM_PRIMS_WRITTEN counters. */ 186 save_prim_start_values(brw, brw_obj); 187 } 188 189 /** 190 * PauseTransformFeedback() driver hook. 191 */ 192 void 193 hsw_pause_transform_feedback(struct gl_context *ctx, 194 struct gl_transform_feedback_object *obj) 195 { 196 struct brw_context *brw = brw_context(ctx); 197 struct brw_transform_feedback_object *brw_obj = 198 (struct brw_transform_feedback_object *) obj; 199 const struct gen_device_info *devinfo = &brw->screen->devinfo; 200 201 if (devinfo->is_haswell) { 202 /* Flush any drawing so that the counters have the right values. */ 203 brw_emit_mi_flush(brw); 204 205 /* Save the SOL buffer offset register values. */ 206 for (int i = 0; i < BRW_MAX_XFB_STREAMS; i++) { 207 BEGIN_BATCH(3); 208 OUT_BATCH(MI_STORE_REGISTER_MEM | (3 - 2)); 209 OUT_BATCH(GEN7_SO_WRITE_OFFSET(i)); 210 OUT_RELOC(brw_obj->offset_bo, RELOC_WRITE, i * sizeof(uint32_t)); 211 ADVANCE_BATCH(); 212 } 213 } 214 215 /* Add any primitives written to our tally */ 216 tally_prims_written(brw, brw_obj, false); 217 } 218 219 /** 220 * ResumeTransformFeedback() driver hook. 221 */ 222 void 223 hsw_resume_transform_feedback(struct gl_context *ctx, 224 struct gl_transform_feedback_object *obj) 225 { 226 struct brw_context *brw = brw_context(ctx); 227 struct brw_transform_feedback_object *brw_obj = 228 (struct brw_transform_feedback_object *) obj; 229 const struct gen_device_info *devinfo = &brw->screen->devinfo; 230 231 if (devinfo->is_haswell) { 232 /* Reload the SOL buffer offset registers. */ 233 for (int i = 0; i < BRW_MAX_XFB_STREAMS; i++) { 234 BEGIN_BATCH(3); 235 OUT_BATCH(GEN7_MI_LOAD_REGISTER_MEM | (3 - 2)); 236 OUT_BATCH(GEN7_SO_WRITE_OFFSET(i)); 237 OUT_RELOC(brw_obj->offset_bo, RELOC_WRITE, i * sizeof(uint32_t)); 238 ADVANCE_BATCH(); 239 } 240 } 241 242 /* Store the new starting value of the SO_NUM_PRIMS_WRITTEN counters. */ 243 save_prim_start_values(brw, brw_obj); 244 } 245 246 /** 247 * EndTransformFeedback() driver hook. 248 */ 249 void 250 hsw_end_transform_feedback(struct gl_context *ctx, 251 struct gl_transform_feedback_object *obj) 252 { 253 struct brw_context *brw = brw_context(ctx); 254 struct brw_transform_feedback_object *brw_obj = 255 (struct brw_transform_feedback_object *) obj; 256 257 /* Add any primitives written to our tally, convert it from the number 258 * of primitives written to the number of vertices written, and store 259 * it in the "final" location in the buffer which DrawTransformFeedback() 260 * will use as the vertex count. 261 */ 262 tally_prims_written(brw, brw_obj, true); 263 } 264