1 /* 2 * Copyright 2016 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24 /** 25 * An implementation of the transform feedback driver hooks for Haswell 26 * and later hardware. This uses MI_MATH to compute the number of vertices 27 * written (for use by DrawTransformFeedback()) without any CPU<->GPU 28 * synchronization which could stall. 29 */ 30 31 #include "brw_context.h" 32 #include "brw_state.h" 33 #include "brw_defines.h" 34 #include "intel_batchbuffer.h" 35 #include "intel_buffer_objects.h" 36 #include "main/transformfeedback.h" 37 38 /** 39 * We store several values in obj->prim_count_bo: 40 * 41 * [4x 32-bit values]: Final Number of Vertices Written 42 * [4x 32-bit values]: Tally of Primitives Written So Far 43 * [4x 64-bit values]: Starting SO_NUM_PRIMS_WRITTEN Counter Snapshots 44 * 45 * The first set of values is used by DrawTransformFeedback(), which 46 * copies one of them into the 3DPRIM_VERTEX_COUNT register and performs 47 * an indirect draw. The other values are just temporary storage. 48 */ 49 50 #define TALLY_OFFSET (BRW_MAX_XFB_STREAMS * sizeof(uint32_t)) 51 #define START_OFFSET (TALLY_OFFSET * 2) 52 53 /** 54 * Store the SO_NUM_PRIMS_WRITTEN counters for each stream (4 uint64_t values) 55 * to prim_count_bo. 56 */ 57 static void 58 save_prim_start_values(struct brw_context *brw, 59 struct brw_transform_feedback_object *obj) 60 { 61 /* Flush any drawing so that the counters have the right values. */ 62 brw_emit_mi_flush(brw); 63 64 /* Emit MI_STORE_REGISTER_MEM commands to write the values. */ 65 for (int i = 0; i < BRW_MAX_XFB_STREAMS; i++) { 66 brw_store_register_mem64(brw, obj->prim_count_bo, 67 GEN7_SO_NUM_PRIMS_WRITTEN(i), 68 START_OFFSET + i * sizeof(uint64_t)); 69 } 70 } 71 72 /** 73 * Compute the number of primitives written during our most recent 74 * transform feedback activity (the current SO_NUM_PRIMS_WRITTEN value 75 * minus the stashed "start" value), and add it to our running tally. 76 * 77 * If \p finalize is true, also compute the number of vertices written 78 * (by multiplying by the number of vertices per primitive), and store 79 * that to the "final" location. 80 * 81 * Otherwise, just overwrite the old tally with the new one. 82 */ 83 static void 84 tally_prims_written(struct brw_context *brw, 85 struct brw_transform_feedback_object *obj, 86 bool finalize) 87 { 88 /* Flush any drawing so that the counters have the right values. */ 89 brw_emit_mi_flush(brw); 90 91 for (int i = 0; i < BRW_MAX_XFB_STREAMS; i++) { 92 /* GPR0 = Tally */ 93 brw_load_register_imm32(brw, HSW_CS_GPR(0) + 4, 0); 94 brw_load_register_mem(brw, HSW_CS_GPR(0), obj->prim_count_bo, 95 I915_GEM_DOMAIN_INSTRUCTION, 96 I915_GEM_DOMAIN_INSTRUCTION, 97 TALLY_OFFSET + i * sizeof(uint32_t)); 98 if (!obj->base.Paused) { 99 /* GPR1 = Start Snapshot */ 100 brw_load_register_mem64(brw, HSW_CS_GPR(1), obj->prim_count_bo, 101 I915_GEM_DOMAIN_INSTRUCTION, 102 I915_GEM_DOMAIN_INSTRUCTION, 103 START_OFFSET + i * sizeof(uint64_t)); 104 /* GPR2 = Ending Snapshot */ 105 brw_load_register_reg64(brw, GEN7_SO_NUM_PRIMS_WRITTEN(i), HSW_CS_GPR(2)); 106 107 BEGIN_BATCH(9); 108 OUT_BATCH(HSW_MI_MATH | (9 - 2)); 109 /* GPR1 = GPR2 (End) - GPR1 (Start) */ 110 OUT_BATCH(MI_MATH_ALU2(LOAD, SRCA, R2)); 111 OUT_BATCH(MI_MATH_ALU2(LOAD, SRCB, R1)); 112 OUT_BATCH(MI_MATH_ALU0(SUB)); 113 OUT_BATCH(MI_MATH_ALU2(STORE, R1, ACCU)); 114 /* GPR0 = GPR0 (Tally) + GPR1 (Diff) */ 115 OUT_BATCH(MI_MATH_ALU2(LOAD, SRCA, R0)); 116 OUT_BATCH(MI_MATH_ALU2(LOAD, SRCB, R1)); 117 OUT_BATCH(MI_MATH_ALU0(ADD)); 118 OUT_BATCH(MI_MATH_ALU2(STORE, R0, ACCU)); 119 ADVANCE_BATCH(); 120 } 121 122 if (!finalize) { 123 /* Write back the new tally */ 124 brw_store_register_mem32(brw, obj->prim_count_bo, HSW_CS_GPR(0), 125 TALLY_OFFSET + i * sizeof(uint32_t)); 126 } else { 127 /* Convert the number of primitives to the number of vertices. */ 128 if (obj->primitive_mode == GL_LINES) { 129 /* Double R0 (R0 = R0 + R0) */ 130 BEGIN_BATCH(5); 131 OUT_BATCH(HSW_MI_MATH | (5 - 2)); 132 OUT_BATCH(MI_MATH_ALU2(LOAD, SRCA, R0)); 133 OUT_BATCH(MI_MATH_ALU2(LOAD, SRCB, R0)); 134 OUT_BATCH(MI_MATH_ALU0(ADD)); 135 OUT_BATCH(MI_MATH_ALU2(STORE, R0, ACCU)); 136 ADVANCE_BATCH(); 137 } else if (obj->primitive_mode == GL_TRIANGLES) { 138 /* Triple R0 (R1 = R0 + R0, R0 = R0 + R1) */ 139 BEGIN_BATCH(9); 140 OUT_BATCH(HSW_MI_MATH | (9 - 2)); 141 OUT_BATCH(MI_MATH_ALU2(LOAD, SRCA, R0)); 142 OUT_BATCH(MI_MATH_ALU2(LOAD, SRCB, R0)); 143 OUT_BATCH(MI_MATH_ALU0(ADD)); 144 OUT_BATCH(MI_MATH_ALU2(STORE, R1, ACCU)); 145 OUT_BATCH(MI_MATH_ALU2(LOAD, SRCA, R0)); 146 OUT_BATCH(MI_MATH_ALU2(LOAD, SRCB, R1)); 147 OUT_BATCH(MI_MATH_ALU0(ADD)); 148 OUT_BATCH(MI_MATH_ALU2(STORE, R0, ACCU)); 149 ADVANCE_BATCH(); 150 } 151 /* Store it to the final result */ 152 brw_store_register_mem32(brw, obj->prim_count_bo, HSW_CS_GPR(0), 153 i * sizeof(uint32_t)); 154 } 155 } 156 } 157 158 /** 159 * BeginTransformFeedback() driver hook. 160 */ 161 void 162 hsw_begin_transform_feedback(struct gl_context *ctx, GLenum mode, 163 struct gl_transform_feedback_object *obj) 164 { 165 struct brw_context *brw = brw_context(ctx); 166 struct brw_transform_feedback_object *brw_obj = 167 (struct brw_transform_feedback_object *) obj; 168 169 brw_obj->primitive_mode = mode; 170 171 /* Reset the SO buffer offsets to 0. */ 172 if (brw->gen >= 8) { 173 brw_obj->zero_offsets = true; 174 } else { 175 BEGIN_BATCH(1 + 2 * BRW_MAX_XFB_STREAMS); 176 OUT_BATCH(MI_LOAD_REGISTER_IMM | (1 + 2 * BRW_MAX_XFB_STREAMS - 2)); 177 for (int i = 0; i < BRW_MAX_XFB_STREAMS; i++) { 178 OUT_BATCH(GEN7_SO_WRITE_OFFSET(i)); 179 OUT_BATCH(0); 180 } 181 ADVANCE_BATCH(); 182 } 183 184 /* Zero out the initial tallies */ 185 brw_store_data_imm64(brw, brw_obj->prim_count_bo, TALLY_OFFSET, 0ull); 186 brw_store_data_imm64(brw, brw_obj->prim_count_bo, TALLY_OFFSET + 8, 0ull); 187 188 /* Store the new starting value of the SO_NUM_PRIMS_WRITTEN counters. */ 189 save_prim_start_values(brw, brw_obj); 190 } 191 192 /** 193 * PauseTransformFeedback() driver hook. 194 */ 195 void 196 hsw_pause_transform_feedback(struct gl_context *ctx, 197 struct gl_transform_feedback_object *obj) 198 { 199 struct brw_context *brw = brw_context(ctx); 200 struct brw_transform_feedback_object *brw_obj = 201 (struct brw_transform_feedback_object *) obj; 202 203 if (brw->is_haswell) { 204 /* Flush any drawing so that the counters have the right values. */ 205 brw_emit_mi_flush(brw); 206 207 /* Save the SOL buffer offset register values. */ 208 for (int i = 0; i < BRW_MAX_XFB_STREAMS; i++) { 209 BEGIN_BATCH(3); 210 OUT_BATCH(MI_STORE_REGISTER_MEM | (3 - 2)); 211 OUT_BATCH(GEN7_SO_WRITE_OFFSET(i)); 212 OUT_RELOC(brw_obj->offset_bo, 213 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 214 i * sizeof(uint32_t)); 215 ADVANCE_BATCH(); 216 } 217 } 218 219 /* Add any primitives written to our tally */ 220 tally_prims_written(brw, brw_obj, false); 221 } 222 223 /** 224 * ResumeTransformFeedback() driver hook. 225 */ 226 void 227 hsw_resume_transform_feedback(struct gl_context *ctx, 228 struct gl_transform_feedback_object *obj) 229 { 230 struct brw_context *brw = brw_context(ctx); 231 struct brw_transform_feedback_object *brw_obj = 232 (struct brw_transform_feedback_object *) obj; 233 234 if (brw->is_haswell) { 235 /* Reload the SOL buffer offset registers. */ 236 for (int i = 0; i < BRW_MAX_XFB_STREAMS; i++) { 237 BEGIN_BATCH(3); 238 OUT_BATCH(GEN7_MI_LOAD_REGISTER_MEM | (3 - 2)); 239 OUT_BATCH(GEN7_SO_WRITE_OFFSET(i)); 240 OUT_RELOC(brw_obj->offset_bo, 241 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 242 i * sizeof(uint32_t)); 243 ADVANCE_BATCH(); 244 } 245 } 246 247 /* Store the new starting value of the SO_NUM_PRIMS_WRITTEN counters. */ 248 save_prim_start_values(brw, brw_obj); 249 } 250 251 /** 252 * EndTransformFeedback() driver hook. 253 */ 254 void 255 hsw_end_transform_feedback(struct gl_context *ctx, 256 struct gl_transform_feedback_object *obj) 257 { 258 struct brw_context *brw = brw_context(ctx); 259 struct brw_transform_feedback_object *brw_obj = 260 (struct brw_transform_feedback_object *) obj; 261 262 /* Add any primitives written to our tally, convert it from the number 263 * of primitives written to the number of vertices written, and store 264 * it in the "final" location in the buffer which DrawTransformFeedback() 265 * will use as the vertex count. 266 */ 267 tally_prims_written(brw, brw_obj, true); 268 } 269