1 /* 2 * Copyright 2011 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24 /** 25 * @file gen7_sol_state.c 26 * 27 * Controls the stream output logic (SOL) stage of the gen7 hardware, which is 28 * used to implement GL_EXT_transform_feedback. 29 */ 30 31 #include "brw_context.h" 32 #include "brw_state.h" 33 #include "brw_defines.h" 34 #include "intel_batchbuffer.h" 35 #include "intel_buffer_objects.h" 36 #include "main/transformfeedback.h" 37 38 static void 39 upload_3dstate_so_buffers(struct brw_context *brw) 40 { 41 struct gl_context *ctx = &brw->ctx; 42 /* BRW_NEW_TRANSFORM_FEEDBACK */ 43 struct gl_transform_feedback_object *xfb_obj = 44 ctx->TransformFeedback.CurrentObject; 45 const struct gl_transform_feedback_info *linked_xfb_info = 46 xfb_obj->program->sh.LinkedTransformFeedback; 47 int i; 48 49 /* Set up the up to 4 output buffers. These are the ranges defined in the 50 * gl_transform_feedback_object. 51 */ 52 for (i = 0; i < 4; i++) { 53 struct intel_buffer_object *bufferobj = 54 intel_buffer_object(xfb_obj->Buffers[i]); 55 drm_intel_bo *bo; 56 uint32_t start, end; 57 uint32_t stride; 58 59 if (!xfb_obj->Buffers[i]) { 60 /* The pitch of 0 in this command indicates that the buffer is 61 * unbound and won't be written to. 62 */ 63 BEGIN_BATCH(4); 64 OUT_BATCH(_3DSTATE_SO_BUFFER << 16 | (4 - 2)); 65 OUT_BATCH((i << SO_BUFFER_INDEX_SHIFT)); 66 OUT_BATCH(0); 67 OUT_BATCH(0); 68 ADVANCE_BATCH(); 69 70 continue; 71 } 72 73 stride = linked_xfb_info->Buffers[i].Stride * 4; 74 75 start = xfb_obj->Offset[i]; 76 assert(start % 4 == 0); 77 end = ALIGN(start + xfb_obj->Size[i], 4); 78 bo = intel_bufferobj_buffer(brw, bufferobj, start, end - start); 79 assert(end <= bo->size); 80 81 BEGIN_BATCH(4); 82 OUT_BATCH(_3DSTATE_SO_BUFFER << 16 | (4 - 2)); 83 OUT_BATCH((i << SO_BUFFER_INDEX_SHIFT) | stride); 84 OUT_RELOC(bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, start); 85 OUT_RELOC(bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, end); 86 ADVANCE_BATCH(); 87 } 88 } 89 90 /** 91 * Outputs the 3DSTATE_SO_DECL_LIST command. 92 * 93 * The data output is a series of 64-bit entries containing a SO_DECL per 94 * stream. We only have one stream of rendering coming out of the GS unit, so 95 * we only emit stream 0 (low 16 bits) SO_DECLs. 96 */ 97 void 98 gen7_upload_3dstate_so_decl_list(struct brw_context *brw, 99 const struct brw_vue_map *vue_map) 100 { 101 struct gl_context *ctx = &brw->ctx; 102 /* BRW_NEW_TRANSFORM_FEEDBACK */ 103 struct gl_transform_feedback_object *xfb_obj = 104 ctx->TransformFeedback.CurrentObject; 105 const struct gl_transform_feedback_info *linked_xfb_info = 106 xfb_obj->program->sh.LinkedTransformFeedback; 107 uint16_t so_decl[MAX_VERTEX_STREAMS][128]; 108 int buffer_mask[MAX_VERTEX_STREAMS] = {0, 0, 0, 0}; 109 int next_offset[MAX_VERTEX_STREAMS] = {0, 0, 0, 0}; 110 int decls[MAX_VERTEX_STREAMS] = {0, 0, 0, 0}; 111 int max_decls = 0; 112 STATIC_ASSERT(ARRAY_SIZE(so_decl[0]) >= MAX_PROGRAM_OUTPUTS); 113 114 memset(so_decl, 0, sizeof(so_decl)); 115 116 /* Construct the list of SO_DECLs to be emitted. The formatting of the 117 * command is feels strange -- each dword pair contains a SO_DECL per stream. 118 */ 119 for (unsigned i = 0; i < linked_xfb_info->NumOutputs; i++) { 120 int buffer = linked_xfb_info->Outputs[i].OutputBuffer; 121 uint16_t decl = 0; 122 int varying = linked_xfb_info->Outputs[i].OutputRegister; 123 const unsigned components = linked_xfb_info->Outputs[i].NumComponents; 124 unsigned component_mask = (1 << components) - 1; 125 unsigned stream_id = linked_xfb_info->Outputs[i].StreamId; 126 unsigned decl_buffer_slot = buffer << SO_DECL_OUTPUT_BUFFER_SLOT_SHIFT; 127 assert(stream_id < MAX_VERTEX_STREAMS); 128 129 /* gl_PointSize is stored in VARYING_SLOT_PSIZ.w 130 * gl_Layer is stored in VARYING_SLOT_PSIZ.y 131 * gl_ViewportIndex is stored in VARYING_SLOT_PSIZ.z 132 */ 133 if (varying == VARYING_SLOT_PSIZ) { 134 assert(components == 1); 135 component_mask <<= 3; 136 } else if (varying == VARYING_SLOT_LAYER) { 137 assert(components == 1); 138 component_mask <<= 1; 139 } else if (varying == VARYING_SLOT_VIEWPORT) { 140 assert(components == 1); 141 component_mask <<= 2; 142 } else { 143 component_mask <<= linked_xfb_info->Outputs[i].ComponentOffset; 144 } 145 146 buffer_mask[stream_id] |= 1 << buffer; 147 148 decl |= decl_buffer_slot; 149 if (varying == VARYING_SLOT_LAYER || varying == VARYING_SLOT_VIEWPORT) { 150 decl |= vue_map->varying_to_slot[VARYING_SLOT_PSIZ] << 151 SO_DECL_REGISTER_INDEX_SHIFT; 152 } else { 153 assert(vue_map->varying_to_slot[varying] >= 0); 154 decl |= vue_map->varying_to_slot[varying] << 155 SO_DECL_REGISTER_INDEX_SHIFT; 156 } 157 decl |= component_mask << SO_DECL_COMPONENT_MASK_SHIFT; 158 159 /* Mesa doesn't store entries for gl_SkipComponents in the Outputs[] 160 * array. Instead, it simply increments DstOffset for the following 161 * input by the number of components that should be skipped. 162 * 163 * Our hardware is unusual in that it requires us to program SO_DECLs 164 * for fake "hole" components, rather than simply taking the offset 165 * for each real varying. Each hole can have size 1, 2, 3, or 4; we 166 * program as many size = 4 holes as we can, then a final hole to 167 * accommodate the final 1, 2, or 3 remaining. 168 */ 169 int skip_components = 170 linked_xfb_info->Outputs[i].DstOffset - next_offset[buffer]; 171 172 next_offset[buffer] += skip_components; 173 174 while (skip_components >= 4) { 175 so_decl[stream_id][decls[stream_id]++] = 176 SO_DECL_HOLE_FLAG | 0xf | decl_buffer_slot; 177 skip_components -= 4; 178 } 179 if (skip_components > 0) 180 so_decl[stream_id][decls[stream_id]++] = 181 SO_DECL_HOLE_FLAG | ((1 << skip_components) - 1) | 182 decl_buffer_slot; 183 184 assert(linked_xfb_info->Outputs[i].DstOffset == next_offset[buffer]); 185 186 next_offset[buffer] += components; 187 188 so_decl[stream_id][decls[stream_id]++] = decl; 189 190 if (decls[stream_id] > max_decls) 191 max_decls = decls[stream_id]; 192 } 193 194 BEGIN_BATCH(max_decls * 2 + 3); 195 OUT_BATCH(_3DSTATE_SO_DECL_LIST << 16 | (max_decls * 2 + 1)); 196 197 OUT_BATCH((buffer_mask[0] << SO_STREAM_TO_BUFFER_SELECTS_0_SHIFT) | 198 (buffer_mask[1] << SO_STREAM_TO_BUFFER_SELECTS_1_SHIFT) | 199 (buffer_mask[2] << SO_STREAM_TO_BUFFER_SELECTS_2_SHIFT) | 200 (buffer_mask[3] << SO_STREAM_TO_BUFFER_SELECTS_3_SHIFT)); 201 202 OUT_BATCH((decls[0] << SO_NUM_ENTRIES_0_SHIFT) | 203 (decls[1] << SO_NUM_ENTRIES_1_SHIFT) | 204 (decls[2] << SO_NUM_ENTRIES_2_SHIFT) | 205 (decls[3] << SO_NUM_ENTRIES_3_SHIFT)); 206 207 for (int i = 0; i < max_decls; i++) { 208 /* Stream 1 | Stream 0 */ 209 OUT_BATCH(((uint32_t) so_decl[1][i]) << 16 | so_decl[0][i]); 210 /* Stream 3 | Stream 2 */ 211 OUT_BATCH(((uint32_t) so_decl[3][i]) << 16 | so_decl[2][i]); 212 } 213 214 ADVANCE_BATCH(); 215 } 216 217 static bool 218 query_active(struct gl_query_object *q) 219 { 220 return q && q->Active; 221 } 222 223 static void 224 upload_3dstate_streamout(struct brw_context *brw, bool active, 225 const struct brw_vue_map *vue_map) 226 { 227 struct gl_context *ctx = &brw->ctx; 228 /* BRW_NEW_TRANSFORM_FEEDBACK */ 229 struct gl_transform_feedback_object *xfb_obj = 230 ctx->TransformFeedback.CurrentObject; 231 uint32_t dw1 = 0, dw2 = 0, dw3 = 0, dw4 = 0; 232 int i; 233 234 if (active) { 235 const struct gl_transform_feedback_info *linked_xfb_info = 236 xfb_obj->program->sh.LinkedTransformFeedback; 237 int urb_entry_read_offset = 0; 238 int urb_entry_read_length = (vue_map->num_slots + 1) / 2 - 239 urb_entry_read_offset; 240 241 dw1 |= SO_FUNCTION_ENABLE; 242 dw1 |= SO_STATISTICS_ENABLE; 243 244 /* BRW_NEW_RASTERIZER_DISCARD */ 245 if (ctx->RasterDiscard) { 246 if (!query_active(ctx->Query.PrimitivesGenerated[0])) { 247 dw1 |= SO_RENDERING_DISABLE; 248 } else { 249 perf_debug("Rasterizer discard with a GL_PRIMITIVES_GENERATED " 250 "query active relies on the clipper."); 251 } 252 } 253 254 /* _NEW_LIGHT */ 255 if (ctx->Light.ProvokingVertex != GL_FIRST_VERTEX_CONVENTION) 256 dw1 |= SO_REORDER_TRAILING; 257 258 if (brw->gen < 8) { 259 for (i = 0; i < 4; i++) { 260 if (xfb_obj->Buffers[i]) { 261 dw1 |= SO_BUFFER_ENABLE(i); 262 } 263 } 264 } 265 266 /* We always read the whole vertex. This could be reduced at some 267 * point by reading less and offsetting the register index in the 268 * SO_DECLs. 269 */ 270 dw2 |= SET_FIELD(urb_entry_read_offset, SO_STREAM_0_VERTEX_READ_OFFSET); 271 dw2 |= SET_FIELD(urb_entry_read_length - 1, SO_STREAM_0_VERTEX_READ_LENGTH); 272 273 dw2 |= SET_FIELD(urb_entry_read_offset, SO_STREAM_1_VERTEX_READ_OFFSET); 274 dw2 |= SET_FIELD(urb_entry_read_length - 1, SO_STREAM_1_VERTEX_READ_LENGTH); 275 276 dw2 |= SET_FIELD(urb_entry_read_offset, SO_STREAM_2_VERTEX_READ_OFFSET); 277 dw2 |= SET_FIELD(urb_entry_read_length - 1, SO_STREAM_2_VERTEX_READ_LENGTH); 278 279 dw2 |= SET_FIELD(urb_entry_read_offset, SO_STREAM_3_VERTEX_READ_OFFSET); 280 dw2 |= SET_FIELD(urb_entry_read_length - 1, SO_STREAM_3_VERTEX_READ_LENGTH); 281 282 if (brw->gen >= 8) { 283 /* Set buffer pitches; 0 means unbound. */ 284 if (xfb_obj->Buffers[0]) 285 dw3 |= linked_xfb_info->Buffers[0].Stride * 4; 286 if (xfb_obj->Buffers[1]) 287 dw3 |= (linked_xfb_info->Buffers[1].Stride * 4) << 16; 288 if (xfb_obj->Buffers[2]) 289 dw4 |= linked_xfb_info->Buffers[2].Stride * 4; 290 if (xfb_obj->Buffers[3]) 291 dw4 |= (linked_xfb_info->Buffers[3].Stride * 4) << 16; 292 } 293 } 294 295 const int dwords = brw->gen >= 8 ? 5 : 3; 296 297 BEGIN_BATCH(dwords); 298 OUT_BATCH(_3DSTATE_STREAMOUT << 16 | (dwords - 2)); 299 OUT_BATCH(dw1); 300 OUT_BATCH(dw2); 301 if (dwords > 3) { 302 OUT_BATCH(dw3); 303 OUT_BATCH(dw4); 304 } 305 ADVANCE_BATCH(); 306 } 307 308 static void 309 upload_sol_state(struct brw_context *brw) 310 { 311 struct gl_context *ctx = &brw->ctx; 312 /* BRW_NEW_TRANSFORM_FEEDBACK */ 313 bool active = _mesa_is_xfb_active_and_unpaused(ctx); 314 315 if (active) { 316 if (brw->gen >= 8) 317 gen8_upload_3dstate_so_buffers(brw); 318 else 319 upload_3dstate_so_buffers(brw); 320 321 /* BRW_NEW_VUE_MAP_GEOM_OUT */ 322 gen7_upload_3dstate_so_decl_list(brw, &brw->vue_map_geom_out); 323 } 324 325 /* Finally, set up the SOL stage. This command must always follow updates to 326 * the nonpipelined SOL state (3DSTATE_SO_BUFFER, 3DSTATE_SO_DECL_LIST) or 327 * MMIO register updates (current performed by the kernel at each batch 328 * emit). 329 */ 330 upload_3dstate_streamout(brw, active, &brw->vue_map_geom_out); 331 } 332 333 const struct brw_tracked_state gen7_sol_state = { 334 .dirty = { 335 .mesa = _NEW_LIGHT, 336 .brw = BRW_NEW_BATCH | 337 BRW_NEW_BLORP | 338 BRW_NEW_RASTERIZER_DISCARD | 339 BRW_NEW_VUE_MAP_GEOM_OUT | 340 BRW_NEW_TRANSFORM_FEEDBACK, 341 }, 342 .emit = upload_sol_state, 343 }; 344 345 /** 346 * Tally the number of primitives generated so far. 347 * 348 * The buffer contains a series of pairs: 349 * (<start0, start1, start2, start3>, <end0, end1, end2, end3>) ; 350 * (<start0, start1, start2, start3>, <end0, end1, end2, end3>) ; 351 * 352 * For each stream, we subtract the pair of values (end - start) to get the 353 * number of primitives generated during one section. We accumulate these 354 * values, adding them up to get the total number of primitives generated. 355 */ 356 static void 357 gen7_tally_prims_generated(struct brw_context *brw, 358 struct brw_transform_feedback_object *obj) 359 { 360 /* If the current batch is still contributing to the number of primitives 361 * generated, flush it now so the results will be present when mapped. 362 */ 363 if (drm_intel_bo_references(brw->batch.bo, obj->prim_count_bo)) 364 intel_batchbuffer_flush(brw); 365 366 if (unlikely(brw->perf_debug && drm_intel_bo_busy(obj->prim_count_bo))) 367 perf_debug("Stalling for # of transform feedback primitives written.\n"); 368 369 drm_intel_bo_map(obj->prim_count_bo, false); 370 uint64_t *prim_counts = obj->prim_count_bo->virtual; 371 372 assert(obj->prim_count_buffer_index % (2 * BRW_MAX_XFB_STREAMS) == 0); 373 int pairs = obj->prim_count_buffer_index / (2 * BRW_MAX_XFB_STREAMS); 374 375 for (int i = 0; i < pairs; i++) { 376 for (int s = 0; s < BRW_MAX_XFB_STREAMS; s++) { 377 obj->prims_generated[s] += 378 prim_counts[BRW_MAX_XFB_STREAMS + s] - prim_counts[s]; 379 } 380 prim_counts += 2 * BRW_MAX_XFB_STREAMS; /* move to the next pair */ 381 } 382 383 drm_intel_bo_unmap(obj->prim_count_bo); 384 385 /* We've already gathered up the old data; we can safely overwrite it now. */ 386 obj->prim_count_buffer_index = 0; 387 } 388 389 /** 390 * Store the SO_NUM_PRIMS_WRITTEN counters for each stream (4 uint64_t values) 391 * to prim_count_bo. 392 * 393 * If prim_count_bo is out of space, gather up the results so far into 394 * prims_generated[] and allocate a new buffer with enough space. 395 * 396 * The number of primitives written is used to compute the number of vertices 397 * written to a transform feedback stream, which is required to implement 398 * DrawTransformFeedback(). 399 */ 400 static void 401 gen7_save_primitives_written_counters(struct brw_context *brw, 402 struct brw_transform_feedback_object *obj) 403 { 404 const int streams = BRW_MAX_XFB_STREAMS; 405 406 /* Check if there's enough space for a new pair of four values. */ 407 if (obj->prim_count_bo != NULL && 408 obj->prim_count_buffer_index + 2 * streams >= 4096 / sizeof(uint64_t)) { 409 /* Gather up the results so far and release the BO. */ 410 gen7_tally_prims_generated(brw, obj); 411 } 412 413 /* Flush any drawing so that the counters have the right values. */ 414 brw_emit_mi_flush(brw); 415 416 /* Emit MI_STORE_REGISTER_MEM commands to write the values. */ 417 for (int i = 0; i < streams; i++) { 418 int offset = (obj->prim_count_buffer_index + i) * sizeof(uint64_t); 419 brw_store_register_mem64(brw, obj->prim_count_bo, 420 GEN7_SO_NUM_PRIMS_WRITTEN(i), 421 offset); 422 } 423 424 /* Update where to write data to. */ 425 obj->prim_count_buffer_index += streams; 426 } 427 428 /** 429 * Compute the number of vertices written by this transform feedback operation. 430 */ 431 static void 432 brw_compute_xfb_vertices_written(struct brw_context *brw, 433 struct brw_transform_feedback_object *obj) 434 { 435 if (obj->vertices_written_valid || !obj->base.EndedAnytime) 436 return; 437 438 unsigned vertices_per_prim = 0; 439 440 switch (obj->primitive_mode) { 441 case GL_POINTS: 442 vertices_per_prim = 1; 443 break; 444 case GL_LINES: 445 vertices_per_prim = 2; 446 break; 447 case GL_TRIANGLES: 448 vertices_per_prim = 3; 449 break; 450 default: 451 unreachable("Invalid transform feedback primitive mode."); 452 } 453 454 /* Get the number of primitives generated. */ 455 gen7_tally_prims_generated(brw, obj); 456 457 for (int i = 0; i < BRW_MAX_XFB_STREAMS; i++) { 458 obj->vertices_written[i] = vertices_per_prim * obj->prims_generated[i]; 459 } 460 obj->vertices_written_valid = true; 461 } 462 463 /** 464 * GetTransformFeedbackVertexCount() driver hook. 465 * 466 * Returns the number of vertices written to a particular stream by the last 467 * Begin/EndTransformFeedback block. Used to implement DrawTransformFeedback(). 468 */ 469 GLsizei 470 brw_get_transform_feedback_vertex_count(struct gl_context *ctx, 471 struct gl_transform_feedback_object *obj, 472 GLuint stream) 473 { 474 struct brw_context *brw = brw_context(ctx); 475 struct brw_transform_feedback_object *brw_obj = 476 (struct brw_transform_feedback_object *) obj; 477 478 assert(obj->EndedAnytime); 479 assert(stream < BRW_MAX_XFB_STREAMS); 480 481 brw_compute_xfb_vertices_written(brw, brw_obj); 482 return brw_obj->vertices_written[stream]; 483 } 484 485 void 486 gen7_begin_transform_feedback(struct gl_context *ctx, GLenum mode, 487 struct gl_transform_feedback_object *obj) 488 { 489 struct brw_context *brw = brw_context(ctx); 490 struct brw_transform_feedback_object *brw_obj = 491 (struct brw_transform_feedback_object *) obj; 492 493 /* Reset the SO buffer offsets to 0. */ 494 if (brw->gen >= 8) { 495 brw_obj->zero_offsets = true; 496 } else { 497 intel_batchbuffer_flush(brw); 498 brw->batch.needs_sol_reset = true; 499 } 500 501 /* We're about to lose the information needed to compute the number of 502 * vertices written during the last Begin/EndTransformFeedback section, 503 * so we can't delay it any further. 504 */ 505 brw_compute_xfb_vertices_written(brw, brw_obj); 506 507 /* No primitives have been generated yet. */ 508 for (int i = 0; i < BRW_MAX_XFB_STREAMS; i++) { 509 brw_obj->prims_generated[i] = 0; 510 } 511 512 /* Store the starting value of the SO_NUM_PRIMS_WRITTEN counters. */ 513 gen7_save_primitives_written_counters(brw, brw_obj); 514 515 brw_obj->primitive_mode = mode; 516 } 517 518 void 519 gen7_end_transform_feedback(struct gl_context *ctx, 520 struct gl_transform_feedback_object *obj) 521 { 522 /* After EndTransformFeedback, it's likely that the client program will try 523 * to draw using the contents of the transform feedback buffer as vertex 524 * input. In order for this to work, we need to flush the data through at 525 * least the GS stage of the pipeline, and flush out the render cache. For 526 * simplicity, just do a full flush. 527 */ 528 struct brw_context *brw = brw_context(ctx); 529 struct brw_transform_feedback_object *brw_obj = 530 (struct brw_transform_feedback_object *) obj; 531 532 /* Store the ending value of the SO_NUM_PRIMS_WRITTEN counters. */ 533 if (!obj->Paused) 534 gen7_save_primitives_written_counters(brw, brw_obj); 535 536 /* EndTransformFeedback() means that we need to update the number of 537 * vertices written. Since it's only necessary if DrawTransformFeedback() 538 * is called and it means mapping a buffer object, we delay computing it 539 * until it's absolutely necessary to try and avoid stalls. 540 */ 541 brw_obj->vertices_written_valid = false; 542 } 543 544 void 545 gen7_pause_transform_feedback(struct gl_context *ctx, 546 struct gl_transform_feedback_object *obj) 547 { 548 struct brw_context *brw = brw_context(ctx); 549 struct brw_transform_feedback_object *brw_obj = 550 (struct brw_transform_feedback_object *) obj; 551 552 /* Flush any drawing so that the counters have the right values. */ 553 brw_emit_mi_flush(brw); 554 555 /* Save the SOL buffer offset register values. */ 556 if (brw->gen < 8) { 557 for (int i = 0; i < 4; i++) { 558 BEGIN_BATCH(3); 559 OUT_BATCH(MI_STORE_REGISTER_MEM | (3 - 2)); 560 OUT_BATCH(GEN7_SO_WRITE_OFFSET(i)); 561 OUT_RELOC(brw_obj->offset_bo, 562 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 563 i * sizeof(uint32_t)); 564 ADVANCE_BATCH(); 565 } 566 } 567 568 /* Store the temporary ending value of the SO_NUM_PRIMS_WRITTEN counters. 569 * While this operation is paused, other transform feedback actions may 570 * occur, which will contribute to the counters. We need to exclude that 571 * from our counts. 572 */ 573 gen7_save_primitives_written_counters(brw, brw_obj); 574 } 575 576 void 577 gen7_resume_transform_feedback(struct gl_context *ctx, 578 struct gl_transform_feedback_object *obj) 579 { 580 struct brw_context *brw = brw_context(ctx); 581 struct brw_transform_feedback_object *brw_obj = 582 (struct brw_transform_feedback_object *) obj; 583 584 /* Reload the SOL buffer offset registers. */ 585 if (brw->gen < 8) { 586 for (int i = 0; i < 4; i++) { 587 BEGIN_BATCH(3); 588 OUT_BATCH(GEN7_MI_LOAD_REGISTER_MEM | (3 - 2)); 589 OUT_BATCH(GEN7_SO_WRITE_OFFSET(i)); 590 OUT_RELOC(brw_obj->offset_bo, 591 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 592 i * sizeof(uint32_t)); 593 ADVANCE_BATCH(); 594 } 595 } 596 597 /* Store the new starting value of the SO_NUM_PRIMS_WRITTEN counters. */ 598 gen7_save_primitives_written_counters(brw, brw_obj); 599 } 600