1 /************************************************************************** 2 * 3 * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28 #include "intel_context.h" 29 #include "intel_batchbuffer.h" 30 #include "intel_buffer_objects.h" 31 #include "intel_reg.h" 32 #include "intel_bufmgr.h" 33 #include "intel_buffers.h" 34 35 struct cached_batch_item { 36 struct cached_batch_item *next; 37 uint16_t header; 38 uint16_t size; 39 }; 40 41 static void clear_cache( struct intel_context *intel ) 42 { 43 struct cached_batch_item *item = intel->batch.cached_items; 44 45 while (item) { 46 struct cached_batch_item *next = item->next; 47 free(item); 48 item = next; 49 } 50 51 intel->batch.cached_items = NULL; 52 } 53 54 void 55 intel_batchbuffer_init(struct intel_context *intel) 56 { 57 intel_batchbuffer_reset(intel); 58 59 if (intel->gen >= 6) { 60 /* We can't just use brw_state_batch to get a chunk of space for 61 * the gen6 workaround because it involves actually writing to 62 * the buffer, and the kernel doesn't let us write to the batch. 63 */ 64 intel->batch.workaround_bo = drm_intel_bo_alloc(intel->bufmgr, 65 "pipe_control workaround", 66 4096, 4096); 67 } 68 } 69 70 void 71 intel_batchbuffer_reset(struct intel_context *intel) 72 { 73 if (intel->batch.last_bo != NULL) { 74 drm_intel_bo_unreference(intel->batch.last_bo); 75 intel->batch.last_bo = NULL; 76 } 77 intel->batch.last_bo = intel->batch.bo; 78 79 clear_cache(intel); 80 81 intel->batch.bo = drm_intel_bo_alloc(intel->bufmgr, "batchbuffer", 82 intel->maxBatchSize, 4096); 83 84 intel->batch.reserved_space = BATCH_RESERVED; 85 intel->batch.state_batch_offset = intel->batch.bo->size; 86 intel->batch.used = 0; 87 intel->batch.needs_sol_reset = false; 88 } 89 90 void 91 intel_batchbuffer_save_state(struct intel_context *intel) 92 { 93 intel->batch.saved.used = intel->batch.used; 94 intel->batch.saved.reloc_count = 95 drm_intel_gem_bo_get_reloc_count(intel->batch.bo); 96 } 97 98 void 99 intel_batchbuffer_reset_to_saved(struct intel_context *intel) 100 { 101 drm_intel_gem_bo_clear_relocs(intel->batch.bo, intel->batch.saved.reloc_count); 102 103 intel->batch.used = intel->batch.saved.used; 104 105 /* Cached batch state is dead, since we just cleared some unknown part of the 106 * batchbuffer. Assume that the caller resets any other state necessary. 107 */ 108 clear_cache(intel); 109 } 110 111 void 112 intel_batchbuffer_free(struct intel_context *intel) 113 { 114 drm_intel_bo_unreference(intel->batch.last_bo); 115 drm_intel_bo_unreference(intel->batch.bo); 116 drm_intel_bo_unreference(intel->batch.workaround_bo); 117 clear_cache(intel); 118 } 119 120 static void 121 do_batch_dump(struct intel_context *intel) 122 { 123 struct drm_intel_decode *decode; 124 struct intel_batchbuffer *batch = &intel->batch; 125 int ret; 126 127 decode = drm_intel_decode_context_alloc(intel->intelScreen->deviceID); 128 if (!decode) 129 return; 130 131 ret = drm_intel_bo_map(batch->bo, false); 132 if (ret == 0) { 133 drm_intel_decode_set_batch_pointer(decode, 134 batch->bo->virtual, 135 batch->bo->offset, 136 batch->used); 137 } else { 138 fprintf(stderr, 139 "WARNING: failed to map batchbuffer (%s), " 140 "dumping uploaded data instead.\n", strerror(ret)); 141 142 drm_intel_decode_set_batch_pointer(decode, 143 batch->map, 144 batch->bo->offset, 145 batch->used); 146 } 147 148 drm_intel_decode(decode); 149 150 drm_intel_decode_context_free(decode); 151 152 if (ret == 0) { 153 drm_intel_bo_unmap(batch->bo); 154 155 if (intel->vtbl.debug_batch != NULL) 156 intel->vtbl.debug_batch(intel); 157 } 158 } 159 160 /* TODO: Push this whole function into bufmgr. 161 */ 162 static int 163 do_flush_locked(struct intel_context *intel) 164 { 165 struct intel_batchbuffer *batch = &intel->batch; 166 int ret = 0; 167 168 ret = drm_intel_bo_subdata(batch->bo, 0, 4*batch->used, batch->map); 169 if (ret == 0 && batch->state_batch_offset != batch->bo->size) { 170 ret = drm_intel_bo_subdata(batch->bo, 171 batch->state_batch_offset, 172 batch->bo->size - batch->state_batch_offset, 173 (char *)batch->map + batch->state_batch_offset); 174 } 175 176 if (!intel->intelScreen->no_hw) { 177 int flags; 178 179 if (intel->gen < 6 || !batch->is_blit) { 180 flags = I915_EXEC_RENDER; 181 } else { 182 flags = I915_EXEC_BLT; 183 } 184 185 if (batch->needs_sol_reset) 186 flags |= I915_EXEC_GEN7_SOL_RESET; 187 188 if (ret == 0) { 189 if (unlikely(INTEL_DEBUG & DEBUG_AUB) && intel->vtbl.annotate_aub) 190 intel->vtbl.annotate_aub(intel); 191 if (intel->hw_ctx == NULL || batch->is_blit) { 192 ret = drm_intel_bo_mrb_exec(batch->bo, 4 * batch->used, NULL, 0, 0, 193 flags); 194 } else { 195 ret = drm_intel_gem_bo_context_exec(batch->bo, intel->hw_ctx, 196 4 * batch->used, flags); 197 } 198 } 199 } 200 201 if (unlikely(INTEL_DEBUG & DEBUG_BATCH)) 202 do_batch_dump(intel); 203 204 if (ret != 0) { 205 fprintf(stderr, "intel_do_flush_locked failed: %s\n", strerror(-ret)); 206 exit(1); 207 } 208 intel->vtbl.new_batch(intel); 209 210 return ret; 211 } 212 213 int 214 _intel_batchbuffer_flush(struct intel_context *intel, 215 const char *file, int line) 216 { 217 int ret; 218 219 if (intel->batch.used == 0) 220 return 0; 221 222 if (intel->first_post_swapbuffers_batch == NULL) { 223 intel->first_post_swapbuffers_batch = intel->batch.bo; 224 drm_intel_bo_reference(intel->first_post_swapbuffers_batch); 225 } 226 227 if (unlikely(INTEL_DEBUG & DEBUG_BATCH)) 228 fprintf(stderr, "%s:%d: Batchbuffer flush with %db used\n", file, line, 229 4*intel->batch.used); 230 231 intel->batch.reserved_space = 0; 232 233 if (intel->vtbl.finish_batch) 234 intel->vtbl.finish_batch(intel); 235 236 /* Mark the end of the buffer. */ 237 intel_batchbuffer_emit_dword(intel, MI_BATCH_BUFFER_END); 238 if (intel->batch.used & 1) { 239 /* Round batchbuffer usage to 2 DWORDs. */ 240 intel_batchbuffer_emit_dword(intel, MI_NOOP); 241 } 242 243 intel_upload_finish(intel); 244 245 /* Check that we didn't just wrap our batchbuffer at a bad time. */ 246 assert(!intel->no_batch_wrap); 247 248 ret = do_flush_locked(intel); 249 250 if (unlikely(INTEL_DEBUG & DEBUG_SYNC)) { 251 fprintf(stderr, "waiting for idle\n"); 252 drm_intel_bo_wait_rendering(intel->batch.bo); 253 } 254 255 /* Reset the buffer: 256 */ 257 intel_batchbuffer_reset(intel); 258 259 return ret; 260 } 261 262 263 /* This is the only way buffers get added to the validate list. 264 */ 265 bool 266 intel_batchbuffer_emit_reloc(struct intel_context *intel, 267 drm_intel_bo *buffer, 268 uint32_t read_domains, uint32_t write_domain, 269 uint32_t delta) 270 { 271 int ret; 272 273 ret = drm_intel_bo_emit_reloc(intel->batch.bo, 4*intel->batch.used, 274 buffer, delta, 275 read_domains, write_domain); 276 assert(ret == 0); 277 (void)ret; 278 279 /* 280 * Using the old buffer offset, write in what the right data would be, in case 281 * the buffer doesn't move and we can short-circuit the relocation processing 282 * in the kernel 283 */ 284 intel_batchbuffer_emit_dword(intel, buffer->offset + delta); 285 286 return true; 287 } 288 289 bool 290 intel_batchbuffer_emit_reloc_fenced(struct intel_context *intel, 291 drm_intel_bo *buffer, 292 uint32_t read_domains, 293 uint32_t write_domain, 294 uint32_t delta) 295 { 296 int ret; 297 298 ret = drm_intel_bo_emit_reloc_fence(intel->batch.bo, 4*intel->batch.used, 299 buffer, delta, 300 read_domains, write_domain); 301 assert(ret == 0); 302 (void)ret; 303 304 /* 305 * Using the old buffer offset, write in what the right data would 306 * be, in case the buffer doesn't move and we can short-circuit the 307 * relocation processing in the kernel 308 */ 309 intel_batchbuffer_emit_dword(intel, buffer->offset + delta); 310 311 return true; 312 } 313 314 void 315 intel_batchbuffer_data(struct intel_context *intel, 316 const void *data, GLuint bytes, bool is_blit) 317 { 318 assert((bytes & 3) == 0); 319 intel_batchbuffer_require_space(intel, bytes, is_blit); 320 __memcpy(intel->batch.map + intel->batch.used, data, bytes); 321 intel->batch.used += bytes >> 2; 322 } 323 324 void 325 intel_batchbuffer_cached_advance(struct intel_context *intel) 326 { 327 struct cached_batch_item **prev = &intel->batch.cached_items, *item; 328 uint32_t sz = (intel->batch.used - intel->batch.emit) * sizeof(uint32_t); 329 uint32_t *start = intel->batch.map + intel->batch.emit; 330 uint16_t op = *start >> 16; 331 332 while (*prev) { 333 uint32_t *old; 334 335 item = *prev; 336 old = intel->batch.map + item->header; 337 if (op == *old >> 16) { 338 if (item->size == sz && memcmp(old, start, sz) == 0) { 339 if (prev != &intel->batch.cached_items) { 340 *prev = item->next; 341 item->next = intel->batch.cached_items; 342 intel->batch.cached_items = item; 343 } 344 intel->batch.used = intel->batch.emit; 345 return; 346 } 347 348 goto emit; 349 } 350 prev = &item->next; 351 } 352 353 item = malloc(sizeof(struct cached_batch_item)); 354 if (item == NULL) 355 return; 356 357 item->next = intel->batch.cached_items; 358 intel->batch.cached_items = item; 359 360 emit: 361 item->size = sz; 362 item->header = intel->batch.emit; 363 } 364 365 /** 366 * Restriction [DevSNB, DevIVB]: 367 * 368 * Prior to changing Depth/Stencil Buffer state (i.e. any combination of 369 * 3DSTATE_DEPTH_BUFFER, 3DSTATE_CLEAR_PARAMS, 3DSTATE_STENCIL_BUFFER, 370 * 3DSTATE_HIER_DEPTH_BUFFER) SW must first issue a pipelined depth stall 371 * (PIPE_CONTROL with Depth Stall bit set), followed by a pipelined depth 372 * cache flush (PIPE_CONTROL with Depth Flush Bit set), followed by 373 * another pipelined depth stall (PIPE_CONTROL with Depth Stall bit set), 374 * unless SW can otherwise guarantee that the pipeline from WM onwards is 375 * already flushed (e.g., via a preceding MI_FLUSH). 376 */ 377 void 378 intel_emit_depth_stall_flushes(struct intel_context *intel) 379 { 380 assert(intel->gen >= 6 && intel->gen <= 7); 381 382 BEGIN_BATCH(4); 383 OUT_BATCH(_3DSTATE_PIPE_CONTROL | (4 - 2)); 384 OUT_BATCH(PIPE_CONTROL_DEPTH_STALL); 385 OUT_BATCH(0); /* address */ 386 OUT_BATCH(0); /* write data */ 387 ADVANCE_BATCH() 388 389 BEGIN_BATCH(4); 390 OUT_BATCH(_3DSTATE_PIPE_CONTROL | (4 - 2)); 391 OUT_BATCH(PIPE_CONTROL_DEPTH_CACHE_FLUSH); 392 OUT_BATCH(0); /* address */ 393 OUT_BATCH(0); /* write data */ 394 ADVANCE_BATCH(); 395 396 BEGIN_BATCH(4); 397 OUT_BATCH(_3DSTATE_PIPE_CONTROL | (4 - 2)); 398 OUT_BATCH(PIPE_CONTROL_DEPTH_STALL); 399 OUT_BATCH(0); /* address */ 400 OUT_BATCH(0); /* write data */ 401 ADVANCE_BATCH(); 402 } 403 404 /** 405 * From the BSpec, volume 2a.03: VS Stage Input / State: 406 * "[DevIVB] A PIPE_CONTROL with Post-Sync Operation set to 1h and a depth 407 * stall needs to be sent just prior to any 3DSTATE_VS, 3DSTATE_URB_VS, 408 * 3DSTATE_CONSTANT_VS, 3DSTATE_BINDING_TABLE_POINTER_VS, 409 * 3DSTATE_SAMPLER_STATE_POINTER_VS command. Only one PIPE_CONTROL needs 410 * to be sent before any combination of VS associated 3DSTATE." 411 */ 412 void 413 gen7_emit_vs_workaround_flush(struct intel_context *intel) 414 { 415 assert(intel->gen == 7); 416 417 BEGIN_BATCH(4); 418 OUT_BATCH(_3DSTATE_PIPE_CONTROL | (4 - 2)); 419 OUT_BATCH(PIPE_CONTROL_DEPTH_STALL | PIPE_CONTROL_WRITE_IMMEDIATE); 420 OUT_RELOC(intel->batch.workaround_bo, 421 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0); 422 OUT_BATCH(0); /* write data */ 423 ADVANCE_BATCH(); 424 } 425 426 /** 427 * Emits a PIPE_CONTROL with a non-zero post-sync operation, for 428 * implementing two workarounds on gen6. From section 1.4.7.1 429 * "PIPE_CONTROL" of the Sandy Bridge PRM volume 2 part 1: 430 * 431 * [DevSNB-C+{W/A}] Before any depth stall flush (including those 432 * produced by non-pipelined state commands), software needs to first 433 * send a PIPE_CONTROL with no bits set except Post-Sync Operation != 434 * 0. 435 * 436 * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush Enable 437 * =1, a PIPE_CONTROL with any non-zero post-sync-op is required. 438 * 439 * And the workaround for these two requires this workaround first: 440 * 441 * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent 442 * BEFORE the pipe-control with a post-sync op and no write-cache 443 * flushes. 444 * 445 * And this last workaround is tricky because of the requirements on 446 * that bit. From section 1.4.7.2.3 "Stall" of the Sandy Bridge PRM 447 * volume 2 part 1: 448 * 449 * "1 of the following must also be set: 450 * - Render Target Cache Flush Enable ([12] of DW1) 451 * - Depth Cache Flush Enable ([0] of DW1) 452 * - Stall at Pixel Scoreboard ([1] of DW1) 453 * - Depth Stall ([13] of DW1) 454 * - Post-Sync Operation ([13] of DW1) 455 * - Notify Enable ([8] of DW1)" 456 * 457 * The cache flushes require the workaround flush that triggered this 458 * one, so we can't use it. Depth stall would trigger the same. 459 * Post-sync nonzero is what triggered this second workaround, so we 460 * can't use that one either. Notify enable is IRQs, which aren't 461 * really our business. That leaves only stall at scoreboard. 462 */ 463 void 464 intel_emit_post_sync_nonzero_flush(struct intel_context *intel) 465 { 466 if (!intel->batch.need_workaround_flush) 467 return; 468 469 BEGIN_BATCH(4); 470 OUT_BATCH(_3DSTATE_PIPE_CONTROL | (4 - 2)); 471 OUT_BATCH(PIPE_CONTROL_CS_STALL | 472 PIPE_CONTROL_STALL_AT_SCOREBOARD); 473 OUT_BATCH(0); /* address */ 474 OUT_BATCH(0); /* write data */ 475 ADVANCE_BATCH(); 476 477 BEGIN_BATCH(4); 478 OUT_BATCH(_3DSTATE_PIPE_CONTROL | (4 - 2)); 479 OUT_BATCH(PIPE_CONTROL_WRITE_IMMEDIATE); 480 OUT_RELOC(intel->batch.workaround_bo, 481 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0); 482 OUT_BATCH(0); /* write data */ 483 ADVANCE_BATCH(); 484 485 intel->batch.need_workaround_flush = false; 486 } 487 488 /* Emit a pipelined flush to either flush render and texture cache for 489 * reading from a FBO-drawn texture, or flush so that frontbuffer 490 * render appears on the screen in DRI1. 491 * 492 * This is also used for the always_flush_cache driconf debug option. 493 */ 494 void 495 intel_batchbuffer_emit_mi_flush(struct intel_context *intel) 496 { 497 if (intel->gen >= 6) { 498 if (intel->batch.is_blit) { 499 BEGIN_BATCH_BLT(4); 500 OUT_BATCH(MI_FLUSH_DW); 501 OUT_BATCH(0); 502 OUT_BATCH(0); 503 OUT_BATCH(0); 504 ADVANCE_BATCH(); 505 } else { 506 if (intel->gen == 6) { 507 /* Hardware workaround: SNB B-Spec says: 508 * 509 * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache 510 * Flush Enable =1, a PIPE_CONTROL with any non-zero 511 * post-sync-op is required. 512 */ 513 intel_emit_post_sync_nonzero_flush(intel); 514 } 515 516 BEGIN_BATCH(4); 517 OUT_BATCH(_3DSTATE_PIPE_CONTROL | (4 - 2)); 518 OUT_BATCH(PIPE_CONTROL_INSTRUCTION_FLUSH | 519 PIPE_CONTROL_WRITE_FLUSH | 520 PIPE_CONTROL_DEPTH_CACHE_FLUSH | 521 PIPE_CONTROL_VF_CACHE_INVALIDATE | 522 PIPE_CONTROL_TC_FLUSH | 523 PIPE_CONTROL_NO_WRITE | 524 PIPE_CONTROL_CS_STALL); 525 OUT_BATCH(0); /* write address */ 526 OUT_BATCH(0); /* write data */ 527 ADVANCE_BATCH(); 528 } 529 } else if (intel->gen >= 4) { 530 BEGIN_BATCH(4); 531 OUT_BATCH(_3DSTATE_PIPE_CONTROL | (4 - 2) | 532 PIPE_CONTROL_WRITE_FLUSH | 533 PIPE_CONTROL_NO_WRITE); 534 OUT_BATCH(0); /* write address */ 535 OUT_BATCH(0); /* write data */ 536 OUT_BATCH(0); /* write data */ 537 ADVANCE_BATCH(); 538 } else { 539 BEGIN_BATCH(1); 540 OUT_BATCH(MI_FLUSH); 541 ADVANCE_BATCH(); 542 } 543 } 544