1 /* 2 * Copyright 2006 VMware, Inc. 3 * All Rights Reserved. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the 7 * "Software"), to deal in the Software without restriction, including 8 * without limitation the rights to use, copy, modify, merge, publish, 9 * distribute, sublicense, and/or sell copies of the Software, and to 10 * permit persons to whom the Software is furnished to do so, subject to 11 * the following conditions: 12 * 13 * The above copyright notice and this permission notice (including the 14 * next paragraph) shall be included in all copies or substantial portions 15 * of the Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 19 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 20 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 21 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 22 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 23 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 24 */ 25 26 #include "intel_batchbuffer.h" 27 #include "intel_buffer_objects.h" 28 #include "intel_bufmgr.h" 29 #include "intel_buffers.h" 30 #include "intel_fbo.h" 31 #include "brw_context.h" 32 #include "brw_defines.h" 33 #include "brw_state.h" 34 35 #include <xf86drm.h> 36 #include <i915_drm.h> 37 38 static void 39 intel_batchbuffer_reset(struct intel_batchbuffer *batch, dri_bufmgr *bufmgr, 40 bool has_llc); 41 42 void 43 intel_batchbuffer_init(struct intel_batchbuffer *batch, dri_bufmgr *bufmgr, 44 bool has_llc) 45 { 46 intel_batchbuffer_reset(batch, bufmgr, has_llc); 47 48 if (!has_llc) { 49 batch->cpu_map = malloc(BATCH_SZ); 50 batch->map = batch->cpu_map; 51 batch->map_next = batch->cpu_map; 52 } 53 } 54 55 static void 56 intel_batchbuffer_reset(struct intel_batchbuffer *batch, dri_bufmgr *bufmgr, 57 bool has_llc) 58 { 59 if (batch->last_bo != NULL) { 60 drm_intel_bo_unreference(batch->last_bo); 61 batch->last_bo = NULL; 62 } 63 batch->last_bo = batch->bo; 64 65 batch->bo = drm_intel_bo_alloc(bufmgr, "batchbuffer", BATCH_SZ, 4096); 66 if (has_llc) { 67 drm_intel_bo_map(batch->bo, true); 68 batch->map = batch->bo->virtual; 69 } 70 batch->map_next = batch->map; 71 72 batch->reserved_space = BATCH_RESERVED; 73 batch->state_batch_offset = batch->bo->size; 74 batch->needs_sol_reset = false; 75 batch->state_base_address_emitted = false; 76 77 /* We don't know what ring the new batch will be sent to until we see the 78 * first BEGIN_BATCH or BEGIN_BATCH_BLT. Mark it as unknown. 79 */ 80 batch->ring = UNKNOWN_RING; 81 } 82 83 static void 84 intel_batchbuffer_reset_and_clear_render_cache(struct brw_context *brw) 85 { 86 intel_batchbuffer_reset(&brw->batch, brw->bufmgr, brw->has_llc); 87 brw_render_cache_set_clear(brw); 88 } 89 90 void 91 intel_batchbuffer_save_state(struct brw_context *brw) 92 { 93 brw->batch.saved.map_next = brw->batch.map_next; 94 brw->batch.saved.reloc_count = 95 drm_intel_gem_bo_get_reloc_count(brw->batch.bo); 96 } 97 98 void 99 intel_batchbuffer_reset_to_saved(struct brw_context *brw) 100 { 101 drm_intel_gem_bo_clear_relocs(brw->batch.bo, brw->batch.saved.reloc_count); 102 103 brw->batch.map_next = brw->batch.saved.map_next; 104 if (USED_BATCH(brw->batch) == 0) 105 brw->batch.ring = UNKNOWN_RING; 106 } 107 108 void 109 intel_batchbuffer_free(struct intel_batchbuffer *batch) 110 { 111 free(batch->cpu_map); 112 drm_intel_bo_unreference(batch->last_bo); 113 drm_intel_bo_unreference(batch->bo); 114 } 115 116 void 117 intel_batchbuffer_require_space(struct brw_context *brw, GLuint sz, 118 enum brw_gpu_ring ring) 119 { 120 /* If we're switching rings, implicitly flush the batch. */ 121 if (unlikely(ring != brw->batch.ring) && brw->batch.ring != UNKNOWN_RING && 122 brw->gen >= 6) { 123 intel_batchbuffer_flush(brw); 124 } 125 126 #ifdef DEBUG 127 assert(sz < BATCH_SZ - BATCH_RESERVED); 128 #endif 129 if (intel_batchbuffer_space(&brw->batch) < sz) 130 intel_batchbuffer_flush(brw); 131 132 enum brw_gpu_ring prev_ring = brw->batch.ring; 133 /* The intel_batchbuffer_flush() calls above might have changed 134 * brw->batch.ring to UNKNOWN_RING, so we need to set it here at the end. 135 */ 136 brw->batch.ring = ring; 137 138 if (unlikely(prev_ring == UNKNOWN_RING && ring == RENDER_RING)) 139 intel_batchbuffer_emit_render_ring_prelude(brw); 140 } 141 142 static void 143 do_batch_dump(struct brw_context *brw) 144 { 145 struct drm_intel_decode *decode; 146 struct intel_batchbuffer *batch = &brw->batch; 147 int ret; 148 149 decode = drm_intel_decode_context_alloc(brw->screen->deviceID); 150 if (!decode) 151 return; 152 153 ret = drm_intel_bo_map(batch->bo, false); 154 if (ret == 0) { 155 drm_intel_decode_set_batch_pointer(decode, 156 batch->bo->virtual, 157 batch->bo->offset64, 158 USED_BATCH(*batch)); 159 } else { 160 fprintf(stderr, 161 "WARNING: failed to map batchbuffer (%s), " 162 "dumping uploaded data instead.\n", strerror(ret)); 163 164 drm_intel_decode_set_batch_pointer(decode, 165 batch->map, 166 batch->bo->offset64, 167 USED_BATCH(*batch)); 168 } 169 170 drm_intel_decode_set_output_file(decode, stderr); 171 drm_intel_decode(decode); 172 173 drm_intel_decode_context_free(decode); 174 175 if (ret == 0) { 176 drm_intel_bo_unmap(batch->bo); 177 178 brw_debug_batch(brw); 179 } 180 } 181 182 void 183 intel_batchbuffer_emit_render_ring_prelude(struct brw_context *brw) 184 { 185 /* Un-used currently */ 186 } 187 188 /** 189 * Called when starting a new batch buffer. 190 */ 191 static void 192 brw_new_batch(struct brw_context *brw) 193 { 194 /* Create a new batchbuffer and reset the associated state: */ 195 drm_intel_gem_bo_clear_relocs(brw->batch.bo, 0); 196 intel_batchbuffer_reset_and_clear_render_cache(brw); 197 198 /* If the kernel supports hardware contexts, then most hardware state is 199 * preserved between batches; we only need to re-emit state that is required 200 * to be in every batch. Otherwise we need to re-emit all the state that 201 * would otherwise be stored in the context (which for all intents and 202 * purposes means everything). 203 */ 204 if (brw->hw_ctx == NULL) 205 brw->ctx.NewDriverState |= BRW_NEW_CONTEXT; 206 207 brw->ctx.NewDriverState |= BRW_NEW_BATCH; 208 209 brw->state_batch_count = 0; 210 211 brw->ib.type = -1; 212 213 /* We need to periodically reap the shader time results, because rollover 214 * happens every few seconds. We also want to see results every once in a 215 * while, because many programs won't cleanly destroy our context, so the 216 * end-of-run printout may not happen. 217 */ 218 if (INTEL_DEBUG & DEBUG_SHADER_TIME) 219 brw_collect_and_report_shader_time(brw); 220 } 221 222 /** 223 * Called from intel_batchbuffer_flush before emitting MI_BATCHBUFFER_END and 224 * sending it off. 225 * 226 * This function can emit state (say, to preserve registers that aren't saved 227 * between batches). All of this state MUST fit in the reserved space at the 228 * end of the batchbuffer. If you add more GPU state, increase the reserved 229 * space by updating the BATCH_RESERVED macro. 230 */ 231 static void 232 brw_finish_batch(struct brw_context *brw) 233 { 234 /* Capture the closing pipeline statistics register values necessary to 235 * support query objects (in the non-hardware context world). 236 */ 237 brw_emit_query_end(brw); 238 239 if (brw->batch.ring == RENDER_RING) { 240 /* Work around L3 state leaks into contexts set MI_RESTORE_INHIBIT which 241 * assume that the L3 cache is configured according to the hardware 242 * defaults. 243 */ 244 if (brw->gen >= 7) 245 gen7_restore_default_l3_config(brw); 246 247 if (brw->is_haswell) { 248 /* From the Haswell PRM, Volume 2b, Command Reference: Instructions, 249 * 3DSTATE_CC_STATE_POINTERS > "Note": 250 * 251 * "SW must program 3DSTATE_CC_STATE_POINTERS command at the end of every 252 * 3D batch buffer followed by a PIPE_CONTROL with RC flush and CS stall." 253 * 254 * From the example in the docs, it seems to expect a regular pipe control 255 * flush here as well. We may have done it already, but meh. 256 * 257 * See also WaAvoidRCZCounterRollover. 258 */ 259 brw_emit_mi_flush(brw); 260 BEGIN_BATCH(2); 261 OUT_BATCH(_3DSTATE_CC_STATE_POINTERS << 16 | (2 - 2)); 262 OUT_BATCH(brw->cc.state_offset | 1); 263 ADVANCE_BATCH(); 264 brw_emit_pipe_control_flush(brw, PIPE_CONTROL_RENDER_TARGET_FLUSH | 265 PIPE_CONTROL_CS_STALL); 266 } 267 } 268 269 /* Mark that the current program cache BO has been used by the GPU. 270 * It will be reallocated if we need to put new programs in for the 271 * next batch. 272 */ 273 brw->cache.bo_used_by_gpu = true; 274 } 275 276 static void 277 throttle(struct brw_context *brw) 278 { 279 /* Wait for the swapbuffers before the one we just emitted, so we 280 * don't get too many swaps outstanding for apps that are GPU-heavy 281 * but not CPU-heavy. 282 * 283 * We're using intelDRI2Flush (called from the loader before 284 * swapbuffer) and glFlush (for front buffer rendering) as the 285 * indicator that a frame is done and then throttle when we get 286 * here as we prepare to render the next frame. At this point for 287 * round trips for swap/copy and getting new buffers are done and 288 * we'll spend less time waiting on the GPU. 289 * 290 * Unfortunately, we don't have a handle to the batch containing 291 * the swap, and getting our hands on that doesn't seem worth it, 292 * so we just use the first batch we emitted after the last swap. 293 */ 294 if (brw->need_swap_throttle && brw->throttle_batch[0]) { 295 if (brw->throttle_batch[1]) { 296 if (!brw->disable_throttling) 297 drm_intel_bo_wait_rendering(brw->throttle_batch[1]); 298 drm_intel_bo_unreference(brw->throttle_batch[1]); 299 } 300 brw->throttle_batch[1] = brw->throttle_batch[0]; 301 brw->throttle_batch[0] = NULL; 302 brw->need_swap_throttle = false; 303 /* Throttling here is more precise than the throttle ioctl, so skip it */ 304 brw->need_flush_throttle = false; 305 } 306 307 if (brw->need_flush_throttle) { 308 __DRIscreen *dri_screen = brw->screen->driScrnPriv; 309 drmCommandNone(dri_screen->fd, DRM_I915_GEM_THROTTLE); 310 brw->need_flush_throttle = false; 311 } 312 } 313 314 /* Drop when RS headers get pulled to libdrm */ 315 #ifndef I915_EXEC_RESOURCE_STREAMER 316 #define I915_EXEC_RESOURCE_STREAMER (1<<15) 317 #endif 318 319 /* TODO: Push this whole function into bufmgr. 320 */ 321 static int 322 do_flush_locked(struct brw_context *brw) 323 { 324 struct intel_batchbuffer *batch = &brw->batch; 325 int ret = 0; 326 327 if (brw->has_llc) { 328 drm_intel_bo_unmap(batch->bo); 329 } else { 330 ret = drm_intel_bo_subdata(batch->bo, 0, 4 * USED_BATCH(*batch), batch->map); 331 if (ret == 0 && batch->state_batch_offset != batch->bo->size) { 332 ret = drm_intel_bo_subdata(batch->bo, 333 batch->state_batch_offset, 334 batch->bo->size - batch->state_batch_offset, 335 (char *)batch->map + batch->state_batch_offset); 336 } 337 } 338 339 if (!brw->screen->no_hw) { 340 int flags; 341 342 if (brw->gen >= 6 && batch->ring == BLT_RING) { 343 flags = I915_EXEC_BLT; 344 } else { 345 flags = I915_EXEC_RENDER | 346 (brw->use_resource_streamer ? I915_EXEC_RESOURCE_STREAMER : 0); 347 } 348 if (batch->needs_sol_reset) 349 flags |= I915_EXEC_GEN7_SOL_RESET; 350 351 if (ret == 0) { 352 if (unlikely(INTEL_DEBUG & DEBUG_AUB)) 353 brw_annotate_aub(brw); 354 355 if (brw->hw_ctx == NULL || batch->ring != RENDER_RING) { 356 ret = drm_intel_bo_mrb_exec(batch->bo, 4 * USED_BATCH(*batch), 357 NULL, 0, 0, flags); 358 } else { 359 ret = drm_intel_gem_bo_context_exec(batch->bo, brw->hw_ctx, 360 4 * USED_BATCH(*batch), flags); 361 } 362 } 363 364 throttle(brw); 365 } 366 367 if (unlikely(INTEL_DEBUG & DEBUG_BATCH)) 368 do_batch_dump(brw); 369 370 if (brw->ctx.Const.ResetStrategy == GL_LOSE_CONTEXT_ON_RESET_ARB) 371 brw_check_for_reset(brw); 372 373 if (ret != 0) { 374 fprintf(stderr, "intel_do_flush_locked failed: %s\n", strerror(-ret)); 375 exit(1); 376 } 377 378 return ret; 379 } 380 381 int 382 _intel_batchbuffer_flush(struct brw_context *brw, 383 const char *file, int line) 384 { 385 int ret; 386 387 if (USED_BATCH(brw->batch) == 0) 388 return 0; 389 390 if (brw->throttle_batch[0] == NULL) { 391 brw->throttle_batch[0] = brw->batch.bo; 392 drm_intel_bo_reference(brw->throttle_batch[0]); 393 } 394 395 if (unlikely(INTEL_DEBUG & DEBUG_BATCH)) { 396 int bytes_for_commands = 4 * USED_BATCH(brw->batch); 397 int bytes_for_state = brw->batch.bo->size - brw->batch.state_batch_offset; 398 int total_bytes = bytes_for_commands + bytes_for_state; 399 fprintf(stderr, "%s:%d: Batchbuffer flush with %4db (pkt) + " 400 "%4db (state) = %4db (%0.1f%%)\n", file, line, 401 bytes_for_commands, bytes_for_state, 402 total_bytes, 403 100.0f * total_bytes / BATCH_SZ); 404 } 405 406 brw->batch.reserved_space = 0; 407 408 brw_finish_batch(brw); 409 410 /* Mark the end of the buffer. */ 411 intel_batchbuffer_emit_dword(&brw->batch, MI_BATCH_BUFFER_END); 412 if (USED_BATCH(brw->batch) & 1) { 413 /* Round batchbuffer usage to 2 DWORDs. */ 414 intel_batchbuffer_emit_dword(&brw->batch, MI_NOOP); 415 } 416 417 intel_upload_finish(brw); 418 419 /* Check that we didn't just wrap our batchbuffer at a bad time. */ 420 assert(!brw->no_batch_wrap); 421 422 ret = do_flush_locked(brw); 423 424 if (unlikely(INTEL_DEBUG & DEBUG_SYNC)) { 425 fprintf(stderr, "waiting for idle\n"); 426 drm_intel_bo_wait_rendering(brw->batch.bo); 427 } 428 429 if (brw->use_resource_streamer) 430 gen7_reset_hw_bt_pool_offsets(brw); 431 432 /* Start a new batch buffer. */ 433 brw_new_batch(brw); 434 435 return ret; 436 } 437 438 439 /* This is the only way buffers get added to the validate list. 440 */ 441 uint32_t 442 intel_batchbuffer_reloc(struct intel_batchbuffer *batch, 443 drm_intel_bo *buffer, uint32_t offset, 444 uint32_t read_domains, uint32_t write_domain, 445 uint32_t delta) 446 { 447 int ret; 448 449 ret = drm_intel_bo_emit_reloc(batch->bo, offset, 450 buffer, delta, 451 read_domains, write_domain); 452 assert(ret == 0); 453 (void)ret; 454 455 /* Using the old buffer offset, write in what the right data would be, in 456 * case the buffer doesn't move and we can short-circuit the relocation 457 * processing in the kernel 458 */ 459 return buffer->offset64 + delta; 460 } 461 462 uint64_t 463 intel_batchbuffer_reloc64(struct intel_batchbuffer *batch, 464 drm_intel_bo *buffer, uint32_t offset, 465 uint32_t read_domains, uint32_t write_domain, 466 uint32_t delta) 467 { 468 int ret = drm_intel_bo_emit_reloc(batch->bo, offset, 469 buffer, delta, 470 read_domains, write_domain); 471 assert(ret == 0); 472 (void) ret; 473 474 /* Using the old buffer offset, write in what the right data would be, in 475 * case the buffer doesn't move and we can short-circuit the relocation 476 * processing in the kernel 477 */ 478 return buffer->offset64 + delta; 479 } 480 481 482 void 483 intel_batchbuffer_data(struct brw_context *brw, 484 const void *data, GLuint bytes, enum brw_gpu_ring ring) 485 { 486 assert((bytes & 3) == 0); 487 intel_batchbuffer_require_space(brw, bytes, ring); 488 memcpy(brw->batch.map_next, data, bytes); 489 brw->batch.map_next += bytes >> 2; 490 } 491 492 static void 493 load_sized_register_mem(struct brw_context *brw, 494 uint32_t reg, 495 drm_intel_bo *bo, 496 uint32_t read_domains, uint32_t write_domain, 497 uint32_t offset, 498 int size) 499 { 500 int i; 501 502 /* MI_LOAD_REGISTER_MEM only exists on Gen7+. */ 503 assert(brw->gen >= 7); 504 505 if (brw->gen >= 8) { 506 BEGIN_BATCH(4 * size); 507 for (i = 0; i < size; i++) { 508 OUT_BATCH(GEN7_MI_LOAD_REGISTER_MEM | (4 - 2)); 509 OUT_BATCH(reg + i * 4); 510 OUT_RELOC64(bo, read_domains, write_domain, offset + i * 4); 511 } 512 ADVANCE_BATCH(); 513 } else { 514 BEGIN_BATCH(3 * size); 515 for (i = 0; i < size; i++) { 516 OUT_BATCH(GEN7_MI_LOAD_REGISTER_MEM | (3 - 2)); 517 OUT_BATCH(reg + i * 4); 518 OUT_RELOC(bo, read_domains, write_domain, offset + i * 4); 519 } 520 ADVANCE_BATCH(); 521 } 522 } 523 524 void 525 brw_load_register_mem(struct brw_context *brw, 526 uint32_t reg, 527 drm_intel_bo *bo, 528 uint32_t read_domains, uint32_t write_domain, 529 uint32_t offset) 530 { 531 load_sized_register_mem(brw, reg, bo, read_domains, write_domain, offset, 1); 532 } 533 534 void 535 brw_load_register_mem64(struct brw_context *brw, 536 uint32_t reg, 537 drm_intel_bo *bo, 538 uint32_t read_domains, uint32_t write_domain, 539 uint32_t offset) 540 { 541 load_sized_register_mem(brw, reg, bo, read_domains, write_domain, offset, 2); 542 } 543 544 /* 545 * Write an arbitrary 32-bit register to a buffer via MI_STORE_REGISTER_MEM. 546 */ 547 void 548 brw_store_register_mem32(struct brw_context *brw, 549 drm_intel_bo *bo, uint32_t reg, uint32_t offset) 550 { 551 assert(brw->gen >= 6); 552 553 if (brw->gen >= 8) { 554 BEGIN_BATCH(4); 555 OUT_BATCH(MI_STORE_REGISTER_MEM | (4 - 2)); 556 OUT_BATCH(reg); 557 OUT_RELOC64(bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 558 offset); 559 ADVANCE_BATCH(); 560 } else { 561 BEGIN_BATCH(3); 562 OUT_BATCH(MI_STORE_REGISTER_MEM | (3 - 2)); 563 OUT_BATCH(reg); 564 OUT_RELOC(bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 565 offset); 566 ADVANCE_BATCH(); 567 } 568 } 569 570 /* 571 * Write an arbitrary 64-bit register to a buffer via MI_STORE_REGISTER_MEM. 572 */ 573 void 574 brw_store_register_mem64(struct brw_context *brw, 575 drm_intel_bo *bo, uint32_t reg, uint32_t offset) 576 { 577 assert(brw->gen >= 6); 578 579 /* MI_STORE_REGISTER_MEM only stores a single 32-bit value, so to 580 * read a full 64-bit register, we need to do two of them. 581 */ 582 if (brw->gen >= 8) { 583 BEGIN_BATCH(8); 584 OUT_BATCH(MI_STORE_REGISTER_MEM | (4 - 2)); 585 OUT_BATCH(reg); 586 OUT_RELOC64(bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 587 offset); 588 OUT_BATCH(MI_STORE_REGISTER_MEM | (4 - 2)); 589 OUT_BATCH(reg + sizeof(uint32_t)); 590 OUT_RELOC64(bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 591 offset + sizeof(uint32_t)); 592 ADVANCE_BATCH(); 593 } else { 594 BEGIN_BATCH(6); 595 OUT_BATCH(MI_STORE_REGISTER_MEM | (3 - 2)); 596 OUT_BATCH(reg); 597 OUT_RELOC(bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 598 offset); 599 OUT_BATCH(MI_STORE_REGISTER_MEM | (3 - 2)); 600 OUT_BATCH(reg + sizeof(uint32_t)); 601 OUT_RELOC(bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 602 offset + sizeof(uint32_t)); 603 ADVANCE_BATCH(); 604 } 605 } 606 607 /* 608 * Write a 32-bit register using immediate data. 609 */ 610 void 611 brw_load_register_imm32(struct brw_context *brw, uint32_t reg, uint32_t imm) 612 { 613 assert(brw->gen >= 6); 614 615 BEGIN_BATCH(3); 616 OUT_BATCH(MI_LOAD_REGISTER_IMM | (3 - 2)); 617 OUT_BATCH(reg); 618 OUT_BATCH(imm); 619 ADVANCE_BATCH(); 620 } 621 622 /* 623 * Write a 64-bit register using immediate data. 624 */ 625 void 626 brw_load_register_imm64(struct brw_context *brw, uint32_t reg, uint64_t imm) 627 { 628 assert(brw->gen >= 6); 629 630 BEGIN_BATCH(5); 631 OUT_BATCH(MI_LOAD_REGISTER_IMM | (5 - 2)); 632 OUT_BATCH(reg); 633 OUT_BATCH(imm & 0xffffffff); 634 OUT_BATCH(reg + 4); 635 OUT_BATCH(imm >> 32); 636 ADVANCE_BATCH(); 637 } 638 639 /* 640 * Copies a 32-bit register. 641 */ 642 void 643 brw_load_register_reg(struct brw_context *brw, uint32_t src, uint32_t dest) 644 { 645 assert(brw->gen >= 8 || brw->is_haswell); 646 647 BEGIN_BATCH(3); 648 OUT_BATCH(MI_LOAD_REGISTER_REG | (3 - 2)); 649 OUT_BATCH(src); 650 OUT_BATCH(dest); 651 ADVANCE_BATCH(); 652 } 653 654 /* 655 * Copies a 64-bit register. 656 */ 657 void 658 brw_load_register_reg64(struct brw_context *brw, uint32_t src, uint32_t dest) 659 { 660 assert(brw->gen >= 8 || brw->is_haswell); 661 662 BEGIN_BATCH(6); 663 OUT_BATCH(MI_LOAD_REGISTER_REG | (3 - 2)); 664 OUT_BATCH(src); 665 OUT_BATCH(dest); 666 OUT_BATCH(MI_LOAD_REGISTER_REG | (3 - 2)); 667 OUT_BATCH(src + sizeof(uint32_t)); 668 OUT_BATCH(dest + sizeof(uint32_t)); 669 ADVANCE_BATCH(); 670 } 671 672 /* 673 * Write 32-bits of immediate data to a GPU memory buffer. 674 */ 675 void 676 brw_store_data_imm32(struct brw_context *brw, drm_intel_bo *bo, 677 uint32_t offset, uint32_t imm) 678 { 679 assert(brw->gen >= 6); 680 681 BEGIN_BATCH(4); 682 OUT_BATCH(MI_STORE_DATA_IMM | (4 - 2)); 683 if (brw->gen >= 8) 684 OUT_RELOC64(bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 685 offset); 686 else { 687 OUT_BATCH(0); /* MBZ */ 688 OUT_RELOC(bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 689 offset); 690 } 691 OUT_BATCH(imm); 692 ADVANCE_BATCH(); 693 } 694 695 /* 696 * Write 64-bits of immediate data to a GPU memory buffer. 697 */ 698 void 699 brw_store_data_imm64(struct brw_context *brw, drm_intel_bo *bo, 700 uint32_t offset, uint64_t imm) 701 { 702 assert(brw->gen >= 6); 703 704 BEGIN_BATCH(5); 705 OUT_BATCH(MI_STORE_DATA_IMM | (5 - 2)); 706 if (brw->gen >= 8) 707 OUT_RELOC64(bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 708 offset); 709 else { 710 OUT_BATCH(0); /* MBZ */ 711 OUT_RELOC(bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 712 offset); 713 } 714 OUT_BATCH(imm & 0xffffffffu); 715 OUT_BATCH(imm >> 32); 716 ADVANCE_BATCH(); 717 } 718