1 /* 2 * Copyright 2010 Jerome Glisse <glisse (at) freedesktop.org> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * on the rights to use, copy, modify, merge, publish, distribute, sub 8 * license, and/or sell copies of the Software, and to permit persons to whom 9 * the Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 * 23 * Authors: 24 * Jerome Glisse 25 */ 26 #include "r600_pipe.h" 27 #include "r600d.h" 28 #include "util/u_memory.h" 29 #include <errno.h> 30 #include <unistd.h> 31 32 33 void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw, 34 boolean count_draw_in) 35 { 36 /* Flush the DMA IB if it's not empty. */ 37 if (radeon_emitted(ctx->b.dma.cs, 0)) 38 ctx->b.dma.flush(ctx, RADEON_FLUSH_ASYNC, NULL); 39 40 if (!radeon_cs_memory_below_limit(ctx->b.screen, ctx->b.gfx.cs, 41 ctx->b.vram, ctx->b.gtt)) { 42 ctx->b.gtt = 0; 43 ctx->b.vram = 0; 44 ctx->b.gfx.flush(ctx, RADEON_FLUSH_ASYNC, NULL); 45 return; 46 } 47 /* all will be accounted once relocation are emited */ 48 ctx->b.gtt = 0; 49 ctx->b.vram = 0; 50 51 /* Check available space in CS. */ 52 if (count_draw_in) { 53 uint64_t mask; 54 55 /* The number of dwords all the dirty states would take. */ 56 mask = ctx->dirty_atoms; 57 while (mask != 0) 58 num_dw += ctx->atoms[u_bit_scan64(&mask)]->num_dw; 59 60 /* The upper-bound of how much space a draw command would take. */ 61 num_dw += R600_MAX_FLUSH_CS_DWORDS + R600_MAX_DRAW_CS_DWORDS; 62 } 63 64 /* Count in r600_suspend_queries. */ 65 num_dw += ctx->b.num_cs_dw_queries_suspend; 66 67 /* Count in streamout_end at the end of CS. */ 68 if (ctx->b.streamout.begin_emitted) { 69 num_dw += ctx->b.streamout.num_dw_for_end; 70 } 71 72 /* SX_MISC */ 73 if (ctx->b.chip_class == R600) { 74 num_dw += 3; 75 } 76 77 /* Count in framebuffer cache flushes at the end of CS. */ 78 num_dw += R600_MAX_FLUSH_CS_DWORDS; 79 80 /* The fence at the end of CS. */ 81 num_dw += 10; 82 83 /* Flush if there's not enough space. */ 84 if (!ctx->b.ws->cs_check_space(ctx->b.gfx.cs, num_dw)) { 85 ctx->b.gfx.flush(ctx, RADEON_FLUSH_ASYNC, NULL); 86 } 87 } 88 89 void r600_flush_emit(struct r600_context *rctx) 90 { 91 struct radeon_winsys_cs *cs = rctx->b.gfx.cs; 92 unsigned cp_coher_cntl = 0; 93 unsigned wait_until = 0; 94 95 if (!rctx->b.flags) { 96 return; 97 } 98 99 /* Ensure coherency between streamout and shaders. */ 100 if (rctx->b.flags & R600_CONTEXT_STREAMOUT_FLUSH) 101 rctx->b.flags |= r600_get_flush_flags(R600_COHERENCY_SHADER); 102 103 if (rctx->b.flags & R600_CONTEXT_WAIT_3D_IDLE) { 104 wait_until |= S_008040_WAIT_3D_IDLE(1); 105 } 106 if (rctx->b.flags & R600_CONTEXT_WAIT_CP_DMA_IDLE) { 107 wait_until |= S_008040_WAIT_CP_DMA_IDLE(1); 108 } 109 110 if (wait_until) { 111 /* Use of WAIT_UNTIL is deprecated on Cayman+ */ 112 if (rctx->b.family >= CHIP_CAYMAN) { 113 /* emit a PS partial flush on Cayman/TN */ 114 rctx->b.flags |= R600_CONTEXT_PS_PARTIAL_FLUSH; 115 } 116 } 117 118 /* Wait packets must be executed first, because SURFACE_SYNC doesn't 119 * wait for shaders if it's not flushing CB or DB. 120 */ 121 if (rctx->b.flags & R600_CONTEXT_PS_PARTIAL_FLUSH) { 122 radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); 123 radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_PS_PARTIAL_FLUSH) | EVENT_INDEX(4)); 124 } 125 126 if (wait_until) { 127 /* Use of WAIT_UNTIL is deprecated on Cayman+ */ 128 if (rctx->b.family < CHIP_CAYMAN) { 129 /* wait for things to settle */ 130 radeon_set_config_reg(cs, R_008040_WAIT_UNTIL, wait_until); 131 } 132 } 133 134 if (rctx->b.chip_class >= R700 && 135 (rctx->b.flags & R600_CONTEXT_FLUSH_AND_INV_CB_META)) { 136 radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); 137 radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_FLUSH_AND_INV_CB_META) | EVENT_INDEX(0)); 138 } 139 140 if (rctx->b.chip_class >= R700 && 141 (rctx->b.flags & R600_CONTEXT_FLUSH_AND_INV_DB_META)) { 142 radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); 143 radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_FLUSH_AND_INV_DB_META) | EVENT_INDEX(0)); 144 145 /* Set FULL_CACHE_ENA for DB META flushes on r7xx and later. 146 * 147 * This hack predates use of FLUSH_AND_INV_DB_META, so it's 148 * unclear whether it's still needed or even whether it has 149 * any effect. 150 */ 151 cp_coher_cntl |= S_0085F0_FULL_CACHE_ENA(1); 152 } 153 154 if (rctx->b.flags & R600_CONTEXT_FLUSH_AND_INV || 155 (rctx->b.chip_class == R600 && rctx->b.flags & R600_CONTEXT_STREAMOUT_FLUSH)) { 156 radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); 157 radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_EVENT) | EVENT_INDEX(0)); 158 } 159 160 if (rctx->b.flags & R600_CONTEXT_INV_CONST_CACHE) { 161 /* Direct constant addressing uses the shader cache. 162 * Indirect contant addressing uses the vertex cache. */ 163 cp_coher_cntl |= S_0085F0_SH_ACTION_ENA(1) | 164 (rctx->has_vertex_cache ? S_0085F0_VC_ACTION_ENA(1) 165 : S_0085F0_TC_ACTION_ENA(1)); 166 } 167 if (rctx->b.flags & R600_CONTEXT_INV_VERTEX_CACHE) { 168 cp_coher_cntl |= rctx->has_vertex_cache ? S_0085F0_VC_ACTION_ENA(1) 169 : S_0085F0_TC_ACTION_ENA(1); 170 } 171 if (rctx->b.flags & R600_CONTEXT_INV_TEX_CACHE) { 172 /* Textures use the texture cache. 173 * Texture buffer objects use the vertex cache. */ 174 cp_coher_cntl |= S_0085F0_TC_ACTION_ENA(1) | 175 (rctx->has_vertex_cache ? S_0085F0_VC_ACTION_ENA(1) : 0); 176 } 177 178 /* Don't use the DB CP COHER logic on r6xx. 179 * There are hw bugs. 180 */ 181 if (rctx->b.chip_class >= R700 && 182 (rctx->b.flags & R600_CONTEXT_FLUSH_AND_INV_DB)) { 183 cp_coher_cntl |= S_0085F0_DB_ACTION_ENA(1) | 184 S_0085F0_DB_DEST_BASE_ENA(1) | 185 S_0085F0_SMX_ACTION_ENA(1); 186 } 187 188 /* Don't use the CB CP COHER logic on r6xx. 189 * There are hw bugs. 190 */ 191 if (rctx->b.chip_class >= R700 && 192 (rctx->b.flags & R600_CONTEXT_FLUSH_AND_INV_CB)) { 193 cp_coher_cntl |= S_0085F0_CB_ACTION_ENA(1) | 194 S_0085F0_CB0_DEST_BASE_ENA(1) | 195 S_0085F0_CB1_DEST_BASE_ENA(1) | 196 S_0085F0_CB2_DEST_BASE_ENA(1) | 197 S_0085F0_CB3_DEST_BASE_ENA(1) | 198 S_0085F0_CB4_DEST_BASE_ENA(1) | 199 S_0085F0_CB5_DEST_BASE_ENA(1) | 200 S_0085F0_CB6_DEST_BASE_ENA(1) | 201 S_0085F0_CB7_DEST_BASE_ENA(1) | 202 S_0085F0_SMX_ACTION_ENA(1); 203 if (rctx->b.chip_class >= EVERGREEN) 204 cp_coher_cntl |= S_0085F0_CB8_DEST_BASE_ENA(1) | 205 S_0085F0_CB9_DEST_BASE_ENA(1) | 206 S_0085F0_CB10_DEST_BASE_ENA(1) | 207 S_0085F0_CB11_DEST_BASE_ENA(1); 208 } 209 210 if (rctx->b.chip_class >= R700 && 211 rctx->b.flags & R600_CONTEXT_STREAMOUT_FLUSH) { 212 cp_coher_cntl |= S_0085F0_SO0_DEST_BASE_ENA(1) | 213 S_0085F0_SO1_DEST_BASE_ENA(1) | 214 S_0085F0_SO2_DEST_BASE_ENA(1) | 215 S_0085F0_SO3_DEST_BASE_ENA(1) | 216 S_0085F0_SMX_ACTION_ENA(1); 217 } 218 219 /* Workaround for buggy flushing on some R6xx chipsets. */ 220 if ((rctx->b.flags & (R600_CONTEXT_FLUSH_AND_INV | 221 R600_CONTEXT_STREAMOUT_FLUSH)) && 222 (rctx->b.family == CHIP_RV670 || 223 rctx->b.family == CHIP_RS780 || 224 rctx->b.family == CHIP_RS880)) { 225 cp_coher_cntl |= S_0085F0_CB1_DEST_BASE_ENA(1) | 226 S_0085F0_DEST_BASE_0_ENA(1); 227 } 228 229 if (cp_coher_cntl) { 230 radeon_emit(cs, PKT3(PKT3_SURFACE_SYNC, 3, 0)); 231 radeon_emit(cs, cp_coher_cntl); /* CP_COHER_CNTL */ 232 radeon_emit(cs, 0xffffffff); /* CP_COHER_SIZE */ 233 radeon_emit(cs, 0); /* CP_COHER_BASE */ 234 radeon_emit(cs, 0x0000000A); /* POLL_INTERVAL */ 235 } 236 237 if (rctx->b.flags & R600_CONTEXT_START_PIPELINE_STATS) { 238 radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); 239 radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_PIPELINESTAT_START) | 240 EVENT_INDEX(0)); 241 } else if (rctx->b.flags & R600_CONTEXT_STOP_PIPELINE_STATS) { 242 radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); 243 radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_PIPELINESTAT_STOP) | 244 EVENT_INDEX(0)); 245 } 246 247 /* everything is properly flushed */ 248 rctx->b.flags = 0; 249 } 250 251 void r600_context_gfx_flush(void *context, unsigned flags, 252 struct pipe_fence_handle **fence) 253 { 254 struct r600_context *ctx = context; 255 struct radeon_winsys_cs *cs = ctx->b.gfx.cs; 256 struct radeon_winsys *ws = ctx->b.ws; 257 258 if (!radeon_emitted(cs, ctx->b.initial_gfx_cs_size)) 259 return; 260 261 if (r600_check_device_reset(&ctx->b)) 262 return; 263 264 r600_preflush_suspend_features(&ctx->b); 265 266 /* flush the framebuffer cache */ 267 ctx->b.flags |= R600_CONTEXT_FLUSH_AND_INV | 268 R600_CONTEXT_FLUSH_AND_INV_CB | 269 R600_CONTEXT_FLUSH_AND_INV_DB | 270 R600_CONTEXT_FLUSH_AND_INV_CB_META | 271 R600_CONTEXT_FLUSH_AND_INV_DB_META | 272 R600_CONTEXT_WAIT_3D_IDLE | 273 R600_CONTEXT_WAIT_CP_DMA_IDLE; 274 275 r600_flush_emit(ctx); 276 277 /* old kernels and userspace don't set SX_MISC, so we must reset it to 0 here */ 278 if (ctx->b.chip_class == R600) { 279 radeon_set_context_reg(cs, R_028350_SX_MISC, 0); 280 } 281 282 /* Flush the CS. */ 283 ws->cs_flush(cs, flags, &ctx->b.last_gfx_fence); 284 if (fence) 285 ws->fence_reference(fence, ctx->b.last_gfx_fence); 286 ctx->b.num_gfx_cs_flushes++; 287 288 r600_begin_new_cs(ctx); 289 } 290 291 void r600_begin_new_cs(struct r600_context *ctx) 292 { 293 unsigned shader; 294 295 ctx->b.flags = 0; 296 ctx->b.gtt = 0; 297 ctx->b.vram = 0; 298 299 /* Begin a new CS. */ 300 r600_emit_command_buffer(ctx->b.gfx.cs, &ctx->start_cs_cmd); 301 302 /* Re-emit states. */ 303 r600_mark_atom_dirty(ctx, &ctx->alphatest_state.atom); 304 r600_mark_atom_dirty(ctx, &ctx->blend_color.atom); 305 r600_mark_atom_dirty(ctx, &ctx->cb_misc_state.atom); 306 r600_mark_atom_dirty(ctx, &ctx->clip_misc_state.atom); 307 r600_mark_atom_dirty(ctx, &ctx->clip_state.atom); 308 r600_mark_atom_dirty(ctx, &ctx->db_misc_state.atom); 309 r600_mark_atom_dirty(ctx, &ctx->db_state.atom); 310 r600_mark_atom_dirty(ctx, &ctx->framebuffer.atom); 311 r600_mark_atom_dirty(ctx, &ctx->hw_shader_stages[R600_HW_STAGE_PS].atom); 312 r600_mark_atom_dirty(ctx, &ctx->poly_offset_state.atom); 313 r600_mark_atom_dirty(ctx, &ctx->vgt_state.atom); 314 r600_mark_atom_dirty(ctx, &ctx->sample_mask.atom); 315 ctx->b.scissors.dirty_mask = (1 << R600_MAX_VIEWPORTS) - 1; 316 r600_mark_atom_dirty(ctx, &ctx->b.scissors.atom); 317 ctx->b.viewports.dirty_mask = (1 << R600_MAX_VIEWPORTS) - 1; 318 ctx->b.viewports.depth_range_dirty_mask = (1 << R600_MAX_VIEWPORTS) - 1; 319 r600_mark_atom_dirty(ctx, &ctx->b.viewports.atom); 320 if (ctx->b.chip_class <= EVERGREEN) { 321 r600_mark_atom_dirty(ctx, &ctx->config_state.atom); 322 } 323 r600_mark_atom_dirty(ctx, &ctx->stencil_ref.atom); 324 r600_mark_atom_dirty(ctx, &ctx->vertex_fetch_shader.atom); 325 r600_mark_atom_dirty(ctx, &ctx->hw_shader_stages[R600_HW_STAGE_ES].atom); 326 r600_mark_atom_dirty(ctx, &ctx->shader_stages.atom); 327 if (ctx->gs_shader) { 328 r600_mark_atom_dirty(ctx, &ctx->hw_shader_stages[R600_HW_STAGE_GS].atom); 329 r600_mark_atom_dirty(ctx, &ctx->gs_rings.atom); 330 } 331 if (ctx->tes_shader) { 332 r600_mark_atom_dirty(ctx, &ctx->hw_shader_stages[EG_HW_STAGE_HS].atom); 333 r600_mark_atom_dirty(ctx, &ctx->hw_shader_stages[EG_HW_STAGE_LS].atom); 334 } 335 r600_mark_atom_dirty(ctx, &ctx->hw_shader_stages[R600_HW_STAGE_VS].atom); 336 r600_mark_atom_dirty(ctx, &ctx->b.streamout.enable_atom); 337 r600_mark_atom_dirty(ctx, &ctx->b.render_cond_atom); 338 339 if (ctx->blend_state.cso) 340 r600_mark_atom_dirty(ctx, &ctx->blend_state.atom); 341 if (ctx->dsa_state.cso) 342 r600_mark_atom_dirty(ctx, &ctx->dsa_state.atom); 343 if (ctx->rasterizer_state.cso) 344 r600_mark_atom_dirty(ctx, &ctx->rasterizer_state.atom); 345 346 if (ctx->b.chip_class <= R700) { 347 r600_mark_atom_dirty(ctx, &ctx->seamless_cube_map.atom); 348 } 349 350 ctx->vertex_buffer_state.dirty_mask = ctx->vertex_buffer_state.enabled_mask; 351 r600_vertex_buffers_dirty(ctx); 352 353 /* Re-emit shader resources. */ 354 for (shader = 0; shader < PIPE_SHADER_TYPES; shader++) { 355 struct r600_constbuf_state *constbuf = &ctx->constbuf_state[shader]; 356 struct r600_textures_info *samplers = &ctx->samplers[shader]; 357 358 constbuf->dirty_mask = constbuf->enabled_mask; 359 samplers->views.dirty_mask = samplers->views.enabled_mask; 360 samplers->states.dirty_mask = samplers->states.enabled_mask; 361 362 r600_constant_buffers_dirty(ctx, constbuf); 363 r600_sampler_views_dirty(ctx, &samplers->views); 364 r600_sampler_states_dirty(ctx, &samplers->states); 365 } 366 367 r600_postflush_resume_features(&ctx->b); 368 369 /* Re-emit the draw state. */ 370 ctx->last_primitive_type = -1; 371 ctx->last_start_instance = -1; 372 373 assert(!ctx->b.gfx.cs->prev_dw); 374 ctx->b.initial_gfx_cs_size = ctx->b.gfx.cs->current.cdw; 375 } 376 377 void r600_emit_pfp_sync_me(struct r600_context *rctx) 378 { 379 struct radeon_winsys_cs *cs = rctx->b.gfx.cs; 380 381 if (rctx->b.chip_class >= EVERGREEN && 382 rctx->b.screen->info.drm_minor >= 46) { 383 radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0)); 384 radeon_emit(cs, 0); 385 } else { 386 /* Emulate PFP_SYNC_ME by writing a value to memory in ME and 387 * waiting for it in PFP. 388 */ 389 struct r600_resource *buf = NULL; 390 unsigned offset, reloc; 391 uint64_t va; 392 393 /* 16-byte address alignment is required by WAIT_REG_MEM. */ 394 u_suballocator_alloc(rctx->b.allocator_zeroed_memory, 4, 16, 395 &offset, (struct pipe_resource**)&buf); 396 if (!buf) { 397 /* This is too heavyweight, but will work. */ 398 rctx->b.gfx.flush(rctx, RADEON_FLUSH_ASYNC, NULL); 399 return; 400 } 401 402 reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, buf, 403 RADEON_USAGE_READWRITE, 404 RADEON_PRIO_FENCE); 405 406 va = buf->gpu_address + offset; 407 assert(va % 16 == 0); 408 409 /* Write 1 to memory in ME. */ 410 radeon_emit(cs, PKT3(PKT3_MEM_WRITE, 3, 0)); 411 radeon_emit(cs, va); 412 radeon_emit(cs, ((va >> 32) & 0xff) | MEM_WRITE_32_BITS); 413 radeon_emit(cs, 1); 414 radeon_emit(cs, 0); 415 416 radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); 417 radeon_emit(cs, reloc); 418 419 /* Wait in PFP (PFP can only do GEQUAL against memory). */ 420 radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0)); 421 radeon_emit(cs, WAIT_REG_MEM_GEQUAL | 422 WAIT_REG_MEM_MEMORY | 423 WAIT_REG_MEM_PFP); 424 radeon_emit(cs, va); 425 radeon_emit(cs, va >> 32); 426 radeon_emit(cs, 1); /* reference value */ 427 radeon_emit(cs, 0xffffffff); /* mask */ 428 radeon_emit(cs, 4); /* poll interval */ 429 430 radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); 431 radeon_emit(cs, reloc); 432 433 r600_resource_reference(&buf, NULL); 434 } 435 } 436 437 /* The max number of bytes to copy per packet. */ 438 #define CP_DMA_MAX_BYTE_COUNT ((1 << 21) - 8) 439 440 void r600_cp_dma_copy_buffer(struct r600_context *rctx, 441 struct pipe_resource *dst, uint64_t dst_offset, 442 struct pipe_resource *src, uint64_t src_offset, 443 unsigned size) 444 { 445 struct radeon_winsys_cs *cs = rctx->b.gfx.cs; 446 447 assert(size); 448 assert(rctx->screen->b.has_cp_dma); 449 450 /* Mark the buffer range of destination as valid (initialized), 451 * so that transfer_map knows it should wait for the GPU when mapping 452 * that range. */ 453 util_range_add(&r600_resource(dst)->valid_buffer_range, dst_offset, 454 dst_offset + size); 455 456 dst_offset += r600_resource(dst)->gpu_address; 457 src_offset += r600_resource(src)->gpu_address; 458 459 /* Flush the caches where the resources are bound. */ 460 rctx->b.flags |= r600_get_flush_flags(R600_COHERENCY_SHADER) | 461 R600_CONTEXT_WAIT_3D_IDLE; 462 463 /* There are differences between R700 and EG in CP DMA, 464 * but we only use the common bits here. */ 465 while (size) { 466 unsigned sync = 0; 467 unsigned byte_count = MIN2(size, CP_DMA_MAX_BYTE_COUNT); 468 unsigned src_reloc, dst_reloc; 469 470 r600_need_cs_space(rctx, 471 10 + (rctx->b.flags ? R600_MAX_FLUSH_CS_DWORDS : 0) + 472 3 + R600_MAX_PFP_SYNC_ME_DWORDS, FALSE); 473 474 /* Flush the caches for the first copy only. */ 475 if (rctx->b.flags) { 476 r600_flush_emit(rctx); 477 } 478 479 /* Do the synchronization after the last copy, so that all data is written to memory. */ 480 if (size == byte_count) { 481 sync = PKT3_CP_DMA_CP_SYNC; 482 } 483 484 /* This must be done after r600_need_cs_space. */ 485 src_reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, (struct r600_resource*)src, 486 RADEON_USAGE_READ, RADEON_PRIO_CP_DMA); 487 dst_reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, (struct r600_resource*)dst, 488 RADEON_USAGE_WRITE, RADEON_PRIO_CP_DMA); 489 490 radeon_emit(cs, PKT3(PKT3_CP_DMA, 4, 0)); 491 radeon_emit(cs, src_offset); /* SRC_ADDR_LO [31:0] */ 492 radeon_emit(cs, sync | ((src_offset >> 32) & 0xff)); /* CP_SYNC [31] | SRC_ADDR_HI [7:0] */ 493 radeon_emit(cs, dst_offset); /* DST_ADDR_LO [31:0] */ 494 radeon_emit(cs, (dst_offset >> 32) & 0xff); /* DST_ADDR_HI [7:0] */ 495 radeon_emit(cs, byte_count); /* COMMAND [29:22] | BYTE_COUNT [20:0] */ 496 497 radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); 498 radeon_emit(cs, src_reloc); 499 radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); 500 radeon_emit(cs, dst_reloc); 501 502 size -= byte_count; 503 src_offset += byte_count; 504 dst_offset += byte_count; 505 } 506 507 /* CP_DMA_CP_SYNC doesn't wait for idle on R6xx, but this does. */ 508 if (rctx->b.chip_class == R600) 509 radeon_set_config_reg(cs, R_008040_WAIT_UNTIL, 510 S_008040_WAIT_CP_DMA_IDLE(1)); 511 512 /* CP DMA is executed in ME, but index buffers are read by PFP. 513 * This ensures that ME (CP DMA) is idle before PFP starts fetching 514 * indices. If we wanted to execute CP DMA in PFP, this packet 515 * should precede it. 516 */ 517 r600_emit_pfp_sync_me(rctx); 518 } 519 520 void r600_dma_copy_buffer(struct r600_context *rctx, 521 struct pipe_resource *dst, 522 struct pipe_resource *src, 523 uint64_t dst_offset, 524 uint64_t src_offset, 525 uint64_t size) 526 { 527 struct radeon_winsys_cs *cs = rctx->b.dma.cs; 528 unsigned i, ncopy, csize; 529 struct r600_resource *rdst = (struct r600_resource*)dst; 530 struct r600_resource *rsrc = (struct r600_resource*)src; 531 532 /* Mark the buffer range of destination as valid (initialized), 533 * so that transfer_map knows it should wait for the GPU when mapping 534 * that range. */ 535 util_range_add(&rdst->valid_buffer_range, dst_offset, 536 dst_offset + size); 537 538 size >>= 2; /* convert to dwords */ 539 ncopy = (size / R600_DMA_COPY_MAX_SIZE_DW) + !!(size % R600_DMA_COPY_MAX_SIZE_DW); 540 541 r600_need_dma_space(&rctx->b, ncopy * 5, rdst, rsrc); 542 for (i = 0; i < ncopy; i++) { 543 csize = size < R600_DMA_COPY_MAX_SIZE_DW ? size : R600_DMA_COPY_MAX_SIZE_DW; 544 /* emit reloc before writing cs so that cs is always in consistent state */ 545 radeon_add_to_buffer_list(&rctx->b, &rctx->b.dma, rsrc, RADEON_USAGE_READ, 546 RADEON_PRIO_SDMA_BUFFER); 547 radeon_add_to_buffer_list(&rctx->b, &rctx->b.dma, rdst, RADEON_USAGE_WRITE, 548 RADEON_PRIO_SDMA_BUFFER); 549 radeon_emit(cs, DMA_PACKET(DMA_PACKET_COPY, 0, 0, csize)); 550 radeon_emit(cs, dst_offset & 0xfffffffc); 551 radeon_emit(cs, src_offset & 0xfffffffc); 552 radeon_emit(cs, (dst_offset >> 32UL) & 0xff); 553 radeon_emit(cs, (src_offset >> 32UL) & 0xff); 554 dst_offset += csize << 2; 555 src_offset += csize << 2; 556 size -= csize; 557 } 558 } 559