1 #include "radv_meta.h" 2 #include "nir/nir_builder.h" 3 4 #include "sid.h" 5 #include "radv_cs.h" 6 7 static nir_shader * 8 build_buffer_fill_shader(struct radv_device *dev) 9 { 10 nir_builder b; 11 12 nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL); 13 b.shader->info->name = ralloc_strdup(b.shader, "meta_buffer_fill"); 14 b.shader->info->cs.local_size[0] = 64; 15 b.shader->info->cs.local_size[1] = 1; 16 b.shader->info->cs.local_size[2] = 1; 17 18 nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0); 19 nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0); 20 nir_ssa_def *block_size = nir_imm_ivec4(&b, 21 b.shader->info->cs.local_size[0], 22 b.shader->info->cs.local_size[1], 23 b.shader->info->cs.local_size[2], 0); 24 25 nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id); 26 27 nir_ssa_def *offset = nir_imul(&b, global_id, nir_imm_int(&b, 16)); 28 offset = nir_swizzle(&b, offset, (unsigned[]) {0, 0, 0, 0}, 1, false); 29 30 nir_intrinsic_instr *dst_buf = nir_intrinsic_instr_create(b.shader, 31 nir_intrinsic_vulkan_resource_index); 32 dst_buf->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0)); 33 nir_intrinsic_set_desc_set(dst_buf, 0); 34 nir_intrinsic_set_binding(dst_buf, 0); 35 nir_ssa_dest_init(&dst_buf->instr, &dst_buf->dest, 1, 32, NULL); 36 nir_builder_instr_insert(&b, &dst_buf->instr); 37 38 nir_intrinsic_instr *load = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant); 39 load->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0)); 40 load->num_components = 1; 41 nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, "fill_value"); 42 nir_builder_instr_insert(&b, &load->instr); 43 44 nir_ssa_def *swizzled_load = nir_swizzle(&b, &load->dest.ssa, (unsigned[]) { 0, 0, 0, 0}, 4, false); 45 46 nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_store_ssbo); 47 store->src[0] = nir_src_for_ssa(swizzled_load); 48 store->src[1] = nir_src_for_ssa(&dst_buf->dest.ssa); 49 store->src[2] = nir_src_for_ssa(offset); 50 nir_intrinsic_set_write_mask(store, 0xf); 51 store->num_components = 4; 52 nir_builder_instr_insert(&b, &store->instr); 53 54 return b.shader; 55 } 56 57 static nir_shader * 58 build_buffer_copy_shader(struct radv_device *dev) 59 { 60 nir_builder b; 61 62 nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL); 63 b.shader->info->name = ralloc_strdup(b.shader, "meta_buffer_copy"); 64 b.shader->info->cs.local_size[0] = 64; 65 b.shader->info->cs.local_size[1] = 1; 66 b.shader->info->cs.local_size[2] = 1; 67 68 nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0); 69 nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0); 70 nir_ssa_def *block_size = nir_imm_ivec4(&b, 71 b.shader->info->cs.local_size[0], 72 b.shader->info->cs.local_size[1], 73 b.shader->info->cs.local_size[2], 0); 74 75 nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id); 76 77 nir_ssa_def *offset = nir_imul(&b, global_id, nir_imm_int(&b, 16)); 78 offset = nir_swizzle(&b, offset, (unsigned[]) {0, 0, 0, 0}, 1, false); 79 80 nir_intrinsic_instr *dst_buf = nir_intrinsic_instr_create(b.shader, 81 nir_intrinsic_vulkan_resource_index); 82 dst_buf->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0)); 83 nir_intrinsic_set_desc_set(dst_buf, 0); 84 nir_intrinsic_set_binding(dst_buf, 0); 85 nir_ssa_dest_init(&dst_buf->instr, &dst_buf->dest, 1, 32, NULL); 86 nir_builder_instr_insert(&b, &dst_buf->instr); 87 88 nir_intrinsic_instr *src_buf = nir_intrinsic_instr_create(b.shader, 89 nir_intrinsic_vulkan_resource_index); 90 src_buf->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0)); 91 nir_intrinsic_set_desc_set(src_buf, 0); 92 nir_intrinsic_set_binding(src_buf, 1); 93 nir_ssa_dest_init(&src_buf->instr, &src_buf->dest, 1, 32, NULL); 94 nir_builder_instr_insert(&b, &src_buf->instr); 95 96 nir_intrinsic_instr *load = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_ssbo); 97 load->src[0] = nir_src_for_ssa(&src_buf->dest.ssa); 98 load->src[1] = nir_src_for_ssa(offset); 99 nir_ssa_dest_init(&load->instr, &load->dest, 4, 32, NULL); 100 load->num_components = 4; 101 nir_builder_instr_insert(&b, &load->instr); 102 103 nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_store_ssbo); 104 store->src[0] = nir_src_for_ssa(&load->dest.ssa); 105 store->src[1] = nir_src_for_ssa(&dst_buf->dest.ssa); 106 store->src[2] = nir_src_for_ssa(offset); 107 nir_intrinsic_set_write_mask(store, 0xf); 108 store->num_components = 4; 109 nir_builder_instr_insert(&b, &store->instr); 110 111 return b.shader; 112 } 113 114 115 116 VkResult radv_device_init_meta_buffer_state(struct radv_device *device) 117 { 118 VkResult result; 119 struct radv_shader_module fill_cs = { .nir = NULL }; 120 struct radv_shader_module copy_cs = { .nir = NULL }; 121 122 zero(device->meta_state.buffer); 123 124 fill_cs.nir = build_buffer_fill_shader(device); 125 copy_cs.nir = build_buffer_copy_shader(device); 126 127 VkDescriptorSetLayoutCreateInfo fill_ds_create_info = { 128 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, 129 .bindingCount = 1, 130 .pBindings = (VkDescriptorSetLayoutBinding[]) { 131 { 132 .binding = 0, 133 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 134 .descriptorCount = 1, 135 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, 136 .pImmutableSamplers = NULL 137 }, 138 } 139 }; 140 141 result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), 142 &fill_ds_create_info, 143 &device->meta_state.alloc, 144 &device->meta_state.buffer.fill_ds_layout); 145 if (result != VK_SUCCESS) 146 goto fail; 147 148 VkDescriptorSetLayoutCreateInfo copy_ds_create_info = { 149 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, 150 .bindingCount = 2, 151 .pBindings = (VkDescriptorSetLayoutBinding[]) { 152 { 153 .binding = 0, 154 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 155 .descriptorCount = 1, 156 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, 157 .pImmutableSamplers = NULL 158 }, 159 { 160 .binding = 1, 161 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 162 .descriptorCount = 1, 163 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, 164 .pImmutableSamplers = NULL 165 }, 166 } 167 }; 168 169 result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), 170 ©_ds_create_info, 171 &device->meta_state.alloc, 172 &device->meta_state.buffer.copy_ds_layout); 173 if (result != VK_SUCCESS) 174 goto fail; 175 176 177 VkPipelineLayoutCreateInfo fill_pl_create_info = { 178 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, 179 .setLayoutCount = 1, 180 .pSetLayouts = &device->meta_state.buffer.fill_ds_layout, 181 .pushConstantRangeCount = 1, 182 .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 4}, 183 }; 184 185 result = radv_CreatePipelineLayout(radv_device_to_handle(device), 186 &fill_pl_create_info, 187 &device->meta_state.alloc, 188 &device->meta_state.buffer.fill_p_layout); 189 if (result != VK_SUCCESS) 190 goto fail; 191 192 VkPipelineLayoutCreateInfo copy_pl_create_info = { 193 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, 194 .setLayoutCount = 1, 195 .pSetLayouts = &device->meta_state.buffer.copy_ds_layout, 196 .pushConstantRangeCount = 0, 197 }; 198 199 result = radv_CreatePipelineLayout(radv_device_to_handle(device), 200 ©_pl_create_info, 201 &device->meta_state.alloc, 202 &device->meta_state.buffer.copy_p_layout); 203 if (result != VK_SUCCESS) 204 goto fail; 205 206 VkPipelineShaderStageCreateInfo fill_pipeline_shader_stage = { 207 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, 208 .stage = VK_SHADER_STAGE_COMPUTE_BIT, 209 .module = radv_shader_module_to_handle(&fill_cs), 210 .pName = "main", 211 .pSpecializationInfo = NULL, 212 }; 213 214 VkComputePipelineCreateInfo fill_vk_pipeline_info = { 215 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, 216 .stage = fill_pipeline_shader_stage, 217 .flags = 0, 218 .layout = device->meta_state.buffer.fill_p_layout, 219 }; 220 221 result = radv_CreateComputePipelines(radv_device_to_handle(device), 222 radv_pipeline_cache_to_handle(&device->meta_state.cache), 223 1, &fill_vk_pipeline_info, NULL, 224 &device->meta_state.buffer.fill_pipeline); 225 if (result != VK_SUCCESS) 226 goto fail; 227 228 VkPipelineShaderStageCreateInfo copy_pipeline_shader_stage = { 229 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, 230 .stage = VK_SHADER_STAGE_COMPUTE_BIT, 231 .module = radv_shader_module_to_handle(©_cs), 232 .pName = "main", 233 .pSpecializationInfo = NULL, 234 }; 235 236 VkComputePipelineCreateInfo copy_vk_pipeline_info = { 237 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, 238 .stage = copy_pipeline_shader_stage, 239 .flags = 0, 240 .layout = device->meta_state.buffer.copy_p_layout, 241 }; 242 243 result = radv_CreateComputePipelines(radv_device_to_handle(device), 244 radv_pipeline_cache_to_handle(&device->meta_state.cache), 245 1, ©_vk_pipeline_info, NULL, 246 &device->meta_state.buffer.copy_pipeline); 247 if (result != VK_SUCCESS) 248 goto fail; 249 250 ralloc_free(fill_cs.nir); 251 ralloc_free(copy_cs.nir); 252 return VK_SUCCESS; 253 fail: 254 radv_device_finish_meta_buffer_state(device); 255 ralloc_free(fill_cs.nir); 256 ralloc_free(copy_cs.nir); 257 return result; 258 } 259 260 void radv_device_finish_meta_buffer_state(struct radv_device *device) 261 { 262 if (device->meta_state.buffer.copy_pipeline) 263 radv_DestroyPipeline(radv_device_to_handle(device), 264 device->meta_state.buffer.copy_pipeline, 265 &device->meta_state.alloc); 266 267 if (device->meta_state.buffer.fill_pipeline) 268 radv_DestroyPipeline(radv_device_to_handle(device), 269 device->meta_state.buffer.fill_pipeline, 270 &device->meta_state.alloc); 271 272 if (device->meta_state.buffer.copy_p_layout) 273 radv_DestroyPipelineLayout(radv_device_to_handle(device), 274 device->meta_state.buffer.copy_p_layout, 275 &device->meta_state.alloc); 276 277 if (device->meta_state.buffer.fill_p_layout) 278 radv_DestroyPipelineLayout(radv_device_to_handle(device), 279 device->meta_state.buffer.fill_p_layout, 280 &device->meta_state.alloc); 281 282 if (device->meta_state.buffer.copy_ds_layout) 283 radv_DestroyDescriptorSetLayout(radv_device_to_handle(device), 284 device->meta_state.buffer.copy_ds_layout, 285 &device->meta_state.alloc); 286 287 if (device->meta_state.buffer.fill_ds_layout) 288 radv_DestroyDescriptorSetLayout(radv_device_to_handle(device), 289 device->meta_state.buffer.fill_ds_layout, 290 &device->meta_state.alloc); 291 } 292 293 static void fill_buffer_shader(struct radv_cmd_buffer *cmd_buffer, 294 struct radeon_winsys_bo *bo, 295 uint64_t offset, uint64_t size, uint32_t value) 296 { 297 struct radv_device *device = cmd_buffer->device; 298 uint64_t block_count = round_up_u64(size, 1024); 299 struct radv_meta_saved_compute_state saved_state; 300 VkDescriptorSet ds; 301 302 radv_meta_save_compute(&saved_state, cmd_buffer, 4); 303 304 radv_temp_descriptor_set_create(device, cmd_buffer, 305 device->meta_state.buffer.fill_ds_layout, 306 &ds); 307 308 struct radv_buffer dst_buffer = { 309 .bo = bo, 310 .offset = offset, 311 .size = size 312 }; 313 314 radv_UpdateDescriptorSets(radv_device_to_handle(device), 315 1, /* writeCount */ 316 (VkWriteDescriptorSet[]) { 317 { 318 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, 319 .dstSet = ds, 320 .dstBinding = 0, 321 .dstArrayElement = 0, 322 .descriptorCount = 1, 323 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 324 .pBufferInfo = &(VkDescriptorBufferInfo) { 325 .buffer = radv_buffer_to_handle(&dst_buffer), 326 .offset = 0, 327 .range = size 328 } 329 } 330 }, 0, NULL); 331 332 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), 333 VK_PIPELINE_BIND_POINT_COMPUTE, 334 device->meta_state.buffer.fill_pipeline); 335 336 radv_CmdBindDescriptorSets(radv_cmd_buffer_to_handle(cmd_buffer), 337 VK_PIPELINE_BIND_POINT_COMPUTE, 338 device->meta_state.buffer.fill_p_layout, 0, 1, 339 &ds, 0, NULL); 340 341 radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), 342 device->meta_state.buffer.fill_p_layout, 343 VK_SHADER_STAGE_COMPUTE_BIT, 0, 4, 344 &value); 345 346 radv_CmdDispatch(radv_cmd_buffer_to_handle(cmd_buffer), block_count, 1, 1); 347 348 radv_temp_descriptor_set_destroy(device, ds); 349 350 radv_meta_restore_compute(&saved_state, cmd_buffer, 4); 351 } 352 353 static void copy_buffer_shader(struct radv_cmd_buffer *cmd_buffer, 354 struct radeon_winsys_bo *src_bo, 355 struct radeon_winsys_bo *dst_bo, 356 uint64_t src_offset, uint64_t dst_offset, 357 uint64_t size) 358 { 359 struct radv_device *device = cmd_buffer->device; 360 uint64_t block_count = round_up_u64(size, 1024); 361 struct radv_meta_saved_compute_state saved_state; 362 VkDescriptorSet ds; 363 364 radv_meta_save_compute(&saved_state, cmd_buffer, 0); 365 366 radv_temp_descriptor_set_create(device, cmd_buffer, 367 device->meta_state.buffer.copy_ds_layout, 368 &ds); 369 370 struct radv_buffer dst_buffer = { 371 .bo = dst_bo, 372 .offset = dst_offset, 373 .size = size 374 }; 375 376 struct radv_buffer src_buffer = { 377 .bo = src_bo, 378 .offset = src_offset, 379 .size = size 380 }; 381 382 radv_UpdateDescriptorSets(radv_device_to_handle(device), 383 2, /* writeCount */ 384 (VkWriteDescriptorSet[]) { 385 { 386 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, 387 .dstSet = ds, 388 .dstBinding = 0, 389 .dstArrayElement = 0, 390 .descriptorCount = 1, 391 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 392 .pBufferInfo = &(VkDescriptorBufferInfo) { 393 .buffer = radv_buffer_to_handle(&dst_buffer), 394 .offset = 0, 395 .range = size 396 } 397 }, 398 { 399 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, 400 .dstSet = ds, 401 .dstBinding = 1, 402 .dstArrayElement = 0, 403 .descriptorCount = 1, 404 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 405 .pBufferInfo = &(VkDescriptorBufferInfo) { 406 .buffer = radv_buffer_to_handle(&src_buffer), 407 .offset = 0, 408 .range = size 409 } 410 } 411 }, 0, NULL); 412 413 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), 414 VK_PIPELINE_BIND_POINT_COMPUTE, 415 device->meta_state.buffer.copy_pipeline); 416 417 radv_CmdBindDescriptorSets(radv_cmd_buffer_to_handle(cmd_buffer), 418 VK_PIPELINE_BIND_POINT_COMPUTE, 419 device->meta_state.buffer.copy_p_layout, 0, 1, 420 &ds, 0, NULL); 421 422 423 radv_CmdDispatch(radv_cmd_buffer_to_handle(cmd_buffer), block_count, 1, 1); 424 425 radv_temp_descriptor_set_destroy(device, ds); 426 427 radv_meta_restore_compute(&saved_state, cmd_buffer, 0); 428 } 429 430 431 void radv_fill_buffer(struct radv_cmd_buffer *cmd_buffer, 432 struct radeon_winsys_bo *bo, 433 uint64_t offset, uint64_t size, uint32_t value) 434 { 435 assert(!(offset & 3)); 436 assert(!(size & 3)); 437 438 if (size >= 4096) 439 fill_buffer_shader(cmd_buffer, bo, offset, size, value); 440 else if (size) { 441 uint64_t va = cmd_buffer->device->ws->buffer_get_va(bo); 442 va += offset; 443 cmd_buffer->device->ws->cs_add_buffer(cmd_buffer->cs, bo, 8); 444 si_cp_dma_clear_buffer(cmd_buffer, va, size, value); 445 } 446 } 447 448 static 449 void radv_copy_buffer(struct radv_cmd_buffer *cmd_buffer, 450 struct radeon_winsys_bo *src_bo, 451 struct radeon_winsys_bo *dst_bo, 452 uint64_t src_offset, uint64_t dst_offset, 453 uint64_t size) 454 { 455 if (size >= 4096 && !(size & 3) && !(src_offset & 3) && !(dst_offset & 3)) 456 copy_buffer_shader(cmd_buffer, src_bo, dst_bo, 457 src_offset, dst_offset, size); 458 else if (size) { 459 uint64_t src_va = cmd_buffer->device->ws->buffer_get_va(src_bo); 460 uint64_t dst_va = cmd_buffer->device->ws->buffer_get_va(dst_bo); 461 src_va += src_offset; 462 dst_va += dst_offset; 463 464 cmd_buffer->device->ws->cs_add_buffer(cmd_buffer->cs, src_bo, 8); 465 cmd_buffer->device->ws->cs_add_buffer(cmd_buffer->cs, dst_bo, 8); 466 467 si_cp_dma_buffer_copy(cmd_buffer, src_va, dst_va, size); 468 } 469 } 470 471 void radv_CmdFillBuffer( 472 VkCommandBuffer commandBuffer, 473 VkBuffer dstBuffer, 474 VkDeviceSize dstOffset, 475 VkDeviceSize fillSize, 476 uint32_t data) 477 { 478 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 479 RADV_FROM_HANDLE(radv_buffer, dst_buffer, dstBuffer); 480 481 if (fillSize == VK_WHOLE_SIZE) 482 fillSize = (dst_buffer->size - dstOffset) & ~3ull; 483 484 radv_fill_buffer(cmd_buffer, dst_buffer->bo, dst_buffer->offset + dstOffset, 485 fillSize, data); 486 } 487 488 void radv_CmdCopyBuffer( 489 VkCommandBuffer commandBuffer, 490 VkBuffer srcBuffer, 491 VkBuffer destBuffer, 492 uint32_t regionCount, 493 const VkBufferCopy* pRegions) 494 { 495 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 496 RADV_FROM_HANDLE(radv_buffer, src_buffer, srcBuffer); 497 RADV_FROM_HANDLE(radv_buffer, dest_buffer, destBuffer); 498 499 for (unsigned r = 0; r < regionCount; r++) { 500 uint64_t src_offset = src_buffer->offset + pRegions[r].srcOffset; 501 uint64_t dest_offset = dest_buffer->offset + pRegions[r].dstOffset; 502 uint64_t copy_size = pRegions[r].size; 503 504 radv_copy_buffer(cmd_buffer, src_buffer->bo, dest_buffer->bo, 505 src_offset, dest_offset, copy_size); 506 } 507 } 508 509 void radv_CmdUpdateBuffer( 510 VkCommandBuffer commandBuffer, 511 VkBuffer dstBuffer, 512 VkDeviceSize dstOffset, 513 VkDeviceSize dataSize, 514 const void* pData) 515 { 516 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 517 RADV_FROM_HANDLE(radv_buffer, dst_buffer, dstBuffer); 518 bool mec = radv_cmd_buffer_uses_mec(cmd_buffer); 519 uint64_t words = dataSize / 4; 520 uint64_t va = cmd_buffer->device->ws->buffer_get_va(dst_buffer->bo); 521 va += dstOffset + dst_buffer->offset; 522 523 assert(!(dataSize & 3)); 524 assert(!(va & 3)); 525 526 if (dataSize < 4096) { 527 si_emit_cache_flush(cmd_buffer); 528 529 cmd_buffer->device->ws->cs_add_buffer(cmd_buffer->cs, dst_buffer->bo, 8); 530 531 radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, words + 4); 532 533 radeon_emit(cmd_buffer->cs, PKT3(PKT3_WRITE_DATA, 2 + words, 0)); 534 radeon_emit(cmd_buffer->cs, S_370_DST_SEL(mec ? 535 V_370_MEM_ASYNC : V_370_MEMORY_SYNC) | 536 S_370_WR_CONFIRM(1) | 537 S_370_ENGINE_SEL(V_370_ME)); 538 radeon_emit(cmd_buffer->cs, va); 539 radeon_emit(cmd_buffer->cs, va >> 32); 540 radeon_emit_array(cmd_buffer->cs, pData, words); 541 } else { 542 uint32_t buf_offset; 543 radv_cmd_buffer_upload_data(cmd_buffer, dataSize, 32, pData, &buf_offset); 544 radv_copy_buffer(cmd_buffer, cmd_buffer->upload.upload_bo, dst_buffer->bo, 545 buf_offset, dstOffset + dst_buffer->offset, dataSize); 546 } 547 } 548