Home | History | Annotate | Download | only in vulkan
      1 #include "radv_meta.h"
      2 #include "nir/nir_builder.h"
      3 
      4 #include "sid.h"
      5 #include "radv_cs.h"
      6 
      7 static nir_shader *
      8 build_buffer_fill_shader(struct radv_device *dev)
      9 {
     10 	nir_builder b;
     11 
     12 	nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
     13 	b.shader->info->name = ralloc_strdup(b.shader, "meta_buffer_fill");
     14 	b.shader->info->cs.local_size[0] = 64;
     15 	b.shader->info->cs.local_size[1] = 1;
     16 	b.shader->info->cs.local_size[2] = 1;
     17 
     18 	nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0);
     19 	nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0);
     20 	nir_ssa_def *block_size = nir_imm_ivec4(&b,
     21 						b.shader->info->cs.local_size[0],
     22 						b.shader->info->cs.local_size[1],
     23 						b.shader->info->cs.local_size[2], 0);
     24 
     25 	nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
     26 
     27 	nir_ssa_def *offset = nir_imul(&b, global_id, nir_imm_int(&b, 16));
     28 	offset = nir_swizzle(&b, offset, (unsigned[]) {0, 0, 0, 0}, 1, false);
     29 
     30 	nir_intrinsic_instr *dst_buf = nir_intrinsic_instr_create(b.shader,
     31 	                                                          nir_intrinsic_vulkan_resource_index);
     32 	dst_buf->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
     33 	nir_intrinsic_set_desc_set(dst_buf, 0);
     34 	nir_intrinsic_set_binding(dst_buf, 0);
     35 	nir_ssa_dest_init(&dst_buf->instr, &dst_buf->dest, 1, 32, NULL);
     36 	nir_builder_instr_insert(&b, &dst_buf->instr);
     37 
     38 	nir_intrinsic_instr *load = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
     39 	load->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
     40 	load->num_components = 1;
     41 	nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, "fill_value");
     42 	nir_builder_instr_insert(&b, &load->instr);
     43 
     44 	nir_ssa_def *swizzled_load = nir_swizzle(&b, &load->dest.ssa, (unsigned[]) { 0, 0, 0, 0}, 4, false);
     45 
     46 	nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_store_ssbo);
     47 	store->src[0] = nir_src_for_ssa(swizzled_load);
     48 	store->src[1] = nir_src_for_ssa(&dst_buf->dest.ssa);
     49 	store->src[2] = nir_src_for_ssa(offset);
     50 	nir_intrinsic_set_write_mask(store, 0xf);
     51 	store->num_components = 4;
     52 	nir_builder_instr_insert(&b, &store->instr);
     53 
     54 	return b.shader;
     55 }
     56 
     57 static nir_shader *
     58 build_buffer_copy_shader(struct radv_device *dev)
     59 {
     60 	nir_builder b;
     61 
     62 	nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
     63 	b.shader->info->name = ralloc_strdup(b.shader, "meta_buffer_copy");
     64 	b.shader->info->cs.local_size[0] = 64;
     65 	b.shader->info->cs.local_size[1] = 1;
     66 	b.shader->info->cs.local_size[2] = 1;
     67 
     68 	nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0);
     69 	nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0);
     70 	nir_ssa_def *block_size = nir_imm_ivec4(&b,
     71 						b.shader->info->cs.local_size[0],
     72 						b.shader->info->cs.local_size[1],
     73 						b.shader->info->cs.local_size[2], 0);
     74 
     75 	nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
     76 
     77 	nir_ssa_def *offset = nir_imul(&b, global_id, nir_imm_int(&b, 16));
     78 	offset = nir_swizzle(&b, offset, (unsigned[]) {0, 0, 0, 0}, 1, false);
     79 
     80 	nir_intrinsic_instr *dst_buf = nir_intrinsic_instr_create(b.shader,
     81 	                                                          nir_intrinsic_vulkan_resource_index);
     82 	dst_buf->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
     83 	nir_intrinsic_set_desc_set(dst_buf, 0);
     84 	nir_intrinsic_set_binding(dst_buf, 0);
     85 	nir_ssa_dest_init(&dst_buf->instr, &dst_buf->dest, 1, 32, NULL);
     86 	nir_builder_instr_insert(&b, &dst_buf->instr);
     87 
     88 	nir_intrinsic_instr *src_buf = nir_intrinsic_instr_create(b.shader,
     89 	                                                          nir_intrinsic_vulkan_resource_index);
     90 	src_buf->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
     91 	nir_intrinsic_set_desc_set(src_buf, 0);
     92 	nir_intrinsic_set_binding(src_buf, 1);
     93 	nir_ssa_dest_init(&src_buf->instr, &src_buf->dest, 1, 32, NULL);
     94 	nir_builder_instr_insert(&b, &src_buf->instr);
     95 
     96 	nir_intrinsic_instr *load = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_ssbo);
     97 	load->src[0] = nir_src_for_ssa(&src_buf->dest.ssa);
     98 	load->src[1] = nir_src_for_ssa(offset);
     99 	nir_ssa_dest_init(&load->instr, &load->dest, 4, 32, NULL);
    100 	load->num_components = 4;
    101 	nir_builder_instr_insert(&b, &load->instr);
    102 
    103 	nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_store_ssbo);
    104 	store->src[0] = nir_src_for_ssa(&load->dest.ssa);
    105 	store->src[1] = nir_src_for_ssa(&dst_buf->dest.ssa);
    106 	store->src[2] = nir_src_for_ssa(offset);
    107 	nir_intrinsic_set_write_mask(store, 0xf);
    108 	store->num_components = 4;
    109 	nir_builder_instr_insert(&b, &store->instr);
    110 
    111 	return b.shader;
    112 }
    113 
    114 
    115 
    116 VkResult radv_device_init_meta_buffer_state(struct radv_device *device)
    117 {
    118 	VkResult result;
    119 	struct radv_shader_module fill_cs = { .nir = NULL };
    120 	struct radv_shader_module copy_cs = { .nir = NULL };
    121 
    122 	zero(device->meta_state.buffer);
    123 
    124 	fill_cs.nir = build_buffer_fill_shader(device);
    125 	copy_cs.nir = build_buffer_copy_shader(device);
    126 
    127 	VkDescriptorSetLayoutCreateInfo fill_ds_create_info = {
    128 		.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
    129 		.bindingCount = 1,
    130 		.pBindings = (VkDescriptorSetLayoutBinding[]) {
    131 			{
    132 				.binding = 0,
    133 				.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
    134 				.descriptorCount = 1,
    135 				.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
    136 				.pImmutableSamplers = NULL
    137 			},
    138 		}
    139 	};
    140 
    141 	result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
    142 						&fill_ds_create_info,
    143 						&device->meta_state.alloc,
    144 						&device->meta_state.buffer.fill_ds_layout);
    145 	if (result != VK_SUCCESS)
    146 		goto fail;
    147 
    148 	VkDescriptorSetLayoutCreateInfo copy_ds_create_info = {
    149 		.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
    150 		.bindingCount = 2,
    151 		.pBindings = (VkDescriptorSetLayoutBinding[]) {
    152 			{
    153 				.binding = 0,
    154 				.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
    155 				.descriptorCount = 1,
    156 				.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
    157 				.pImmutableSamplers = NULL
    158 			},
    159 			{
    160 				.binding = 1,
    161 				.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
    162 				.descriptorCount = 1,
    163 				.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
    164 				.pImmutableSamplers = NULL
    165 			},
    166 		}
    167 	};
    168 
    169 	result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
    170 						&copy_ds_create_info,
    171 						&device->meta_state.alloc,
    172 						&device->meta_state.buffer.copy_ds_layout);
    173 	if (result != VK_SUCCESS)
    174 		goto fail;
    175 
    176 
    177 	VkPipelineLayoutCreateInfo fill_pl_create_info = {
    178 		.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
    179 		.setLayoutCount = 1,
    180 		.pSetLayouts = &device->meta_state.buffer.fill_ds_layout,
    181 		.pushConstantRangeCount = 1,
    182 		.pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 4},
    183 	};
    184 
    185 	result = radv_CreatePipelineLayout(radv_device_to_handle(device),
    186 					  &fill_pl_create_info,
    187 					  &device->meta_state.alloc,
    188 					  &device->meta_state.buffer.fill_p_layout);
    189 	if (result != VK_SUCCESS)
    190 		goto fail;
    191 
    192 	VkPipelineLayoutCreateInfo copy_pl_create_info = {
    193 		.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
    194 		.setLayoutCount = 1,
    195 		.pSetLayouts = &device->meta_state.buffer.copy_ds_layout,
    196 		.pushConstantRangeCount = 0,
    197 	};
    198 
    199 	result = radv_CreatePipelineLayout(radv_device_to_handle(device),
    200 					  &copy_pl_create_info,
    201 					  &device->meta_state.alloc,
    202 					  &device->meta_state.buffer.copy_p_layout);
    203 	if (result != VK_SUCCESS)
    204 		goto fail;
    205 
    206 	VkPipelineShaderStageCreateInfo fill_pipeline_shader_stage = {
    207 		.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
    208 		.stage = VK_SHADER_STAGE_COMPUTE_BIT,
    209 		.module = radv_shader_module_to_handle(&fill_cs),
    210 		.pName = "main",
    211 		.pSpecializationInfo = NULL,
    212 	};
    213 
    214 	VkComputePipelineCreateInfo fill_vk_pipeline_info = {
    215 		.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
    216 		.stage = fill_pipeline_shader_stage,
    217 		.flags = 0,
    218 		.layout = device->meta_state.buffer.fill_p_layout,
    219 	};
    220 
    221 	result = radv_CreateComputePipelines(radv_device_to_handle(device),
    222 					     radv_pipeline_cache_to_handle(&device->meta_state.cache),
    223 					     1, &fill_vk_pipeline_info, NULL,
    224 					     &device->meta_state.buffer.fill_pipeline);
    225 	if (result != VK_SUCCESS)
    226 		goto fail;
    227 
    228 	VkPipelineShaderStageCreateInfo copy_pipeline_shader_stage = {
    229 		.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
    230 		.stage = VK_SHADER_STAGE_COMPUTE_BIT,
    231 		.module = radv_shader_module_to_handle(&copy_cs),
    232 		.pName = "main",
    233 		.pSpecializationInfo = NULL,
    234 	};
    235 
    236 	VkComputePipelineCreateInfo copy_vk_pipeline_info = {
    237 		.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
    238 		.stage = copy_pipeline_shader_stage,
    239 		.flags = 0,
    240 		.layout = device->meta_state.buffer.copy_p_layout,
    241 	};
    242 
    243 	result = radv_CreateComputePipelines(radv_device_to_handle(device),
    244 					     radv_pipeline_cache_to_handle(&device->meta_state.cache),
    245 					     1, &copy_vk_pipeline_info, NULL,
    246 					     &device->meta_state.buffer.copy_pipeline);
    247 	if (result != VK_SUCCESS)
    248 		goto fail;
    249 
    250 	ralloc_free(fill_cs.nir);
    251 	ralloc_free(copy_cs.nir);
    252 	return VK_SUCCESS;
    253 fail:
    254 	radv_device_finish_meta_buffer_state(device);
    255 	ralloc_free(fill_cs.nir);
    256 	ralloc_free(copy_cs.nir);
    257 	return result;
    258 }
    259 
    260 void radv_device_finish_meta_buffer_state(struct radv_device *device)
    261 {
    262 	if (device->meta_state.buffer.copy_pipeline)
    263 		radv_DestroyPipeline(radv_device_to_handle(device),
    264 				     device->meta_state.buffer.copy_pipeline,
    265 				     &device->meta_state.alloc);
    266 
    267 	if (device->meta_state.buffer.fill_pipeline)
    268 		radv_DestroyPipeline(radv_device_to_handle(device),
    269 				     device->meta_state.buffer.fill_pipeline,
    270 				     &device->meta_state.alloc);
    271 
    272 	if (device->meta_state.buffer.copy_p_layout)
    273 		radv_DestroyPipelineLayout(radv_device_to_handle(device),
    274 					   device->meta_state.buffer.copy_p_layout,
    275 					   &device->meta_state.alloc);
    276 
    277 	if (device->meta_state.buffer.fill_p_layout)
    278 		radv_DestroyPipelineLayout(radv_device_to_handle(device),
    279 					   device->meta_state.buffer.fill_p_layout,
    280 					   &device->meta_state.alloc);
    281 
    282 	if (device->meta_state.buffer.copy_ds_layout)
    283 		radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
    284 						device->meta_state.buffer.copy_ds_layout,
    285 						&device->meta_state.alloc);
    286 
    287 	if (device->meta_state.buffer.fill_ds_layout)
    288 		radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
    289 						device->meta_state.buffer.fill_ds_layout,
    290 						&device->meta_state.alloc);
    291 }
    292 
    293 static void fill_buffer_shader(struct radv_cmd_buffer *cmd_buffer,
    294 			       struct radeon_winsys_bo *bo,
    295 			       uint64_t offset, uint64_t size, uint32_t value)
    296 {
    297 	struct radv_device *device = cmd_buffer->device;
    298 	uint64_t block_count = round_up_u64(size, 1024);
    299 	struct radv_meta_saved_compute_state saved_state;
    300 	VkDescriptorSet ds;
    301 
    302 	radv_meta_save_compute(&saved_state, cmd_buffer, 4);
    303 
    304 	radv_temp_descriptor_set_create(device, cmd_buffer,
    305 					device->meta_state.buffer.fill_ds_layout,
    306 					&ds);
    307 
    308 	struct radv_buffer dst_buffer = {
    309 		.bo = bo,
    310 		.offset = offset,
    311 		.size = size
    312 	};
    313 
    314 	radv_UpdateDescriptorSets(radv_device_to_handle(device),
    315 				  1, /* writeCount */
    316 				  (VkWriteDescriptorSet[]) {
    317 					  {
    318 						  .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
    319 						  .dstSet = ds,
    320 						  .dstBinding = 0,
    321 						  .dstArrayElement = 0,
    322 						  .descriptorCount = 1,
    323 						  .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
    324 						  .pBufferInfo = &(VkDescriptorBufferInfo) {
    325 							.buffer = radv_buffer_to_handle(&dst_buffer),
    326 							.offset = 0,
    327 							.range = size
    328 						  }
    329 					  }
    330 				  }, 0, NULL);
    331 
    332 	radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
    333 			     VK_PIPELINE_BIND_POINT_COMPUTE,
    334 			     device->meta_state.buffer.fill_pipeline);
    335 
    336 	radv_CmdBindDescriptorSets(radv_cmd_buffer_to_handle(cmd_buffer),
    337 				   VK_PIPELINE_BIND_POINT_COMPUTE,
    338 				   device->meta_state.buffer.fill_p_layout, 0, 1,
    339 				   &ds, 0, NULL);
    340 
    341 	radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
    342 			      device->meta_state.buffer.fill_p_layout,
    343 			      VK_SHADER_STAGE_COMPUTE_BIT, 0, 4,
    344 			      &value);
    345 
    346 	radv_CmdDispatch(radv_cmd_buffer_to_handle(cmd_buffer), block_count, 1, 1);
    347 
    348 	radv_temp_descriptor_set_destroy(device, ds);
    349 
    350 	radv_meta_restore_compute(&saved_state, cmd_buffer, 4);
    351 }
    352 
    353 static void copy_buffer_shader(struct radv_cmd_buffer *cmd_buffer,
    354 			       struct radeon_winsys_bo *src_bo,
    355 			       struct radeon_winsys_bo *dst_bo,
    356 			       uint64_t src_offset, uint64_t dst_offset,
    357 			       uint64_t size)
    358 {
    359 	struct radv_device *device = cmd_buffer->device;
    360 	uint64_t block_count = round_up_u64(size, 1024);
    361 	struct radv_meta_saved_compute_state saved_state;
    362 	VkDescriptorSet ds;
    363 
    364 	radv_meta_save_compute(&saved_state, cmd_buffer, 0);
    365 
    366 	radv_temp_descriptor_set_create(device, cmd_buffer,
    367 					device->meta_state.buffer.copy_ds_layout,
    368 					&ds);
    369 
    370 	struct radv_buffer dst_buffer = {
    371 		.bo = dst_bo,
    372 		.offset = dst_offset,
    373 		.size = size
    374 	};
    375 
    376 	struct radv_buffer src_buffer = {
    377 		.bo = src_bo,
    378 		.offset = src_offset,
    379 		.size = size
    380 	};
    381 
    382 	radv_UpdateDescriptorSets(radv_device_to_handle(device),
    383 				  2, /* writeCount */
    384 				  (VkWriteDescriptorSet[]) {
    385 					  {
    386 						  .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
    387 						  .dstSet = ds,
    388 						  .dstBinding = 0,
    389 						  .dstArrayElement = 0,
    390 						  .descriptorCount = 1,
    391 						  .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
    392 						  .pBufferInfo = &(VkDescriptorBufferInfo) {
    393 							.buffer = radv_buffer_to_handle(&dst_buffer),
    394 							.offset = 0,
    395 							.range = size
    396 						  }
    397 					  },
    398 					  {
    399 						  .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
    400 						  .dstSet = ds,
    401 						  .dstBinding = 1,
    402 						  .dstArrayElement = 0,
    403 						  .descriptorCount = 1,
    404 						  .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
    405 						  .pBufferInfo = &(VkDescriptorBufferInfo) {
    406 							.buffer = radv_buffer_to_handle(&src_buffer),
    407 							.offset = 0,
    408 							.range = size
    409 						  }
    410 					  }
    411 				  }, 0, NULL);
    412 
    413 	radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
    414 			     VK_PIPELINE_BIND_POINT_COMPUTE,
    415 			     device->meta_state.buffer.copy_pipeline);
    416 
    417 	radv_CmdBindDescriptorSets(radv_cmd_buffer_to_handle(cmd_buffer),
    418 				   VK_PIPELINE_BIND_POINT_COMPUTE,
    419 				   device->meta_state.buffer.copy_p_layout, 0, 1,
    420 				   &ds, 0, NULL);
    421 
    422 
    423 	radv_CmdDispatch(radv_cmd_buffer_to_handle(cmd_buffer), block_count, 1, 1);
    424 
    425 	radv_temp_descriptor_set_destroy(device, ds);
    426 
    427 	radv_meta_restore_compute(&saved_state, cmd_buffer, 0);
    428 }
    429 
    430 
    431 void radv_fill_buffer(struct radv_cmd_buffer *cmd_buffer,
    432 		      struct radeon_winsys_bo *bo,
    433 		      uint64_t offset, uint64_t size, uint32_t value)
    434 {
    435 	assert(!(offset & 3));
    436 	assert(!(size & 3));
    437 
    438 	if (size >= 4096)
    439 		fill_buffer_shader(cmd_buffer, bo, offset, size, value);
    440 	else if (size) {
    441 		uint64_t va = cmd_buffer->device->ws->buffer_get_va(bo);
    442 		va += offset;
    443 		cmd_buffer->device->ws->cs_add_buffer(cmd_buffer->cs, bo, 8);
    444 		si_cp_dma_clear_buffer(cmd_buffer, va, size, value);
    445 	}
    446 }
    447 
    448 static
    449 void radv_copy_buffer(struct radv_cmd_buffer *cmd_buffer,
    450 		      struct radeon_winsys_bo *src_bo,
    451 		      struct radeon_winsys_bo *dst_bo,
    452 		      uint64_t src_offset, uint64_t dst_offset,
    453 		      uint64_t size)
    454 {
    455 	if (size >= 4096 && !(size & 3) && !(src_offset & 3) && !(dst_offset & 3))
    456 		copy_buffer_shader(cmd_buffer, src_bo, dst_bo,
    457 				   src_offset, dst_offset, size);
    458 	else if (size) {
    459 		uint64_t src_va = cmd_buffer->device->ws->buffer_get_va(src_bo);
    460 		uint64_t dst_va = cmd_buffer->device->ws->buffer_get_va(dst_bo);
    461 		src_va += src_offset;
    462 		dst_va += dst_offset;
    463 
    464 		cmd_buffer->device->ws->cs_add_buffer(cmd_buffer->cs, src_bo, 8);
    465 		cmd_buffer->device->ws->cs_add_buffer(cmd_buffer->cs, dst_bo, 8);
    466 
    467 		si_cp_dma_buffer_copy(cmd_buffer, src_va, dst_va, size);
    468 	}
    469 }
    470 
    471 void radv_CmdFillBuffer(
    472     VkCommandBuffer                             commandBuffer,
    473     VkBuffer                                    dstBuffer,
    474     VkDeviceSize                                dstOffset,
    475     VkDeviceSize                                fillSize,
    476     uint32_t                                    data)
    477 {
    478 	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
    479 	RADV_FROM_HANDLE(radv_buffer, dst_buffer, dstBuffer);
    480 
    481 	if (fillSize == VK_WHOLE_SIZE)
    482 		fillSize = (dst_buffer->size - dstOffset) & ~3ull;
    483 
    484 	radv_fill_buffer(cmd_buffer, dst_buffer->bo, dst_buffer->offset + dstOffset,
    485 			 fillSize, data);
    486 }
    487 
    488 void radv_CmdCopyBuffer(
    489 	VkCommandBuffer                             commandBuffer,
    490 	VkBuffer                                    srcBuffer,
    491 	VkBuffer                                    destBuffer,
    492 	uint32_t                                    regionCount,
    493 	const VkBufferCopy*                         pRegions)
    494 {
    495 	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
    496 	RADV_FROM_HANDLE(radv_buffer, src_buffer, srcBuffer);
    497 	RADV_FROM_HANDLE(radv_buffer, dest_buffer, destBuffer);
    498 
    499 	for (unsigned r = 0; r < regionCount; r++) {
    500 		uint64_t src_offset = src_buffer->offset + pRegions[r].srcOffset;
    501 		uint64_t dest_offset = dest_buffer->offset + pRegions[r].dstOffset;
    502 		uint64_t copy_size = pRegions[r].size;
    503 
    504 		radv_copy_buffer(cmd_buffer, src_buffer->bo, dest_buffer->bo,
    505 				 src_offset, dest_offset, copy_size);
    506 	}
    507 }
    508 
    509 void radv_CmdUpdateBuffer(
    510 	VkCommandBuffer                             commandBuffer,
    511 	VkBuffer                                    dstBuffer,
    512 	VkDeviceSize                                dstOffset,
    513 	VkDeviceSize                                dataSize,
    514 	const void*                                 pData)
    515 {
    516 	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
    517 	RADV_FROM_HANDLE(radv_buffer, dst_buffer, dstBuffer);
    518 	bool mec = radv_cmd_buffer_uses_mec(cmd_buffer);
    519 	uint64_t words = dataSize / 4;
    520 	uint64_t va = cmd_buffer->device->ws->buffer_get_va(dst_buffer->bo);
    521 	va += dstOffset + dst_buffer->offset;
    522 
    523 	assert(!(dataSize & 3));
    524 	assert(!(va & 3));
    525 
    526 	if (dataSize < 4096) {
    527 		si_emit_cache_flush(cmd_buffer);
    528 
    529 		cmd_buffer->device->ws->cs_add_buffer(cmd_buffer->cs, dst_buffer->bo, 8);
    530 
    531 		radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, words + 4);
    532 
    533 		radeon_emit(cmd_buffer->cs, PKT3(PKT3_WRITE_DATA, 2 + words, 0));
    534 		radeon_emit(cmd_buffer->cs, S_370_DST_SEL(mec ?
    535 		                                V_370_MEM_ASYNC : V_370_MEMORY_SYNC) |
    536 		                            S_370_WR_CONFIRM(1) |
    537 		                            S_370_ENGINE_SEL(V_370_ME));
    538 		radeon_emit(cmd_buffer->cs, va);
    539 		radeon_emit(cmd_buffer->cs, va >> 32);
    540 		radeon_emit_array(cmd_buffer->cs, pData, words);
    541 	} else {
    542 		uint32_t buf_offset;
    543 		radv_cmd_buffer_upload_data(cmd_buffer, dataSize, 32, pData, &buf_offset);
    544 		radv_copy_buffer(cmd_buffer, cmd_buffer->upload.upload_bo, dst_buffer->bo,
    545 				 buf_offset, dstOffset + dst_buffer->offset, dataSize);
    546 	}
    547 }
    548