1 /* 2 * Copyright 2016 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24 #include "anv_private.h" 25 26 static bool 27 lookup_blorp_shader(struct blorp_context *blorp, 28 const void *key, uint32_t key_size, 29 uint32_t *kernel_out, void *prog_data_out) 30 { 31 struct anv_device *device = blorp->driver_ctx; 32 33 /* The blorp cache must be a real cache */ 34 assert(device->blorp_shader_cache.cache); 35 36 struct anv_shader_bin *bin = 37 anv_pipeline_cache_search(&device->blorp_shader_cache, key, key_size); 38 if (!bin) 39 return false; 40 41 /* The cache already has a reference and it's not going anywhere so there 42 * is no need to hold a second reference. 43 */ 44 anv_shader_bin_unref(device, bin); 45 46 *kernel_out = bin->kernel.offset; 47 *(const struct brw_stage_prog_data **)prog_data_out = bin->prog_data; 48 49 return true; 50 } 51 52 static void 53 upload_blorp_shader(struct blorp_context *blorp, 54 const void *key, uint32_t key_size, 55 const void *kernel, uint32_t kernel_size, 56 const struct brw_stage_prog_data *prog_data, 57 uint32_t prog_data_size, 58 uint32_t *kernel_out, void *prog_data_out) 59 { 60 struct anv_device *device = blorp->driver_ctx; 61 62 /* The blorp cache must be a real cache */ 63 assert(device->blorp_shader_cache.cache); 64 65 struct anv_pipeline_bind_map bind_map = { 66 .surface_count = 0, 67 .sampler_count = 0, 68 }; 69 70 struct anv_shader_bin *bin = 71 anv_pipeline_cache_upload_kernel(&device->blorp_shader_cache, 72 key, key_size, kernel, kernel_size, 73 prog_data, prog_data_size, &bind_map); 74 75 /* The cache already has a reference and it's not going anywhere so there 76 * is no need to hold a second reference. 77 */ 78 anv_shader_bin_unref(device, bin); 79 80 *kernel_out = bin->kernel.offset; 81 *(const struct brw_stage_prog_data **)prog_data_out = bin->prog_data; 82 } 83 84 void 85 anv_device_init_blorp(struct anv_device *device) 86 { 87 anv_pipeline_cache_init(&device->blorp_shader_cache, device, true); 88 blorp_init(&device->blorp, device, &device->isl_dev); 89 device->blorp.compiler = device->instance->physicalDevice.compiler; 90 device->blorp.mocs.tex = device->default_mocs; 91 device->blorp.mocs.rb = device->default_mocs; 92 device->blorp.mocs.vb = device->default_mocs; 93 device->blorp.lookup_shader = lookup_blorp_shader; 94 device->blorp.upload_shader = upload_blorp_shader; 95 switch (device->info.gen) { 96 case 7: 97 if (device->info.is_haswell) { 98 device->blorp.exec = gen75_blorp_exec; 99 } else { 100 device->blorp.exec = gen7_blorp_exec; 101 } 102 break; 103 case 8: 104 device->blorp.exec = gen8_blorp_exec; 105 break; 106 case 9: 107 device->blorp.exec = gen9_blorp_exec; 108 break; 109 default: 110 unreachable("Unknown hardware generation"); 111 } 112 } 113 114 void 115 anv_device_finish_blorp(struct anv_device *device) 116 { 117 blorp_finish(&device->blorp); 118 anv_pipeline_cache_finish(&device->blorp_shader_cache); 119 } 120 121 static void 122 get_blorp_surf_for_anv_buffer(struct anv_device *device, 123 struct anv_buffer *buffer, uint64_t offset, 124 uint32_t width, uint32_t height, 125 uint32_t row_pitch, enum isl_format format, 126 struct blorp_surf *blorp_surf, 127 struct isl_surf *isl_surf) 128 { 129 const struct isl_format_layout *fmtl = 130 isl_format_get_layout(format); 131 132 /* ASTC is the only format which doesn't support linear layouts. 133 * Create an equivalently sized surface with ISL to get around this. 134 */ 135 if (fmtl->txc == ISL_TXC_ASTC) { 136 /* Use an equivalently sized format */ 137 format = ISL_FORMAT_R32G32B32A32_UINT; 138 assert(fmtl->bpb == isl_format_get_layout(format)->bpb); 139 140 /* Shrink the dimensions for the new format */ 141 width = DIV_ROUND_UP(width, fmtl->bw); 142 height = DIV_ROUND_UP(height, fmtl->bh); 143 } 144 145 *blorp_surf = (struct blorp_surf) { 146 .surf = isl_surf, 147 .addr = { 148 .buffer = buffer->bo, 149 .offset = buffer->offset + offset, 150 }, 151 }; 152 153 isl_surf_init(&device->isl_dev, isl_surf, 154 .dim = ISL_SURF_DIM_2D, 155 .format = format, 156 .width = width, 157 .height = height, 158 .depth = 1, 159 .levels = 1, 160 .array_len = 1, 161 .samples = 1, 162 .min_pitch = row_pitch, 163 .usage = ISL_SURF_USAGE_TEXTURE_BIT | 164 ISL_SURF_USAGE_RENDER_TARGET_BIT, 165 .tiling_flags = ISL_TILING_LINEAR_BIT); 166 assert(isl_surf->row_pitch == row_pitch); 167 } 168 169 static void 170 get_blorp_surf_for_anv_image(const struct anv_image *image, 171 VkImageAspectFlags aspect, 172 enum isl_aux_usage aux_usage, 173 struct blorp_surf *blorp_surf) 174 { 175 if (aspect == VK_IMAGE_ASPECT_STENCIL_BIT || 176 aux_usage == ISL_AUX_USAGE_HIZ) 177 aux_usage = ISL_AUX_USAGE_NONE; 178 179 const struct anv_surface *surface = 180 anv_image_get_surface_for_aspect_mask(image, aspect); 181 182 *blorp_surf = (struct blorp_surf) { 183 .surf = &surface->isl, 184 .addr = { 185 .buffer = image->bo, 186 .offset = image->offset + surface->offset, 187 }, 188 }; 189 190 if (aux_usage != ISL_AUX_USAGE_NONE) { 191 blorp_surf->aux_surf = &image->aux_surface.isl, 192 blorp_surf->aux_addr = (struct blorp_address) { 193 .buffer = image->bo, 194 .offset = image->offset + image->aux_surface.offset, 195 }; 196 blorp_surf->aux_usage = aux_usage; 197 } 198 } 199 200 void anv_CmdCopyImage( 201 VkCommandBuffer commandBuffer, 202 VkImage srcImage, 203 VkImageLayout srcImageLayout, 204 VkImage dstImage, 205 VkImageLayout dstImageLayout, 206 uint32_t regionCount, 207 const VkImageCopy* pRegions) 208 { 209 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); 210 ANV_FROM_HANDLE(anv_image, src_image, srcImage); 211 ANV_FROM_HANDLE(anv_image, dst_image, dstImage); 212 213 struct blorp_batch batch; 214 blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0); 215 216 for (unsigned r = 0; r < regionCount; r++) { 217 VkOffset3D srcOffset = 218 anv_sanitize_image_offset(src_image->type, pRegions[r].srcOffset); 219 VkOffset3D dstOffset = 220 anv_sanitize_image_offset(dst_image->type, pRegions[r].dstOffset); 221 VkExtent3D extent = 222 anv_sanitize_image_extent(src_image->type, pRegions[r].extent); 223 224 unsigned dst_base_layer, layer_count; 225 if (dst_image->type == VK_IMAGE_TYPE_3D) { 226 dst_base_layer = pRegions[r].dstOffset.z; 227 layer_count = pRegions[r].extent.depth; 228 } else { 229 dst_base_layer = pRegions[r].dstSubresource.baseArrayLayer; 230 layer_count = 231 anv_get_layerCount(dst_image, &pRegions[r].dstSubresource); 232 } 233 234 unsigned src_base_layer; 235 if (src_image->type == VK_IMAGE_TYPE_3D) { 236 src_base_layer = pRegions[r].srcOffset.z; 237 } else { 238 src_base_layer = pRegions[r].srcSubresource.baseArrayLayer; 239 assert(layer_count == 240 anv_get_layerCount(src_image, &pRegions[r].srcSubresource)); 241 } 242 243 assert(pRegions[r].srcSubresource.aspectMask == 244 pRegions[r].dstSubresource.aspectMask); 245 246 uint32_t a; 247 for_each_bit(a, pRegions[r].dstSubresource.aspectMask) { 248 VkImageAspectFlagBits aspect = (1 << a); 249 250 struct blorp_surf src_surf, dst_surf; 251 get_blorp_surf_for_anv_image(src_image, aspect, src_image->aux_usage, 252 &src_surf); 253 get_blorp_surf_for_anv_image(dst_image, aspect, dst_image->aux_usage, 254 &dst_surf); 255 256 for (unsigned i = 0; i < layer_count; i++) { 257 blorp_copy(&batch, &src_surf, pRegions[r].srcSubresource.mipLevel, 258 src_base_layer + i, 259 &dst_surf, pRegions[r].dstSubresource.mipLevel, 260 dst_base_layer + i, 261 srcOffset.x, srcOffset.y, 262 dstOffset.x, dstOffset.y, 263 extent.width, extent.height); 264 } 265 } 266 } 267 268 blorp_batch_finish(&batch); 269 } 270 271 static void 272 copy_buffer_to_image(struct anv_cmd_buffer *cmd_buffer, 273 struct anv_buffer *anv_buffer, 274 struct anv_image *anv_image, 275 uint32_t regionCount, 276 const VkBufferImageCopy* pRegions, 277 bool buffer_to_image) 278 { 279 struct blorp_batch batch; 280 blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0); 281 282 struct { 283 struct blorp_surf surf; 284 uint32_t level; 285 VkOffset3D offset; 286 } image, buffer, *src, *dst; 287 288 buffer.level = 0; 289 buffer.offset = (VkOffset3D) { 0, 0, 0 }; 290 291 if (buffer_to_image) { 292 src = &buffer; 293 dst = ℑ 294 } else { 295 src = ℑ 296 dst = &buffer; 297 } 298 299 for (unsigned r = 0; r < regionCount; r++) { 300 const VkImageAspectFlags aspect = pRegions[r].imageSubresource.aspectMask; 301 302 get_blorp_surf_for_anv_image(anv_image, aspect, anv_image->aux_usage, 303 &image.surf); 304 image.offset = 305 anv_sanitize_image_offset(anv_image->type, pRegions[r].imageOffset); 306 image.level = pRegions[r].imageSubresource.mipLevel; 307 308 VkExtent3D extent = 309 anv_sanitize_image_extent(anv_image->type, pRegions[r].imageExtent); 310 if (anv_image->type != VK_IMAGE_TYPE_3D) { 311 image.offset.z = pRegions[r].imageSubresource.baseArrayLayer; 312 extent.depth = 313 anv_get_layerCount(anv_image, &pRegions[r].imageSubresource); 314 } 315 316 const enum isl_format buffer_format = 317 anv_get_isl_format(&cmd_buffer->device->info, anv_image->vk_format, 318 aspect, VK_IMAGE_TILING_LINEAR); 319 320 const VkExtent3D bufferImageExtent = { 321 .width = pRegions[r].bufferRowLength ? 322 pRegions[r].bufferRowLength : extent.width, 323 .height = pRegions[r].bufferImageHeight ? 324 pRegions[r].bufferImageHeight : extent.height, 325 }; 326 327 const struct isl_format_layout *buffer_fmtl = 328 isl_format_get_layout(buffer_format); 329 330 const uint32_t buffer_row_pitch = 331 DIV_ROUND_UP(bufferImageExtent.width, buffer_fmtl->bw) * 332 (buffer_fmtl->bpb / 8); 333 334 const uint32_t buffer_layer_stride = 335 DIV_ROUND_UP(bufferImageExtent.height, buffer_fmtl->bh) * 336 buffer_row_pitch; 337 338 struct isl_surf buffer_isl_surf; 339 get_blorp_surf_for_anv_buffer(cmd_buffer->device, 340 anv_buffer, pRegions[r].bufferOffset, 341 extent.width, extent.height, 342 buffer_row_pitch, buffer_format, 343 &buffer.surf, &buffer_isl_surf); 344 345 for (unsigned z = 0; z < extent.depth; z++) { 346 blorp_copy(&batch, &src->surf, src->level, src->offset.z, 347 &dst->surf, dst->level, dst->offset.z, 348 src->offset.x, src->offset.y, dst->offset.x, dst->offset.y, 349 extent.width, extent.height); 350 351 image.offset.z++; 352 buffer.surf.addr.offset += buffer_layer_stride; 353 } 354 } 355 356 blorp_batch_finish(&batch); 357 } 358 359 void anv_CmdCopyBufferToImage( 360 VkCommandBuffer commandBuffer, 361 VkBuffer srcBuffer, 362 VkImage dstImage, 363 VkImageLayout dstImageLayout, 364 uint32_t regionCount, 365 const VkBufferImageCopy* pRegions) 366 { 367 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); 368 ANV_FROM_HANDLE(anv_buffer, src_buffer, srcBuffer); 369 ANV_FROM_HANDLE(anv_image, dst_image, dstImage); 370 371 copy_buffer_to_image(cmd_buffer, src_buffer, dst_image, 372 regionCount, pRegions, true); 373 } 374 375 void anv_CmdCopyImageToBuffer( 376 VkCommandBuffer commandBuffer, 377 VkImage srcImage, 378 VkImageLayout srcImageLayout, 379 VkBuffer dstBuffer, 380 uint32_t regionCount, 381 const VkBufferImageCopy* pRegions) 382 { 383 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); 384 ANV_FROM_HANDLE(anv_image, src_image, srcImage); 385 ANV_FROM_HANDLE(anv_buffer, dst_buffer, dstBuffer); 386 387 copy_buffer_to_image(cmd_buffer, dst_buffer, src_image, 388 regionCount, pRegions, false); 389 } 390 391 static bool 392 flip_coords(unsigned *src0, unsigned *src1, unsigned *dst0, unsigned *dst1) 393 { 394 bool flip = false; 395 if (*src0 > *src1) { 396 unsigned tmp = *src0; 397 *src0 = *src1; 398 *src1 = tmp; 399 flip = !flip; 400 } 401 402 if (*dst0 > *dst1) { 403 unsigned tmp = *dst0; 404 *dst0 = *dst1; 405 *dst1 = tmp; 406 flip = !flip; 407 } 408 409 return flip; 410 } 411 412 void anv_CmdBlitImage( 413 VkCommandBuffer commandBuffer, 414 VkImage srcImage, 415 VkImageLayout srcImageLayout, 416 VkImage dstImage, 417 VkImageLayout dstImageLayout, 418 uint32_t regionCount, 419 const VkImageBlit* pRegions, 420 VkFilter filter) 421 422 { 423 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); 424 ANV_FROM_HANDLE(anv_image, src_image, srcImage); 425 ANV_FROM_HANDLE(anv_image, dst_image, dstImage); 426 427 struct blorp_surf src, dst; 428 429 uint32_t gl_filter; 430 switch (filter) { 431 case VK_FILTER_NEAREST: 432 gl_filter = 0x2600; /* GL_NEAREST */ 433 break; 434 case VK_FILTER_LINEAR: 435 gl_filter = 0x2601; /* GL_LINEAR */ 436 break; 437 default: 438 unreachable("Invalid filter"); 439 } 440 441 struct blorp_batch batch; 442 blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0); 443 444 for (unsigned r = 0; r < regionCount; r++) { 445 const VkImageSubresourceLayers *src_res = &pRegions[r].srcSubresource; 446 const VkImageSubresourceLayers *dst_res = &pRegions[r].dstSubresource; 447 448 get_blorp_surf_for_anv_image(src_image, src_res->aspectMask, 449 src_image->aux_usage, &src); 450 get_blorp_surf_for_anv_image(dst_image, dst_res->aspectMask, 451 dst_image->aux_usage, &dst); 452 453 struct anv_format src_format = 454 anv_get_format(&cmd_buffer->device->info, src_image->vk_format, 455 src_res->aspectMask, src_image->tiling); 456 struct anv_format dst_format = 457 anv_get_format(&cmd_buffer->device->info, dst_image->vk_format, 458 dst_res->aspectMask, dst_image->tiling); 459 460 unsigned dst_start, dst_end; 461 if (dst_image->type == VK_IMAGE_TYPE_3D) { 462 assert(dst_res->baseArrayLayer == 0); 463 dst_start = pRegions[r].dstOffsets[0].z; 464 dst_end = pRegions[r].dstOffsets[1].z; 465 } else { 466 dst_start = dst_res->baseArrayLayer; 467 dst_end = dst_start + anv_get_layerCount(dst_image, dst_res); 468 } 469 470 unsigned src_start, src_end; 471 if (src_image->type == VK_IMAGE_TYPE_3D) { 472 assert(src_res->baseArrayLayer == 0); 473 src_start = pRegions[r].srcOffsets[0].z; 474 src_end = pRegions[r].srcOffsets[1].z; 475 } else { 476 src_start = src_res->baseArrayLayer; 477 src_end = src_start + anv_get_layerCount(src_image, src_res); 478 } 479 480 bool flip_z = flip_coords(&src_start, &src_end, &dst_start, &dst_end); 481 float src_z_step = (float)(src_end + 1 - src_start) / 482 (float)(dst_end + 1 - dst_start); 483 484 if (flip_z) { 485 src_start = src_end; 486 src_z_step *= -1; 487 } 488 489 unsigned src_x0 = pRegions[r].srcOffsets[0].x; 490 unsigned src_x1 = pRegions[r].srcOffsets[1].x; 491 unsigned dst_x0 = pRegions[r].dstOffsets[0].x; 492 unsigned dst_x1 = pRegions[r].dstOffsets[1].x; 493 bool flip_x = flip_coords(&src_x0, &src_x1, &dst_x0, &dst_x1); 494 495 unsigned src_y0 = pRegions[r].srcOffsets[0].y; 496 unsigned src_y1 = pRegions[r].srcOffsets[1].y; 497 unsigned dst_y0 = pRegions[r].dstOffsets[0].y; 498 unsigned dst_y1 = pRegions[r].dstOffsets[1].y; 499 bool flip_y = flip_coords(&src_y0, &src_y1, &dst_y0, &dst_y1); 500 501 const unsigned num_layers = dst_end - dst_start; 502 for (unsigned i = 0; i < num_layers; i++) { 503 unsigned dst_z = dst_start + i; 504 unsigned src_z = src_start + i * src_z_step; 505 506 blorp_blit(&batch, &src, src_res->mipLevel, src_z, 507 src_format.isl_format, src_format.swizzle, 508 &dst, dst_res->mipLevel, dst_z, 509 dst_format.isl_format, dst_format.swizzle, 510 src_x0, src_y0, src_x1, src_y1, 511 dst_x0, dst_y0, dst_x1, dst_y1, 512 gl_filter, flip_x, flip_y); 513 } 514 515 } 516 517 blorp_batch_finish(&batch); 518 } 519 520 static enum isl_format 521 isl_format_for_size(unsigned size_B) 522 { 523 switch (size_B) { 524 case 1: return ISL_FORMAT_R8_UINT; 525 case 2: return ISL_FORMAT_R8G8_UINT; 526 case 4: return ISL_FORMAT_R8G8B8A8_UINT; 527 case 8: return ISL_FORMAT_R16G16B16A16_UINT; 528 case 16: return ISL_FORMAT_R32G32B32A32_UINT; 529 default: 530 unreachable("Not a power-of-two format size"); 531 } 532 } 533 534 static void 535 do_buffer_copy(struct blorp_batch *batch, 536 struct anv_bo *src, uint64_t src_offset, 537 struct anv_bo *dst, uint64_t dst_offset, 538 int width, int height, int block_size) 539 { 540 struct anv_device *device = batch->blorp->driver_ctx; 541 542 /* The actual format we pick doesn't matter as blorp will throw it away. 543 * The only thing that actually matters is the size. 544 */ 545 enum isl_format format = isl_format_for_size(block_size); 546 547 struct isl_surf surf; 548 isl_surf_init(&device->isl_dev, &surf, 549 .dim = ISL_SURF_DIM_2D, 550 .format = format, 551 .width = width, 552 .height = height, 553 .depth = 1, 554 .levels = 1, 555 .array_len = 1, 556 .samples = 1, 557 .usage = ISL_SURF_USAGE_TEXTURE_BIT | 558 ISL_SURF_USAGE_RENDER_TARGET_BIT, 559 .tiling_flags = ISL_TILING_LINEAR_BIT); 560 assert(surf.row_pitch == width * block_size); 561 562 struct blorp_surf src_blorp_surf = { 563 .surf = &surf, 564 .addr = { 565 .buffer = src, 566 .offset = src_offset, 567 }, 568 }; 569 570 struct blorp_surf dst_blorp_surf = { 571 .surf = &surf, 572 .addr = { 573 .buffer = dst, 574 .offset = dst_offset, 575 }, 576 }; 577 578 blorp_copy(batch, &src_blorp_surf, 0, 0, &dst_blorp_surf, 0, 0, 579 0, 0, 0, 0, width, height); 580 } 581 582 /** 583 * Returns the greatest common divisor of a and b that is a power of two. 584 */ 585 static inline uint64_t 586 gcd_pow2_u64(uint64_t a, uint64_t b) 587 { 588 assert(a > 0 || b > 0); 589 590 unsigned a_log2 = ffsll(a) - 1; 591 unsigned b_log2 = ffsll(b) - 1; 592 593 /* If either a or b is 0, then a_log2 or b_log2 till be UINT_MAX in which 594 * case, the MIN2() will take the other one. If both are 0 then we will 595 * hit the assert above. 596 */ 597 return 1 << MIN2(a_log2, b_log2); 598 } 599 600 /* This is maximum possible width/height our HW can handle */ 601 #define MAX_SURFACE_DIM (1ull << 14) 602 603 void anv_CmdCopyBuffer( 604 VkCommandBuffer commandBuffer, 605 VkBuffer srcBuffer, 606 VkBuffer dstBuffer, 607 uint32_t regionCount, 608 const VkBufferCopy* pRegions) 609 { 610 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); 611 ANV_FROM_HANDLE(anv_buffer, src_buffer, srcBuffer); 612 ANV_FROM_HANDLE(anv_buffer, dst_buffer, dstBuffer); 613 614 struct blorp_batch batch; 615 blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0); 616 617 for (unsigned r = 0; r < regionCount; r++) { 618 uint64_t src_offset = src_buffer->offset + pRegions[r].srcOffset; 619 uint64_t dst_offset = dst_buffer->offset + pRegions[r].dstOffset; 620 uint64_t copy_size = pRegions[r].size; 621 622 /* First, we compute the biggest format that can be used with the 623 * given offsets and size. 624 */ 625 int bs = 16; 626 bs = gcd_pow2_u64(bs, src_offset); 627 bs = gcd_pow2_u64(bs, dst_offset); 628 bs = gcd_pow2_u64(bs, pRegions[r].size); 629 630 /* First, we make a bunch of max-sized copies */ 631 uint64_t max_copy_size = MAX_SURFACE_DIM * MAX_SURFACE_DIM * bs; 632 while (copy_size >= max_copy_size) { 633 do_buffer_copy(&batch, src_buffer->bo, src_offset, 634 dst_buffer->bo, dst_offset, 635 MAX_SURFACE_DIM, MAX_SURFACE_DIM, bs); 636 copy_size -= max_copy_size; 637 src_offset += max_copy_size; 638 dst_offset += max_copy_size; 639 } 640 641 /* Now make a max-width copy */ 642 uint64_t height = copy_size / (MAX_SURFACE_DIM * bs); 643 assert(height < MAX_SURFACE_DIM); 644 if (height != 0) { 645 uint64_t rect_copy_size = height * MAX_SURFACE_DIM * bs; 646 do_buffer_copy(&batch, src_buffer->bo, src_offset, 647 dst_buffer->bo, dst_offset, 648 MAX_SURFACE_DIM, height, bs); 649 copy_size -= rect_copy_size; 650 src_offset += rect_copy_size; 651 dst_offset += rect_copy_size; 652 } 653 654 /* Finally, make a small copy to finish it off */ 655 if (copy_size != 0) { 656 do_buffer_copy(&batch, src_buffer->bo, src_offset, 657 dst_buffer->bo, dst_offset, 658 copy_size / bs, 1, bs); 659 } 660 } 661 662 blorp_batch_finish(&batch); 663 } 664 665 void anv_CmdUpdateBuffer( 666 VkCommandBuffer commandBuffer, 667 VkBuffer dstBuffer, 668 VkDeviceSize dstOffset, 669 VkDeviceSize dataSize, 670 const void* pData) 671 { 672 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); 673 ANV_FROM_HANDLE(anv_buffer, dst_buffer, dstBuffer); 674 675 struct blorp_batch batch; 676 blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0); 677 678 /* We can't quite grab a full block because the state stream needs a 679 * little data at the top to build its linked list. 680 */ 681 const uint32_t max_update_size = 682 cmd_buffer->device->dynamic_state_block_pool.block_size - 64; 683 684 assert(max_update_size < MAX_SURFACE_DIM * 4); 685 686 while (dataSize) { 687 const uint32_t copy_size = MIN2(dataSize, max_update_size); 688 689 struct anv_state tmp_data = 690 anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, copy_size, 64); 691 692 memcpy(tmp_data.map, pData, copy_size); 693 694 int bs = 16; 695 bs = gcd_pow2_u64(bs, dstOffset); 696 bs = gcd_pow2_u64(bs, copy_size); 697 698 do_buffer_copy(&batch, 699 &cmd_buffer->device->dynamic_state_block_pool.bo, 700 tmp_data.offset, 701 dst_buffer->bo, dst_buffer->offset + dstOffset, 702 copy_size / bs, 1, bs); 703 704 dataSize -= copy_size; 705 dstOffset += copy_size; 706 pData = (void *)pData + copy_size; 707 } 708 709 blorp_batch_finish(&batch); 710 } 711 712 void anv_CmdFillBuffer( 713 VkCommandBuffer commandBuffer, 714 VkBuffer dstBuffer, 715 VkDeviceSize dstOffset, 716 VkDeviceSize fillSize, 717 uint32_t data) 718 { 719 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); 720 ANV_FROM_HANDLE(anv_buffer, dst_buffer, dstBuffer); 721 struct blorp_surf surf; 722 struct isl_surf isl_surf; 723 724 struct blorp_batch batch; 725 blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0); 726 727 if (fillSize == VK_WHOLE_SIZE) { 728 fillSize = dst_buffer->size - dstOffset; 729 /* Make sure fillSize is a multiple of 4 */ 730 fillSize &= ~3ull; 731 } 732 733 /* First, we compute the biggest format that can be used with the 734 * given offsets and size. 735 */ 736 int bs = 16; 737 bs = gcd_pow2_u64(bs, dstOffset); 738 bs = gcd_pow2_u64(bs, fillSize); 739 enum isl_format isl_format = isl_format_for_size(bs); 740 741 union isl_color_value color = { 742 .u32 = { data, data, data, data }, 743 }; 744 745 const uint64_t max_fill_size = MAX_SURFACE_DIM * MAX_SURFACE_DIM * bs; 746 while (fillSize >= max_fill_size) { 747 get_blorp_surf_for_anv_buffer(cmd_buffer->device, 748 dst_buffer, dstOffset, 749 MAX_SURFACE_DIM, MAX_SURFACE_DIM, 750 MAX_SURFACE_DIM * bs, isl_format, 751 &surf, &isl_surf); 752 753 blorp_clear(&batch, &surf, isl_format, ISL_SWIZZLE_IDENTITY, 754 0, 0, 1, 0, 0, MAX_SURFACE_DIM, MAX_SURFACE_DIM, 755 color, NULL); 756 fillSize -= max_fill_size; 757 dstOffset += max_fill_size; 758 } 759 760 uint64_t height = fillSize / (MAX_SURFACE_DIM * bs); 761 assert(height < MAX_SURFACE_DIM); 762 if (height != 0) { 763 const uint64_t rect_fill_size = height * MAX_SURFACE_DIM * bs; 764 get_blorp_surf_for_anv_buffer(cmd_buffer->device, 765 dst_buffer, dstOffset, 766 MAX_SURFACE_DIM, height, 767 MAX_SURFACE_DIM * bs, isl_format, 768 &surf, &isl_surf); 769 770 blorp_clear(&batch, &surf, isl_format, ISL_SWIZZLE_IDENTITY, 771 0, 0, 1, 0, 0, MAX_SURFACE_DIM, height, 772 color, NULL); 773 fillSize -= rect_fill_size; 774 dstOffset += rect_fill_size; 775 } 776 777 if (fillSize != 0) { 778 const uint32_t width = fillSize / bs; 779 get_blorp_surf_for_anv_buffer(cmd_buffer->device, 780 dst_buffer, dstOffset, 781 width, 1, 782 width * bs, isl_format, 783 &surf, &isl_surf); 784 785 blorp_clear(&batch, &surf, isl_format, ISL_SWIZZLE_IDENTITY, 786 0, 0, 1, 0, 0, width, 1, 787 color, NULL); 788 } 789 790 blorp_batch_finish(&batch); 791 } 792 793 void anv_CmdClearColorImage( 794 VkCommandBuffer commandBuffer, 795 VkImage _image, 796 VkImageLayout imageLayout, 797 const VkClearColorValue* pColor, 798 uint32_t rangeCount, 799 const VkImageSubresourceRange* pRanges) 800 { 801 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); 802 ANV_FROM_HANDLE(anv_image, image, _image); 803 804 static const bool color_write_disable[4] = { false, false, false, false }; 805 806 struct blorp_batch batch; 807 blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0); 808 809 struct blorp_surf surf; 810 get_blorp_surf_for_anv_image(image, VK_IMAGE_ASPECT_COLOR_BIT, 811 image->aux_usage, &surf); 812 813 for (unsigned r = 0; r < rangeCount; r++) { 814 if (pRanges[r].aspectMask == 0) 815 continue; 816 817 assert(pRanges[r].aspectMask == VK_IMAGE_ASPECT_COLOR_BIT); 818 819 struct anv_format src_format = 820 anv_get_format(&cmd_buffer->device->info, image->vk_format, 821 VK_IMAGE_ASPECT_COLOR_BIT, image->tiling); 822 823 unsigned base_layer = pRanges[r].baseArrayLayer; 824 unsigned layer_count = anv_get_layerCount(image, &pRanges[r]); 825 826 for (unsigned i = 0; i < anv_get_levelCount(image, &pRanges[r]); i++) { 827 const unsigned level = pRanges[r].baseMipLevel + i; 828 const unsigned level_width = anv_minify(image->extent.width, level); 829 const unsigned level_height = anv_minify(image->extent.height, level); 830 831 if (image->type == VK_IMAGE_TYPE_3D) { 832 base_layer = 0; 833 layer_count = anv_minify(image->extent.depth, level); 834 } 835 836 blorp_clear(&batch, &surf, 837 src_format.isl_format, src_format.swizzle, 838 level, base_layer, layer_count, 839 0, 0, level_width, level_height, 840 vk_to_isl_color(*pColor), color_write_disable); 841 } 842 } 843 844 blorp_batch_finish(&batch); 845 } 846 847 void anv_CmdClearDepthStencilImage( 848 VkCommandBuffer commandBuffer, 849 VkImage image_h, 850 VkImageLayout imageLayout, 851 const VkClearDepthStencilValue* pDepthStencil, 852 uint32_t rangeCount, 853 const VkImageSubresourceRange* pRanges) 854 { 855 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); 856 ANV_FROM_HANDLE(anv_image, image, image_h); 857 858 struct blorp_batch batch; 859 blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0); 860 861 struct blorp_surf depth, stencil; 862 if (image->aspects & VK_IMAGE_ASPECT_DEPTH_BIT) { 863 get_blorp_surf_for_anv_image(image, VK_IMAGE_ASPECT_DEPTH_BIT, 864 ISL_AUX_USAGE_NONE, &depth); 865 } else { 866 memset(&depth, 0, sizeof(depth)); 867 } 868 869 if (image->aspects & VK_IMAGE_ASPECT_STENCIL_BIT) { 870 get_blorp_surf_for_anv_image(image, VK_IMAGE_ASPECT_STENCIL_BIT, 871 ISL_AUX_USAGE_NONE, &stencil); 872 } else { 873 memset(&stencil, 0, sizeof(stencil)); 874 } 875 876 for (unsigned r = 0; r < rangeCount; r++) { 877 if (pRanges[r].aspectMask == 0) 878 continue; 879 880 bool clear_depth = pRanges[r].aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT; 881 bool clear_stencil = pRanges[r].aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT; 882 883 unsigned base_layer = pRanges[r].baseArrayLayer; 884 unsigned layer_count = anv_get_layerCount(image, &pRanges[r]); 885 886 for (unsigned i = 0; i < anv_get_levelCount(image, &pRanges[r]); i++) { 887 const unsigned level = pRanges[r].baseMipLevel + i; 888 const unsigned level_width = anv_minify(image->extent.width, level); 889 const unsigned level_height = anv_minify(image->extent.height, level); 890 891 if (image->type == VK_IMAGE_TYPE_3D) 892 layer_count = anv_minify(image->extent.depth, level); 893 894 blorp_clear_depth_stencil(&batch, &depth, &stencil, 895 level, base_layer, layer_count, 896 0, 0, level_width, level_height, 897 clear_depth, pDepthStencil->depth, 898 clear_stencil ? 0xff : 0, 899 pDepthStencil->stencil); 900 } 901 } 902 903 blorp_batch_finish(&batch); 904 } 905 906 struct anv_state 907 anv_cmd_buffer_alloc_blorp_binding_table(struct anv_cmd_buffer *cmd_buffer, 908 uint32_t num_entries, 909 uint32_t *state_offset) 910 { 911 struct anv_state bt_state = 912 anv_cmd_buffer_alloc_binding_table(cmd_buffer, num_entries, 913 state_offset); 914 if (bt_state.map == NULL) { 915 /* We ran out of space. Grab a new binding table block. */ 916 MAYBE_UNUSED VkResult result = 917 anv_cmd_buffer_new_binding_table_block(cmd_buffer); 918 assert(result == VK_SUCCESS); 919 920 /* Re-emit state base addresses so we get the new surface state base 921 * address before we start emitting binding tables etc. 922 */ 923 anv_cmd_buffer_emit_state_base_address(cmd_buffer); 924 925 bt_state = anv_cmd_buffer_alloc_binding_table(cmd_buffer, num_entries, 926 state_offset); 927 assert(bt_state.map != NULL); 928 } 929 930 return bt_state; 931 } 932 933 static uint32_t 934 binding_table_for_surface_state(struct anv_cmd_buffer *cmd_buffer, 935 struct anv_state surface_state) 936 { 937 uint32_t state_offset; 938 struct anv_state bt_state = 939 anv_cmd_buffer_alloc_blorp_binding_table(cmd_buffer, 1, &state_offset); 940 941 uint32_t *bt_map = bt_state.map; 942 bt_map[0] = surface_state.offset + state_offset; 943 944 return bt_state.offset; 945 } 946 947 static void 948 clear_color_attachment(struct anv_cmd_buffer *cmd_buffer, 949 struct blorp_batch *batch, 950 const VkClearAttachment *attachment, 951 uint32_t rectCount, const VkClearRect *pRects) 952 { 953 const struct anv_subpass *subpass = cmd_buffer->state.subpass; 954 const uint32_t color_att = attachment->colorAttachment; 955 const uint32_t att_idx = subpass->color_attachments[color_att]; 956 957 if (att_idx == VK_ATTACHMENT_UNUSED) 958 return; 959 960 struct anv_render_pass_attachment *pass_att = 961 &cmd_buffer->state.pass->attachments[att_idx]; 962 struct anv_attachment_state *att_state = 963 &cmd_buffer->state.attachments[att_idx]; 964 965 uint32_t binding_table = 966 binding_table_for_surface_state(cmd_buffer, att_state->color_rt_state); 967 968 union isl_color_value clear_color = 969 vk_to_isl_color(attachment->clearValue.color); 970 971 for (uint32_t r = 0; r < rectCount; ++r) { 972 const VkOffset2D offset = pRects[r].rect.offset; 973 const VkExtent2D extent = pRects[r].rect.extent; 974 blorp_clear_attachments(batch, binding_table, 975 ISL_FORMAT_UNSUPPORTED, pass_att->samples, 976 pRects[r].baseArrayLayer, 977 pRects[r].layerCount, 978 offset.x, offset.y, 979 offset.x + extent.width, offset.y + extent.height, 980 true, clear_color, false, 0.0f, 0, 0); 981 } 982 } 983 984 static void 985 clear_depth_stencil_attachment(struct anv_cmd_buffer *cmd_buffer, 986 struct blorp_batch *batch, 987 const VkClearAttachment *attachment, 988 uint32_t rectCount, const VkClearRect *pRects) 989 { 990 static const union isl_color_value color_value = { .u32 = { 0, } }; 991 const struct anv_subpass *subpass = cmd_buffer->state.subpass; 992 const uint32_t att_idx = subpass->depth_stencil_attachment; 993 994 if (att_idx == VK_ATTACHMENT_UNUSED) 995 return; 996 997 struct anv_render_pass_attachment *pass_att = 998 &cmd_buffer->state.pass->attachments[att_idx]; 999 1000 bool clear_depth = attachment->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT; 1001 bool clear_stencil = attachment->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT; 1002 1003 enum isl_format depth_format = ISL_FORMAT_UNSUPPORTED; 1004 if (clear_depth) { 1005 depth_format = anv_get_isl_format(&cmd_buffer->device->info, 1006 pass_att->format, 1007 VK_IMAGE_ASPECT_DEPTH_BIT, 1008 VK_IMAGE_TILING_OPTIMAL); 1009 } 1010 1011 uint32_t binding_table = 1012 binding_table_for_surface_state(cmd_buffer, 1013 cmd_buffer->state.null_surface_state); 1014 1015 for (uint32_t r = 0; r < rectCount; ++r) { 1016 const VkOffset2D offset = pRects[r].rect.offset; 1017 const VkExtent2D extent = pRects[r].rect.extent; 1018 VkClearDepthStencilValue value = attachment->clearValue.depthStencil; 1019 blorp_clear_attachments(batch, binding_table, 1020 depth_format, pass_att->samples, 1021 pRects[r].baseArrayLayer, 1022 pRects[r].layerCount, 1023 offset.x, offset.y, 1024 offset.x + extent.width, offset.y + extent.height, 1025 false, color_value, 1026 clear_depth, value.depth, 1027 clear_stencil ? 0xff : 0, value.stencil); 1028 } 1029 } 1030 1031 void anv_CmdClearAttachments( 1032 VkCommandBuffer commandBuffer, 1033 uint32_t attachmentCount, 1034 const VkClearAttachment* pAttachments, 1035 uint32_t rectCount, 1036 const VkClearRect* pRects) 1037 { 1038 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); 1039 1040 /* Because this gets called within a render pass, we tell blorp not to 1041 * trash our depth and stencil buffers. 1042 */ 1043 struct blorp_batch batch; 1044 blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 1045 BLORP_BATCH_NO_EMIT_DEPTH_STENCIL); 1046 1047 for (uint32_t a = 0; a < attachmentCount; ++a) { 1048 if (pAttachments[a].aspectMask == VK_IMAGE_ASPECT_COLOR_BIT) { 1049 clear_color_attachment(cmd_buffer, &batch, 1050 &pAttachments[a], 1051 rectCount, pRects); 1052 } else { 1053 clear_depth_stencil_attachment(cmd_buffer, &batch, 1054 &pAttachments[a], 1055 rectCount, pRects); 1056 } 1057 } 1058 1059 blorp_batch_finish(&batch); 1060 } 1061 1062 enum subpass_stage { 1063 SUBPASS_STAGE_LOAD, 1064 SUBPASS_STAGE_DRAW, 1065 SUBPASS_STAGE_RESOLVE, 1066 }; 1067 1068 static bool 1069 attachment_needs_flush(struct anv_cmd_buffer *cmd_buffer, 1070 struct anv_render_pass_attachment *att, 1071 enum subpass_stage stage) 1072 { 1073 struct anv_render_pass *pass = cmd_buffer->state.pass; 1074 struct anv_subpass *subpass = cmd_buffer->state.subpass; 1075 unsigned subpass_idx = subpass - pass->subpasses; 1076 assert(subpass_idx < pass->subpass_count); 1077 1078 /* We handle this subpass specially based on the current stage */ 1079 enum anv_subpass_usage usage = att->subpass_usage[subpass_idx]; 1080 switch (stage) { 1081 case SUBPASS_STAGE_LOAD: 1082 if (usage & (ANV_SUBPASS_USAGE_INPUT | ANV_SUBPASS_USAGE_RESOLVE_SRC)) 1083 return true; 1084 break; 1085 1086 case SUBPASS_STAGE_DRAW: 1087 if (usage & ANV_SUBPASS_USAGE_RESOLVE_SRC) 1088 return true; 1089 break; 1090 1091 default: 1092 break; 1093 } 1094 1095 for (uint32_t s = subpass_idx + 1; s < pass->subpass_count; s++) { 1096 usage = att->subpass_usage[s]; 1097 1098 /* If this attachment is going to be used as an input in this or any 1099 * future subpass, then we need to flush its cache and invalidate the 1100 * texture cache. 1101 */ 1102 if (att->subpass_usage[s] & ANV_SUBPASS_USAGE_INPUT) 1103 return true; 1104 1105 if (usage & (ANV_SUBPASS_USAGE_DRAW | ANV_SUBPASS_USAGE_RESOLVE_DST)) { 1106 /* We found another subpass that draws to this attachment. We'll 1107 * wait to resolve until then. 1108 */ 1109 return false; 1110 } 1111 } 1112 1113 return false; 1114 } 1115 1116 static void 1117 anv_cmd_buffer_flush_attachments(struct anv_cmd_buffer *cmd_buffer, 1118 enum subpass_stage stage) 1119 { 1120 struct anv_subpass *subpass = cmd_buffer->state.subpass; 1121 struct anv_render_pass *pass = cmd_buffer->state.pass; 1122 1123 for (uint32_t i = 0; i < subpass->color_count; ++i) { 1124 uint32_t att = subpass->color_attachments[i]; 1125 assert(att < pass->attachment_count); 1126 if (attachment_needs_flush(cmd_buffer, &pass->attachments[att], stage)) { 1127 cmd_buffer->state.pending_pipe_bits |= 1128 ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT | 1129 ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT; 1130 } 1131 } 1132 1133 if (subpass->depth_stencil_attachment != VK_ATTACHMENT_UNUSED) { 1134 uint32_t att = subpass->depth_stencil_attachment; 1135 assert(att < pass->attachment_count); 1136 if (attachment_needs_flush(cmd_buffer, &pass->attachments[att], stage)) { 1137 cmd_buffer->state.pending_pipe_bits |= 1138 ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT | 1139 ANV_PIPE_DEPTH_CACHE_FLUSH_BIT; 1140 } 1141 } 1142 } 1143 1144 static bool 1145 subpass_needs_clear(const struct anv_cmd_buffer *cmd_buffer) 1146 { 1147 const struct anv_cmd_state *cmd_state = &cmd_buffer->state; 1148 uint32_t ds = cmd_state->subpass->depth_stencil_attachment; 1149 1150 for (uint32_t i = 0; i < cmd_state->subpass->color_count; ++i) { 1151 uint32_t a = cmd_state->subpass->color_attachments[i]; 1152 if (cmd_state->attachments[a].pending_clear_aspects) { 1153 return true; 1154 } 1155 } 1156 1157 if (ds != VK_ATTACHMENT_UNUSED && 1158 cmd_state->attachments[ds].pending_clear_aspects) { 1159 return true; 1160 } 1161 1162 return false; 1163 } 1164 1165 void 1166 anv_cmd_buffer_clear_subpass(struct anv_cmd_buffer *cmd_buffer) 1167 { 1168 const struct anv_cmd_state *cmd_state = &cmd_buffer->state; 1169 const VkRect2D render_area = cmd_buffer->state.render_area; 1170 1171 1172 if (!subpass_needs_clear(cmd_buffer)) 1173 return; 1174 1175 /* Because this gets called within a render pass, we tell blorp not to 1176 * trash our depth and stencil buffers. 1177 */ 1178 struct blorp_batch batch; 1179 blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 1180 BLORP_BATCH_NO_EMIT_DEPTH_STENCIL); 1181 1182 VkClearRect clear_rect = { 1183 .rect = cmd_buffer->state.render_area, 1184 .baseArrayLayer = 0, 1185 .layerCount = cmd_buffer->state.framebuffer->layers, 1186 }; 1187 1188 struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; 1189 for (uint32_t i = 0; i < cmd_state->subpass->color_count; ++i) { 1190 const uint32_t a = cmd_state->subpass->color_attachments[i]; 1191 struct anv_attachment_state *att_state = &cmd_state->attachments[a]; 1192 1193 if (!att_state->pending_clear_aspects) 1194 continue; 1195 1196 assert(att_state->pending_clear_aspects == VK_IMAGE_ASPECT_COLOR_BIT); 1197 1198 struct anv_image_view *iview = fb->attachments[a]; 1199 const struct anv_image *image = iview->image; 1200 struct blorp_surf surf; 1201 get_blorp_surf_for_anv_image(image, VK_IMAGE_ASPECT_COLOR_BIT, 1202 att_state->aux_usage, &surf); 1203 1204 if (att_state->fast_clear) { 1205 surf.clear_color = vk_to_isl_color(att_state->clear_value.color); 1206 1207 /* From the Sky Lake PRM Vol. 7, "Render Target Fast Clear": 1208 * 1209 * "After Render target fast clear, pipe-control with color cache 1210 * write-flush must be issued before sending any DRAW commands on 1211 * that render target." 1212 * 1213 * This comment is a bit cryptic and doesn't really tell you what's 1214 * going or what's really needed. It appears that fast clear ops are 1215 * not properly synchronized with other drawing. This means that we 1216 * cannot have a fast clear operation in the pipe at the same time as 1217 * other regular drawing operations. We need to use a PIPE_CONTROL 1218 * to ensure that the contents of the previous draw hit the render 1219 * target before we resolve and then use a second PIPE_CONTROL after 1220 * the resolve to ensure that it is completed before any additional 1221 * drawing occurs. 1222 */ 1223 cmd_buffer->state.pending_pipe_bits |= 1224 ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | ANV_PIPE_CS_STALL_BIT; 1225 1226 blorp_fast_clear(&batch, &surf, iview->isl.format, 1227 iview->isl.base_level, 1228 iview->isl.base_array_layer, fb->layers, 1229 render_area.offset.x, render_area.offset.y, 1230 render_area.offset.x + render_area.extent.width, 1231 render_area.offset.y + render_area.extent.height); 1232 1233 cmd_buffer->state.pending_pipe_bits |= 1234 ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | ANV_PIPE_CS_STALL_BIT; 1235 } else { 1236 blorp_clear(&batch, &surf, iview->isl.format, iview->isl.swizzle, 1237 iview->isl.base_level, 1238 iview->isl.base_array_layer, fb->layers, 1239 render_area.offset.x, render_area.offset.y, 1240 render_area.offset.x + render_area.extent.width, 1241 render_area.offset.y + render_area.extent.height, 1242 vk_to_isl_color(att_state->clear_value.color), NULL); 1243 } 1244 1245 att_state->pending_clear_aspects = 0; 1246 } 1247 1248 const uint32_t ds = cmd_state->subpass->depth_stencil_attachment; 1249 1250 if (ds != VK_ATTACHMENT_UNUSED && 1251 cmd_state->attachments[ds].pending_clear_aspects) { 1252 1253 VkClearAttachment clear_att = { 1254 .aspectMask = cmd_state->attachments[ds].pending_clear_aspects, 1255 .clearValue = cmd_state->attachments[ds].clear_value, 1256 }; 1257 1258 1259 const uint8_t gen = cmd_buffer->device->info.gen; 1260 bool clear_with_hiz = gen >= 8 && cmd_state->attachments[ds].aux_usage == 1261 ISL_AUX_USAGE_HIZ; 1262 const struct anv_image_view *iview = fb->attachments[ds]; 1263 1264 if (clear_with_hiz) { 1265 const bool clear_depth = clear_att.aspectMask & 1266 VK_IMAGE_ASPECT_DEPTH_BIT; 1267 const bool clear_stencil = clear_att.aspectMask & 1268 VK_IMAGE_ASPECT_STENCIL_BIT; 1269 1270 /* Check against restrictions for depth buffer clearing. A great GPU 1271 * performance benefit isn't expected when using the HZ sequence for 1272 * stencil-only clears. Therefore, we don't emit a HZ op sequence for 1273 * a stencil clear in addition to using the BLORP-fallback for depth. 1274 */ 1275 if (clear_depth) { 1276 if (!blorp_can_hiz_clear_depth(gen, iview->isl.format, 1277 iview->image->samples, 1278 render_area.offset.x, 1279 render_area.offset.y, 1280 render_area.offset.x + 1281 render_area.extent.width, 1282 render_area.offset.y + 1283 render_area.extent.height)) { 1284 clear_with_hiz = false; 1285 } else if (clear_att.clearValue.depthStencil.depth != 1286 ANV_HZ_FC_VAL) { 1287 /* Don't enable fast depth clears for any color not equal to 1288 * ANV_HZ_FC_VAL. 1289 */ 1290 clear_with_hiz = false; 1291 } else if (gen == 8 && 1292 anv_can_sample_with_hiz(cmd_buffer->device->info.gen, 1293 iview->image->samples)) { 1294 /* Only gen9+ supports returning ANV_HZ_FC_VAL when sampling a 1295 * fast-cleared portion of a HiZ buffer. Testing has revealed 1296 * that Gen8 only supports returning 0.0f. Gens prior to gen8 do 1297 * not support this feature at all. 1298 */ 1299 clear_with_hiz = false; 1300 } 1301 } 1302 1303 if (clear_with_hiz) { 1304 blorp_gen8_hiz_clear_attachments(&batch, iview->image->samples, 1305 render_area.offset.x, 1306 render_area.offset.y, 1307 render_area.offset.x + 1308 render_area.extent.width, 1309 render_area.offset.y + 1310 render_area.extent.height, 1311 clear_depth, clear_stencil, 1312 clear_att.clearValue. 1313 depthStencil.stencil); 1314 } 1315 } 1316 1317 if (!clear_with_hiz) { 1318 clear_depth_stencil_attachment(cmd_buffer, &batch, 1319 &clear_att, 1, &clear_rect); 1320 } 1321 1322 cmd_state->attachments[ds].pending_clear_aspects = 0; 1323 } 1324 1325 blorp_batch_finish(&batch); 1326 1327 anv_cmd_buffer_flush_attachments(cmd_buffer, SUBPASS_STAGE_LOAD); 1328 } 1329 1330 static void 1331 resolve_image(struct blorp_batch *batch, 1332 const struct anv_image *src_image, 1333 uint32_t src_level, uint32_t src_layer, 1334 const struct anv_image *dst_image, 1335 uint32_t dst_level, uint32_t dst_layer, 1336 VkImageAspectFlags aspect_mask, 1337 uint32_t src_x, uint32_t src_y, uint32_t dst_x, uint32_t dst_y, 1338 uint32_t width, uint32_t height) 1339 { 1340 assert(src_image->type == VK_IMAGE_TYPE_2D); 1341 assert(src_image->samples > 1); 1342 assert(dst_image->type == VK_IMAGE_TYPE_2D); 1343 assert(dst_image->samples == 1); 1344 1345 uint32_t a; 1346 for_each_bit(a, aspect_mask) { 1347 VkImageAspectFlagBits aspect = 1 << a; 1348 1349 struct blorp_surf src_surf, dst_surf; 1350 get_blorp_surf_for_anv_image(src_image, aspect, 1351 src_image->aux_usage, &src_surf); 1352 get_blorp_surf_for_anv_image(dst_image, aspect, 1353 dst_image->aux_usage, &dst_surf); 1354 1355 blorp_blit(batch, 1356 &src_surf, src_level, src_layer, 1357 ISL_FORMAT_UNSUPPORTED, ISL_SWIZZLE_IDENTITY, 1358 &dst_surf, dst_level, dst_layer, 1359 ISL_FORMAT_UNSUPPORTED, ISL_SWIZZLE_IDENTITY, 1360 src_x, src_y, src_x + width, src_y + height, 1361 dst_x, dst_y, dst_x + width, dst_y + height, 1362 0x2600 /* GL_NEAREST */, false, false); 1363 } 1364 } 1365 1366 void anv_CmdResolveImage( 1367 VkCommandBuffer commandBuffer, 1368 VkImage srcImage, 1369 VkImageLayout srcImageLayout, 1370 VkImage dstImage, 1371 VkImageLayout dstImageLayout, 1372 uint32_t regionCount, 1373 const VkImageResolve* pRegions) 1374 { 1375 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); 1376 ANV_FROM_HANDLE(anv_image, src_image, srcImage); 1377 ANV_FROM_HANDLE(anv_image, dst_image, dstImage); 1378 1379 struct blorp_batch batch; 1380 blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0); 1381 1382 for (uint32_t r = 0; r < regionCount; r++) { 1383 assert(pRegions[r].srcSubresource.aspectMask == 1384 pRegions[r].dstSubresource.aspectMask); 1385 assert(anv_get_layerCount(src_image, &pRegions[r].srcSubresource) == 1386 anv_get_layerCount(dst_image, &pRegions[r].dstSubresource)); 1387 1388 const uint32_t layer_count = 1389 anv_get_layerCount(dst_image, &pRegions[r].dstSubresource); 1390 1391 for (uint32_t layer = 0; layer < layer_count; layer++) { 1392 resolve_image(&batch, 1393 src_image, pRegions[r].srcSubresource.mipLevel, 1394 pRegions[r].srcSubresource.baseArrayLayer + layer, 1395 dst_image, pRegions[r].dstSubresource.mipLevel, 1396 pRegions[r].dstSubresource.baseArrayLayer + layer, 1397 pRegions[r].dstSubresource.aspectMask, 1398 pRegions[r].srcOffset.x, pRegions[r].srcOffset.y, 1399 pRegions[r].dstOffset.x, pRegions[r].dstOffset.y, 1400 pRegions[r].extent.width, pRegions[r].extent.height); 1401 } 1402 } 1403 1404 blorp_batch_finish(&batch); 1405 } 1406 1407 static void 1408 ccs_resolve_attachment(struct anv_cmd_buffer *cmd_buffer, 1409 struct blorp_batch *batch, 1410 uint32_t att) 1411 { 1412 struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; 1413 struct anv_attachment_state *att_state = 1414 &cmd_buffer->state.attachments[att]; 1415 1416 if (att_state->aux_usage == ISL_AUX_USAGE_NONE) 1417 return; /* Nothing to resolve */ 1418 1419 assert(att_state->aux_usage == ISL_AUX_USAGE_CCS_E || 1420 att_state->aux_usage == ISL_AUX_USAGE_CCS_D); 1421 1422 struct anv_render_pass *pass = cmd_buffer->state.pass; 1423 struct anv_subpass *subpass = cmd_buffer->state.subpass; 1424 unsigned subpass_idx = subpass - pass->subpasses; 1425 assert(subpass_idx < pass->subpass_count); 1426 1427 /* Scan forward to see what all ways this attachment will be used. 1428 * Ideally, we would like to resolve in the same subpass as the last write 1429 * of a particular attachment. That way we only resolve once but it's 1430 * still hot in the cache. 1431 */ 1432 bool found_draw = false; 1433 enum anv_subpass_usage usage = 0; 1434 for (uint32_t s = subpass_idx + 1; s < pass->subpass_count; s++) { 1435 usage |= pass->attachments[att].subpass_usage[s]; 1436 1437 if (usage & (ANV_SUBPASS_USAGE_DRAW | ANV_SUBPASS_USAGE_RESOLVE_DST)) { 1438 /* We found another subpass that draws to this attachment. We'll 1439 * wait to resolve until then. 1440 */ 1441 found_draw = true; 1442 break; 1443 } 1444 } 1445 1446 struct anv_image_view *iview = fb->attachments[att]; 1447 const struct anv_image *image = iview->image; 1448 assert(image->aspects == VK_IMAGE_ASPECT_COLOR_BIT); 1449 1450 enum blorp_fast_clear_op resolve_op = BLORP_FAST_CLEAR_OP_NONE; 1451 if (!found_draw) { 1452 /* This is the last subpass that writes to this attachment so we need to 1453 * resolve here. Ideally, we would like to only resolve if the storeOp 1454 * is set to VK_ATTACHMENT_STORE_OP_STORE. However, we need to ensure 1455 * that the CCS bits are set to "resolved" because there may be copy or 1456 * blit operations (which may ignore CCS) between now and the next time 1457 * we render and we need to ensure that anything they write will be 1458 * respected in the next render. Unfortunately, the hardware does not 1459 * provide us with any sort of "invalidate" pass that sets the CCS to 1460 * "resolved" without writing to the render target. 1461 */ 1462 if (iview->image->aux_usage != ISL_AUX_USAGE_CCS_E) { 1463 /* The image destination surface doesn't support compression outside 1464 * the render pass. We need a full resolve. 1465 */ 1466 resolve_op = BLORP_FAST_CLEAR_OP_RESOLVE_FULL; 1467 } else if (att_state->fast_clear) { 1468 /* We don't know what to do with clear colors outside the render 1469 * pass. We need a partial resolve. Only transparent black is 1470 * built into the surface state object and thus no resolve is 1471 * required for this case. 1472 */ 1473 if (att_state->clear_value.color.uint32[0] || 1474 att_state->clear_value.color.uint32[1] || 1475 att_state->clear_value.color.uint32[2] || 1476 att_state->clear_value.color.uint32[3]) 1477 resolve_op = BLORP_FAST_CLEAR_OP_RESOLVE_PARTIAL; 1478 } else { 1479 /* The image "natively" supports all the compression we care about 1480 * and we don't need to resolve at all. If this is the case, we also 1481 * don't need to resolve for any of the input attachment cases below. 1482 */ 1483 } 1484 } else if (usage & ANV_SUBPASS_USAGE_INPUT) { 1485 /* Input attachments are clear-color aware so, at least on Sky Lake, we 1486 * can frequently sample from them with no resolves at all. 1487 */ 1488 if (att_state->aux_usage != att_state->input_aux_usage) { 1489 assert(att_state->input_aux_usage == ISL_AUX_USAGE_NONE); 1490 resolve_op = BLORP_FAST_CLEAR_OP_RESOLVE_FULL; 1491 } else if (!att_state->clear_color_is_zero_one) { 1492 /* Sky Lake PRM, Vol. 2d, RENDER_SURFACE_STATE::Red Clear Color: 1493 * 1494 * "If Number of Multisamples is MULTISAMPLECOUNT_1 AND if this RT 1495 * is fast cleared with non-0/1 clear value, this RT must be 1496 * partially resolved (refer to Partial Resolve operation) before 1497 * binding this surface to Sampler." 1498 */ 1499 resolve_op = BLORP_FAST_CLEAR_OP_RESOLVE_PARTIAL; 1500 } 1501 } 1502 1503 if (resolve_op == BLORP_FAST_CLEAR_OP_NONE) 1504 return; 1505 1506 struct blorp_surf surf; 1507 get_blorp_surf_for_anv_image(image, VK_IMAGE_ASPECT_COLOR_BIT, 1508 att_state->aux_usage, &surf); 1509 if (att_state->fast_clear) 1510 surf.clear_color = vk_to_isl_color(att_state->clear_value.color); 1511 1512 /* From the Sky Lake PRM Vol. 7, "Render Target Resolve": 1513 * 1514 * "When performing a render target resolve, PIPE_CONTROL with end of 1515 * pipe sync must be delivered." 1516 * 1517 * This comment is a bit cryptic and doesn't really tell you what's going 1518 * or what's really needed. It appears that fast clear ops are not 1519 * properly synchronized with other drawing. We need to use a PIPE_CONTROL 1520 * to ensure that the contents of the previous draw hit the render target 1521 * before we resolve and then use a second PIPE_CONTROL after the resolve 1522 * to ensure that it is completed before any additional drawing occurs. 1523 */ 1524 cmd_buffer->state.pending_pipe_bits |= 1525 ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | ANV_PIPE_CS_STALL_BIT; 1526 1527 for (uint32_t layer = 0; layer < fb->layers; layer++) { 1528 blorp_ccs_resolve(batch, &surf, 1529 iview->isl.base_level, 1530 iview->isl.base_array_layer + layer, 1531 iview->isl.format, resolve_op); 1532 } 1533 1534 cmd_buffer->state.pending_pipe_bits |= 1535 ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | ANV_PIPE_CS_STALL_BIT; 1536 1537 /* Once we've done any sort of resolve, we're no longer fast-cleared */ 1538 att_state->fast_clear = false; 1539 if (att_state->aux_usage == ISL_AUX_USAGE_CCS_D) 1540 att_state->aux_usage = ISL_AUX_USAGE_NONE; 1541 } 1542 1543 void 1544 anv_cmd_buffer_resolve_subpass(struct anv_cmd_buffer *cmd_buffer) 1545 { 1546 struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; 1547 struct anv_subpass *subpass = cmd_buffer->state.subpass; 1548 1549 1550 struct blorp_batch batch; 1551 blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0); 1552 1553 for (uint32_t i = 0; i < subpass->color_count; ++i) { 1554 ccs_resolve_attachment(cmd_buffer, &batch, 1555 subpass->color_attachments[i]); 1556 } 1557 1558 anv_cmd_buffer_flush_attachments(cmd_buffer, SUBPASS_STAGE_DRAW); 1559 1560 if (subpass->has_resolve) { 1561 for (uint32_t i = 0; i < subpass->color_count; ++i) { 1562 uint32_t src_att = subpass->color_attachments[i]; 1563 uint32_t dst_att = subpass->resolve_attachments[i]; 1564 1565 if (dst_att == VK_ATTACHMENT_UNUSED) 1566 continue; 1567 1568 if (cmd_buffer->state.attachments[dst_att].pending_clear_aspects) { 1569 /* From the Vulkan 1.0 spec: 1570 * 1571 * If the first use of an attachment in a render pass is as a 1572 * resolve attachment, then the loadOp is effectively ignored 1573 * as the resolve is guaranteed to overwrite all pixels in the 1574 * render area. 1575 */ 1576 cmd_buffer->state.attachments[dst_att].pending_clear_aspects = 0; 1577 } 1578 1579 struct anv_image_view *src_iview = fb->attachments[src_att]; 1580 struct anv_image_view *dst_iview = fb->attachments[dst_att]; 1581 1582 const VkRect2D render_area = cmd_buffer->state.render_area; 1583 1584 assert(src_iview->aspect_mask == dst_iview->aspect_mask); 1585 resolve_image(&batch, src_iview->image, 1586 src_iview->isl.base_level, 1587 src_iview->isl.base_array_layer, 1588 dst_iview->image, 1589 dst_iview->isl.base_level, 1590 dst_iview->isl.base_array_layer, 1591 src_iview->aspect_mask, 1592 render_area.offset.x, render_area.offset.y, 1593 render_area.offset.x, render_area.offset.y, 1594 render_area.extent.width, render_area.extent.height); 1595 1596 ccs_resolve_attachment(cmd_buffer, &batch, dst_att); 1597 } 1598 1599 anv_cmd_buffer_flush_attachments(cmd_buffer, SUBPASS_STAGE_RESOLVE); 1600 } 1601 1602 blorp_batch_finish(&batch); 1603 } 1604 1605 void 1606 anv_gen8_hiz_op_resolve(struct anv_cmd_buffer *cmd_buffer, 1607 const struct anv_image *image, 1608 enum blorp_hiz_op op) 1609 { 1610 assert(image); 1611 1612 /* Don't resolve depth buffers without an auxiliary HiZ buffer and 1613 * don't perform such a resolve on gens that don't support it. 1614 */ 1615 if (cmd_buffer->device->info.gen < 8 || 1616 image->aux_usage != ISL_AUX_USAGE_HIZ) 1617 return; 1618 1619 assert(op == BLORP_HIZ_OP_HIZ_RESOLVE || 1620 op == BLORP_HIZ_OP_DEPTH_RESOLVE); 1621 1622 struct blorp_batch batch; 1623 blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0); 1624 1625 struct blorp_surf surf; 1626 get_blorp_surf_for_anv_image(image, VK_IMAGE_ASPECT_DEPTH_BIT, 1627 ISL_AUX_USAGE_NONE, &surf); 1628 1629 /* Manually add the aux HiZ surf */ 1630 surf.aux_surf = &image->aux_surface.isl, 1631 surf.aux_addr = (struct blorp_address) { 1632 .buffer = image->bo, 1633 .offset = image->offset + image->aux_surface.offset, 1634 }; 1635 surf.aux_usage = ISL_AUX_USAGE_HIZ; 1636 1637 surf.clear_color.u32[0] = (uint32_t) ANV_HZ_FC_VAL; 1638 1639 blorp_gen6_hiz_op(&batch, &surf, 0, 0, op); 1640 blorp_batch_finish(&batch); 1641 } 1642