1 /* 2 * Copyright 2016 Red Hat. 3 * Copyright 2016 Bas Nieuwenhuizen 4 * 5 * based in part on anv driver which is: 6 * Copyright 2015 Intel Corporation 7 * 8 * Permission is hereby granted, free of charge, to any person obtaining a 9 * copy of this software and associated documentation files (the "Software"), 10 * to deal in the Software without restriction, including without limitation 11 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 12 * and/or sell copies of the Software, and to permit persons to whom the 13 * Software is furnished to do so, subject to the following conditions: 14 * 15 * The above copyright notice and this permission notice (including the next 16 * paragraph) shall be included in all copies or substantial portions of the 17 * Software. 18 * 19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 25 * IN THE SOFTWARE. 26 */ 27 28 #ifndef RADV_PRIVATE_H 29 #define RADV_PRIVATE_H 30 31 #include <stdlib.h> 32 #include <stdio.h> 33 #include <stdbool.h> 34 #include <pthread.h> 35 #include <assert.h> 36 #include <stdint.h> 37 #include <string.h> 38 #ifdef HAVE_VALGRIND 39 #include <valgrind.h> 40 #include <memcheck.h> 41 #define VG(x) x 42 #else 43 #define VG(x) 44 #endif 45 46 #include <amdgpu.h> 47 #include "compiler/shader_enums.h" 48 #include "util/macros.h" 49 #include "util/list.h" 50 #include "main/macros.h" 51 #include "vk_alloc.h" 52 #include "vk_debug_report.h" 53 54 #include "radv_radeon_winsys.h" 55 #include "ac_binary.h" 56 #include "ac_nir_to_llvm.h" 57 #include "ac_gpu_info.h" 58 #include "ac_surface.h" 59 #include "radv_descriptor_set.h" 60 61 #include <llvm-c/TargetMachine.h> 62 63 /* Pre-declarations needed for WSI entrypoints */ 64 struct wl_surface; 65 struct wl_display; 66 typedef struct xcb_connection_t xcb_connection_t; 67 typedef uint32_t xcb_visualid_t; 68 typedef uint32_t xcb_window_t; 69 70 #include <vulkan/vulkan.h> 71 #include <vulkan/vulkan_intel.h> 72 #include <vulkan/vk_icd.h> 73 #include <vulkan/vk_android_native_buffer.h> 74 75 #include "radv_entrypoints.h" 76 77 #include "wsi_common.h" 78 79 #define ATI_VENDOR_ID 0x1002 80 81 #define MAX_VBS 32 82 #define MAX_VERTEX_ATTRIBS 32 83 #define MAX_RTS 8 84 #define MAX_VIEWPORTS 16 85 #define MAX_SCISSORS 16 86 #define MAX_DISCARD_RECTANGLES 4 87 #define MAX_PUSH_CONSTANTS_SIZE 128 88 #define MAX_PUSH_DESCRIPTORS 32 89 #define MAX_DYNAMIC_UNIFORM_BUFFERS 16 90 #define MAX_DYNAMIC_STORAGE_BUFFERS 8 91 #define MAX_DYNAMIC_BUFFERS (MAX_DYNAMIC_UNIFORM_BUFFERS + MAX_DYNAMIC_STORAGE_BUFFERS) 92 #define MAX_SAMPLES_LOG2 4 93 #define NUM_META_FS_KEYS 13 94 #define RADV_MAX_DRM_DEVICES 8 95 #define MAX_VIEWS 8 96 97 #define NUM_DEPTH_CLEAR_PIPELINES 3 98 99 enum radv_mem_heap { 100 RADV_MEM_HEAP_VRAM, 101 RADV_MEM_HEAP_VRAM_CPU_ACCESS, 102 RADV_MEM_HEAP_GTT, 103 RADV_MEM_HEAP_COUNT 104 }; 105 106 enum radv_mem_type { 107 RADV_MEM_TYPE_VRAM, 108 RADV_MEM_TYPE_GTT_WRITE_COMBINE, 109 RADV_MEM_TYPE_VRAM_CPU_ACCESS, 110 RADV_MEM_TYPE_GTT_CACHED, 111 RADV_MEM_TYPE_COUNT 112 }; 113 114 #define radv_printflike(a, b) __attribute__((__format__(__printf__, a, b))) 115 116 static inline uint32_t 117 align_u32(uint32_t v, uint32_t a) 118 { 119 assert(a != 0 && a == (a & -a)); 120 return (v + a - 1) & ~(a - 1); 121 } 122 123 static inline uint32_t 124 align_u32_npot(uint32_t v, uint32_t a) 125 { 126 return (v + a - 1) / a * a; 127 } 128 129 static inline uint64_t 130 align_u64(uint64_t v, uint64_t a) 131 { 132 assert(a != 0 && a == (a & -a)); 133 return (v + a - 1) & ~(a - 1); 134 } 135 136 static inline int32_t 137 align_i32(int32_t v, int32_t a) 138 { 139 assert(a != 0 && a == (a & -a)); 140 return (v + a - 1) & ~(a - 1); 141 } 142 143 /** Alignment must be a power of 2. */ 144 static inline bool 145 radv_is_aligned(uintmax_t n, uintmax_t a) 146 { 147 assert(a == (a & -a)); 148 return (n & (a - 1)) == 0; 149 } 150 151 static inline uint32_t 152 round_up_u32(uint32_t v, uint32_t a) 153 { 154 return (v + a - 1) / a; 155 } 156 157 static inline uint64_t 158 round_up_u64(uint64_t v, uint64_t a) 159 { 160 return (v + a - 1) / a; 161 } 162 163 static inline uint32_t 164 radv_minify(uint32_t n, uint32_t levels) 165 { 166 if (unlikely(n == 0)) 167 return 0; 168 else 169 return MAX2(n >> levels, 1); 170 } 171 static inline float 172 radv_clamp_f(float f, float min, float max) 173 { 174 assert(min < max); 175 176 if (f > max) 177 return max; 178 else if (f < min) 179 return min; 180 else 181 return f; 182 } 183 184 static inline bool 185 radv_clear_mask(uint32_t *inout_mask, uint32_t clear_mask) 186 { 187 if (*inout_mask & clear_mask) { 188 *inout_mask &= ~clear_mask; 189 return true; 190 } else { 191 return false; 192 } 193 } 194 195 #define for_each_bit(b, dword) \ 196 for (uint32_t __dword = (dword); \ 197 (b) = __builtin_ffs(__dword) - 1, __dword; \ 198 __dword &= ~(1 << (b))) 199 200 #define typed_memcpy(dest, src, count) ({ \ 201 STATIC_ASSERT(sizeof(*src) == sizeof(*dest)); \ 202 memcpy((dest), (src), (count) * sizeof(*(src))); \ 203 }) 204 205 /* Whenever we generate an error, pass it through this function. Useful for 206 * debugging, where we can break on it. Only call at error site, not when 207 * propagating errors. Might be useful to plug in a stack trace here. 208 */ 209 210 VkResult __vk_errorf(VkResult error, const char *file, int line, const char *format, ...); 211 212 #ifdef DEBUG 213 #define vk_error(error) __vk_errorf(error, __FILE__, __LINE__, NULL); 214 #define vk_errorf(error, format, ...) __vk_errorf(error, __FILE__, __LINE__, format, ## __VA_ARGS__); 215 #else 216 #define vk_error(error) error 217 #define vk_errorf(error, format, ...) error 218 #endif 219 220 void __radv_finishme(const char *file, int line, const char *format, ...) 221 radv_printflike(3, 4); 222 void radv_loge(const char *format, ...) radv_printflike(1, 2); 223 void radv_loge_v(const char *format, va_list va); 224 225 /** 226 * Print a FINISHME message, including its source location. 227 */ 228 #define radv_finishme(format, ...) \ 229 do { \ 230 static bool reported = false; \ 231 if (!reported) { \ 232 __radv_finishme(__FILE__, __LINE__, format, ##__VA_ARGS__); \ 233 reported = true; \ 234 } \ 235 } while (0) 236 237 /* A non-fatal assert. Useful for debugging. */ 238 #ifdef DEBUG 239 #define radv_assert(x) ({ \ 240 if (unlikely(!(x))) \ 241 fprintf(stderr, "%s:%d ASSERT: %s\n", __FILE__, __LINE__, #x); \ 242 }) 243 #else 244 #define radv_assert(x) 245 #endif 246 247 #define stub_return(v) \ 248 do { \ 249 radv_finishme("stub %s", __func__); \ 250 return (v); \ 251 } while (0) 252 253 #define stub() \ 254 do { \ 255 radv_finishme("stub %s", __func__); \ 256 return; \ 257 } while (0) 258 259 void *radv_lookup_entrypoint(const char *name); 260 261 struct radv_physical_device { 262 VK_LOADER_DATA _loader_data; 263 264 struct radv_instance * instance; 265 266 struct radeon_winsys *ws; 267 struct radeon_info rad_info; 268 char path[20]; 269 char name[VK_MAX_PHYSICAL_DEVICE_NAME_SIZE]; 270 uint8_t driver_uuid[VK_UUID_SIZE]; 271 uint8_t device_uuid[VK_UUID_SIZE]; 272 uint8_t cache_uuid[VK_UUID_SIZE]; 273 274 int local_fd; 275 struct wsi_device wsi_device; 276 277 bool has_rbplus; /* if RB+ register exist */ 278 bool rbplus_allowed; /* if RB+ is allowed */ 279 bool has_clear_state; 280 bool cpdma_prefetch_writes_memory; 281 bool has_scissor_bug; 282 283 /* This is the drivers on-disk cache used as a fallback as opposed to 284 * the pipeline cache defined by apps. 285 */ 286 struct disk_cache * disk_cache; 287 288 VkPhysicalDeviceMemoryProperties memory_properties; 289 enum radv_mem_type mem_type_indices[RADV_MEM_TYPE_COUNT]; 290 }; 291 292 struct radv_instance { 293 VK_LOADER_DATA _loader_data; 294 295 VkAllocationCallbacks alloc; 296 297 uint32_t apiVersion; 298 int physicalDeviceCount; 299 struct radv_physical_device physicalDevices[RADV_MAX_DRM_DEVICES]; 300 301 uint64_t debug_flags; 302 uint64_t perftest_flags; 303 304 struct vk_debug_report_instance debug_report_callbacks; 305 }; 306 307 VkResult radv_init_wsi(struct radv_physical_device *physical_device); 308 void radv_finish_wsi(struct radv_physical_device *physical_device); 309 310 bool radv_instance_extension_supported(const char *name); 311 uint32_t radv_physical_device_api_version(struct radv_physical_device *dev); 312 bool radv_physical_device_extension_supported(struct radv_physical_device *dev, 313 const char *name); 314 315 struct cache_entry; 316 317 struct radv_pipeline_cache { 318 struct radv_device * device; 319 pthread_mutex_t mutex; 320 321 uint32_t total_size; 322 uint32_t table_size; 323 uint32_t kernel_count; 324 struct cache_entry ** hash_table; 325 bool modified; 326 327 VkAllocationCallbacks alloc; 328 }; 329 330 struct radv_pipeline_key { 331 uint32_t instance_rate_inputs; 332 unsigned tess_input_vertices; 333 uint32_t col_format; 334 uint32_t is_int8; 335 uint32_t is_int10; 336 uint8_t log2_ps_iter_samples; 337 uint8_t log2_num_samples; 338 uint32_t multisample : 1; 339 uint32_t has_multiview_view_index : 1; 340 }; 341 342 void 343 radv_pipeline_cache_init(struct radv_pipeline_cache *cache, 344 struct radv_device *device); 345 void 346 radv_pipeline_cache_finish(struct radv_pipeline_cache *cache); 347 void 348 radv_pipeline_cache_load(struct radv_pipeline_cache *cache, 349 const void *data, size_t size); 350 351 struct radv_shader_variant; 352 353 bool 354 radv_create_shader_variants_from_pipeline_cache(struct radv_device *device, 355 struct radv_pipeline_cache *cache, 356 const unsigned char *sha1, 357 struct radv_shader_variant **variants); 358 359 void 360 radv_pipeline_cache_insert_shaders(struct radv_device *device, 361 struct radv_pipeline_cache *cache, 362 const unsigned char *sha1, 363 struct radv_shader_variant **variants, 364 const void *const *codes, 365 const unsigned *code_sizes); 366 367 enum radv_blit_ds_layout { 368 RADV_BLIT_DS_LAYOUT_TILE_ENABLE, 369 RADV_BLIT_DS_LAYOUT_TILE_DISABLE, 370 RADV_BLIT_DS_LAYOUT_COUNT, 371 }; 372 373 static inline enum radv_blit_ds_layout radv_meta_blit_ds_to_type(VkImageLayout layout) 374 { 375 return (layout == VK_IMAGE_LAYOUT_GENERAL) ? RADV_BLIT_DS_LAYOUT_TILE_DISABLE : RADV_BLIT_DS_LAYOUT_TILE_ENABLE; 376 } 377 378 static inline VkImageLayout radv_meta_blit_ds_to_layout(enum radv_blit_ds_layout ds_layout) 379 { 380 return ds_layout == RADV_BLIT_DS_LAYOUT_TILE_ENABLE ? VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL : VK_IMAGE_LAYOUT_GENERAL; 381 } 382 383 enum radv_meta_dst_layout { 384 RADV_META_DST_LAYOUT_GENERAL, 385 RADV_META_DST_LAYOUT_OPTIMAL, 386 RADV_META_DST_LAYOUT_COUNT, 387 }; 388 389 static inline enum radv_meta_dst_layout radv_meta_dst_layout_from_layout(VkImageLayout layout) 390 { 391 return (layout == VK_IMAGE_LAYOUT_GENERAL) ? RADV_META_DST_LAYOUT_GENERAL : RADV_META_DST_LAYOUT_OPTIMAL; 392 } 393 394 static inline VkImageLayout radv_meta_dst_layout_to_layout(enum radv_meta_dst_layout layout) 395 { 396 return layout == RADV_META_DST_LAYOUT_OPTIMAL ? VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL : VK_IMAGE_LAYOUT_GENERAL; 397 } 398 399 struct radv_meta_state { 400 VkAllocationCallbacks alloc; 401 402 struct radv_pipeline_cache cache; 403 404 /** 405 * Use array element `i` for images with `2^i` samples. 406 */ 407 struct { 408 VkRenderPass render_pass[NUM_META_FS_KEYS]; 409 VkPipeline color_pipelines[NUM_META_FS_KEYS]; 410 411 VkRenderPass depthstencil_rp; 412 VkPipeline depth_only_pipeline[NUM_DEPTH_CLEAR_PIPELINES]; 413 VkPipeline stencil_only_pipeline[NUM_DEPTH_CLEAR_PIPELINES]; 414 VkPipeline depthstencil_pipeline[NUM_DEPTH_CLEAR_PIPELINES]; 415 } clear[1 + MAX_SAMPLES_LOG2]; 416 417 VkPipelineLayout clear_color_p_layout; 418 VkPipelineLayout clear_depth_p_layout; 419 struct { 420 VkRenderPass render_pass[NUM_META_FS_KEYS][RADV_META_DST_LAYOUT_COUNT]; 421 422 /** Pipeline that blits from a 1D image. */ 423 VkPipeline pipeline_1d_src[NUM_META_FS_KEYS]; 424 425 /** Pipeline that blits from a 2D image. */ 426 VkPipeline pipeline_2d_src[NUM_META_FS_KEYS]; 427 428 /** Pipeline that blits from a 3D image. */ 429 VkPipeline pipeline_3d_src[NUM_META_FS_KEYS]; 430 431 VkRenderPass depth_only_rp[RADV_BLIT_DS_LAYOUT_COUNT]; 432 VkPipeline depth_only_1d_pipeline; 433 VkPipeline depth_only_2d_pipeline; 434 VkPipeline depth_only_3d_pipeline; 435 436 VkRenderPass stencil_only_rp[RADV_BLIT_DS_LAYOUT_COUNT]; 437 VkPipeline stencil_only_1d_pipeline; 438 VkPipeline stencil_only_2d_pipeline; 439 VkPipeline stencil_only_3d_pipeline; 440 VkPipelineLayout pipeline_layout; 441 VkDescriptorSetLayout ds_layout; 442 } blit; 443 444 struct { 445 VkRenderPass render_passes[NUM_META_FS_KEYS][RADV_META_DST_LAYOUT_COUNT]; 446 447 VkPipelineLayout p_layouts[3]; 448 VkDescriptorSetLayout ds_layouts[3]; 449 VkPipeline pipelines[3][NUM_META_FS_KEYS]; 450 451 VkRenderPass depth_only_rp[RADV_BLIT_DS_LAYOUT_COUNT]; 452 VkPipeline depth_only_pipeline[3]; 453 454 VkRenderPass stencil_only_rp[RADV_BLIT_DS_LAYOUT_COUNT]; 455 VkPipeline stencil_only_pipeline[3]; 456 } blit2d; 457 458 struct { 459 VkPipelineLayout img_p_layout; 460 VkDescriptorSetLayout img_ds_layout; 461 VkPipeline pipeline; 462 VkPipeline pipeline_3d; 463 } itob; 464 struct { 465 VkPipelineLayout img_p_layout; 466 VkDescriptorSetLayout img_ds_layout; 467 VkPipeline pipeline; 468 VkPipeline pipeline_3d; 469 } btoi; 470 struct { 471 VkPipelineLayout img_p_layout; 472 VkDescriptorSetLayout img_ds_layout; 473 VkPipeline pipeline; 474 VkPipeline pipeline_3d; 475 } itoi; 476 struct { 477 VkPipelineLayout img_p_layout; 478 VkDescriptorSetLayout img_ds_layout; 479 VkPipeline pipeline; 480 VkPipeline pipeline_3d; 481 } cleari; 482 483 struct { 484 VkPipelineLayout p_layout; 485 VkPipeline pipeline[NUM_META_FS_KEYS]; 486 VkRenderPass pass[NUM_META_FS_KEYS]; 487 } resolve; 488 489 struct { 490 VkDescriptorSetLayout ds_layout; 491 VkPipelineLayout p_layout; 492 struct { 493 VkPipeline pipeline; 494 VkPipeline i_pipeline; 495 VkPipeline srgb_pipeline; 496 } rc[MAX_SAMPLES_LOG2]; 497 } resolve_compute; 498 499 struct { 500 VkDescriptorSetLayout ds_layout; 501 VkPipelineLayout p_layout; 502 503 struct { 504 VkRenderPass render_pass[NUM_META_FS_KEYS][RADV_META_DST_LAYOUT_COUNT]; 505 VkPipeline pipeline[NUM_META_FS_KEYS]; 506 } rc[MAX_SAMPLES_LOG2]; 507 } resolve_fragment; 508 509 struct { 510 VkPipelineLayout p_layout; 511 VkPipeline decompress_pipeline; 512 VkPipeline resummarize_pipeline; 513 VkRenderPass pass; 514 } depth_decomp[1 + MAX_SAMPLES_LOG2]; 515 516 struct { 517 VkPipelineLayout p_layout; 518 VkPipeline cmask_eliminate_pipeline; 519 VkPipeline fmask_decompress_pipeline; 520 VkPipeline dcc_decompress_pipeline; 521 VkRenderPass pass; 522 523 VkDescriptorSetLayout dcc_decompress_compute_ds_layout; 524 VkPipelineLayout dcc_decompress_compute_p_layout; 525 VkPipeline dcc_decompress_compute_pipeline; 526 } fast_clear_flush; 527 528 struct { 529 VkPipelineLayout fill_p_layout; 530 VkPipelineLayout copy_p_layout; 531 VkDescriptorSetLayout fill_ds_layout; 532 VkDescriptorSetLayout copy_ds_layout; 533 VkPipeline fill_pipeline; 534 VkPipeline copy_pipeline; 535 } buffer; 536 537 struct { 538 VkDescriptorSetLayout ds_layout; 539 VkPipelineLayout p_layout; 540 VkPipeline occlusion_query_pipeline; 541 VkPipeline pipeline_statistics_query_pipeline; 542 } query; 543 }; 544 545 /* queue types */ 546 #define RADV_QUEUE_GENERAL 0 547 #define RADV_QUEUE_COMPUTE 1 548 #define RADV_QUEUE_TRANSFER 2 549 550 #define RADV_MAX_QUEUE_FAMILIES 3 551 552 enum ring_type radv_queue_family_to_ring(int f); 553 554 struct radv_queue { 555 VK_LOADER_DATA _loader_data; 556 struct radv_device * device; 557 struct radeon_winsys_ctx *hw_ctx; 558 enum radeon_ctx_priority priority; 559 uint32_t queue_family_index; 560 int queue_idx; 561 562 uint32_t scratch_size; 563 uint32_t compute_scratch_size; 564 uint32_t esgs_ring_size; 565 uint32_t gsvs_ring_size; 566 bool has_tess_rings; 567 bool has_sample_positions; 568 569 struct radeon_winsys_bo *scratch_bo; 570 struct radeon_winsys_bo *descriptor_bo; 571 struct radeon_winsys_bo *compute_scratch_bo; 572 struct radeon_winsys_bo *esgs_ring_bo; 573 struct radeon_winsys_bo *gsvs_ring_bo; 574 struct radeon_winsys_bo *tess_factor_ring_bo; 575 struct radeon_winsys_bo *tess_offchip_ring_bo; 576 struct radeon_winsys_cs *initial_preamble_cs; 577 struct radeon_winsys_cs *initial_full_flush_preamble_cs; 578 struct radeon_winsys_cs *continue_preamble_cs; 579 }; 580 581 struct radv_device { 582 VK_LOADER_DATA _loader_data; 583 584 VkAllocationCallbacks alloc; 585 586 struct radv_instance * instance; 587 struct radeon_winsys *ws; 588 589 struct radv_meta_state meta_state; 590 591 struct radv_queue *queues[RADV_MAX_QUEUE_FAMILIES]; 592 int queue_count[RADV_MAX_QUEUE_FAMILIES]; 593 struct radeon_winsys_cs *empty_cs[RADV_MAX_QUEUE_FAMILIES]; 594 595 bool always_use_syncobj; 596 bool llvm_supports_spill; 597 bool has_distributed_tess; 598 bool pbb_allowed; 599 bool dfsm_allowed; 600 uint32_t tess_offchip_block_dw_size; 601 uint32_t scratch_waves; 602 uint32_t dispatch_initiator; 603 604 uint32_t gs_table_depth; 605 606 /* MSAA sample locations. 607 * The first index is the sample index. 608 * The second index is the coordinate: X, Y. */ 609 float sample_locations_1x[1][2]; 610 float sample_locations_2x[2][2]; 611 float sample_locations_4x[4][2]; 612 float sample_locations_8x[8][2]; 613 float sample_locations_16x[16][2]; 614 615 /* CIK and later */ 616 uint32_t gfx_init_size_dw; 617 struct radeon_winsys_bo *gfx_init; 618 619 struct radeon_winsys_bo *trace_bo; 620 uint32_t *trace_id_ptr; 621 622 /* Whether to keep shader debug info, for tracing or VK_AMD_shader_info */ 623 bool keep_shader_info; 624 625 struct radv_physical_device *physical_device; 626 627 /* Backup in-memory cache to be used if the app doesn't provide one */ 628 struct radv_pipeline_cache * mem_cache; 629 630 /* 631 * use different counters so MSAA MRTs get consecutive surface indices, 632 * even if MASK is allocated in between. 633 */ 634 uint32_t image_mrt_offset_counter; 635 uint32_t fmask_mrt_offset_counter; 636 struct list_head shader_slabs; 637 mtx_t shader_slab_mutex; 638 639 /* For detecting VM faults reported by dmesg. */ 640 uint64_t dmesg_timestamp; 641 }; 642 643 struct radv_device_memory { 644 struct radeon_winsys_bo *bo; 645 /* for dedicated allocations */ 646 struct radv_image *image; 647 struct radv_buffer *buffer; 648 uint32_t type_index; 649 VkDeviceSize map_size; 650 void * map; 651 }; 652 653 654 struct radv_descriptor_range { 655 uint64_t va; 656 uint32_t size; 657 }; 658 659 struct radv_descriptor_set { 660 const struct radv_descriptor_set_layout *layout; 661 uint32_t size; 662 663 struct radeon_winsys_bo *bo; 664 uint64_t va; 665 uint32_t *mapped_ptr; 666 struct radv_descriptor_range *dynamic_descriptors; 667 668 struct radeon_winsys_bo *descriptors[0]; 669 }; 670 671 struct radv_push_descriptor_set 672 { 673 struct radv_descriptor_set set; 674 uint32_t capacity; 675 }; 676 677 struct radv_descriptor_pool_entry { 678 uint32_t offset; 679 uint32_t size; 680 struct radv_descriptor_set *set; 681 }; 682 683 struct radv_descriptor_pool { 684 struct radeon_winsys_bo *bo; 685 uint8_t *mapped_ptr; 686 uint64_t current_offset; 687 uint64_t size; 688 689 uint8_t *host_memory_base; 690 uint8_t *host_memory_ptr; 691 uint8_t *host_memory_end; 692 693 uint32_t entry_count; 694 uint32_t max_entry_count; 695 struct radv_descriptor_pool_entry entries[0]; 696 }; 697 698 struct radv_descriptor_update_template_entry { 699 VkDescriptorType descriptor_type; 700 701 /* The number of descriptors to update */ 702 uint32_t descriptor_count; 703 704 /* Into mapped_ptr or dynamic_descriptors, in units of the respective array */ 705 uint32_t dst_offset; 706 707 /* In dwords. Not valid/used for dynamic descriptors */ 708 uint32_t dst_stride; 709 710 uint32_t buffer_offset; 711 712 /* Only valid for combined image samplers and samplers */ 713 uint16_t has_sampler; 714 715 /* In bytes */ 716 size_t src_offset; 717 size_t src_stride; 718 719 /* For push descriptors */ 720 const uint32_t *immutable_samplers; 721 }; 722 723 struct radv_descriptor_update_template { 724 uint32_t entry_count; 725 struct radv_descriptor_update_template_entry entry[0]; 726 }; 727 728 struct radv_buffer { 729 struct radv_device * device; 730 VkDeviceSize size; 731 732 VkBufferUsageFlags usage; 733 VkBufferCreateFlags flags; 734 735 /* Set when bound */ 736 struct radeon_winsys_bo * bo; 737 VkDeviceSize offset; 738 739 bool shareable; 740 }; 741 742 enum radv_dynamic_state_bits { 743 RADV_DYNAMIC_VIEWPORT = 1 << 0, 744 RADV_DYNAMIC_SCISSOR = 1 << 1, 745 RADV_DYNAMIC_LINE_WIDTH = 1 << 2, 746 RADV_DYNAMIC_DEPTH_BIAS = 1 << 3, 747 RADV_DYNAMIC_BLEND_CONSTANTS = 1 << 4, 748 RADV_DYNAMIC_DEPTH_BOUNDS = 1 << 5, 749 RADV_DYNAMIC_STENCIL_COMPARE_MASK = 1 << 6, 750 RADV_DYNAMIC_STENCIL_WRITE_MASK = 1 << 7, 751 RADV_DYNAMIC_STENCIL_REFERENCE = 1 << 8, 752 RADV_DYNAMIC_DISCARD_RECTANGLE = 1 << 9, 753 RADV_DYNAMIC_ALL = (1 << 10) - 1, 754 }; 755 756 enum radv_cmd_dirty_bits { 757 /* Keep the dynamic state dirty bits in sync with 758 * enum radv_dynamic_state_bits */ 759 RADV_CMD_DIRTY_DYNAMIC_VIEWPORT = 1 << 0, 760 RADV_CMD_DIRTY_DYNAMIC_SCISSOR = 1 << 1, 761 RADV_CMD_DIRTY_DYNAMIC_LINE_WIDTH = 1 << 2, 762 RADV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS = 1 << 3, 763 RADV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS = 1 << 4, 764 RADV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS = 1 << 5, 765 RADV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK = 1 << 6, 766 RADV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK = 1 << 7, 767 RADV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE = 1 << 8, 768 RADV_CMD_DIRTY_DYNAMIC_DISCARD_RECTANGLE = 1 << 9, 769 RADV_CMD_DIRTY_DYNAMIC_ALL = (1 << 10) - 1, 770 RADV_CMD_DIRTY_PIPELINE = 1 << 10, 771 RADV_CMD_DIRTY_INDEX_BUFFER = 1 << 11, 772 RADV_CMD_DIRTY_FRAMEBUFFER = 1 << 12, 773 RADV_CMD_DIRTY_VERTEX_BUFFER = 1 << 13, 774 }; 775 776 enum radv_cmd_flush_bits { 777 RADV_CMD_FLAG_INV_ICACHE = 1 << 0, 778 /* SMEM L1, other names: KCACHE, constant cache, DCACHE, data cache */ 779 RADV_CMD_FLAG_INV_SMEM_L1 = 1 << 1, 780 /* VMEM L1 can optionally be bypassed (GLC=1). Other names: TC L1 */ 781 RADV_CMD_FLAG_INV_VMEM_L1 = 1 << 2, 782 /* Used by everything except CB/DB, can be bypassed (SLC=1). Other names: TC L2 */ 783 RADV_CMD_FLAG_INV_GLOBAL_L2 = 1 << 3, 784 /* Same as above, but only writes back and doesn't invalidate */ 785 RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2 = 1 << 4, 786 /* Framebuffer caches */ 787 RADV_CMD_FLAG_FLUSH_AND_INV_CB_META = 1 << 5, 788 RADV_CMD_FLAG_FLUSH_AND_INV_DB_META = 1 << 6, 789 RADV_CMD_FLAG_FLUSH_AND_INV_DB = 1 << 7, 790 RADV_CMD_FLAG_FLUSH_AND_INV_CB = 1 << 8, 791 /* Engine synchronization. */ 792 RADV_CMD_FLAG_VS_PARTIAL_FLUSH = 1 << 9, 793 RADV_CMD_FLAG_PS_PARTIAL_FLUSH = 1 << 10, 794 RADV_CMD_FLAG_CS_PARTIAL_FLUSH = 1 << 11, 795 RADV_CMD_FLAG_VGT_FLUSH = 1 << 12, 796 797 RADV_CMD_FLUSH_AND_INV_FRAMEBUFFER = (RADV_CMD_FLAG_FLUSH_AND_INV_CB | 798 RADV_CMD_FLAG_FLUSH_AND_INV_CB_META | 799 RADV_CMD_FLAG_FLUSH_AND_INV_DB | 800 RADV_CMD_FLAG_FLUSH_AND_INV_DB_META) 801 }; 802 803 struct radv_vertex_binding { 804 struct radv_buffer * buffer; 805 VkDeviceSize offset; 806 }; 807 808 struct radv_viewport_state { 809 uint32_t count; 810 VkViewport viewports[MAX_VIEWPORTS]; 811 }; 812 813 struct radv_scissor_state { 814 uint32_t count; 815 VkRect2D scissors[MAX_SCISSORS]; 816 }; 817 818 struct radv_discard_rectangle_state { 819 uint32_t count; 820 VkRect2D rectangles[MAX_DISCARD_RECTANGLES]; 821 }; 822 823 struct radv_dynamic_state { 824 /** 825 * Bitmask of (1 << VK_DYNAMIC_STATE_*). 826 * Defines the set of saved dynamic state. 827 */ 828 uint32_t mask; 829 830 struct radv_viewport_state viewport; 831 832 struct radv_scissor_state scissor; 833 834 float line_width; 835 836 struct { 837 float bias; 838 float clamp; 839 float slope; 840 } depth_bias; 841 842 float blend_constants[4]; 843 844 struct { 845 float min; 846 float max; 847 } depth_bounds; 848 849 struct { 850 uint32_t front; 851 uint32_t back; 852 } stencil_compare_mask; 853 854 struct { 855 uint32_t front; 856 uint32_t back; 857 } stencil_write_mask; 858 859 struct { 860 uint32_t front; 861 uint32_t back; 862 } stencil_reference; 863 864 struct radv_discard_rectangle_state discard_rectangle; 865 }; 866 867 extern const struct radv_dynamic_state default_dynamic_state; 868 869 const char * 870 radv_get_debug_option_name(int id); 871 872 const char * 873 radv_get_perftest_option_name(int id); 874 875 /** 876 * Attachment state when recording a renderpass instance. 877 * 878 * The clear value is valid only if there exists a pending clear. 879 */ 880 struct radv_attachment_state { 881 VkImageAspectFlags pending_clear_aspects; 882 uint32_t cleared_views; 883 VkClearValue clear_value; 884 VkImageLayout current_layout; 885 }; 886 887 struct radv_cmd_state { 888 /* Vertex descriptors */ 889 bool vb_prefetch_dirty; 890 uint64_t vb_va; 891 unsigned vb_size; 892 893 bool push_descriptors_dirty; 894 bool predicating; 895 uint32_t dirty; 896 897 struct radv_pipeline * pipeline; 898 struct radv_pipeline * emitted_pipeline; 899 struct radv_pipeline * compute_pipeline; 900 struct radv_pipeline * emitted_compute_pipeline; 901 struct radv_framebuffer * framebuffer; 902 struct radv_render_pass * pass; 903 const struct radv_subpass * subpass; 904 struct radv_dynamic_state dynamic; 905 struct radv_attachment_state * attachments; 906 VkRect2D render_area; 907 908 /* Index buffer */ 909 struct radv_buffer *index_buffer; 910 uint64_t index_offset; 911 uint32_t index_type; 912 uint32_t max_index_count; 913 uint64_t index_va; 914 int32_t last_index_type; 915 916 int32_t last_primitive_reset_en; 917 uint32_t last_primitive_reset_index; 918 enum radv_cmd_flush_bits flush_bits; 919 unsigned active_occlusion_queries; 920 float offset_scale; 921 uint32_t descriptors_dirty; 922 uint32_t valid_descriptors; 923 uint32_t trace_id; 924 uint32_t last_ia_multi_vgt_param; 925 926 uint32_t last_num_instances; 927 uint32_t last_first_instance; 928 uint32_t last_vertex_offset; 929 }; 930 931 struct radv_cmd_pool { 932 VkAllocationCallbacks alloc; 933 struct list_head cmd_buffers; 934 struct list_head free_cmd_buffers; 935 uint32_t queue_family_index; 936 }; 937 938 struct radv_cmd_buffer_upload { 939 uint8_t *map; 940 unsigned offset; 941 uint64_t size; 942 struct radeon_winsys_bo *upload_bo; 943 struct list_head list; 944 }; 945 946 enum radv_cmd_buffer_status { 947 RADV_CMD_BUFFER_STATUS_INVALID, 948 RADV_CMD_BUFFER_STATUS_INITIAL, 949 RADV_CMD_BUFFER_STATUS_RECORDING, 950 RADV_CMD_BUFFER_STATUS_EXECUTABLE, 951 RADV_CMD_BUFFER_STATUS_PENDING, 952 }; 953 954 struct radv_cmd_buffer { 955 VK_LOADER_DATA _loader_data; 956 957 struct radv_device * device; 958 959 struct radv_cmd_pool * pool; 960 struct list_head pool_link; 961 962 VkCommandBufferUsageFlags usage_flags; 963 VkCommandBufferLevel level; 964 enum radv_cmd_buffer_status status; 965 struct radeon_winsys_cs *cs; 966 struct radv_cmd_state state; 967 struct radv_vertex_binding vertex_bindings[MAX_VBS]; 968 uint32_t queue_family_index; 969 970 uint8_t push_constants[MAX_PUSH_CONSTANTS_SIZE]; 971 uint32_t dynamic_buffers[4 * MAX_DYNAMIC_BUFFERS]; 972 VkShaderStageFlags push_constant_stages; 973 struct radv_push_descriptor_set push_descriptors; 974 struct radv_descriptor_set meta_push_descriptors; 975 struct radv_descriptor_set *descriptors[MAX_SETS]; 976 977 struct radv_cmd_buffer_upload upload; 978 979 uint32_t scratch_size_needed; 980 uint32_t compute_scratch_size_needed; 981 uint32_t esgs_ring_size_needed; 982 uint32_t gsvs_ring_size_needed; 983 bool tess_rings_needed; 984 bool sample_positions_needed; 985 986 VkResult record_result; 987 988 int ring_offsets_idx; /* just used for verification */ 989 uint32_t gfx9_fence_offset; 990 struct radeon_winsys_bo *gfx9_fence_bo; 991 uint32_t gfx9_fence_idx; 992 993 /** 994 * Whether a query pool has been resetted and we have to flush caches. 995 */ 996 bool pending_reset_query; 997 }; 998 999 struct radv_image; 1000 1001 bool radv_cmd_buffer_uses_mec(struct radv_cmd_buffer *cmd_buffer); 1002 1003 void si_init_compute(struct radv_cmd_buffer *cmd_buffer); 1004 void si_init_config(struct radv_cmd_buffer *cmd_buffer); 1005 1006 void cik_create_gfx_config(struct radv_device *device); 1007 1008 void si_write_viewport(struct radeon_winsys_cs *cs, int first_vp, 1009 int count, const VkViewport *viewports); 1010 void si_write_scissors(struct radeon_winsys_cs *cs, int first, 1011 int count, const VkRect2D *scissors, 1012 const VkViewport *viewports, bool can_use_guardband); 1013 uint32_t si_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer, 1014 bool instanced_draw, bool indirect_draw, 1015 uint32_t draw_vertex_count); 1016 void si_cs_emit_write_event_eop(struct radeon_winsys_cs *cs, 1017 bool predicated, 1018 enum chip_class chip_class, 1019 bool is_mec, 1020 unsigned event, unsigned event_flags, 1021 unsigned data_sel, 1022 uint64_t va, 1023 uint32_t old_fence, 1024 uint32_t new_fence); 1025 1026 void si_emit_wait_fence(struct radeon_winsys_cs *cs, 1027 bool predicated, 1028 uint64_t va, uint32_t ref, 1029 uint32_t mask); 1030 void si_cs_emit_cache_flush(struct radeon_winsys_cs *cs, 1031 enum chip_class chip_class, 1032 uint32_t *fence_ptr, uint64_t va, 1033 bool is_mec, 1034 enum radv_cmd_flush_bits flush_bits); 1035 void si_emit_cache_flush(struct radv_cmd_buffer *cmd_buffer); 1036 void si_emit_set_predication_state(struct radv_cmd_buffer *cmd_buffer, uint64_t va); 1037 void si_cp_dma_buffer_copy(struct radv_cmd_buffer *cmd_buffer, 1038 uint64_t src_va, uint64_t dest_va, 1039 uint64_t size); 1040 void si_cp_dma_prefetch(struct radv_cmd_buffer *cmd_buffer, uint64_t va, 1041 unsigned size); 1042 void si_cp_dma_clear_buffer(struct radv_cmd_buffer *cmd_buffer, uint64_t va, 1043 uint64_t size, unsigned value); 1044 void radv_set_db_count_control(struct radv_cmd_buffer *cmd_buffer); 1045 bool 1046 radv_cmd_buffer_upload_alloc(struct radv_cmd_buffer *cmd_buffer, 1047 unsigned size, 1048 unsigned alignment, 1049 unsigned *out_offset, 1050 void **ptr); 1051 void 1052 radv_cmd_buffer_set_subpass(struct radv_cmd_buffer *cmd_buffer, 1053 const struct radv_subpass *subpass, 1054 bool transitions); 1055 bool 1056 radv_cmd_buffer_upload_data(struct radv_cmd_buffer *cmd_buffer, 1057 unsigned size, unsigned alignmnet, 1058 const void *data, unsigned *out_offset); 1059 1060 void radv_cmd_buffer_clear_subpass(struct radv_cmd_buffer *cmd_buffer); 1061 void radv_cmd_buffer_resolve_subpass(struct radv_cmd_buffer *cmd_buffer); 1062 void radv_cmd_buffer_resolve_subpass_cs(struct radv_cmd_buffer *cmd_buffer); 1063 void radv_cmd_buffer_resolve_subpass_fs(struct radv_cmd_buffer *cmd_buffer); 1064 void radv_cayman_emit_msaa_sample_locs(struct radeon_winsys_cs *cs, int nr_samples); 1065 unsigned radv_cayman_get_maxdist(int log_samples); 1066 void radv_device_init_msaa(struct radv_device *device); 1067 void radv_set_depth_clear_regs(struct radv_cmd_buffer *cmd_buffer, 1068 struct radv_image *image, 1069 VkClearDepthStencilValue ds_clear_value, 1070 VkImageAspectFlags aspects); 1071 void radv_set_color_clear_regs(struct radv_cmd_buffer *cmd_buffer, 1072 struct radv_image *image, 1073 int idx, 1074 uint32_t color_values[2]); 1075 void radv_set_dcc_need_cmask_elim_pred(struct radv_cmd_buffer *cmd_buffer, 1076 struct radv_image *image, 1077 bool value); 1078 uint32_t radv_fill_buffer(struct radv_cmd_buffer *cmd_buffer, 1079 struct radeon_winsys_bo *bo, 1080 uint64_t offset, uint64_t size, uint32_t value); 1081 void radv_cmd_buffer_trace_emit(struct radv_cmd_buffer *cmd_buffer); 1082 bool radv_get_memory_fd(struct radv_device *device, 1083 struct radv_device_memory *memory, 1084 int *pFD); 1085 1086 /* 1087 * Takes x,y,z as exact numbers of invocations, instead of blocks. 1088 * 1089 * Limitations: Can't call normal dispatch functions without binding or rebinding 1090 * the compute pipeline. 1091 */ 1092 void radv_unaligned_dispatch( 1093 struct radv_cmd_buffer *cmd_buffer, 1094 uint32_t x, 1095 uint32_t y, 1096 uint32_t z); 1097 1098 struct radv_event { 1099 struct radeon_winsys_bo *bo; 1100 uint64_t *map; 1101 }; 1102 1103 struct radv_shader_module; 1104 1105 #define RADV_HASH_SHADER_IS_GEOM_COPY_SHADER (1 << 0) 1106 #define RADV_HASH_SHADER_SISCHED (1 << 1) 1107 #define RADV_HASH_SHADER_UNSAFE_MATH (1 << 2) 1108 void 1109 radv_hash_shaders(unsigned char *hash, 1110 const VkPipelineShaderStageCreateInfo **stages, 1111 const struct radv_pipeline_layout *layout, 1112 const struct radv_pipeline_key *key, 1113 uint32_t flags); 1114 1115 static inline gl_shader_stage 1116 vk_to_mesa_shader_stage(VkShaderStageFlagBits vk_stage) 1117 { 1118 assert(__builtin_popcount(vk_stage) == 1); 1119 return ffs(vk_stage) - 1; 1120 } 1121 1122 static inline VkShaderStageFlagBits 1123 mesa_to_vk_shader_stage(gl_shader_stage mesa_stage) 1124 { 1125 return (1 << mesa_stage); 1126 } 1127 1128 #define RADV_STAGE_MASK ((1 << MESA_SHADER_STAGES) - 1) 1129 1130 #define radv_foreach_stage(stage, stage_bits) \ 1131 for (gl_shader_stage stage, \ 1132 __tmp = (gl_shader_stage)((stage_bits) & RADV_STAGE_MASK); \ 1133 stage = __builtin_ffs(__tmp) - 1, __tmp; \ 1134 __tmp &= ~(1 << (stage))) 1135 1136 struct radv_depth_stencil_state { 1137 uint32_t db_depth_control; 1138 uint32_t db_stencil_control; 1139 uint32_t db_render_control; 1140 uint32_t db_render_override2; 1141 }; 1142 1143 struct radv_blend_state { 1144 uint32_t cb_color_control; 1145 uint32_t cb_target_mask; 1146 uint32_t sx_mrt_blend_opt[8]; 1147 uint32_t cb_blend_control[8]; 1148 1149 uint32_t spi_shader_col_format; 1150 uint32_t cb_shader_mask; 1151 uint32_t db_alpha_to_mask; 1152 }; 1153 1154 unsigned radv_format_meta_fs_key(VkFormat format); 1155 1156 struct radv_raster_state { 1157 uint32_t pa_cl_clip_cntl; 1158 uint32_t spi_interp_control; 1159 uint32_t pa_su_vtx_cntl; 1160 uint32_t pa_su_sc_mode_cntl; 1161 }; 1162 1163 struct radv_multisample_state { 1164 uint32_t db_eqaa; 1165 uint32_t pa_sc_line_cntl; 1166 uint32_t pa_sc_mode_cntl_0; 1167 uint32_t pa_sc_mode_cntl_1; 1168 uint32_t pa_sc_aa_config; 1169 uint32_t pa_sc_aa_mask[2]; 1170 unsigned num_samples; 1171 }; 1172 1173 struct radv_prim_vertex_count { 1174 uint8_t min; 1175 uint8_t incr; 1176 }; 1177 1178 struct radv_tessellation_state { 1179 uint32_t ls_hs_config; 1180 uint32_t tcs_in_layout; 1181 uint32_t tcs_out_layout; 1182 uint32_t tcs_out_offsets; 1183 uint32_t offchip_layout; 1184 unsigned num_patches; 1185 unsigned lds_size; 1186 unsigned num_tcs_input_cp; 1187 uint32_t tf_param; 1188 }; 1189 1190 struct radv_gs_state { 1191 uint32_t vgt_gs_onchip_cntl; 1192 uint32_t vgt_gs_max_prims_per_subgroup; 1193 uint32_t vgt_esgs_ring_itemsize; 1194 uint32_t lds_size; 1195 }; 1196 1197 struct radv_vertex_elements_info { 1198 uint32_t rsrc_word3[MAX_VERTEX_ATTRIBS]; 1199 uint32_t format_size[MAX_VERTEX_ATTRIBS]; 1200 uint32_t binding[MAX_VERTEX_ATTRIBS]; 1201 uint32_t offset[MAX_VERTEX_ATTRIBS]; 1202 uint32_t count; 1203 }; 1204 1205 struct radv_vs_state { 1206 uint32_t pa_cl_vs_out_cntl; 1207 uint32_t spi_shader_pos_format; 1208 uint32_t spi_vs_out_config; 1209 uint32_t vgt_reuse_off; 1210 }; 1211 1212 struct radv_binning_state { 1213 uint32_t pa_sc_binner_cntl_0; 1214 uint32_t db_dfsm_control; 1215 }; 1216 1217 #define SI_GS_PER_ES 128 1218 1219 struct radv_pipeline { 1220 struct radv_device * device; 1221 struct radv_dynamic_state dynamic_state; 1222 1223 struct radv_pipeline_layout * layout; 1224 1225 bool needs_data_cache; 1226 bool need_indirect_descriptor_sets; 1227 struct radv_shader_variant * shaders[MESA_SHADER_STAGES]; 1228 struct radv_shader_variant *gs_copy_shader; 1229 VkShaderStageFlags active_stages; 1230 1231 struct radv_vertex_elements_info vertex_elements; 1232 1233 uint32_t binding_stride[MAX_VBS]; 1234 1235 uint32_t user_data_0[MESA_SHADER_STAGES]; 1236 union { 1237 struct { 1238 struct radv_blend_state blend; 1239 struct radv_depth_stencil_state ds; 1240 struct radv_raster_state raster; 1241 struct radv_multisample_state ms; 1242 struct radv_tessellation_state tess; 1243 struct radv_gs_state gs; 1244 struct radv_vs_state vs; 1245 struct radv_binning_state bin; 1246 uint32_t db_shader_control; 1247 uint32_t shader_z_format; 1248 uint32_t spi_baryc_cntl; 1249 unsigned prim; 1250 unsigned gs_out; 1251 uint32_t vgt_gs_mode; 1252 bool vgt_primitiveid_en; 1253 bool prim_restart_enable; 1254 bool partial_es_wave; 1255 uint8_t primgroup_size; 1256 unsigned esgs_ring_size; 1257 unsigned gsvs_ring_size; 1258 uint32_t ps_input_cntl[32]; 1259 uint32_t ps_input_cntl_num; 1260 uint32_t vgt_shader_stages_en; 1261 uint32_t vtx_base_sgpr; 1262 uint32_t base_ia_multi_vgt_param; 1263 bool wd_switch_on_eop; 1264 bool ia_switch_on_eoi; 1265 bool partial_vs_wave; 1266 uint8_t vtx_emit_num; 1267 uint32_t vtx_reuse_depth; 1268 struct radv_prim_vertex_count prim_vertex_count; 1269 bool can_use_guardband; 1270 uint32_t pa_sc_cliprect_rule; 1271 } graphics; 1272 }; 1273 1274 unsigned max_waves; 1275 unsigned scratch_bytes_per_wave; 1276 }; 1277 1278 static inline bool radv_pipeline_has_gs(struct radv_pipeline *pipeline) 1279 { 1280 return pipeline->shaders[MESA_SHADER_GEOMETRY] ? true : false; 1281 } 1282 1283 static inline bool radv_pipeline_has_tess(struct radv_pipeline *pipeline) 1284 { 1285 return pipeline->shaders[MESA_SHADER_TESS_CTRL] ? true : false; 1286 } 1287 1288 struct ac_userdata_info *radv_lookup_user_sgpr(struct radv_pipeline *pipeline, 1289 gl_shader_stage stage, 1290 int idx); 1291 1292 struct radv_shader_variant *radv_get_vertex_shader(struct radv_pipeline *pipeline); 1293 1294 struct radv_graphics_pipeline_create_info { 1295 bool use_rectlist; 1296 bool db_depth_clear; 1297 bool db_stencil_clear; 1298 bool db_depth_disable_expclear; 1299 bool db_stencil_disable_expclear; 1300 bool db_flush_depth_inplace; 1301 bool db_flush_stencil_inplace; 1302 bool db_resummarize; 1303 uint32_t custom_blend_mode; 1304 }; 1305 1306 VkResult 1307 radv_graphics_pipeline_create(VkDevice device, 1308 VkPipelineCache cache, 1309 const VkGraphicsPipelineCreateInfo *pCreateInfo, 1310 const struct radv_graphics_pipeline_create_info *extra, 1311 const VkAllocationCallbacks *alloc, 1312 VkPipeline *pPipeline); 1313 1314 struct vk_format_description; 1315 uint32_t radv_translate_buffer_dataformat(const struct vk_format_description *desc, 1316 int first_non_void); 1317 uint32_t radv_translate_buffer_numformat(const struct vk_format_description *desc, 1318 int first_non_void); 1319 uint32_t radv_translate_colorformat(VkFormat format); 1320 uint32_t radv_translate_color_numformat(VkFormat format, 1321 const struct vk_format_description *desc, 1322 int first_non_void); 1323 uint32_t radv_colorformat_endian_swap(uint32_t colorformat); 1324 unsigned radv_translate_colorswap(VkFormat format, bool do_endian_swap); 1325 uint32_t radv_translate_dbformat(VkFormat format); 1326 uint32_t radv_translate_tex_dataformat(VkFormat format, 1327 const struct vk_format_description *desc, 1328 int first_non_void); 1329 uint32_t radv_translate_tex_numformat(VkFormat format, 1330 const struct vk_format_description *desc, 1331 int first_non_void); 1332 bool radv_format_pack_clear_color(VkFormat format, 1333 uint32_t clear_vals[2], 1334 VkClearColorValue *value); 1335 bool radv_is_colorbuffer_format_supported(VkFormat format, bool *blendable); 1336 bool radv_dcc_formats_compatible(VkFormat format1, 1337 VkFormat format2); 1338 1339 struct radv_fmask_info { 1340 uint64_t offset; 1341 uint64_t size; 1342 unsigned alignment; 1343 unsigned pitch_in_pixels; 1344 unsigned bank_height; 1345 unsigned slice_tile_max; 1346 unsigned tile_mode_index; 1347 unsigned tile_swizzle; 1348 }; 1349 1350 struct radv_cmask_info { 1351 uint64_t offset; 1352 uint64_t size; 1353 unsigned alignment; 1354 unsigned slice_tile_max; 1355 }; 1356 1357 struct radv_image { 1358 VkImageType type; 1359 /* The original VkFormat provided by the client. This may not match any 1360 * of the actual surface formats. 1361 */ 1362 VkFormat vk_format; 1363 VkImageAspectFlags aspects; 1364 VkImageUsageFlags usage; /**< Superset of VkImageCreateInfo::usage. */ 1365 struct ac_surf_info info; 1366 VkImageTiling tiling; /** VkImageCreateInfo::tiling */ 1367 VkImageCreateFlags flags; /** VkImageCreateInfo::flags */ 1368 1369 VkDeviceSize size; 1370 uint32_t alignment; 1371 1372 unsigned queue_family_mask; 1373 bool exclusive; 1374 bool shareable; 1375 1376 /* Set when bound */ 1377 struct radeon_winsys_bo *bo; 1378 VkDeviceSize offset; 1379 uint64_t dcc_offset; 1380 uint64_t htile_offset; 1381 bool tc_compatible_htile; 1382 struct radeon_surf surface; 1383 1384 struct radv_fmask_info fmask; 1385 struct radv_cmask_info cmask; 1386 uint64_t clear_value_offset; 1387 uint64_t dcc_pred_offset; 1388 1389 /* For VK_ANDROID_native_buffer, the WSI image owns the memory, */ 1390 VkDeviceMemory owned_memory; 1391 }; 1392 1393 /* Whether the image has a htile that is known consistent with the contents of 1394 * the image. */ 1395 bool radv_layout_has_htile(const struct radv_image *image, 1396 VkImageLayout layout, 1397 unsigned queue_mask); 1398 1399 /* Whether the image has a htile that is known consistent with the contents of 1400 * the image and is allowed to be in compressed form. 1401 * 1402 * If this is false reads that don't use the htile should be able to return 1403 * correct results. 1404 */ 1405 bool radv_layout_is_htile_compressed(const struct radv_image *image, 1406 VkImageLayout layout, 1407 unsigned queue_mask); 1408 1409 bool radv_layout_can_fast_clear(const struct radv_image *image, 1410 VkImageLayout layout, 1411 unsigned queue_mask); 1412 1413 bool radv_layout_dcc_compressed(const struct radv_image *image, 1414 VkImageLayout layout, 1415 unsigned queue_mask); 1416 1417 static inline bool 1418 radv_vi_dcc_enabled(const struct radv_image *image, unsigned level) 1419 { 1420 return image->surface.dcc_size && level < image->surface.num_dcc_levels; 1421 } 1422 1423 static inline bool 1424 radv_htile_enabled(const struct radv_image *image, unsigned level) 1425 { 1426 return image->surface.htile_size && level == 0; 1427 } 1428 1429 unsigned radv_image_queue_family_mask(const struct radv_image *image, uint32_t family, uint32_t queue_family); 1430 1431 static inline uint32_t 1432 radv_get_layerCount(const struct radv_image *image, 1433 const VkImageSubresourceRange *range) 1434 { 1435 return range->layerCount == VK_REMAINING_ARRAY_LAYERS ? 1436 image->info.array_size - range->baseArrayLayer : range->layerCount; 1437 } 1438 1439 static inline uint32_t 1440 radv_get_levelCount(const struct radv_image *image, 1441 const VkImageSubresourceRange *range) 1442 { 1443 return range->levelCount == VK_REMAINING_MIP_LEVELS ? 1444 image->info.levels - range->baseMipLevel : range->levelCount; 1445 } 1446 1447 struct radeon_bo_metadata; 1448 void 1449 radv_init_metadata(struct radv_device *device, 1450 struct radv_image *image, 1451 struct radeon_bo_metadata *metadata); 1452 1453 struct radv_image_view { 1454 struct radv_image *image; /**< VkImageViewCreateInfo::image */ 1455 struct radeon_winsys_bo *bo; 1456 1457 VkImageViewType type; 1458 VkImageAspectFlags aspect_mask; 1459 VkFormat vk_format; 1460 uint32_t base_layer; 1461 uint32_t layer_count; 1462 uint32_t base_mip; 1463 uint32_t level_count; 1464 VkExtent3D extent; /**< Extent of VkImageViewCreateInfo::baseMipLevel. */ 1465 1466 uint32_t descriptor[16]; 1467 1468 /* Descriptor for use as a storage image as opposed to a sampled image. 1469 * This has a few differences for cube maps (e.g. type). 1470 */ 1471 uint32_t storage_descriptor[16]; 1472 }; 1473 1474 struct radv_image_create_info { 1475 const VkImageCreateInfo *vk_info; 1476 bool scanout; 1477 bool no_metadata_planes; 1478 }; 1479 1480 VkResult radv_image_create(VkDevice _device, 1481 const struct radv_image_create_info *info, 1482 const VkAllocationCallbacks* alloc, 1483 VkImage *pImage); 1484 1485 VkResult 1486 radv_image_from_gralloc(VkDevice device_h, 1487 const VkImageCreateInfo *base_info, 1488 const VkNativeBufferANDROID *gralloc_info, 1489 const VkAllocationCallbacks *alloc, 1490 VkImage *out_image_h); 1491 1492 void radv_image_view_init(struct radv_image_view *view, 1493 struct radv_device *device, 1494 const VkImageViewCreateInfo* pCreateInfo); 1495 1496 struct radv_buffer_view { 1497 struct radeon_winsys_bo *bo; 1498 VkFormat vk_format; 1499 uint64_t range; /**< VkBufferViewCreateInfo::range */ 1500 uint32_t state[4]; 1501 }; 1502 void radv_buffer_view_init(struct radv_buffer_view *view, 1503 struct radv_device *device, 1504 const VkBufferViewCreateInfo* pCreateInfo); 1505 1506 static inline struct VkExtent3D 1507 radv_sanitize_image_extent(const VkImageType imageType, 1508 const struct VkExtent3D imageExtent) 1509 { 1510 switch (imageType) { 1511 case VK_IMAGE_TYPE_1D: 1512 return (VkExtent3D) { imageExtent.width, 1, 1 }; 1513 case VK_IMAGE_TYPE_2D: 1514 return (VkExtent3D) { imageExtent.width, imageExtent.height, 1 }; 1515 case VK_IMAGE_TYPE_3D: 1516 return imageExtent; 1517 default: 1518 unreachable("invalid image type"); 1519 } 1520 } 1521 1522 static inline struct VkOffset3D 1523 radv_sanitize_image_offset(const VkImageType imageType, 1524 const struct VkOffset3D imageOffset) 1525 { 1526 switch (imageType) { 1527 case VK_IMAGE_TYPE_1D: 1528 return (VkOffset3D) { imageOffset.x, 0, 0 }; 1529 case VK_IMAGE_TYPE_2D: 1530 return (VkOffset3D) { imageOffset.x, imageOffset.y, 0 }; 1531 case VK_IMAGE_TYPE_3D: 1532 return imageOffset; 1533 default: 1534 unreachable("invalid image type"); 1535 } 1536 } 1537 1538 static inline bool 1539 radv_image_extent_compare(const struct radv_image *image, 1540 const VkExtent3D *extent) 1541 { 1542 if (extent->width != image->info.width || 1543 extent->height != image->info.height || 1544 extent->depth != image->info.depth) 1545 return false; 1546 return true; 1547 } 1548 1549 struct radv_sampler { 1550 uint32_t state[4]; 1551 }; 1552 1553 struct radv_color_buffer_info { 1554 uint64_t cb_color_base; 1555 uint64_t cb_color_cmask; 1556 uint64_t cb_color_fmask; 1557 uint64_t cb_dcc_base; 1558 uint32_t cb_color_pitch; 1559 uint32_t cb_color_slice; 1560 uint32_t cb_color_view; 1561 uint32_t cb_color_info; 1562 uint32_t cb_color_attrib; 1563 uint32_t cb_color_attrib2; 1564 uint32_t cb_dcc_control; 1565 uint32_t cb_color_cmask_slice; 1566 uint32_t cb_color_fmask_slice; 1567 }; 1568 1569 struct radv_ds_buffer_info { 1570 uint64_t db_z_read_base; 1571 uint64_t db_stencil_read_base; 1572 uint64_t db_z_write_base; 1573 uint64_t db_stencil_write_base; 1574 uint64_t db_htile_data_base; 1575 uint32_t db_depth_info; 1576 uint32_t db_z_info; 1577 uint32_t db_stencil_info; 1578 uint32_t db_depth_view; 1579 uint32_t db_depth_size; 1580 uint32_t db_depth_slice; 1581 uint32_t db_htile_surface; 1582 uint32_t pa_su_poly_offset_db_fmt_cntl; 1583 uint32_t db_z_info2; 1584 uint32_t db_stencil_info2; 1585 float offset_scale; 1586 }; 1587 1588 struct radv_attachment_info { 1589 union { 1590 struct radv_color_buffer_info cb; 1591 struct radv_ds_buffer_info ds; 1592 }; 1593 struct radv_image_view *attachment; 1594 }; 1595 1596 struct radv_framebuffer { 1597 uint32_t width; 1598 uint32_t height; 1599 uint32_t layers; 1600 1601 uint32_t attachment_count; 1602 struct radv_attachment_info attachments[0]; 1603 }; 1604 1605 struct radv_subpass_barrier { 1606 VkPipelineStageFlags src_stage_mask; 1607 VkAccessFlags src_access_mask; 1608 VkAccessFlags dst_access_mask; 1609 }; 1610 1611 struct radv_subpass { 1612 uint32_t input_count; 1613 uint32_t color_count; 1614 VkAttachmentReference * input_attachments; 1615 VkAttachmentReference * color_attachments; 1616 VkAttachmentReference * resolve_attachments; 1617 VkAttachmentReference depth_stencil_attachment; 1618 1619 /** Subpass has at least one resolve attachment */ 1620 bool has_resolve; 1621 1622 struct radv_subpass_barrier start_barrier; 1623 1624 uint32_t view_mask; 1625 }; 1626 1627 struct radv_render_pass_attachment { 1628 VkFormat format; 1629 uint32_t samples; 1630 VkAttachmentLoadOp load_op; 1631 VkAttachmentLoadOp stencil_load_op; 1632 VkImageLayout initial_layout; 1633 VkImageLayout final_layout; 1634 uint32_t view_mask; 1635 }; 1636 1637 struct radv_render_pass { 1638 uint32_t attachment_count; 1639 uint32_t subpass_count; 1640 VkAttachmentReference * subpass_attachments; 1641 struct radv_render_pass_attachment * attachments; 1642 struct radv_subpass_barrier end_barrier; 1643 struct radv_subpass subpasses[0]; 1644 }; 1645 1646 VkResult radv_device_init_meta(struct radv_device *device); 1647 void radv_device_finish_meta(struct radv_device *device); 1648 1649 struct radv_query_pool { 1650 struct radeon_winsys_bo *bo; 1651 uint32_t stride; 1652 uint32_t availability_offset; 1653 char *ptr; 1654 VkQueryType type; 1655 uint32_t pipeline_stats_mask; 1656 }; 1657 1658 struct radv_semaphore { 1659 /* use a winsys sem for non-exportable */ 1660 struct radeon_winsys_sem *sem; 1661 uint32_t syncobj; 1662 uint32_t temp_syncobj; 1663 }; 1664 1665 VkResult radv_alloc_sem_info(struct radv_winsys_sem_info *sem_info, 1666 int num_wait_sems, 1667 const VkSemaphore *wait_sems, 1668 int num_signal_sems, 1669 const VkSemaphore *signal_sems, 1670 VkFence fence); 1671 void radv_free_sem_info(struct radv_winsys_sem_info *sem_info); 1672 1673 void radv_set_descriptor_set(struct radv_cmd_buffer *cmd_buffer, 1674 struct radv_descriptor_set *set, 1675 unsigned idx); 1676 1677 void 1678 radv_update_descriptor_sets(struct radv_device *device, 1679 struct radv_cmd_buffer *cmd_buffer, 1680 VkDescriptorSet overrideSet, 1681 uint32_t descriptorWriteCount, 1682 const VkWriteDescriptorSet *pDescriptorWrites, 1683 uint32_t descriptorCopyCount, 1684 const VkCopyDescriptorSet *pDescriptorCopies); 1685 1686 void 1687 radv_update_descriptor_set_with_template(struct radv_device *device, 1688 struct radv_cmd_buffer *cmd_buffer, 1689 struct radv_descriptor_set *set, 1690 VkDescriptorUpdateTemplateKHR descriptorUpdateTemplate, 1691 const void *pData); 1692 1693 void radv_meta_push_descriptor_set(struct radv_cmd_buffer *cmd_buffer, 1694 VkPipelineBindPoint pipelineBindPoint, 1695 VkPipelineLayout _layout, 1696 uint32_t set, 1697 uint32_t descriptorWriteCount, 1698 const VkWriteDescriptorSet *pDescriptorWrites); 1699 1700 void radv_initialise_cmask(struct radv_cmd_buffer *cmd_buffer, 1701 struct radv_image *image, uint32_t value); 1702 void radv_initialize_dcc(struct radv_cmd_buffer *cmd_buffer, 1703 struct radv_image *image, uint32_t value); 1704 1705 struct radv_fence { 1706 struct radeon_winsys_fence *fence; 1707 bool submitted; 1708 bool signalled; 1709 1710 uint32_t syncobj; 1711 uint32_t temp_syncobj; 1712 }; 1713 1714 struct radeon_winsys_sem; 1715 1716 #define RADV_DEFINE_HANDLE_CASTS(__radv_type, __VkType) \ 1717 \ 1718 static inline struct __radv_type * \ 1719 __radv_type ## _from_handle(__VkType _handle) \ 1720 { \ 1721 return (struct __radv_type *) _handle; \ 1722 } \ 1723 \ 1724 static inline __VkType \ 1725 __radv_type ## _to_handle(struct __radv_type *_obj) \ 1726 { \ 1727 return (__VkType) _obj; \ 1728 } 1729 1730 #define RADV_DEFINE_NONDISP_HANDLE_CASTS(__radv_type, __VkType) \ 1731 \ 1732 static inline struct __radv_type * \ 1733 __radv_type ## _from_handle(__VkType _handle) \ 1734 { \ 1735 return (struct __radv_type *)(uintptr_t) _handle; \ 1736 } \ 1737 \ 1738 static inline __VkType \ 1739 __radv_type ## _to_handle(struct __radv_type *_obj) \ 1740 { \ 1741 return (__VkType)(uintptr_t) _obj; \ 1742 } 1743 1744 #define RADV_FROM_HANDLE(__radv_type, __name, __handle) \ 1745 struct __radv_type *__name = __radv_type ## _from_handle(__handle) 1746 1747 RADV_DEFINE_HANDLE_CASTS(radv_cmd_buffer, VkCommandBuffer) 1748 RADV_DEFINE_HANDLE_CASTS(radv_device, VkDevice) 1749 RADV_DEFINE_HANDLE_CASTS(radv_instance, VkInstance) 1750 RADV_DEFINE_HANDLE_CASTS(radv_physical_device, VkPhysicalDevice) 1751 RADV_DEFINE_HANDLE_CASTS(radv_queue, VkQueue) 1752 1753 RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_cmd_pool, VkCommandPool) 1754 RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_buffer, VkBuffer) 1755 RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_buffer_view, VkBufferView) 1756 RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_descriptor_pool, VkDescriptorPool) 1757 RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_descriptor_set, VkDescriptorSet) 1758 RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_descriptor_set_layout, VkDescriptorSetLayout) 1759 RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_descriptor_update_template, VkDescriptorUpdateTemplateKHR) 1760 RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_device_memory, VkDeviceMemory) 1761 RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_fence, VkFence) 1762 RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_event, VkEvent) 1763 RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_framebuffer, VkFramebuffer) 1764 RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_image, VkImage) 1765 RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_image_view, VkImageView); 1766 RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_pipeline_cache, VkPipelineCache) 1767 RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_pipeline, VkPipeline) 1768 RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_pipeline_layout, VkPipelineLayout) 1769 RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_query_pool, VkQueryPool) 1770 RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_render_pass, VkRenderPass) 1771 RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_sampler, VkSampler) 1772 RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_shader_module, VkShaderModule) 1773 RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_semaphore, VkSemaphore) 1774 1775 #endif /* RADV_PRIVATE_H */ 1776