1 /* 2 * Copyright 2013 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 * 23 * Authors: Marek Olk <maraeo (at) gmail.com> 24 * 25 */ 26 27 /** 28 * This file contains common screen and context structures and functions 29 * for r600g and radeonsi. 30 */ 31 32 #ifndef R600_PIPE_COMMON_H 33 #define R600_PIPE_COMMON_H 34 35 #include <stdio.h> 36 37 #include "radeon/radeon_winsys.h" 38 39 #include "util/u_blitter.h" 40 #include "util/list.h" 41 #include "util/u_range.h" 42 #include "util/slab.h" 43 #include "util/u_suballoc.h" 44 #include "util/u_transfer.h" 45 46 #define ATI_VENDOR_ID 0x1002 47 48 #define R600_RESOURCE_FLAG_TRANSFER (PIPE_RESOURCE_FLAG_DRV_PRIV << 0) 49 #define R600_RESOURCE_FLAG_FLUSHED_DEPTH (PIPE_RESOURCE_FLAG_DRV_PRIV << 1) 50 #define R600_RESOURCE_FLAG_FORCE_TILING (PIPE_RESOURCE_FLAG_DRV_PRIV << 2) 51 #define R600_RESOURCE_FLAG_DISABLE_DCC (PIPE_RESOURCE_FLAG_DRV_PRIV << 3) 52 53 #define R600_CONTEXT_STREAMOUT_FLUSH (1u << 0) 54 /* Pipeline & streamout query controls. */ 55 #define R600_CONTEXT_START_PIPELINE_STATS (1u << 1) 56 #define R600_CONTEXT_STOP_PIPELINE_STATS (1u << 2) 57 #define R600_CONTEXT_PRIVATE_FLAG (1u << 3) 58 59 /* special primitive types */ 60 #define R600_PRIM_RECTANGLE_LIST PIPE_PRIM_MAX 61 62 /* Debug flags. */ 63 /* logging */ 64 #define DBG_TEX (1 << 0) 65 /* gap - reuse */ 66 #define DBG_COMPUTE (1 << 2) 67 #define DBG_VM (1 << 3) 68 /* gap - reuse */ 69 /* shader logging */ 70 #define DBG_FS (1 << 5) 71 #define DBG_VS (1 << 6) 72 #define DBG_GS (1 << 7) 73 #define DBG_PS (1 << 8) 74 #define DBG_CS (1 << 9) 75 #define DBG_TCS (1 << 10) 76 #define DBG_TES (1 << 11) 77 #define DBG_NO_IR (1 << 12) 78 #define DBG_NO_TGSI (1 << 13) 79 #define DBG_NO_ASM (1 << 14) 80 #define DBG_PREOPT_IR (1 << 15) 81 #define DBG_CHECK_IR (1 << 16) 82 #define DBG_NO_OPT_VARIANT (1 << 17) 83 /* gaps */ 84 #define DBG_TEST_DMA (1 << 20) 85 /* Bits 21-31 are reserved for the r600g driver. */ 86 /* features */ 87 #define DBG_NO_ASYNC_DMA (1llu << 32) 88 #define DBG_NO_HYPERZ (1llu << 33) 89 #define DBG_NO_DISCARD_RANGE (1llu << 34) 90 #define DBG_NO_2D_TILING (1llu << 35) 91 #define DBG_NO_TILING (1llu << 36) 92 #define DBG_SWITCH_ON_EOP (1llu << 37) 93 #define DBG_FORCE_DMA (1llu << 38) 94 #define DBG_PRECOMPILE (1llu << 39) 95 #define DBG_INFO (1llu << 40) 96 #define DBG_NO_WC (1llu << 41) 97 #define DBG_CHECK_VM (1llu << 42) 98 #define DBG_NO_DCC (1llu << 43) 99 #define DBG_NO_DCC_CLEAR (1llu << 44) 100 #define DBG_NO_RB_PLUS (1llu << 45) 101 #define DBG_SI_SCHED (1llu << 46) 102 #define DBG_MONOLITHIC_SHADERS (1llu << 47) 103 #define DBG_NO_CE (1llu << 48) 104 #define DBG_UNSAFE_MATH (1llu << 49) 105 #define DBG_NO_DCC_FB (1llu << 50) 106 107 #define R600_MAP_BUFFER_ALIGNMENT 64 108 #define R600_MAX_VIEWPORTS 16 109 110 #define SI_MAX_VARIABLE_THREADS_PER_BLOCK 1024 111 112 enum r600_coherency { 113 R600_COHERENCY_NONE, /* no cache flushes needed */ 114 R600_COHERENCY_SHADER, 115 R600_COHERENCY_CB_META, 116 }; 117 118 #ifdef PIPE_ARCH_BIG_ENDIAN 119 #define R600_BIG_ENDIAN 1 120 #else 121 #define R600_BIG_ENDIAN 0 122 #endif 123 124 struct r600_common_context; 125 struct r600_perfcounters; 126 struct tgsi_shader_info; 127 struct r600_qbo_state; 128 129 struct radeon_shader_reloc { 130 char name[32]; 131 uint64_t offset; 132 }; 133 134 struct radeon_shader_binary { 135 /** Shader code */ 136 unsigned char *code; 137 unsigned code_size; 138 139 /** Config/Context register state that accompanies this shader. 140 * This is a stream of dword pairs. First dword contains the 141 * register address, the second dword contains the value.*/ 142 unsigned char *config; 143 unsigned config_size; 144 145 /** The number of bytes of config information for each global symbol. 146 */ 147 unsigned config_size_per_symbol; 148 149 /** Constant data accessed by the shader. This will be uploaded 150 * into a constant buffer. */ 151 unsigned char *rodata; 152 unsigned rodata_size; 153 154 /** List of symbol offsets for the shader */ 155 uint64_t *global_symbol_offsets; 156 unsigned global_symbol_count; 157 158 struct radeon_shader_reloc *relocs; 159 unsigned reloc_count; 160 161 /** Disassembled shader in a string. */ 162 char *disasm_string; 163 char *llvm_ir_string; 164 }; 165 166 void radeon_shader_binary_init(struct radeon_shader_binary *b); 167 void radeon_shader_binary_clean(struct radeon_shader_binary *b); 168 169 /* Only 32-bit buffer allocations are supported, gallium doesn't support more 170 * at the moment. 171 */ 172 struct r600_resource { 173 struct u_resource b; 174 175 /* Winsys objects. */ 176 struct pb_buffer *buf; 177 uint64_t gpu_address; 178 /* Memory usage if the buffer placement is optimal. */ 179 uint64_t vram_usage; 180 uint64_t gart_usage; 181 182 /* Resource properties. */ 183 uint64_t bo_size; 184 unsigned bo_alignment; 185 enum radeon_bo_domain domains; 186 enum radeon_bo_flag flags; 187 unsigned bind_history; 188 189 /* The buffer range which is initialized (with a write transfer, 190 * streamout, DMA, or as a random access target). The rest of 191 * the buffer is considered invalid and can be mapped unsynchronized. 192 * 193 * This allows unsychronized mapping of a buffer range which hasn't 194 * been used yet. It's for applications which forget to use 195 * the unsynchronized map flag and expect the driver to figure it out. 196 */ 197 struct util_range valid_buffer_range; 198 199 /* For buffers only. This indicates that a write operation has been 200 * performed by TC L2, but the cache hasn't been flushed. 201 * Any hw block which doesn't use or bypasses TC L2 should check this 202 * flag and flush the cache before using the buffer. 203 * 204 * For example, TC L2 must be flushed if a buffer which has been 205 * modified by a shader store instruction is about to be used as 206 * an index buffer. The reason is that VGT DMA index fetching doesn't 207 * use TC L2. 208 */ 209 bool TC_L2_dirty; 210 211 /* Whether the resource has been exported via resource_get_handle. */ 212 bool is_shared; 213 unsigned external_usage; /* PIPE_HANDLE_USAGE_* */ 214 }; 215 216 struct r600_transfer { 217 struct pipe_transfer transfer; 218 struct r600_resource *staging; 219 unsigned offset; 220 }; 221 222 struct r600_fmask_info { 223 uint64_t offset; 224 uint64_t size; 225 unsigned alignment; 226 unsigned pitch_in_pixels; 227 unsigned bank_height; 228 unsigned slice_tile_max; 229 unsigned tile_mode_index; 230 }; 231 232 struct r600_cmask_info { 233 uint64_t offset; 234 uint64_t size; 235 unsigned alignment; 236 unsigned slice_tile_max; 237 unsigned base_address_reg; 238 }; 239 240 struct r600_texture { 241 struct r600_resource resource; 242 243 uint64_t size; 244 unsigned num_level0_transfers; 245 enum pipe_format db_render_format; 246 bool is_depth; 247 bool db_compatible; 248 bool can_sample_z; 249 bool can_sample_s; 250 unsigned dirty_level_mask; /* each bit says if that mipmap is compressed */ 251 unsigned stencil_dirty_level_mask; /* each bit says if that mipmap is compressed */ 252 struct r600_texture *flushed_depth_texture; 253 struct radeon_surf surface; 254 255 /* Colorbuffer compression and fast clear. */ 256 struct r600_fmask_info fmask; 257 struct r600_cmask_info cmask; 258 struct r600_resource *cmask_buffer; 259 uint64_t dcc_offset; /* 0 = disabled */ 260 unsigned cb_color_info; /* fast clear enable bit */ 261 unsigned color_clear_value[2]; 262 unsigned last_msaa_resolve_target_micro_mode; 263 264 /* Depth buffer compression and fast clear. */ 265 struct r600_resource *htile_buffer; 266 bool tc_compatible_htile; 267 bool depth_cleared; /* if it was cleared at least once */ 268 float depth_clear_value; 269 bool stencil_cleared; /* if it was cleared at least once */ 270 uint8_t stencil_clear_value; 271 272 bool non_disp_tiling; /* R600-Cayman only */ 273 274 /* Whether the texture is a displayable back buffer and needs DCC 275 * decompression, which is expensive. Therefore, it's enabled only 276 * if statistics suggest that it will pay off and it's allocated 277 * separately. It can't be bound as a sampler by apps. Limited to 278 * target == 2D and last_level == 0. If enabled, dcc_offset contains 279 * the absolute GPUVM address, not the relative one. 280 */ 281 struct r600_resource *dcc_separate_buffer; 282 /* When DCC is temporarily disabled, the separate buffer is here. */ 283 struct r600_resource *last_dcc_separate_buffer; 284 /* We need to track DCC dirtiness, because st/dri usually calls 285 * flush_resource twice per frame (not a bug) and we don't wanna 286 * decompress DCC twice. Also, the dirty tracking must be done even 287 * if DCC isn't used, because it's required by the DCC usage analysis 288 * for a possible future enablement. 289 */ 290 bool separate_dcc_dirty; 291 /* Statistics gathering for the DCC enablement heuristic. */ 292 bool dcc_gather_statistics; 293 /* Estimate of how much this color buffer is written to in units of 294 * full-screen draws: ps_invocations / (width * height) 295 * Shader kills, late Z, and blending with trivial discards make it 296 * inaccurate (we need to count CB updates, not PS invocations). 297 */ 298 unsigned ps_draw_ratio; 299 /* The number of clears since the last DCC usage analysis. */ 300 unsigned num_slow_clears; 301 302 /* Counter that should be non-zero if the texture is bound to a 303 * framebuffer. Implemented in radeonsi only. 304 */ 305 uint32_t framebuffers_bound; 306 }; 307 308 struct r600_surface { 309 struct pipe_surface base; 310 311 bool color_initialized; 312 bool depth_initialized; 313 314 /* Misc. color flags. */ 315 bool alphatest_bypass; 316 bool export_16bpc; 317 bool color_is_int8; 318 bool color_is_int10; 319 320 /* Color registers. */ 321 unsigned cb_color_info; 322 unsigned cb_color_base; 323 unsigned cb_color_view; 324 unsigned cb_color_size; /* R600 only */ 325 unsigned cb_color_dim; /* EG only */ 326 unsigned cb_color_pitch; /* EG and later */ 327 unsigned cb_color_slice; /* EG and later */ 328 unsigned cb_color_attrib; /* EG and later */ 329 unsigned cb_dcc_control; /* VI and later */ 330 unsigned cb_color_fmask; /* CB_COLORn_FMASK (EG and later) or CB_COLORn_FRAG (r600) */ 331 unsigned cb_color_fmask_slice; /* EG and later */ 332 unsigned cb_color_cmask; /* CB_COLORn_TILE (r600 only) */ 333 unsigned cb_color_mask; /* R600 only */ 334 unsigned spi_shader_col_format; /* SI+, no blending, no alpha-to-coverage. */ 335 unsigned spi_shader_col_format_alpha; /* SI+, alpha-to-coverage */ 336 unsigned spi_shader_col_format_blend; /* SI+, blending without alpha. */ 337 unsigned spi_shader_col_format_blend_alpha; /* SI+, blending with alpha. */ 338 struct r600_resource *cb_buffer_fmask; /* Used for FMASK relocations. R600 only */ 339 struct r600_resource *cb_buffer_cmask; /* Used for CMASK relocations. R600 only */ 340 341 /* DB registers. */ 342 unsigned db_depth_info; /* R600 only, then SI and later */ 343 unsigned db_z_info; /* EG and later */ 344 unsigned db_depth_base; /* DB_Z_READ/WRITE_BASE (EG and later) or DB_DEPTH_BASE (r600) */ 345 unsigned db_depth_view; 346 unsigned db_depth_size; 347 unsigned db_depth_slice; /* EG and later */ 348 unsigned db_stencil_base; /* EG and later */ 349 unsigned db_stencil_info; /* EG and later */ 350 unsigned db_prefetch_limit; /* R600 only */ 351 unsigned db_htile_surface; 352 unsigned db_htile_data_base; 353 unsigned db_preload_control; /* EG and later */ 354 }; 355 356 union r600_grbm_counters { 357 struct { 358 unsigned spi_busy; 359 unsigned spi_idle; 360 unsigned gui_busy; 361 unsigned gui_idle; 362 } named; 363 unsigned array[0]; 364 }; 365 366 struct r600_common_screen { 367 struct pipe_screen b; 368 struct radeon_winsys *ws; 369 enum radeon_family family; 370 enum chip_class chip_class; 371 struct radeon_info info; 372 uint64_t debug_flags; 373 bool has_cp_dma; 374 bool has_streamout; 375 376 struct slab_parent_pool pool_transfers; 377 378 /* Texture filter settings. */ 379 int force_aniso; /* -1 = disabled */ 380 381 /* Auxiliary context. Mainly used to initialize resources. 382 * It must be locked prior to using and flushed before unlocking. */ 383 struct pipe_context *aux_context; 384 pipe_mutex aux_context_lock; 385 386 /* This must be in the screen, because UE4 uses one context for 387 * compilation and another one for rendering. 388 */ 389 unsigned num_compilations; 390 /* Along with ST_DEBUG=precompile, this should show if applications 391 * are loading shaders on demand. This is a monotonic counter. 392 */ 393 unsigned num_shaders_created; 394 unsigned num_shader_cache_hits; 395 396 /* GPU load thread. */ 397 pipe_mutex gpu_load_mutex; 398 pipe_thread gpu_load_thread; 399 union r600_grbm_counters grbm_counters; 400 volatile unsigned gpu_load_stop_thread; /* bool */ 401 402 char renderer_string[100]; 403 404 /* Performance counters. */ 405 struct r600_perfcounters *perfcounters; 406 407 /* If pipe_screen wants to re-emit the framebuffer state of all 408 * contexts, it should atomically increment this. Each context will 409 * compare this with its own last known value of the counter before 410 * drawing and re-emit the framebuffer state accordingly. 411 */ 412 unsigned dirty_fb_counter; 413 414 /* Atomically increment this counter when an existing texture's 415 * metadata is enabled or disabled in a way that requires changing 416 * contexts' compressed texture binding masks. 417 */ 418 unsigned compressed_colortex_counter; 419 420 /* Atomically increment this counter when an existing texture's 421 * backing buffer or tile mode parameters have changed that requires 422 * recomputation of shader descriptors. 423 */ 424 unsigned dirty_tex_descriptor_counter; 425 426 struct { 427 /* Context flags to set so that all writes from earlier jobs 428 * in the CP are seen by L2 clients. 429 */ 430 unsigned cp_to_L2; 431 432 /* Context flags to set so that all writes from earlier 433 * compute jobs are seen by L2 clients. 434 */ 435 unsigned compute_to_L2; 436 } barrier_flags; 437 438 void (*query_opaque_metadata)(struct r600_common_screen *rscreen, 439 struct r600_texture *rtex, 440 struct radeon_bo_metadata *md); 441 442 void (*apply_opaque_metadata)(struct r600_common_screen *rscreen, 443 struct r600_texture *rtex, 444 struct radeon_bo_metadata *md); 445 }; 446 447 /* This encapsulates a state or an operation which can emitted into the GPU 448 * command stream. */ 449 struct r600_atom { 450 void (*emit)(struct r600_common_context *ctx, struct r600_atom *state); 451 unsigned num_dw; 452 unsigned short id; 453 }; 454 455 struct r600_so_target { 456 struct pipe_stream_output_target b; 457 458 /* The buffer where BUFFER_FILLED_SIZE is stored. */ 459 struct r600_resource *buf_filled_size; 460 unsigned buf_filled_size_offset; 461 bool buf_filled_size_valid; 462 463 unsigned stride_in_dw; 464 }; 465 466 struct r600_streamout { 467 struct r600_atom begin_atom; 468 bool begin_emitted; 469 unsigned num_dw_for_end; 470 471 unsigned enabled_mask; 472 unsigned num_targets; 473 struct r600_so_target *targets[PIPE_MAX_SO_BUFFERS]; 474 475 unsigned append_bitmask; 476 bool suspended; 477 478 /* External state which comes from the vertex shader, 479 * it must be set explicitly when binding a shader. */ 480 unsigned *stride_in_dw; 481 unsigned enabled_stream_buffers_mask; /* stream0 buffers0-3 in 4 LSB */ 482 483 /* The state of VGT_STRMOUT_BUFFER_(CONFIG|EN). */ 484 unsigned hw_enabled_mask; 485 486 /* The state of VGT_STRMOUT_(CONFIG|EN). */ 487 struct r600_atom enable_atom; 488 bool streamout_enabled; 489 bool prims_gen_query_enabled; 490 int num_prims_gen_queries; 491 }; 492 493 struct r600_signed_scissor { 494 int minx; 495 int miny; 496 int maxx; 497 int maxy; 498 }; 499 500 struct r600_scissors { 501 struct r600_atom atom; 502 unsigned dirty_mask; 503 struct pipe_scissor_state states[R600_MAX_VIEWPORTS]; 504 }; 505 506 struct r600_viewports { 507 struct r600_atom atom; 508 unsigned dirty_mask; 509 unsigned depth_range_dirty_mask; 510 struct pipe_viewport_state states[R600_MAX_VIEWPORTS]; 511 struct r600_signed_scissor as_scissor[R600_MAX_VIEWPORTS]; 512 }; 513 514 struct r600_ring { 515 struct radeon_winsys_cs *cs; 516 void (*flush)(void *ctx, unsigned flags, 517 struct pipe_fence_handle **fence); 518 }; 519 520 /* Saved CS data for debugging features. */ 521 struct radeon_saved_cs { 522 uint32_t *ib; 523 unsigned num_dw; 524 525 struct radeon_bo_list_item *bo_list; 526 unsigned bo_count; 527 }; 528 529 struct r600_common_context { 530 struct pipe_context b; /* base class */ 531 532 struct r600_common_screen *screen; 533 struct radeon_winsys *ws; 534 struct radeon_winsys_ctx *ctx; 535 enum radeon_family family; 536 enum chip_class chip_class; 537 struct r600_ring gfx; 538 struct r600_ring dma; 539 struct pipe_fence_handle *last_gfx_fence; 540 struct pipe_fence_handle *last_sdma_fence; 541 unsigned num_gfx_cs_flushes; 542 unsigned initial_gfx_cs_size; 543 unsigned gpu_reset_counter; 544 unsigned last_dirty_fb_counter; 545 unsigned last_compressed_colortex_counter; 546 unsigned last_dirty_tex_descriptor_counter; 547 548 struct u_upload_mgr *uploader; 549 struct u_suballocator *allocator_zeroed_memory; 550 struct slab_child_pool pool_transfers; 551 552 /* Current unaccounted memory usage. */ 553 uint64_t vram; 554 uint64_t gtt; 555 556 /* States. */ 557 struct r600_streamout streamout; 558 struct r600_scissors scissors; 559 struct r600_viewports viewports; 560 bool scissor_enabled; 561 bool clip_halfz; 562 bool vs_writes_viewport_index; 563 bool vs_disables_clipping_viewport; 564 565 /* Additional context states. */ 566 unsigned flags; /* flush flags */ 567 568 /* Queries. */ 569 /* Maintain the list of active queries for pausing between IBs. */ 570 int num_occlusion_queries; 571 int num_perfect_occlusion_queries; 572 struct list_head active_queries; 573 unsigned num_cs_dw_queries_suspend; 574 /* Additional hardware info. */ 575 unsigned backend_mask; 576 unsigned max_db; /* for OQ */ 577 /* Misc stats. */ 578 unsigned num_draw_calls; 579 unsigned num_spill_draw_calls; 580 unsigned num_compute_calls; 581 unsigned num_spill_compute_calls; 582 unsigned num_dma_calls; 583 unsigned num_cp_dma_calls; 584 unsigned num_vs_flushes; 585 unsigned num_ps_flushes; 586 unsigned num_cs_flushes; 587 unsigned num_fb_cache_flushes; 588 unsigned num_L2_invalidates; 589 unsigned num_L2_writebacks; 590 uint64_t num_alloc_tex_transfer_bytes; 591 unsigned last_tex_ps_draw_ratio; /* for query */ 592 593 /* Render condition. */ 594 struct r600_atom render_cond_atom; 595 struct pipe_query *render_cond; 596 unsigned render_cond_mode; 597 bool render_cond_invert; 598 bool render_cond_force_off; /* for u_blitter */ 599 600 /* MSAA sample locations. 601 * The first index is the sample index. 602 * The second index is the coordinate: X, Y. */ 603 float sample_locations_1x[1][2]; 604 float sample_locations_2x[2][2]; 605 float sample_locations_4x[4][2]; 606 float sample_locations_8x[8][2]; 607 float sample_locations_16x[16][2]; 608 609 /* Statistics gathering for the DCC enablement heuristic. It can't be 610 * in r600_texture because r600_texture can be shared by multiple 611 * contexts. This is for back buffers only. We shouldn't get too many 612 * of those. 613 * 614 * X11 DRI3 rotates among a finite set of back buffers. They should 615 * all fit in this array. If they don't, separate DCC might never be 616 * enabled by DCC stat gathering. 617 */ 618 struct { 619 struct r600_texture *tex; 620 /* Query queue: 0 = usually active, 1 = waiting, 2 = readback. */ 621 struct pipe_query *ps_stats[3]; 622 /* If all slots are used and another slot is needed, 623 * the least recently used slot is evicted based on this. */ 624 int64_t last_use_timestamp; 625 bool query_active; 626 } dcc_stats[5]; 627 628 struct pipe_debug_callback debug; 629 struct pipe_device_reset_callback device_reset_callback; 630 631 void *query_result_shader; 632 633 /* Copy one resource to another using async DMA. */ 634 void (*dma_copy)(struct pipe_context *ctx, 635 struct pipe_resource *dst, 636 unsigned dst_level, 637 unsigned dst_x, unsigned dst_y, unsigned dst_z, 638 struct pipe_resource *src, 639 unsigned src_level, 640 const struct pipe_box *src_box); 641 642 void (*dma_clear_buffer)(struct pipe_context *ctx, struct pipe_resource *dst, 643 uint64_t offset, uint64_t size, unsigned value); 644 645 void (*clear_buffer)(struct pipe_context *ctx, struct pipe_resource *dst, 646 uint64_t offset, uint64_t size, unsigned value, 647 enum r600_coherency coher); 648 649 void (*blit_decompress_depth)(struct pipe_context *ctx, 650 struct r600_texture *texture, 651 struct r600_texture *staging, 652 unsigned first_level, unsigned last_level, 653 unsigned first_layer, unsigned last_layer, 654 unsigned first_sample, unsigned last_sample); 655 656 void (*decompress_dcc)(struct pipe_context *ctx, 657 struct r600_texture *rtex); 658 659 /* Reallocate the buffer and update all resource bindings where 660 * the buffer is bound, including all resource descriptors. */ 661 void (*invalidate_buffer)(struct pipe_context *ctx, struct pipe_resource *buf); 662 663 /* Enable or disable occlusion queries. */ 664 void (*set_occlusion_query_state)(struct pipe_context *ctx, bool enable); 665 666 void (*save_qbo_state)(struct pipe_context *ctx, struct r600_qbo_state *st); 667 668 /* This ensures there is enough space in the command stream. */ 669 void (*need_gfx_cs_space)(struct pipe_context *ctx, unsigned num_dw, 670 bool include_draw_vbo); 671 672 void (*set_atom_dirty)(struct r600_common_context *ctx, 673 struct r600_atom *atom, bool dirty); 674 675 void (*check_vm_faults)(struct r600_common_context *ctx, 676 struct radeon_saved_cs *saved, 677 enum ring_type ring); 678 }; 679 680 /* r600_buffer.c */ 681 bool r600_rings_is_buffer_referenced(struct r600_common_context *ctx, 682 struct pb_buffer *buf, 683 enum radeon_bo_usage usage); 684 void *r600_buffer_map_sync_with_rings(struct r600_common_context *ctx, 685 struct r600_resource *resource, 686 unsigned usage); 687 void r600_buffer_subdata(struct pipe_context *ctx, 688 struct pipe_resource *buffer, 689 unsigned usage, unsigned offset, 690 unsigned size, const void *data); 691 void r600_init_resource_fields(struct r600_common_screen *rscreen, 692 struct r600_resource *res, 693 uint64_t size, unsigned alignment); 694 bool r600_alloc_resource(struct r600_common_screen *rscreen, 695 struct r600_resource *res); 696 struct pipe_resource *r600_buffer_create(struct pipe_screen *screen, 697 const struct pipe_resource *templ, 698 unsigned alignment); 699 struct pipe_resource * r600_aligned_buffer_create(struct pipe_screen *screen, 700 unsigned bind, 701 unsigned usage, 702 unsigned size, 703 unsigned alignment); 704 struct pipe_resource * 705 r600_buffer_from_user_memory(struct pipe_screen *screen, 706 const struct pipe_resource *templ, 707 void *user_memory); 708 void 709 r600_invalidate_resource(struct pipe_context *ctx, 710 struct pipe_resource *resource); 711 712 /* r600_common_pipe.c */ 713 void r600_gfx_write_event_eop(struct r600_common_context *ctx, 714 unsigned event, unsigned event_flags, 715 unsigned data_sel, 716 struct r600_resource *buf, uint64_t va, 717 uint32_t old_fence, uint32_t new_fence); 718 unsigned r600_gfx_write_fence_dwords(struct r600_common_screen *screen); 719 void r600_gfx_wait_fence(struct r600_common_context *ctx, 720 uint64_t va, uint32_t ref, uint32_t mask); 721 void r600_draw_rectangle(struct blitter_context *blitter, 722 int x1, int y1, int x2, int y2, float depth, 723 enum blitter_attrib_type type, 724 const union pipe_color_union *attrib); 725 bool r600_common_screen_init(struct r600_common_screen *rscreen, 726 struct radeon_winsys *ws); 727 void r600_destroy_common_screen(struct r600_common_screen *rscreen); 728 void r600_preflush_suspend_features(struct r600_common_context *ctx); 729 void r600_postflush_resume_features(struct r600_common_context *ctx); 730 bool r600_common_context_init(struct r600_common_context *rctx, 731 struct r600_common_screen *rscreen, 732 unsigned context_flags); 733 void r600_common_context_cleanup(struct r600_common_context *rctx); 734 bool r600_can_dump_shader(struct r600_common_screen *rscreen, 735 unsigned processor); 736 bool r600_extra_shader_checks(struct r600_common_screen *rscreen, 737 unsigned processor); 738 void r600_screen_clear_buffer(struct r600_common_screen *rscreen, struct pipe_resource *dst, 739 uint64_t offset, uint64_t size, unsigned value); 740 struct pipe_resource *r600_resource_create_common(struct pipe_screen *screen, 741 const struct pipe_resource *templ); 742 const char *r600_get_llvm_processor_name(enum radeon_family family); 743 void r600_need_dma_space(struct r600_common_context *ctx, unsigned num_dw, 744 struct r600_resource *dst, struct r600_resource *src); 745 void radeon_save_cs(struct radeon_winsys *ws, struct radeon_winsys_cs *cs, 746 struct radeon_saved_cs *saved); 747 void radeon_clear_saved_cs(struct radeon_saved_cs *saved); 748 bool r600_check_device_reset(struct r600_common_context *rctx); 749 750 /* r600_gpu_load.c */ 751 void r600_gpu_load_kill_thread(struct r600_common_screen *rscreen); 752 uint64_t r600_begin_counter_spi(struct r600_common_screen *rscreen); 753 unsigned r600_end_counter_spi(struct r600_common_screen *rscreen, uint64_t begin); 754 uint64_t r600_begin_counter_gui(struct r600_common_screen *rscreen); 755 unsigned r600_end_counter_gui(struct r600_common_screen *rscreen, uint64_t begin); 756 757 /* r600_perfcounters.c */ 758 void r600_perfcounters_destroy(struct r600_common_screen *rscreen); 759 760 /* r600_query.c */ 761 void r600_init_screen_query_functions(struct r600_common_screen *rscreen); 762 void r600_query_init(struct r600_common_context *rctx); 763 void r600_suspend_queries(struct r600_common_context *ctx); 764 void r600_resume_queries(struct r600_common_context *ctx); 765 void r600_query_init_backend_mask(struct r600_common_context *ctx); 766 767 /* r600_streamout.c */ 768 void r600_streamout_buffers_dirty(struct r600_common_context *rctx); 769 void r600_set_streamout_targets(struct pipe_context *ctx, 770 unsigned num_targets, 771 struct pipe_stream_output_target **targets, 772 const unsigned *offset); 773 void r600_emit_streamout_end(struct r600_common_context *rctx); 774 void r600_update_prims_generated_query_state(struct r600_common_context *rctx, 775 unsigned type, int diff); 776 void r600_streamout_init(struct r600_common_context *rctx); 777 778 /* r600_test_dma.c */ 779 void r600_test_dma(struct r600_common_screen *rscreen); 780 781 /* r600_texture.c */ 782 bool r600_prepare_for_dma_blit(struct r600_common_context *rctx, 783 struct r600_texture *rdst, 784 unsigned dst_level, unsigned dstx, 785 unsigned dsty, unsigned dstz, 786 struct r600_texture *rsrc, 787 unsigned src_level, 788 const struct pipe_box *src_box); 789 void r600_texture_get_fmask_info(struct r600_common_screen *rscreen, 790 struct r600_texture *rtex, 791 unsigned nr_samples, 792 struct r600_fmask_info *out); 793 void r600_texture_get_cmask_info(struct r600_common_screen *rscreen, 794 struct r600_texture *rtex, 795 struct r600_cmask_info *out); 796 bool r600_init_flushed_depth_texture(struct pipe_context *ctx, 797 struct pipe_resource *texture, 798 struct r600_texture **staging); 799 void r600_print_texture_info(struct r600_texture *rtex, FILE *f); 800 struct pipe_resource *r600_texture_create(struct pipe_screen *screen, 801 const struct pipe_resource *templ); 802 bool vi_dcc_formats_compatible(enum pipe_format format1, 803 enum pipe_format format2); 804 void vi_dcc_disable_if_incompatible_format(struct r600_common_context *rctx, 805 struct pipe_resource *tex, 806 unsigned level, 807 enum pipe_format view_format); 808 struct pipe_surface *r600_create_surface_custom(struct pipe_context *pipe, 809 struct pipe_resource *texture, 810 const struct pipe_surface *templ, 811 unsigned width, unsigned height); 812 unsigned r600_translate_colorswap(enum pipe_format format, bool do_endian_swap); 813 void vi_separate_dcc_start_query(struct pipe_context *ctx, 814 struct r600_texture *tex); 815 void vi_separate_dcc_stop_query(struct pipe_context *ctx, 816 struct r600_texture *tex); 817 void vi_separate_dcc_process_and_reset_stats(struct pipe_context *ctx, 818 struct r600_texture *tex); 819 void vi_dcc_clear_level(struct r600_common_context *rctx, 820 struct r600_texture *rtex, 821 unsigned level, unsigned clear_value); 822 void evergreen_do_fast_color_clear(struct r600_common_context *rctx, 823 struct pipe_framebuffer_state *fb, 824 struct r600_atom *fb_state, 825 unsigned *buffers, unsigned *dirty_cbufs, 826 const union pipe_color_union *color); 827 bool r600_texture_disable_dcc(struct r600_common_context *rctx, 828 struct r600_texture *rtex); 829 void r600_init_screen_texture_functions(struct r600_common_screen *rscreen); 830 void r600_init_context_texture_functions(struct r600_common_context *rctx); 831 832 /* r600_viewport.c */ 833 void evergreen_apply_scissor_bug_workaround(struct r600_common_context *rctx, 834 struct pipe_scissor_state *scissor); 835 void r600_viewport_set_rast_deps(struct r600_common_context *rctx, 836 bool scissor_enable, bool clip_halfz); 837 void r600_update_vs_writes_viewport_index(struct r600_common_context *rctx, 838 struct tgsi_shader_info *info); 839 void r600_init_viewport_functions(struct r600_common_context *rctx); 840 841 /* cayman_msaa.c */ 842 extern const uint32_t eg_sample_locs_2x[4]; 843 extern const unsigned eg_max_dist_2x; 844 extern const uint32_t eg_sample_locs_4x[4]; 845 extern const unsigned eg_max_dist_4x; 846 void cayman_get_sample_position(struct pipe_context *ctx, unsigned sample_count, 847 unsigned sample_index, float *out_value); 848 void cayman_init_msaa(struct pipe_context *ctx); 849 void cayman_emit_msaa_sample_locs(struct radeon_winsys_cs *cs, int nr_samples); 850 void cayman_emit_msaa_config(struct radeon_winsys_cs *cs, int nr_samples, 851 int ps_iter_samples, int overrast_samples, 852 unsigned sc_mode_cntl_1); 853 854 855 /* Inline helpers. */ 856 857 static inline struct r600_resource *r600_resource(struct pipe_resource *r) 858 { 859 return (struct r600_resource*)r; 860 } 861 862 static inline void 863 r600_resource_reference(struct r600_resource **ptr, struct r600_resource *res) 864 { 865 pipe_resource_reference((struct pipe_resource **)ptr, 866 (struct pipe_resource *)res); 867 } 868 869 static inline void 870 r600_texture_reference(struct r600_texture **ptr, struct r600_texture *res) 871 { 872 pipe_resource_reference((struct pipe_resource **)ptr, &res->resource.b.b); 873 } 874 875 static inline void 876 r600_context_add_resource_size(struct pipe_context *ctx, struct pipe_resource *r) 877 { 878 struct r600_common_context *rctx = (struct r600_common_context *)ctx; 879 struct r600_resource *res = (struct r600_resource *)r; 880 881 if (res) { 882 /* Add memory usage for need_gfx_cs_space */ 883 rctx->vram += res->vram_usage; 884 rctx->gtt += res->gart_usage; 885 } 886 } 887 888 static inline bool r600_get_strmout_en(struct r600_common_context *rctx) 889 { 890 return rctx->streamout.streamout_enabled || 891 rctx->streamout.prims_gen_query_enabled; 892 } 893 894 #define SQ_TEX_XY_FILTER_POINT 0x00 895 #define SQ_TEX_XY_FILTER_BILINEAR 0x01 896 #define SQ_TEX_XY_FILTER_ANISO_POINT 0x02 897 #define SQ_TEX_XY_FILTER_ANISO_BILINEAR 0x03 898 899 static inline unsigned eg_tex_filter(unsigned filter, unsigned max_aniso) 900 { 901 if (filter == PIPE_TEX_FILTER_LINEAR) 902 return max_aniso > 1 ? SQ_TEX_XY_FILTER_ANISO_BILINEAR 903 : SQ_TEX_XY_FILTER_BILINEAR; 904 else 905 return max_aniso > 1 ? SQ_TEX_XY_FILTER_ANISO_POINT 906 : SQ_TEX_XY_FILTER_POINT; 907 } 908 909 static inline unsigned r600_tex_aniso_filter(unsigned filter) 910 { 911 if (filter < 2) 912 return 0; 913 if (filter < 4) 914 return 1; 915 if (filter < 8) 916 return 2; 917 if (filter < 16) 918 return 3; 919 return 4; 920 } 921 922 static inline unsigned r600_wavefront_size(enum radeon_family family) 923 { 924 switch (family) { 925 case CHIP_RV610: 926 case CHIP_RS780: 927 case CHIP_RV620: 928 case CHIP_RS880: 929 return 16; 930 case CHIP_RV630: 931 case CHIP_RV635: 932 case CHIP_RV730: 933 case CHIP_RV710: 934 case CHIP_PALM: 935 case CHIP_CEDAR: 936 return 32; 937 default: 938 return 64; 939 } 940 } 941 942 static inline enum radeon_bo_priority 943 r600_get_sampler_view_priority(struct r600_resource *res) 944 { 945 if (res->b.b.target == PIPE_BUFFER) 946 return RADEON_PRIO_SAMPLER_BUFFER; 947 948 if (res->b.b.nr_samples > 1) 949 return RADEON_PRIO_SAMPLER_TEXTURE_MSAA; 950 951 return RADEON_PRIO_SAMPLER_TEXTURE; 952 } 953 954 static inline bool 955 r600_can_sample_zs(struct r600_texture *tex, bool stencil_sampler) 956 { 957 return (stencil_sampler && tex->can_sample_s) || 958 (!stencil_sampler && tex->can_sample_z); 959 } 960 961 #define COMPUTE_DBG(rscreen, fmt, args...) \ 962 do { \ 963 if ((rscreen->b.debug_flags & DBG_COMPUTE)) fprintf(stderr, fmt, ##args); \ 964 } while (0); 965 966 #define R600_ERR(fmt, args...) \ 967 fprintf(stderr, "EE %s:%d %s - " fmt, __FILE__, __LINE__, __func__, ##args) 968 969 /* For MSAA sample positions. */ 970 #define FILL_SREG(s0x, s0y, s1x, s1y, s2x, s2y, s3x, s3y) \ 971 (((s0x) & 0xf) | (((unsigned)(s0y) & 0xf) << 4) | \ 972 (((unsigned)(s1x) & 0xf) << 8) | (((unsigned)(s1y) & 0xf) << 12) | \ 973 (((unsigned)(s2x) & 0xf) << 16) | (((unsigned)(s2y) & 0xf) << 20) | \ 974 (((unsigned)(s3x) & 0xf) << 24) | (((unsigned)(s3y) & 0xf) << 28)) 975 976 #endif 977