1 /* 2 * Copyright 2010 Jerome Glisse <glisse (at) freedesktop.org> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * on the rights to use, copy, modify, merge, publish, distribute, sub 8 * license, and/or sell copies of the Software, and to permit persons to whom 9 * the Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 * 23 * Authors: 24 * Jerome Glisse 25 * Corbin Simpson 26 */ 27 #include "r600_pipe_common.h" 28 #include "r600_cs.h" 29 #include "r600_query.h" 30 #include "util/u_format.h" 31 #include "util/u_memory.h" 32 #include "util/u_pack_color.h" 33 #include "util/u_surface.h" 34 #include "os/os_time.h" 35 #include <errno.h> 36 #include <inttypes.h> 37 38 static void r600_texture_discard_cmask(struct r600_common_screen *rscreen, 39 struct r600_texture *rtex); 40 static enum radeon_surf_mode 41 r600_choose_tiling(struct r600_common_screen *rscreen, 42 const struct pipe_resource *templ); 43 44 45 bool r600_prepare_for_dma_blit(struct r600_common_context *rctx, 46 struct r600_texture *rdst, 47 unsigned dst_level, unsigned dstx, 48 unsigned dsty, unsigned dstz, 49 struct r600_texture *rsrc, 50 unsigned src_level, 51 const struct pipe_box *src_box) 52 { 53 if (!rctx->dma.cs) 54 return false; 55 56 if (rdst->surface.bpe != rsrc->surface.bpe) 57 return false; 58 59 /* MSAA: Blits don't exist in the real world. */ 60 if (rsrc->resource.b.b.nr_samples > 1 || 61 rdst->resource.b.b.nr_samples > 1) 62 return false; 63 64 /* Depth-stencil surfaces: 65 * When dst is linear, the DB->CB copy preserves HTILE. 66 * When dst is tiled, the 3D path must be used to update HTILE. 67 */ 68 if (rsrc->is_depth || rdst->is_depth) 69 return false; 70 71 /* DCC as: 72 * src: Use the 3D path. DCC decompression is expensive. 73 * dst: Use the 3D path to compress the pixels with DCC. 74 */ 75 if ((rsrc->dcc_offset && src_level < rsrc->surface.num_dcc_levels) || 76 (rdst->dcc_offset && dst_level < rdst->surface.num_dcc_levels)) 77 return false; 78 79 /* CMASK as: 80 * src: Both texture and SDMA paths need decompression. Use SDMA. 81 * dst: If overwriting the whole texture, discard CMASK and use 82 * SDMA. Otherwise, use the 3D path. 83 */ 84 if (rdst->cmask.size && rdst->dirty_level_mask & (1 << dst_level)) { 85 /* The CMASK clear is only enabled for the first level. */ 86 assert(dst_level == 0); 87 if (!util_texrange_covers_whole_level(&rdst->resource.b.b, dst_level, 88 dstx, dsty, dstz, src_box->width, 89 src_box->height, src_box->depth)) 90 return false; 91 92 r600_texture_discard_cmask(rctx->screen, rdst); 93 } 94 95 /* All requirements are met. Prepare textures for SDMA. */ 96 if (rsrc->cmask.size && rsrc->dirty_level_mask & (1 << src_level)) 97 rctx->b.flush_resource(&rctx->b, &rsrc->resource.b.b); 98 99 assert(!(rsrc->dirty_level_mask & (1 << src_level))); 100 assert(!(rdst->dirty_level_mask & (1 << dst_level))); 101 102 return true; 103 } 104 105 /* Same as resource_copy_region, except that both upsampling and downsampling are allowed. */ 106 static void r600_copy_region_with_blit(struct pipe_context *pipe, 107 struct pipe_resource *dst, 108 unsigned dst_level, 109 unsigned dstx, unsigned dsty, unsigned dstz, 110 struct pipe_resource *src, 111 unsigned src_level, 112 const struct pipe_box *src_box) 113 { 114 struct pipe_blit_info blit; 115 116 memset(&blit, 0, sizeof(blit)); 117 blit.src.resource = src; 118 blit.src.format = src->format; 119 blit.src.level = src_level; 120 blit.src.box = *src_box; 121 blit.dst.resource = dst; 122 blit.dst.format = dst->format; 123 blit.dst.level = dst_level; 124 blit.dst.box.x = dstx; 125 blit.dst.box.y = dsty; 126 blit.dst.box.z = dstz; 127 blit.dst.box.width = src_box->width; 128 blit.dst.box.height = src_box->height; 129 blit.dst.box.depth = src_box->depth; 130 blit.mask = util_format_get_mask(src->format) & 131 util_format_get_mask(dst->format); 132 blit.filter = PIPE_TEX_FILTER_NEAREST; 133 134 if (blit.mask) { 135 pipe->blit(pipe, &blit); 136 } 137 } 138 139 /* Copy from a full GPU texture to a transfer's staging one. */ 140 static void r600_copy_to_staging_texture(struct pipe_context *ctx, struct r600_transfer *rtransfer) 141 { 142 struct r600_common_context *rctx = (struct r600_common_context*)ctx; 143 struct pipe_transfer *transfer = (struct pipe_transfer*)rtransfer; 144 struct pipe_resource *dst = &rtransfer->staging->b.b; 145 struct pipe_resource *src = transfer->resource; 146 147 if (src->nr_samples > 1) { 148 r600_copy_region_with_blit(ctx, dst, 0, 0, 0, 0, 149 src, transfer->level, &transfer->box); 150 return; 151 } 152 153 rctx->dma_copy(ctx, dst, 0, 0, 0, 0, src, transfer->level, 154 &transfer->box); 155 } 156 157 /* Copy from a transfer's staging texture to a full GPU one. */ 158 static void r600_copy_from_staging_texture(struct pipe_context *ctx, struct r600_transfer *rtransfer) 159 { 160 struct r600_common_context *rctx = (struct r600_common_context*)ctx; 161 struct pipe_transfer *transfer = (struct pipe_transfer*)rtransfer; 162 struct pipe_resource *dst = transfer->resource; 163 struct pipe_resource *src = &rtransfer->staging->b.b; 164 struct pipe_box sbox; 165 166 u_box_3d(0, 0, 0, transfer->box.width, transfer->box.height, transfer->box.depth, &sbox); 167 168 if (dst->nr_samples > 1) { 169 r600_copy_region_with_blit(ctx, dst, transfer->level, 170 transfer->box.x, transfer->box.y, transfer->box.z, 171 src, 0, &sbox); 172 return; 173 } 174 175 rctx->dma_copy(ctx, dst, transfer->level, 176 transfer->box.x, transfer->box.y, transfer->box.z, 177 src, 0, &sbox); 178 } 179 180 static unsigned r600_texture_get_offset(struct r600_texture *rtex, unsigned level, 181 const struct pipe_box *box) 182 { 183 return rtex->surface.level[level].offset + 184 box->z * rtex->surface.level[level].slice_size + 185 (box->y / rtex->surface.blk_h * 186 rtex->surface.level[level].nblk_x + 187 box->x / rtex->surface.blk_w) * rtex->surface.bpe; 188 } 189 190 static int r600_init_surface(struct r600_common_screen *rscreen, 191 struct radeon_surf *surface, 192 const struct pipe_resource *ptex, 193 enum radeon_surf_mode array_mode, 194 unsigned pitch_in_bytes_override, 195 unsigned offset, 196 bool is_imported, 197 bool is_scanout, 198 bool is_flushed_depth, 199 bool tc_compatible_htile) 200 { 201 const struct util_format_description *desc = 202 util_format_description(ptex->format); 203 bool is_depth, is_stencil; 204 int r; 205 unsigned i, bpe, flags = 0; 206 207 is_depth = util_format_has_depth(desc); 208 is_stencil = util_format_has_stencil(desc); 209 210 if (rscreen->chip_class >= EVERGREEN && !is_flushed_depth && 211 ptex->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT) { 212 bpe = 4; /* stencil is allocated separately on evergreen */ 213 } else { 214 bpe = util_format_get_blocksize(ptex->format); 215 /* align byte per element on dword */ 216 if (bpe == 3) { 217 bpe = 4; 218 } 219 } 220 221 if (!is_flushed_depth && is_depth) { 222 flags |= RADEON_SURF_ZBUFFER; 223 224 if (tc_compatible_htile && 225 array_mode == RADEON_SURF_MODE_2D) { 226 /* TC-compatible HTILE only supports Z32_FLOAT. 227 * Promote Z16 to Z32. DB->CB copies will convert 228 * the format for transfers. 229 */ 230 bpe = 4; 231 flags |= RADEON_SURF_TC_COMPATIBLE_HTILE; 232 } 233 234 if (is_stencil) 235 flags |= RADEON_SURF_SBUFFER; 236 } 237 238 if (rscreen->chip_class >= VI && 239 (ptex->flags & R600_RESOURCE_FLAG_DISABLE_DCC || 240 ptex->format == PIPE_FORMAT_R9G9B9E5_FLOAT)) 241 flags |= RADEON_SURF_DISABLE_DCC; 242 243 if (ptex->bind & PIPE_BIND_SCANOUT || is_scanout) { 244 /* This should catch bugs in gallium users setting incorrect flags. */ 245 assert(ptex->nr_samples <= 1 && 246 ptex->array_size == 1 && 247 ptex->depth0 == 1 && 248 ptex->last_level == 0 && 249 !(flags & RADEON_SURF_Z_OR_SBUFFER)); 250 251 flags |= RADEON_SURF_SCANOUT; 252 } 253 254 if (is_imported) 255 flags |= RADEON_SURF_IMPORTED; 256 if (!(ptex->flags & R600_RESOURCE_FLAG_FORCE_TILING)) 257 flags |= RADEON_SURF_OPTIMIZE_FOR_SPACE; 258 259 r = rscreen->ws->surface_init(rscreen->ws, ptex, flags, bpe, 260 array_mode, surface); 261 if (r) { 262 return r; 263 } 264 265 if (pitch_in_bytes_override && 266 pitch_in_bytes_override != surface->level[0].nblk_x * bpe) { 267 /* old ddx on evergreen over estimate alignment for 1d, only 1 level 268 * for those 269 */ 270 surface->level[0].nblk_x = pitch_in_bytes_override / bpe; 271 surface->level[0].slice_size = pitch_in_bytes_override * surface->level[0].nblk_y; 272 } 273 274 if (offset) { 275 for (i = 0; i < ARRAY_SIZE(surface->level); ++i) 276 surface->level[i].offset += offset; 277 } 278 return 0; 279 } 280 281 static void r600_texture_init_metadata(struct r600_texture *rtex, 282 struct radeon_bo_metadata *metadata) 283 { 284 struct radeon_surf *surface = &rtex->surface; 285 286 memset(metadata, 0, sizeof(*metadata)); 287 metadata->microtile = surface->level[0].mode >= RADEON_SURF_MODE_1D ? 288 RADEON_LAYOUT_TILED : RADEON_LAYOUT_LINEAR; 289 metadata->macrotile = surface->level[0].mode >= RADEON_SURF_MODE_2D ? 290 RADEON_LAYOUT_TILED : RADEON_LAYOUT_LINEAR; 291 metadata->pipe_config = surface->pipe_config; 292 metadata->bankw = surface->bankw; 293 metadata->bankh = surface->bankh; 294 metadata->tile_split = surface->tile_split; 295 metadata->mtilea = surface->mtilea; 296 metadata->num_banks = surface->num_banks; 297 metadata->stride = surface->level[0].nblk_x * surface->bpe; 298 metadata->scanout = (surface->flags & RADEON_SURF_SCANOUT) != 0; 299 } 300 301 static void r600_dirty_all_framebuffer_states(struct r600_common_screen *rscreen) 302 { 303 p_atomic_inc(&rscreen->dirty_fb_counter); 304 } 305 306 static void r600_eliminate_fast_color_clear(struct r600_common_context *rctx, 307 struct r600_texture *rtex) 308 { 309 struct r600_common_screen *rscreen = rctx->screen; 310 struct pipe_context *ctx = &rctx->b; 311 312 if (ctx == rscreen->aux_context) 313 pipe_mutex_lock(rscreen->aux_context_lock); 314 315 ctx->flush_resource(ctx, &rtex->resource.b.b); 316 ctx->flush(ctx, NULL, 0); 317 318 if (ctx == rscreen->aux_context) 319 pipe_mutex_unlock(rscreen->aux_context_lock); 320 } 321 322 static void r600_texture_discard_cmask(struct r600_common_screen *rscreen, 323 struct r600_texture *rtex) 324 { 325 if (!rtex->cmask.size) 326 return; 327 328 assert(rtex->resource.b.b.nr_samples <= 1); 329 330 /* Disable CMASK. */ 331 memset(&rtex->cmask, 0, sizeof(rtex->cmask)); 332 rtex->cmask.base_address_reg = rtex->resource.gpu_address >> 8; 333 rtex->dirty_level_mask = 0; 334 335 if (rscreen->chip_class >= SI) 336 rtex->cb_color_info &= ~SI_S_028C70_FAST_CLEAR(1); 337 else 338 rtex->cb_color_info &= ~EG_S_028C70_FAST_CLEAR(1); 339 340 if (rtex->cmask_buffer != &rtex->resource) 341 r600_resource_reference(&rtex->cmask_buffer, NULL); 342 343 /* Notify all contexts about the change. */ 344 r600_dirty_all_framebuffer_states(rscreen); 345 p_atomic_inc(&rscreen->compressed_colortex_counter); 346 } 347 348 static bool r600_can_disable_dcc(struct r600_texture *rtex) 349 { 350 /* We can't disable DCC if it can be written by another process. */ 351 return rtex->dcc_offset && 352 (!rtex->resource.is_shared || 353 !(rtex->resource.external_usage & PIPE_HANDLE_USAGE_WRITE)); 354 } 355 356 static bool r600_texture_discard_dcc(struct r600_common_screen *rscreen, 357 struct r600_texture *rtex) 358 { 359 if (!r600_can_disable_dcc(rtex)) 360 return false; 361 362 assert(rtex->dcc_separate_buffer == NULL); 363 364 /* Disable DCC. */ 365 rtex->dcc_offset = 0; 366 367 /* Notify all contexts about the change. */ 368 r600_dirty_all_framebuffer_states(rscreen); 369 return true; 370 } 371 372 /** 373 * Disable DCC for the texture. (first decompress, then discard metadata). 374 * 375 * There is unresolved multi-context synchronization issue between 376 * screen::aux_context and the current context. If applications do this with 377 * multiple contexts, it's already undefined behavior for them and we don't 378 * have to worry about that. The scenario is: 379 * 380 * If context 1 disables DCC and context 2 has queued commands that write 381 * to the texture via CB with DCC enabled, and the order of operations is 382 * as follows: 383 * context 2 queues draw calls rendering to the texture, but doesn't flush 384 * context 1 disables DCC and flushes 385 * context 1 & 2 reset descriptors and FB state 386 * context 2 flushes (new compressed tiles written by the draw calls) 387 * context 1 & 2 read garbage, because DCC is disabled, yet there are 388 * compressed tiled 389 * 390 * \param rctx the current context if you have one, or rscreen->aux_context 391 * if you don't. 392 */ 393 bool r600_texture_disable_dcc(struct r600_common_context *rctx, 394 struct r600_texture *rtex) 395 { 396 struct r600_common_screen *rscreen = rctx->screen; 397 398 if (!r600_can_disable_dcc(rtex)) 399 return false; 400 401 if (&rctx->b == rscreen->aux_context) 402 pipe_mutex_lock(rscreen->aux_context_lock); 403 404 /* Decompress DCC. */ 405 rctx->decompress_dcc(&rctx->b, rtex); 406 rctx->b.flush(&rctx->b, NULL, 0); 407 408 if (&rctx->b == rscreen->aux_context) 409 pipe_mutex_unlock(rscreen->aux_context_lock); 410 411 return r600_texture_discard_dcc(rscreen, rtex); 412 } 413 414 static void r600_degrade_tile_mode_to_linear(struct r600_common_context *rctx, 415 struct r600_texture *rtex, 416 bool invalidate_storage) 417 { 418 struct pipe_screen *screen = rctx->b.screen; 419 struct r600_texture *new_tex; 420 struct pipe_resource templ = rtex->resource.b.b; 421 unsigned i; 422 423 templ.bind |= PIPE_BIND_LINEAR; 424 425 /* r600g doesn't react to dirty_tex_descriptor_counter */ 426 if (rctx->chip_class < SI) 427 return; 428 429 if (rtex->resource.is_shared || 430 rtex->surface.is_linear) 431 return; 432 433 /* This fails with MSAA, depth, and compressed textures. */ 434 if (r600_choose_tiling(rctx->screen, &templ) != 435 RADEON_SURF_MODE_LINEAR_ALIGNED) 436 return; 437 438 new_tex = (struct r600_texture*)screen->resource_create(screen, &templ); 439 if (!new_tex) 440 return; 441 442 /* Copy the pixels to the new texture. */ 443 if (!invalidate_storage) { 444 for (i = 0; i <= templ.last_level; i++) { 445 struct pipe_box box; 446 447 u_box_3d(0, 0, 0, 448 u_minify(templ.width0, i), u_minify(templ.height0, i), 449 util_max_layer(&templ, i) + 1, &box); 450 451 rctx->dma_copy(&rctx->b, &new_tex->resource.b.b, i, 0, 0, 0, 452 &rtex->resource.b.b, i, &box); 453 } 454 } 455 456 r600_texture_discard_cmask(rctx->screen, rtex); 457 r600_texture_discard_dcc(rctx->screen, rtex); 458 459 /* Replace the structure fields of rtex. */ 460 rtex->resource.b.b.bind = templ.bind; 461 pb_reference(&rtex->resource.buf, new_tex->resource.buf); 462 rtex->resource.gpu_address = new_tex->resource.gpu_address; 463 rtex->resource.vram_usage = new_tex->resource.vram_usage; 464 rtex->resource.gart_usage = new_tex->resource.gart_usage; 465 rtex->resource.bo_size = new_tex->resource.bo_size; 466 rtex->resource.bo_alignment = new_tex->resource.bo_alignment; 467 rtex->resource.domains = new_tex->resource.domains; 468 rtex->resource.flags = new_tex->resource.flags; 469 rtex->size = new_tex->size; 470 rtex->surface = new_tex->surface; 471 rtex->non_disp_tiling = new_tex->non_disp_tiling; 472 rtex->cb_color_info = new_tex->cb_color_info; 473 rtex->cmask = new_tex->cmask; /* needed even without CMASK */ 474 475 assert(!rtex->htile_buffer); 476 assert(!rtex->cmask.size); 477 assert(!rtex->fmask.size); 478 assert(!rtex->dcc_offset); 479 assert(!rtex->is_depth); 480 481 r600_texture_reference(&new_tex, NULL); 482 483 r600_dirty_all_framebuffer_states(rctx->screen); 484 p_atomic_inc(&rctx->screen->dirty_tex_descriptor_counter); 485 } 486 487 static boolean r600_texture_get_handle(struct pipe_screen* screen, 488 struct pipe_context *ctx, 489 struct pipe_resource *resource, 490 struct winsys_handle *whandle, 491 unsigned usage) 492 { 493 struct r600_common_screen *rscreen = (struct r600_common_screen*)screen; 494 struct r600_common_context *rctx = (struct r600_common_context*) 495 (ctx ? ctx : rscreen->aux_context); 496 struct r600_resource *res = (struct r600_resource*)resource; 497 struct r600_texture *rtex = (struct r600_texture*)resource; 498 struct radeon_bo_metadata metadata; 499 bool update_metadata = false; 500 501 /* This is not supported now, but it might be required for OpenCL 502 * interop in the future. 503 */ 504 if (resource->target != PIPE_BUFFER && 505 (resource->nr_samples > 1 || rtex->is_depth)) 506 return false; 507 508 if (resource->target != PIPE_BUFFER) { 509 /* Since shader image stores don't support DCC on VI, 510 * disable it for external clients that want write 511 * access. 512 */ 513 if (usage & PIPE_HANDLE_USAGE_WRITE && rtex->dcc_offset) { 514 if (r600_texture_disable_dcc(rctx, rtex)) 515 update_metadata = true; 516 } 517 518 if (!(usage & PIPE_HANDLE_USAGE_EXPLICIT_FLUSH) && 519 (rtex->cmask.size || rtex->dcc_offset)) { 520 /* Eliminate fast clear (both CMASK and DCC) */ 521 r600_eliminate_fast_color_clear(rctx, rtex); 522 523 /* Disable CMASK if flush_resource isn't going 524 * to be called. 525 */ 526 if (rtex->cmask.size) 527 r600_texture_discard_cmask(rscreen, rtex); 528 } 529 530 /* Set metadata. */ 531 if (!res->is_shared || update_metadata) { 532 r600_texture_init_metadata(rtex, &metadata); 533 if (rscreen->query_opaque_metadata) 534 rscreen->query_opaque_metadata(rscreen, rtex, 535 &metadata); 536 537 rscreen->ws->buffer_set_metadata(res->buf, &metadata); 538 } 539 } 540 541 if (res->is_shared) { 542 /* USAGE_EXPLICIT_FLUSH must be cleared if at least one user 543 * doesn't set it. 544 */ 545 res->external_usage |= usage & ~PIPE_HANDLE_USAGE_EXPLICIT_FLUSH; 546 if (!(usage & PIPE_HANDLE_USAGE_EXPLICIT_FLUSH)) 547 res->external_usage &= ~PIPE_HANDLE_USAGE_EXPLICIT_FLUSH; 548 } else { 549 res->is_shared = true; 550 res->external_usage = usage; 551 } 552 553 return rscreen->ws->buffer_get_handle(res->buf, 554 rtex->surface.level[0].nblk_x * 555 rtex->surface.bpe, 556 rtex->surface.level[0].offset, 557 rtex->surface.level[0].slice_size, 558 whandle); 559 } 560 561 static void r600_texture_destroy(struct pipe_screen *screen, 562 struct pipe_resource *ptex) 563 { 564 struct r600_texture *rtex = (struct r600_texture*)ptex; 565 struct r600_resource *resource = &rtex->resource; 566 567 r600_texture_reference(&rtex->flushed_depth_texture, NULL); 568 569 r600_resource_reference(&rtex->htile_buffer, NULL); 570 if (rtex->cmask_buffer != &rtex->resource) { 571 r600_resource_reference(&rtex->cmask_buffer, NULL); 572 } 573 pb_reference(&resource->buf, NULL); 574 r600_resource_reference(&rtex->dcc_separate_buffer, NULL); 575 r600_resource_reference(&rtex->last_dcc_separate_buffer, NULL); 576 FREE(rtex); 577 } 578 579 static const struct u_resource_vtbl r600_texture_vtbl; 580 581 /* The number of samples can be specified independently of the texture. */ 582 void r600_texture_get_fmask_info(struct r600_common_screen *rscreen, 583 struct r600_texture *rtex, 584 unsigned nr_samples, 585 struct r600_fmask_info *out) 586 { 587 /* FMASK is allocated like an ordinary texture. */ 588 struct pipe_resource templ = rtex->resource.b.b; 589 struct radeon_surf fmask = {}; 590 unsigned flags, bpe; 591 592 memset(out, 0, sizeof(*out)); 593 594 templ.nr_samples = 1; 595 flags = rtex->surface.flags | RADEON_SURF_FMASK; 596 597 if (rscreen->chip_class <= CAYMAN) { 598 /* Use the same parameters and tile mode. */ 599 fmask.bankw = rtex->surface.bankw; 600 fmask.bankh = rtex->surface.bankh; 601 fmask.mtilea = rtex->surface.mtilea; 602 fmask.tile_split = rtex->surface.tile_split; 603 604 if (nr_samples <= 4) 605 fmask.bankh = 4; 606 } 607 608 switch (nr_samples) { 609 case 2: 610 case 4: 611 bpe = 1; 612 break; 613 case 8: 614 bpe = 4; 615 break; 616 default: 617 R600_ERR("Invalid sample count for FMASK allocation.\n"); 618 return; 619 } 620 621 /* Overallocate FMASK on R600-R700 to fix colorbuffer corruption. 622 * This can be fixed by writing a separate FMASK allocator specifically 623 * for R600-R700 asics. */ 624 if (rscreen->chip_class <= R700) { 625 bpe *= 2; 626 } 627 628 if (rscreen->ws->surface_init(rscreen->ws, &templ, flags, bpe, 629 RADEON_SURF_MODE_2D, &fmask)) { 630 R600_ERR("Got error in surface_init while allocating FMASK.\n"); 631 return; 632 } 633 634 assert(fmask.level[0].mode == RADEON_SURF_MODE_2D); 635 636 out->slice_tile_max = (fmask.level[0].nblk_x * fmask.level[0].nblk_y) / 64; 637 if (out->slice_tile_max) 638 out->slice_tile_max -= 1; 639 640 out->tile_mode_index = fmask.tiling_index[0]; 641 out->pitch_in_pixels = fmask.level[0].nblk_x; 642 out->bank_height = fmask.bankh; 643 out->alignment = MAX2(256, fmask.surf_alignment); 644 out->size = fmask.surf_size; 645 } 646 647 static void r600_texture_allocate_fmask(struct r600_common_screen *rscreen, 648 struct r600_texture *rtex) 649 { 650 r600_texture_get_fmask_info(rscreen, rtex, 651 rtex->resource.b.b.nr_samples, &rtex->fmask); 652 653 rtex->fmask.offset = align64(rtex->size, rtex->fmask.alignment); 654 rtex->size = rtex->fmask.offset + rtex->fmask.size; 655 } 656 657 void r600_texture_get_cmask_info(struct r600_common_screen *rscreen, 658 struct r600_texture *rtex, 659 struct r600_cmask_info *out) 660 { 661 unsigned cmask_tile_width = 8; 662 unsigned cmask_tile_height = 8; 663 unsigned cmask_tile_elements = cmask_tile_width * cmask_tile_height; 664 unsigned element_bits = 4; 665 unsigned cmask_cache_bits = 1024; 666 unsigned num_pipes = rscreen->info.num_tile_pipes; 667 unsigned pipe_interleave_bytes = rscreen->info.pipe_interleave_bytes; 668 669 unsigned elements_per_macro_tile = (cmask_cache_bits / element_bits) * num_pipes; 670 unsigned pixels_per_macro_tile = elements_per_macro_tile * cmask_tile_elements; 671 unsigned sqrt_pixels_per_macro_tile = sqrt(pixels_per_macro_tile); 672 unsigned macro_tile_width = util_next_power_of_two(sqrt_pixels_per_macro_tile); 673 unsigned macro_tile_height = pixels_per_macro_tile / macro_tile_width; 674 675 unsigned pitch_elements = align(rtex->resource.b.b.width0, macro_tile_width); 676 unsigned height = align(rtex->resource.b.b.height0, macro_tile_height); 677 678 unsigned base_align = num_pipes * pipe_interleave_bytes; 679 unsigned slice_bytes = 680 ((pitch_elements * height * element_bits + 7) / 8) / cmask_tile_elements; 681 682 assert(macro_tile_width % 128 == 0); 683 assert(macro_tile_height % 128 == 0); 684 685 out->slice_tile_max = ((pitch_elements * height) / (128*128)) - 1; 686 out->alignment = MAX2(256, base_align); 687 out->size = (util_max_layer(&rtex->resource.b.b, 0) + 1) * 688 align(slice_bytes, base_align); 689 } 690 691 static void si_texture_get_cmask_info(struct r600_common_screen *rscreen, 692 struct r600_texture *rtex, 693 struct r600_cmask_info *out) 694 { 695 unsigned pipe_interleave_bytes = rscreen->info.pipe_interleave_bytes; 696 unsigned num_pipes = rscreen->info.num_tile_pipes; 697 unsigned cl_width, cl_height; 698 699 switch (num_pipes) { 700 case 2: 701 cl_width = 32; 702 cl_height = 16; 703 break; 704 case 4: 705 cl_width = 32; 706 cl_height = 32; 707 break; 708 case 8: 709 cl_width = 64; 710 cl_height = 32; 711 break; 712 case 16: /* Hawaii */ 713 cl_width = 64; 714 cl_height = 64; 715 break; 716 default: 717 assert(0); 718 return; 719 } 720 721 unsigned base_align = num_pipes * pipe_interleave_bytes; 722 723 unsigned width = align(rtex->resource.b.b.width0, cl_width*8); 724 unsigned height = align(rtex->resource.b.b.height0, cl_height*8); 725 unsigned slice_elements = (width * height) / (8*8); 726 727 /* Each element of CMASK is a nibble. */ 728 unsigned slice_bytes = slice_elements / 2; 729 730 out->slice_tile_max = (width * height) / (128*128); 731 if (out->slice_tile_max) 732 out->slice_tile_max -= 1; 733 734 out->alignment = MAX2(256, base_align); 735 out->size = (util_max_layer(&rtex->resource.b.b, 0) + 1) * 736 align(slice_bytes, base_align); 737 } 738 739 static void r600_texture_allocate_cmask(struct r600_common_screen *rscreen, 740 struct r600_texture *rtex) 741 { 742 if (rscreen->chip_class >= SI) { 743 si_texture_get_cmask_info(rscreen, rtex, &rtex->cmask); 744 } else { 745 r600_texture_get_cmask_info(rscreen, rtex, &rtex->cmask); 746 } 747 748 rtex->cmask.offset = align64(rtex->size, rtex->cmask.alignment); 749 rtex->size = rtex->cmask.offset + rtex->cmask.size; 750 751 if (rscreen->chip_class >= SI) 752 rtex->cb_color_info |= SI_S_028C70_FAST_CLEAR(1); 753 else 754 rtex->cb_color_info |= EG_S_028C70_FAST_CLEAR(1); 755 } 756 757 static void r600_texture_alloc_cmask_separate(struct r600_common_screen *rscreen, 758 struct r600_texture *rtex) 759 { 760 if (rtex->cmask_buffer) 761 return; 762 763 assert(rtex->cmask.size == 0); 764 765 if (rscreen->chip_class >= SI) { 766 si_texture_get_cmask_info(rscreen, rtex, &rtex->cmask); 767 } else { 768 r600_texture_get_cmask_info(rscreen, rtex, &rtex->cmask); 769 } 770 771 rtex->cmask_buffer = (struct r600_resource *) 772 r600_aligned_buffer_create(&rscreen->b, 0, PIPE_USAGE_DEFAULT, 773 rtex->cmask.size, 774 rtex->cmask.alignment); 775 if (rtex->cmask_buffer == NULL) { 776 rtex->cmask.size = 0; 777 return; 778 } 779 780 /* update colorbuffer state bits */ 781 rtex->cmask.base_address_reg = rtex->cmask_buffer->gpu_address >> 8; 782 783 if (rscreen->chip_class >= SI) 784 rtex->cb_color_info |= SI_S_028C70_FAST_CLEAR(1); 785 else 786 rtex->cb_color_info |= EG_S_028C70_FAST_CLEAR(1); 787 788 p_atomic_inc(&rscreen->compressed_colortex_counter); 789 } 790 791 static void r600_texture_get_htile_size(struct r600_common_screen *rscreen, 792 struct r600_texture *rtex) 793 { 794 unsigned cl_width, cl_height, width, height; 795 unsigned slice_elements, slice_bytes, pipe_interleave_bytes, base_align; 796 unsigned num_pipes = rscreen->info.num_tile_pipes; 797 798 rtex->surface.htile_size = 0; 799 800 if (rscreen->chip_class <= EVERGREEN && 801 rscreen->info.drm_major == 2 && rscreen->info.drm_minor < 26) 802 return; 803 804 /* HW bug on R6xx. */ 805 if (rscreen->chip_class == R600 && 806 (rtex->resource.b.b.width0 > 7680 || 807 rtex->resource.b.b.height0 > 7680)) 808 return; 809 810 /* HTILE is broken with 1D tiling on old kernels and CIK. */ 811 if (rscreen->chip_class >= CIK && 812 rtex->surface.level[0].mode == RADEON_SURF_MODE_1D && 813 rscreen->info.drm_major == 2 && rscreen->info.drm_minor < 38) 814 return; 815 816 /* Overalign HTILE on P2 configs to work around GPU hangs in 817 * piglit/depthstencil-render-miplevels 585. 818 * 819 * This has been confirmed to help Kabini & Stoney, where the hangs 820 * are always reproducible. I think I have seen the test hang 821 * on Carrizo too, though it was very rare there. 822 */ 823 if (rscreen->chip_class >= CIK && num_pipes < 4) 824 num_pipes = 4; 825 826 switch (num_pipes) { 827 case 1: 828 cl_width = 32; 829 cl_height = 16; 830 break; 831 case 2: 832 cl_width = 32; 833 cl_height = 32; 834 break; 835 case 4: 836 cl_width = 64; 837 cl_height = 32; 838 break; 839 case 8: 840 cl_width = 64; 841 cl_height = 64; 842 break; 843 case 16: 844 cl_width = 128; 845 cl_height = 64; 846 break; 847 default: 848 assert(0); 849 return; 850 } 851 852 width = align(rtex->resource.b.b.width0, cl_width * 8); 853 height = align(rtex->resource.b.b.height0, cl_height * 8); 854 855 slice_elements = (width * height) / (8 * 8); 856 slice_bytes = slice_elements * 4; 857 858 pipe_interleave_bytes = rscreen->info.pipe_interleave_bytes; 859 base_align = num_pipes * pipe_interleave_bytes; 860 861 rtex->surface.htile_alignment = base_align; 862 rtex->surface.htile_size = 863 (util_max_layer(&rtex->resource.b.b, 0) + 1) * 864 align(slice_bytes, base_align); 865 } 866 867 static void r600_texture_allocate_htile(struct r600_common_screen *rscreen, 868 struct r600_texture *rtex) 869 { 870 uint32_t clear_value; 871 872 if (rtex->tc_compatible_htile) { 873 clear_value = 0x0000030F; 874 } else { 875 r600_texture_get_htile_size(rscreen, rtex); 876 clear_value = 0; 877 } 878 879 if (!rtex->surface.htile_size) 880 return; 881 882 rtex->htile_buffer = (struct r600_resource*) 883 r600_aligned_buffer_create(&rscreen->b, 0, PIPE_USAGE_DEFAULT, 884 rtex->surface.htile_size, 885 rtex->surface.htile_alignment); 886 if (rtex->htile_buffer == NULL) { 887 /* this is not a fatal error as we can still keep rendering 888 * without htile buffer */ 889 R600_ERR("Failed to create buffer object for htile buffer.\n"); 890 } else { 891 r600_screen_clear_buffer(rscreen, &rtex->htile_buffer->b.b, 892 0, rtex->surface.htile_size, 893 clear_value); 894 } 895 } 896 897 void r600_print_texture_info(struct r600_texture *rtex, FILE *f) 898 { 899 int i; 900 901 fprintf(f, " Info: npix_x=%u, npix_y=%u, npix_z=%u, blk_w=%u, " 902 "blk_h=%u, array_size=%u, last_level=%u, " 903 "bpe=%u, nsamples=%u, flags=0x%x, %s\n", 904 rtex->resource.b.b.width0, rtex->resource.b.b.height0, 905 rtex->resource.b.b.depth0, rtex->surface.blk_w, 906 rtex->surface.blk_h, 907 rtex->resource.b.b.array_size, rtex->resource.b.b.last_level, 908 rtex->surface.bpe, rtex->resource.b.b.nr_samples, 909 rtex->surface.flags, util_format_short_name(rtex->resource.b.b.format)); 910 911 fprintf(f, " Layout: size=%"PRIu64", alignment=%u, bankw=%u, " 912 "bankh=%u, nbanks=%u, mtilea=%u, tilesplit=%u, pipeconfig=%u, scanout=%u\n", 913 rtex->surface.surf_size, rtex->surface.surf_alignment, rtex->surface.bankw, 914 rtex->surface.bankh, rtex->surface.num_banks, rtex->surface.mtilea, 915 rtex->surface.tile_split, rtex->surface.pipe_config, 916 (rtex->surface.flags & RADEON_SURF_SCANOUT) != 0); 917 918 if (rtex->fmask.size) 919 fprintf(f, " FMask: offset=%"PRIu64", size=%"PRIu64", alignment=%u, pitch_in_pixels=%u, " 920 "bankh=%u, slice_tile_max=%u, tile_mode_index=%u\n", 921 rtex->fmask.offset, rtex->fmask.size, rtex->fmask.alignment, 922 rtex->fmask.pitch_in_pixels, rtex->fmask.bank_height, 923 rtex->fmask.slice_tile_max, rtex->fmask.tile_mode_index); 924 925 if (rtex->cmask.size) 926 fprintf(f, " CMask: offset=%"PRIu64", size=%"PRIu64", alignment=%u, " 927 "slice_tile_max=%u\n", 928 rtex->cmask.offset, rtex->cmask.size, rtex->cmask.alignment, 929 rtex->cmask.slice_tile_max); 930 931 if (rtex->htile_buffer) 932 fprintf(f, " HTile: size=%u, alignment=%u, TC_compatible = %u\n", 933 rtex->htile_buffer->b.b.width0, 934 rtex->htile_buffer->buf->alignment, 935 rtex->tc_compatible_htile); 936 937 if (rtex->dcc_offset) { 938 fprintf(f, " DCC: offset=%"PRIu64", size=%"PRIu64", alignment=%u\n", 939 rtex->dcc_offset, rtex->surface.dcc_size, 940 rtex->surface.dcc_alignment); 941 for (i = 0; i <= rtex->resource.b.b.last_level; i++) 942 fprintf(f, " DCCLevel[%i]: enabled=%u, offset=%"PRIu64", " 943 "fast_clear_size=%"PRIu64"\n", 944 i, i < rtex->surface.num_dcc_levels, 945 rtex->surface.level[i].dcc_offset, 946 rtex->surface.level[i].dcc_fast_clear_size); 947 } 948 949 for (i = 0; i <= rtex->resource.b.b.last_level; i++) 950 fprintf(f, " Level[%i]: offset=%"PRIu64", slice_size=%"PRIu64", " 951 "npix_x=%u, npix_y=%u, npix_z=%u, nblk_x=%u, nblk_y=%u, " 952 "mode=%u, tiling_index = %u\n", 953 i, rtex->surface.level[i].offset, 954 rtex->surface.level[i].slice_size, 955 u_minify(rtex->resource.b.b.width0, i), 956 u_minify(rtex->resource.b.b.height0, i), 957 u_minify(rtex->resource.b.b.depth0, i), 958 rtex->surface.level[i].nblk_x, 959 rtex->surface.level[i].nblk_y, 960 rtex->surface.level[i].mode, 961 rtex->surface.tiling_index[i]); 962 963 if (rtex->surface.flags & RADEON_SURF_SBUFFER) { 964 fprintf(f, " StencilLayout: tilesplit=%u\n", 965 rtex->surface.stencil_tile_split); 966 for (i = 0; i <= rtex->resource.b.b.last_level; i++) { 967 fprintf(f, " StencilLevel[%i]: offset=%"PRIu64", " 968 "slice_size=%"PRIu64", npix_x=%u, " 969 "npix_y=%u, npix_z=%u, nblk_x=%u, nblk_y=%u, " 970 "mode=%u, tiling_index = %u\n", 971 i, rtex->surface.stencil_level[i].offset, 972 rtex->surface.stencil_level[i].slice_size, 973 u_minify(rtex->resource.b.b.width0, i), 974 u_minify(rtex->resource.b.b.height0, i), 975 u_minify(rtex->resource.b.b.depth0, i), 976 rtex->surface.stencil_level[i].nblk_x, 977 rtex->surface.stencil_level[i].nblk_y, 978 rtex->surface.stencil_level[i].mode, 979 rtex->surface.stencil_tiling_index[i]); 980 } 981 } 982 } 983 984 /* Common processing for r600_texture_create and r600_texture_from_handle */ 985 static struct r600_texture * 986 r600_texture_create_object(struct pipe_screen *screen, 987 const struct pipe_resource *base, 988 struct pb_buffer *buf, 989 struct radeon_surf *surface) 990 { 991 struct r600_texture *rtex; 992 struct r600_resource *resource; 993 struct r600_common_screen *rscreen = (struct r600_common_screen*)screen; 994 995 rtex = CALLOC_STRUCT(r600_texture); 996 if (!rtex) 997 return NULL; 998 999 resource = &rtex->resource; 1000 resource->b.b = *base; 1001 resource->b.b.next = NULL; 1002 resource->b.vtbl = &r600_texture_vtbl; 1003 pipe_reference_init(&resource->b.b.reference, 1); 1004 resource->b.b.screen = screen; 1005 1006 /* don't include stencil-only formats which we don't support for rendering */ 1007 rtex->is_depth = util_format_has_depth(util_format_description(rtex->resource.b.b.format)); 1008 1009 rtex->surface = *surface; 1010 rtex->size = rtex->surface.surf_size; 1011 1012 rtex->tc_compatible_htile = rtex->surface.htile_size != 0 && 1013 (rtex->surface.flags & 1014 RADEON_SURF_TC_COMPATIBLE_HTILE); 1015 1016 /* TC-compatible HTILE only supports Z32_FLOAT. */ 1017 if (rtex->tc_compatible_htile) 1018 rtex->db_render_format = PIPE_FORMAT_Z32_FLOAT; 1019 else 1020 rtex->db_render_format = base->format; 1021 1022 /* Tiled depth textures utilize the non-displayable tile order. 1023 * This must be done after r600_setup_surface. 1024 * Applies to R600-Cayman. */ 1025 rtex->non_disp_tiling = rtex->is_depth && rtex->surface.level[0].mode >= RADEON_SURF_MODE_1D; 1026 /* Applies to GCN. */ 1027 rtex->last_msaa_resolve_target_micro_mode = rtex->surface.micro_tile_mode; 1028 1029 /* Disable separate DCC at the beginning. DRI2 doesn't reuse buffers 1030 * between frames, so the only thing that can enable separate DCC 1031 * with DRI2 is multiple slow clears within a frame. 1032 */ 1033 rtex->ps_draw_ratio = 0; 1034 1035 if (rtex->is_depth) { 1036 if (base->flags & (R600_RESOURCE_FLAG_TRANSFER | 1037 R600_RESOURCE_FLAG_FLUSHED_DEPTH) || 1038 rscreen->chip_class >= EVERGREEN) { 1039 rtex->can_sample_z = !rtex->surface.depth_adjusted; 1040 rtex->can_sample_s = !rtex->surface.stencil_adjusted; 1041 } else { 1042 if (rtex->resource.b.b.nr_samples <= 1 && 1043 (rtex->resource.b.b.format == PIPE_FORMAT_Z16_UNORM || 1044 rtex->resource.b.b.format == PIPE_FORMAT_Z32_FLOAT)) 1045 rtex->can_sample_z = true; 1046 } 1047 1048 if (!(base->flags & (R600_RESOURCE_FLAG_TRANSFER | 1049 R600_RESOURCE_FLAG_FLUSHED_DEPTH))) { 1050 rtex->db_compatible = true; 1051 1052 if (!(rscreen->debug_flags & DBG_NO_HYPERZ)) 1053 r600_texture_allocate_htile(rscreen, rtex); 1054 } 1055 } else { 1056 if (base->nr_samples > 1) { 1057 if (!buf) { 1058 r600_texture_allocate_fmask(rscreen, rtex); 1059 r600_texture_allocate_cmask(rscreen, rtex); 1060 rtex->cmask_buffer = &rtex->resource; 1061 } 1062 if (!rtex->fmask.size || !rtex->cmask.size) { 1063 FREE(rtex); 1064 return NULL; 1065 } 1066 } 1067 1068 /* Shared textures must always set up DCC here. 1069 * If it's not present, it will be disabled by 1070 * apply_opaque_metadata later. 1071 */ 1072 if (rtex->surface.dcc_size && 1073 (buf || !(rscreen->debug_flags & DBG_NO_DCC)) && 1074 !(rtex->surface.flags & RADEON_SURF_SCANOUT)) { 1075 /* Reserve space for the DCC buffer. */ 1076 rtex->dcc_offset = align64(rtex->size, rtex->surface.dcc_alignment); 1077 rtex->size = rtex->dcc_offset + rtex->surface.dcc_size; 1078 } 1079 } 1080 1081 /* Now create the backing buffer. */ 1082 if (!buf) { 1083 r600_init_resource_fields(rscreen, resource, rtex->size, 1084 rtex->surface.surf_alignment); 1085 1086 resource->flags |= RADEON_FLAG_HANDLE; 1087 1088 if (!r600_alloc_resource(rscreen, resource)) { 1089 FREE(rtex); 1090 return NULL; 1091 } 1092 } else { 1093 resource->buf = buf; 1094 resource->gpu_address = rscreen->ws->buffer_get_virtual_address(resource->buf); 1095 resource->bo_size = buf->size; 1096 resource->bo_alignment = buf->alignment; 1097 resource->domains = rscreen->ws->buffer_get_initial_domain(resource->buf); 1098 if (resource->domains & RADEON_DOMAIN_VRAM) 1099 resource->vram_usage = buf->size; 1100 else if (resource->domains & RADEON_DOMAIN_GTT) 1101 resource->gart_usage = buf->size; 1102 } 1103 1104 if (rtex->cmask.size) { 1105 /* Initialize the cmask to 0xCC (= compressed state). */ 1106 r600_screen_clear_buffer(rscreen, &rtex->cmask_buffer->b.b, 1107 rtex->cmask.offset, rtex->cmask.size, 1108 0xCCCCCCCC); 1109 } 1110 1111 /* Initialize DCC only if the texture is not being imported. */ 1112 if (!buf && rtex->dcc_offset) { 1113 r600_screen_clear_buffer(rscreen, &rtex->resource.b.b, 1114 rtex->dcc_offset, 1115 rtex->surface.dcc_size, 1116 0xFFFFFFFF); 1117 } 1118 1119 /* Initialize the CMASK base register value. */ 1120 rtex->cmask.base_address_reg = 1121 (rtex->resource.gpu_address + rtex->cmask.offset) >> 8; 1122 1123 if (rscreen->debug_flags & DBG_VM) { 1124 fprintf(stderr, "VM start=0x%"PRIX64" end=0x%"PRIX64" | Texture %ix%ix%i, %i levels, %i samples, %s\n", 1125 rtex->resource.gpu_address, 1126 rtex->resource.gpu_address + rtex->resource.buf->size, 1127 base->width0, base->height0, util_max_layer(base, 0)+1, base->last_level+1, 1128 base->nr_samples ? base->nr_samples : 1, util_format_short_name(base->format)); 1129 } 1130 1131 if (rscreen->debug_flags & DBG_TEX) { 1132 puts("Texture:"); 1133 r600_print_texture_info(rtex, stdout); 1134 fflush(stdout); 1135 } 1136 1137 return rtex; 1138 } 1139 1140 static enum radeon_surf_mode 1141 r600_choose_tiling(struct r600_common_screen *rscreen, 1142 const struct pipe_resource *templ) 1143 { 1144 const struct util_format_description *desc = util_format_description(templ->format); 1145 bool force_tiling = templ->flags & R600_RESOURCE_FLAG_FORCE_TILING; 1146 1147 /* MSAA resources must be 2D tiled. */ 1148 if (templ->nr_samples > 1) 1149 return RADEON_SURF_MODE_2D; 1150 1151 /* Transfer resources should be linear. */ 1152 if (templ->flags & R600_RESOURCE_FLAG_TRANSFER) 1153 return RADEON_SURF_MODE_LINEAR_ALIGNED; 1154 1155 /* r600g: force tiling on TEXTURE_2D and TEXTURE_3D compute resources. */ 1156 if (rscreen->chip_class >= R600 && rscreen->chip_class <= CAYMAN && 1157 (templ->bind & PIPE_BIND_COMPUTE_RESOURCE) && 1158 (templ->target == PIPE_TEXTURE_2D || 1159 templ->target == PIPE_TEXTURE_3D)) 1160 force_tiling = true; 1161 1162 /* Handle common candidates for the linear mode. 1163 * Compressed textures and DB surfaces must always be tiled. 1164 */ 1165 if (!force_tiling && !util_format_is_compressed(templ->format) && 1166 (!util_format_is_depth_or_stencil(templ->format) || 1167 templ->flags & R600_RESOURCE_FLAG_FLUSHED_DEPTH)) { 1168 if (rscreen->debug_flags & DBG_NO_TILING) 1169 return RADEON_SURF_MODE_LINEAR_ALIGNED; 1170 1171 /* Tiling doesn't work with the 422 (SUBSAMPLED) formats on R600+. */ 1172 if (desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) 1173 return RADEON_SURF_MODE_LINEAR_ALIGNED; 1174 1175 /* Cursors are linear on SI. 1176 * (XXX double-check, maybe also use RADEON_SURF_SCANOUT) */ 1177 if (rscreen->chip_class >= SI && 1178 (templ->bind & PIPE_BIND_CURSOR)) 1179 return RADEON_SURF_MODE_LINEAR_ALIGNED; 1180 1181 if (templ->bind & PIPE_BIND_LINEAR) 1182 return RADEON_SURF_MODE_LINEAR_ALIGNED; 1183 1184 /* Textures with a very small height are recommended to be linear. */ 1185 if (templ->target == PIPE_TEXTURE_1D || 1186 templ->target == PIPE_TEXTURE_1D_ARRAY || 1187 templ->height0 <= 4) 1188 return RADEON_SURF_MODE_LINEAR_ALIGNED; 1189 1190 /* Textures likely to be mapped often. */ 1191 if (templ->usage == PIPE_USAGE_STAGING || 1192 templ->usage == PIPE_USAGE_STREAM) 1193 return RADEON_SURF_MODE_LINEAR_ALIGNED; 1194 } 1195 1196 /* Make small textures 1D tiled. */ 1197 if (templ->width0 <= 16 || templ->height0 <= 16 || 1198 (rscreen->debug_flags & DBG_NO_2D_TILING)) 1199 return RADEON_SURF_MODE_1D; 1200 1201 /* The allocator will switch to 1D if needed. */ 1202 return RADEON_SURF_MODE_2D; 1203 } 1204 1205 struct pipe_resource *r600_texture_create(struct pipe_screen *screen, 1206 const struct pipe_resource *templ) 1207 { 1208 struct r600_common_screen *rscreen = (struct r600_common_screen*)screen; 1209 struct radeon_surf surface = {0}; 1210 bool is_flushed_depth = templ->flags & R600_RESOURCE_FLAG_FLUSHED_DEPTH; 1211 bool tc_compatible_htile = 1212 rscreen->chip_class >= VI && 1213 (templ->flags & PIPE_RESOURCE_FLAG_TEXTURING_MORE_LIKELY) && 1214 !(rscreen->debug_flags & DBG_NO_HYPERZ) && 1215 !is_flushed_depth && 1216 templ->nr_samples <= 1 && /* TC-compat HTILE is less efficient with MSAA */ 1217 util_format_is_depth_or_stencil(templ->format); 1218 1219 int r; 1220 1221 r = r600_init_surface(rscreen, &surface, templ, 1222 r600_choose_tiling(rscreen, templ), 0, 0, 1223 false, false, is_flushed_depth, 1224 tc_compatible_htile); 1225 if (r) { 1226 return NULL; 1227 } 1228 1229 return (struct pipe_resource *) 1230 r600_texture_create_object(screen, templ, NULL, &surface); 1231 } 1232 1233 static struct pipe_resource *r600_texture_from_handle(struct pipe_screen *screen, 1234 const struct pipe_resource *templ, 1235 struct winsys_handle *whandle, 1236 unsigned usage) 1237 { 1238 struct r600_common_screen *rscreen = (struct r600_common_screen*)screen; 1239 struct pb_buffer *buf = NULL; 1240 unsigned stride = 0, offset = 0; 1241 unsigned array_mode; 1242 struct radeon_surf surface; 1243 int r; 1244 struct radeon_bo_metadata metadata = {}; 1245 struct r600_texture *rtex; 1246 1247 /* Support only 2D textures without mipmaps */ 1248 if ((templ->target != PIPE_TEXTURE_2D && templ->target != PIPE_TEXTURE_RECT) || 1249 templ->depth0 != 1 || templ->last_level != 0) 1250 return NULL; 1251 1252 buf = rscreen->ws->buffer_from_handle(rscreen->ws, whandle, &stride, &offset); 1253 if (!buf) 1254 return NULL; 1255 1256 rscreen->ws->buffer_get_metadata(buf, &metadata); 1257 1258 surface.pipe_config = metadata.pipe_config; 1259 surface.bankw = metadata.bankw; 1260 surface.bankh = metadata.bankh; 1261 surface.tile_split = metadata.tile_split; 1262 surface.mtilea = metadata.mtilea; 1263 surface.num_banks = metadata.num_banks; 1264 1265 if (metadata.macrotile == RADEON_LAYOUT_TILED) 1266 array_mode = RADEON_SURF_MODE_2D; 1267 else if (metadata.microtile == RADEON_LAYOUT_TILED) 1268 array_mode = RADEON_SURF_MODE_1D; 1269 else 1270 array_mode = RADEON_SURF_MODE_LINEAR_ALIGNED; 1271 1272 r = r600_init_surface(rscreen, &surface, templ, array_mode, stride, 1273 offset, true, metadata.scanout, false, false); 1274 if (r) { 1275 return NULL; 1276 } 1277 1278 rtex = r600_texture_create_object(screen, templ, buf, &surface); 1279 if (!rtex) 1280 return NULL; 1281 1282 rtex->resource.is_shared = true; 1283 rtex->resource.external_usage = usage; 1284 1285 if (rscreen->apply_opaque_metadata) 1286 rscreen->apply_opaque_metadata(rscreen, rtex, &metadata); 1287 1288 return &rtex->resource.b.b; 1289 } 1290 1291 bool r600_init_flushed_depth_texture(struct pipe_context *ctx, 1292 struct pipe_resource *texture, 1293 struct r600_texture **staging) 1294 { 1295 struct r600_texture *rtex = (struct r600_texture*)texture; 1296 struct pipe_resource resource; 1297 struct r600_texture **flushed_depth_texture = staging ? 1298 staging : &rtex->flushed_depth_texture; 1299 enum pipe_format pipe_format = texture->format; 1300 1301 if (!staging) { 1302 if (rtex->flushed_depth_texture) 1303 return true; /* it's ready */ 1304 1305 if (!rtex->can_sample_z && rtex->can_sample_s) { 1306 switch (pipe_format) { 1307 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: 1308 /* Save memory by not allocating the S plane. */ 1309 pipe_format = PIPE_FORMAT_Z32_FLOAT; 1310 break; 1311 case PIPE_FORMAT_Z24_UNORM_S8_UINT: 1312 case PIPE_FORMAT_S8_UINT_Z24_UNORM: 1313 /* Save memory bandwidth by not copying the 1314 * stencil part during flush. 1315 * 1316 * This potentially increases memory bandwidth 1317 * if an application uses both Z and S texturing 1318 * simultaneously (a flushed Z24S8 texture 1319 * would be stored compactly), but how often 1320 * does that really happen? 1321 */ 1322 pipe_format = PIPE_FORMAT_Z24X8_UNORM; 1323 break; 1324 default:; 1325 } 1326 } else if (!rtex->can_sample_s && rtex->can_sample_z) { 1327 assert(util_format_has_stencil(util_format_description(pipe_format))); 1328 1329 /* DB->CB copies to an 8bpp surface don't work. */ 1330 pipe_format = PIPE_FORMAT_X24S8_UINT; 1331 } 1332 } 1333 1334 memset(&resource, 0, sizeof(resource)); 1335 resource.target = texture->target; 1336 resource.format = pipe_format; 1337 resource.width0 = texture->width0; 1338 resource.height0 = texture->height0; 1339 resource.depth0 = texture->depth0; 1340 resource.array_size = texture->array_size; 1341 resource.last_level = texture->last_level; 1342 resource.nr_samples = texture->nr_samples; 1343 resource.usage = staging ? PIPE_USAGE_STAGING : PIPE_USAGE_DEFAULT; 1344 resource.bind = texture->bind & ~PIPE_BIND_DEPTH_STENCIL; 1345 resource.flags = texture->flags | R600_RESOURCE_FLAG_FLUSHED_DEPTH; 1346 1347 if (staging) 1348 resource.flags |= R600_RESOURCE_FLAG_TRANSFER; 1349 1350 *flushed_depth_texture = (struct r600_texture *)ctx->screen->resource_create(ctx->screen, &resource); 1351 if (*flushed_depth_texture == NULL) { 1352 R600_ERR("failed to create temporary texture to hold flushed depth\n"); 1353 return false; 1354 } 1355 1356 (*flushed_depth_texture)->non_disp_tiling = false; 1357 return true; 1358 } 1359 1360 /** 1361 * Initialize the pipe_resource descriptor to be of the same size as the box, 1362 * which is supposed to hold a subregion of the texture "orig" at the given 1363 * mipmap level. 1364 */ 1365 static void r600_init_temp_resource_from_box(struct pipe_resource *res, 1366 struct pipe_resource *orig, 1367 const struct pipe_box *box, 1368 unsigned level, unsigned flags) 1369 { 1370 memset(res, 0, sizeof(*res)); 1371 res->format = orig->format; 1372 res->width0 = box->width; 1373 res->height0 = box->height; 1374 res->depth0 = 1; 1375 res->array_size = 1; 1376 res->usage = flags & R600_RESOURCE_FLAG_TRANSFER ? PIPE_USAGE_STAGING : PIPE_USAGE_DEFAULT; 1377 res->flags = flags; 1378 1379 /* We must set the correct texture target and dimensions for a 3D box. */ 1380 if (box->depth > 1 && util_max_layer(orig, level) > 0) { 1381 res->target = PIPE_TEXTURE_2D_ARRAY; 1382 res->array_size = box->depth; 1383 } else { 1384 res->target = PIPE_TEXTURE_2D; 1385 } 1386 } 1387 1388 static bool r600_can_invalidate_texture(struct r600_common_screen *rscreen, 1389 struct r600_texture *rtex, 1390 unsigned transfer_usage, 1391 const struct pipe_box *box) 1392 { 1393 /* r600g doesn't react to dirty_tex_descriptor_counter */ 1394 return rscreen->chip_class >= SI && 1395 !rtex->resource.is_shared && 1396 !(transfer_usage & PIPE_TRANSFER_READ) && 1397 rtex->resource.b.b.last_level == 0 && 1398 util_texrange_covers_whole_level(&rtex->resource.b.b, 0, 1399 box->x, box->y, box->z, 1400 box->width, box->height, 1401 box->depth); 1402 } 1403 1404 static void r600_texture_invalidate_storage(struct r600_common_context *rctx, 1405 struct r600_texture *rtex) 1406 { 1407 struct r600_common_screen *rscreen = rctx->screen; 1408 1409 /* There is no point in discarding depth and tiled buffers. */ 1410 assert(!rtex->is_depth); 1411 assert(rtex->surface.is_linear); 1412 1413 /* Reallocate the buffer in the same pipe_resource. */ 1414 r600_alloc_resource(rscreen, &rtex->resource); 1415 1416 /* Initialize the CMASK base address (needed even without CMASK). */ 1417 rtex->cmask.base_address_reg = 1418 (rtex->resource.gpu_address + rtex->cmask.offset) >> 8; 1419 1420 r600_dirty_all_framebuffer_states(rscreen); 1421 p_atomic_inc(&rscreen->dirty_tex_descriptor_counter); 1422 1423 rctx->num_alloc_tex_transfer_bytes += rtex->size; 1424 } 1425 1426 static void *r600_texture_transfer_map(struct pipe_context *ctx, 1427 struct pipe_resource *texture, 1428 unsigned level, 1429 unsigned usage, 1430 const struct pipe_box *box, 1431 struct pipe_transfer **ptransfer) 1432 { 1433 struct r600_common_context *rctx = (struct r600_common_context*)ctx; 1434 struct r600_texture *rtex = (struct r600_texture*)texture; 1435 struct r600_transfer *trans; 1436 struct r600_resource *buf; 1437 unsigned offset = 0; 1438 char *map; 1439 bool use_staging_texture = false; 1440 1441 assert(!(texture->flags & R600_RESOURCE_FLAG_TRANSFER)); 1442 1443 /* Depth textures use staging unconditionally. */ 1444 if (!rtex->is_depth) { 1445 /* Degrade the tile mode if we get too many transfers on APUs. 1446 * On dGPUs, the staging texture is always faster. 1447 * Only count uploads that are at least 4x4 pixels large. 1448 */ 1449 if (!rctx->screen->info.has_dedicated_vram && 1450 level == 0 && 1451 box->width >= 4 && box->height >= 4 && 1452 p_atomic_inc_return(&rtex->num_level0_transfers) == 10) { 1453 bool can_invalidate = 1454 r600_can_invalidate_texture(rctx->screen, rtex, 1455 usage, box); 1456 1457 r600_degrade_tile_mode_to_linear(rctx, rtex, 1458 can_invalidate); 1459 } 1460 1461 /* Tiled textures need to be converted into a linear texture for CPU 1462 * access. The staging texture is always linear and is placed in GART. 1463 * 1464 * Reading from VRAM is slow, always use the staging texture in 1465 * this case. 1466 * 1467 * Use the staging texture for uploads if the underlying BO 1468 * is busy. 1469 */ 1470 if (!rtex->surface.is_linear) 1471 use_staging_texture = true; 1472 else if (usage & PIPE_TRANSFER_READ) 1473 use_staging_texture = (rtex->resource.domains & 1474 RADEON_DOMAIN_VRAM) != 0; 1475 /* Write & linear only: */ 1476 else if (r600_rings_is_buffer_referenced(rctx, rtex->resource.buf, 1477 RADEON_USAGE_READWRITE) || 1478 !rctx->ws->buffer_wait(rtex->resource.buf, 0, 1479 RADEON_USAGE_READWRITE)) { 1480 /* It's busy. */ 1481 if (r600_can_invalidate_texture(rctx->screen, rtex, 1482 usage, box)) 1483 r600_texture_invalidate_storage(rctx, rtex); 1484 else 1485 use_staging_texture = true; 1486 } 1487 } 1488 1489 trans = CALLOC_STRUCT(r600_transfer); 1490 if (!trans) 1491 return NULL; 1492 trans->transfer.resource = texture; 1493 trans->transfer.level = level; 1494 trans->transfer.usage = usage; 1495 trans->transfer.box = *box; 1496 1497 if (rtex->is_depth) { 1498 struct r600_texture *staging_depth; 1499 1500 if (rtex->resource.b.b.nr_samples > 1) { 1501 /* MSAA depth buffers need to be converted to single sample buffers. 1502 * 1503 * Mapping MSAA depth buffers can occur if ReadPixels is called 1504 * with a multisample GLX visual. 1505 * 1506 * First downsample the depth buffer to a temporary texture, 1507 * then decompress the temporary one to staging. 1508 * 1509 * Only the region being mapped is transfered. 1510 */ 1511 struct pipe_resource resource; 1512 1513 r600_init_temp_resource_from_box(&resource, texture, box, level, 0); 1514 1515 if (!r600_init_flushed_depth_texture(ctx, &resource, &staging_depth)) { 1516 R600_ERR("failed to create temporary texture to hold untiled copy\n"); 1517 FREE(trans); 1518 return NULL; 1519 } 1520 1521 if (usage & PIPE_TRANSFER_READ) { 1522 struct pipe_resource *temp = ctx->screen->resource_create(ctx->screen, &resource); 1523 if (!temp) { 1524 R600_ERR("failed to create a temporary depth texture\n"); 1525 FREE(trans); 1526 return NULL; 1527 } 1528 1529 r600_copy_region_with_blit(ctx, temp, 0, 0, 0, 0, texture, level, box); 1530 rctx->blit_decompress_depth(ctx, (struct r600_texture*)temp, staging_depth, 1531 0, 0, 0, box->depth, 0, 0); 1532 pipe_resource_reference(&temp, NULL); 1533 } 1534 } 1535 else { 1536 /* XXX: only readback the rectangle which is being mapped? */ 1537 /* XXX: when discard is true, no need to read back from depth texture */ 1538 if (!r600_init_flushed_depth_texture(ctx, texture, &staging_depth)) { 1539 R600_ERR("failed to create temporary texture to hold untiled copy\n"); 1540 FREE(trans); 1541 return NULL; 1542 } 1543 1544 rctx->blit_decompress_depth(ctx, rtex, staging_depth, 1545 level, level, 1546 box->z, box->z + box->depth - 1, 1547 0, 0); 1548 1549 offset = r600_texture_get_offset(staging_depth, level, box); 1550 } 1551 1552 trans->transfer.stride = staging_depth->surface.level[level].nblk_x * 1553 staging_depth->surface.bpe; 1554 trans->transfer.layer_stride = staging_depth->surface.level[level].slice_size; 1555 trans->staging = (struct r600_resource*)staging_depth; 1556 buf = trans->staging; 1557 } else if (use_staging_texture) { 1558 struct pipe_resource resource; 1559 struct r600_texture *staging; 1560 1561 r600_init_temp_resource_from_box(&resource, texture, box, level, 1562 R600_RESOURCE_FLAG_TRANSFER); 1563 resource.usage = (usage & PIPE_TRANSFER_READ) ? 1564 PIPE_USAGE_STAGING : PIPE_USAGE_STREAM; 1565 1566 /* Create the temporary texture. */ 1567 staging = (struct r600_texture*)ctx->screen->resource_create(ctx->screen, &resource); 1568 if (!staging) { 1569 R600_ERR("failed to create temporary texture to hold untiled copy\n"); 1570 FREE(trans); 1571 return NULL; 1572 } 1573 trans->staging = &staging->resource; 1574 trans->transfer.stride = staging->surface.level[0].nblk_x * 1575 staging->surface.bpe; 1576 trans->transfer.layer_stride = staging->surface.level[0].slice_size; 1577 1578 if (usage & PIPE_TRANSFER_READ) 1579 r600_copy_to_staging_texture(ctx, trans); 1580 else 1581 usage |= PIPE_TRANSFER_UNSYNCHRONIZED; 1582 1583 buf = trans->staging; 1584 } else { 1585 /* the resource is mapped directly */ 1586 trans->transfer.stride = rtex->surface.level[level].nblk_x * 1587 rtex->surface.bpe; 1588 trans->transfer.layer_stride = rtex->surface.level[level].slice_size; 1589 offset = r600_texture_get_offset(rtex, level, box); 1590 buf = &rtex->resource; 1591 } 1592 1593 if (!(map = r600_buffer_map_sync_with_rings(rctx, buf, usage))) { 1594 r600_resource_reference(&trans->staging, NULL); 1595 FREE(trans); 1596 return NULL; 1597 } 1598 1599 *ptransfer = &trans->transfer; 1600 return map + offset; 1601 } 1602 1603 static void r600_texture_transfer_unmap(struct pipe_context *ctx, 1604 struct pipe_transfer* transfer) 1605 { 1606 struct r600_common_context *rctx = (struct r600_common_context*)ctx; 1607 struct r600_transfer *rtransfer = (struct r600_transfer*)transfer; 1608 struct pipe_resource *texture = transfer->resource; 1609 struct r600_texture *rtex = (struct r600_texture*)texture; 1610 1611 if ((transfer->usage & PIPE_TRANSFER_WRITE) && rtransfer->staging) { 1612 if (rtex->is_depth && rtex->resource.b.b.nr_samples <= 1) { 1613 ctx->resource_copy_region(ctx, texture, transfer->level, 1614 transfer->box.x, transfer->box.y, transfer->box.z, 1615 &rtransfer->staging->b.b, transfer->level, 1616 &transfer->box); 1617 } else { 1618 r600_copy_from_staging_texture(ctx, rtransfer); 1619 } 1620 } 1621 1622 if (rtransfer->staging) { 1623 rctx->num_alloc_tex_transfer_bytes += rtransfer->staging->buf->size; 1624 r600_resource_reference(&rtransfer->staging, NULL); 1625 } 1626 1627 /* Heuristic for {upload, draw, upload, draw, ..}: 1628 * 1629 * Flush the gfx IB if we've allocated too much texture storage. 1630 * 1631 * The idea is that we don't want to build IBs that use too much 1632 * memory and put pressure on the kernel memory manager and we also 1633 * want to make temporary and invalidated buffers go idle ASAP to 1634 * decrease the total memory usage or make them reusable. The memory 1635 * usage will be slightly higher than given here because of the buffer 1636 * cache in the winsys. 1637 * 1638 * The result is that the kernel memory manager is never a bottleneck. 1639 */ 1640 if (rctx->num_alloc_tex_transfer_bytes > rctx->screen->info.gart_size / 4) { 1641 rctx->gfx.flush(rctx, RADEON_FLUSH_ASYNC, NULL); 1642 rctx->num_alloc_tex_transfer_bytes = 0; 1643 } 1644 1645 FREE(transfer); 1646 } 1647 1648 static const struct u_resource_vtbl r600_texture_vtbl = 1649 { 1650 NULL, /* get_handle */ 1651 r600_texture_destroy, /* resource_destroy */ 1652 r600_texture_transfer_map, /* transfer_map */ 1653 u_default_transfer_flush_region, /* transfer_flush_region */ 1654 r600_texture_transfer_unmap, /* transfer_unmap */ 1655 }; 1656 1657 /* DCC channel type categories within which formats can be reinterpreted 1658 * while keeping the same DCC encoding. The swizzle must also match. */ 1659 enum dcc_channel_type { 1660 dcc_channel_float32, 1661 dcc_channel_uint32, 1662 dcc_channel_sint32, 1663 dcc_channel_float16, 1664 dcc_channel_uint16, 1665 dcc_channel_sint16, 1666 dcc_channel_uint_10_10_10_2, 1667 dcc_channel_uint8, 1668 dcc_channel_sint8, 1669 dcc_channel_incompatible, 1670 }; 1671 1672 /* Return the type of DCC encoding. */ 1673 static enum dcc_channel_type 1674 vi_get_dcc_channel_type(const struct util_format_description *desc) 1675 { 1676 int i; 1677 1678 /* Find the first non-void channel. */ 1679 for (i = 0; i < desc->nr_channels; i++) 1680 if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) 1681 break; 1682 if (i == desc->nr_channels) 1683 return dcc_channel_incompatible; 1684 1685 switch (desc->channel[i].size) { 1686 case 32: 1687 if (desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT) 1688 return dcc_channel_float32; 1689 if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) 1690 return dcc_channel_uint32; 1691 return dcc_channel_sint32; 1692 case 16: 1693 if (desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT) 1694 return dcc_channel_float16; 1695 if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) 1696 return dcc_channel_uint16; 1697 return dcc_channel_sint16; 1698 case 10: 1699 return dcc_channel_uint_10_10_10_2; 1700 case 8: 1701 if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) 1702 return dcc_channel_uint8; 1703 return dcc_channel_sint8; 1704 default: 1705 return dcc_channel_incompatible; 1706 } 1707 } 1708 1709 /* Return if it's allowed to reinterpret one format as another with DCC enabled. */ 1710 bool vi_dcc_formats_compatible(enum pipe_format format1, 1711 enum pipe_format format2) 1712 { 1713 const struct util_format_description *desc1, *desc2; 1714 enum dcc_channel_type type1, type2; 1715 int i; 1716 1717 if (format1 == format2) 1718 return true; 1719 1720 desc1 = util_format_description(format1); 1721 desc2 = util_format_description(format2); 1722 1723 if (desc1->nr_channels != desc2->nr_channels) 1724 return false; 1725 1726 /* Swizzles must be the same. */ 1727 for (i = 0; i < desc1->nr_channels; i++) 1728 if (desc1->swizzle[i] <= PIPE_SWIZZLE_W && 1729 desc2->swizzle[i] <= PIPE_SWIZZLE_W && 1730 desc1->swizzle[i] != desc2->swizzle[i]) 1731 return false; 1732 1733 type1 = vi_get_dcc_channel_type(desc1); 1734 type2 = vi_get_dcc_channel_type(desc2); 1735 1736 return type1 != dcc_channel_incompatible && 1737 type2 != dcc_channel_incompatible && 1738 type1 == type2; 1739 } 1740 1741 void vi_dcc_disable_if_incompatible_format(struct r600_common_context *rctx, 1742 struct pipe_resource *tex, 1743 unsigned level, 1744 enum pipe_format view_format) 1745 { 1746 struct r600_texture *rtex = (struct r600_texture *)tex; 1747 1748 if (rtex->dcc_offset && 1749 level < rtex->surface.num_dcc_levels && 1750 !vi_dcc_formats_compatible(tex->format, view_format)) 1751 if (!r600_texture_disable_dcc(rctx, (struct r600_texture*)tex)) 1752 rctx->decompress_dcc(&rctx->b, rtex); 1753 } 1754 1755 struct pipe_surface *r600_create_surface_custom(struct pipe_context *pipe, 1756 struct pipe_resource *texture, 1757 const struct pipe_surface *templ, 1758 unsigned width, unsigned height) 1759 { 1760 struct r600_common_context *rctx = (struct r600_common_context*)pipe; 1761 struct r600_surface *surface = CALLOC_STRUCT(r600_surface); 1762 1763 if (!surface) 1764 return NULL; 1765 1766 assert(templ->u.tex.first_layer <= util_max_layer(texture, templ->u.tex.level)); 1767 assert(templ->u.tex.last_layer <= util_max_layer(texture, templ->u.tex.level)); 1768 1769 pipe_reference_init(&surface->base.reference, 1); 1770 pipe_resource_reference(&surface->base.texture, texture); 1771 surface->base.context = pipe; 1772 surface->base.format = templ->format; 1773 surface->base.width = width; 1774 surface->base.height = height; 1775 surface->base.u = templ->u; 1776 1777 if (texture->target != PIPE_BUFFER) 1778 vi_dcc_disable_if_incompatible_format(rctx, texture, 1779 templ->u.tex.level, 1780 templ->format); 1781 1782 return &surface->base; 1783 } 1784 1785 static struct pipe_surface *r600_create_surface(struct pipe_context *pipe, 1786 struct pipe_resource *tex, 1787 const struct pipe_surface *templ) 1788 { 1789 unsigned level = templ->u.tex.level; 1790 unsigned width = u_minify(tex->width0, level); 1791 unsigned height = u_minify(tex->height0, level); 1792 1793 if (tex->target != PIPE_BUFFER && templ->format != tex->format) { 1794 const struct util_format_description *tex_desc 1795 = util_format_description(tex->format); 1796 const struct util_format_description *templ_desc 1797 = util_format_description(templ->format); 1798 1799 assert(tex_desc->block.bits == templ_desc->block.bits); 1800 1801 /* Adjust size of surface if and only if the block width or 1802 * height is changed. */ 1803 if (tex_desc->block.width != templ_desc->block.width || 1804 tex_desc->block.height != templ_desc->block.height) { 1805 unsigned nblks_x = util_format_get_nblocksx(tex->format, width); 1806 unsigned nblks_y = util_format_get_nblocksy(tex->format, height); 1807 1808 width = nblks_x * templ_desc->block.width; 1809 height = nblks_y * templ_desc->block.height; 1810 } 1811 } 1812 1813 return r600_create_surface_custom(pipe, tex, templ, width, height); 1814 } 1815 1816 static void r600_surface_destroy(struct pipe_context *pipe, 1817 struct pipe_surface *surface) 1818 { 1819 struct r600_surface *surf = (struct r600_surface*)surface; 1820 r600_resource_reference(&surf->cb_buffer_fmask, NULL); 1821 r600_resource_reference(&surf->cb_buffer_cmask, NULL); 1822 pipe_resource_reference(&surface->texture, NULL); 1823 FREE(surface); 1824 } 1825 1826 static void r600_clear_texture(struct pipe_context *pipe, 1827 struct pipe_resource *tex, 1828 unsigned level, 1829 const struct pipe_box *box, 1830 const void *data) 1831 { 1832 struct pipe_screen *screen = pipe->screen; 1833 struct r600_texture *rtex = (struct r600_texture*)tex; 1834 struct pipe_surface tmpl = {{0}}; 1835 struct pipe_surface *sf; 1836 const struct util_format_description *desc = 1837 util_format_description(tex->format); 1838 1839 tmpl.format = tex->format; 1840 tmpl.u.tex.first_layer = box->z; 1841 tmpl.u.tex.last_layer = box->z + box->depth - 1; 1842 tmpl.u.tex.level = level; 1843 sf = pipe->create_surface(pipe, tex, &tmpl); 1844 if (!sf) 1845 return; 1846 1847 if (rtex->is_depth) { 1848 unsigned clear; 1849 float depth; 1850 uint8_t stencil = 0; 1851 1852 /* Depth is always present. */ 1853 clear = PIPE_CLEAR_DEPTH; 1854 desc->unpack_z_float(&depth, 0, data, 0, 1, 1); 1855 1856 if (rtex->surface.flags & RADEON_SURF_SBUFFER) { 1857 clear |= PIPE_CLEAR_STENCIL; 1858 desc->unpack_s_8uint(&stencil, 0, data, 0, 1, 1); 1859 } 1860 1861 pipe->clear_depth_stencil(pipe, sf, clear, depth, stencil, 1862 box->x, box->y, 1863 box->width, box->height, false); 1864 } else { 1865 union pipe_color_union color; 1866 1867 /* pipe_color_union requires the full vec4 representation. */ 1868 if (util_format_is_pure_uint(tex->format)) 1869 desc->unpack_rgba_uint(color.ui, 0, data, 0, 1, 1); 1870 else if (util_format_is_pure_sint(tex->format)) 1871 desc->unpack_rgba_sint(color.i, 0, data, 0, 1, 1); 1872 else 1873 desc->unpack_rgba_float(color.f, 0, data, 0, 1, 1); 1874 1875 if (screen->is_format_supported(screen, tex->format, 1876 tex->target, 0, 1877 PIPE_BIND_RENDER_TARGET)) { 1878 pipe->clear_render_target(pipe, sf, &color, 1879 box->x, box->y, 1880 box->width, box->height, false); 1881 } else { 1882 /* Software fallback - just for R9G9B9E5_FLOAT */ 1883 util_clear_render_target(pipe, sf, &color, 1884 box->x, box->y, 1885 box->width, box->height); 1886 } 1887 } 1888 pipe_surface_reference(&sf, NULL); 1889 } 1890 1891 unsigned r600_translate_colorswap(enum pipe_format format, bool do_endian_swap) 1892 { 1893 const struct util_format_description *desc = util_format_description(format); 1894 1895 #define HAS_SWIZZLE(chan,swz) (desc->swizzle[chan] == PIPE_SWIZZLE_##swz) 1896 1897 if (format == PIPE_FORMAT_R11G11B10_FLOAT) /* isn't plain */ 1898 return V_0280A0_SWAP_STD; 1899 1900 if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) 1901 return ~0U; 1902 1903 switch (desc->nr_channels) { 1904 case 1: 1905 if (HAS_SWIZZLE(0,X)) 1906 return V_0280A0_SWAP_STD; /* X___ */ 1907 else if (HAS_SWIZZLE(3,X)) 1908 return V_0280A0_SWAP_ALT_REV; /* ___X */ 1909 break; 1910 case 2: 1911 if ((HAS_SWIZZLE(0,X) && HAS_SWIZZLE(1,Y)) || 1912 (HAS_SWIZZLE(0,X) && HAS_SWIZZLE(1,NONE)) || 1913 (HAS_SWIZZLE(0,NONE) && HAS_SWIZZLE(1,Y))) 1914 return V_0280A0_SWAP_STD; /* XY__ */ 1915 else if ((HAS_SWIZZLE(0,Y) && HAS_SWIZZLE(1,X)) || 1916 (HAS_SWIZZLE(0,Y) && HAS_SWIZZLE(1,NONE)) || 1917 (HAS_SWIZZLE(0,NONE) && HAS_SWIZZLE(1,X))) 1918 /* YX__ */ 1919 return (do_endian_swap ? V_0280A0_SWAP_STD : V_0280A0_SWAP_STD_REV); 1920 else if (HAS_SWIZZLE(0,X) && HAS_SWIZZLE(3,Y)) 1921 return V_0280A0_SWAP_ALT; /* X__Y */ 1922 else if (HAS_SWIZZLE(0,Y) && HAS_SWIZZLE(3,X)) 1923 return V_0280A0_SWAP_ALT_REV; /* Y__X */ 1924 break; 1925 case 3: 1926 if (HAS_SWIZZLE(0,X)) 1927 return (do_endian_swap ? V_0280A0_SWAP_STD_REV : V_0280A0_SWAP_STD); 1928 else if (HAS_SWIZZLE(0,Z)) 1929 return V_0280A0_SWAP_STD_REV; /* ZYX */ 1930 break; 1931 case 4: 1932 /* check the middle channels, the 1st and 4th channel can be NONE */ 1933 if (HAS_SWIZZLE(1,Y) && HAS_SWIZZLE(2,Z)) { 1934 return V_0280A0_SWAP_STD; /* XYZW */ 1935 } else if (HAS_SWIZZLE(1,Z) && HAS_SWIZZLE(2,Y)) { 1936 return V_0280A0_SWAP_STD_REV; /* WZYX */ 1937 } else if (HAS_SWIZZLE(1,Y) && HAS_SWIZZLE(2,X)) { 1938 return V_0280A0_SWAP_ALT; /* ZYXW */ 1939 } else if (HAS_SWIZZLE(1,Z) && HAS_SWIZZLE(2,W)) { 1940 /* YZWX */ 1941 if (desc->is_array) 1942 return V_0280A0_SWAP_ALT_REV; 1943 else 1944 return (do_endian_swap ? V_0280A0_SWAP_ALT : V_0280A0_SWAP_ALT_REV); 1945 } 1946 break; 1947 } 1948 return ~0U; 1949 } 1950 1951 /* PIPELINE_STAT-BASED DCC ENABLEMENT FOR DISPLAYABLE SURFACES */ 1952 1953 static void vi_dcc_clean_up_context_slot(struct r600_common_context *rctx, 1954 int slot) 1955 { 1956 int i; 1957 1958 if (rctx->dcc_stats[slot].query_active) 1959 vi_separate_dcc_stop_query(&rctx->b, 1960 rctx->dcc_stats[slot].tex); 1961 1962 for (i = 0; i < ARRAY_SIZE(rctx->dcc_stats[slot].ps_stats); i++) 1963 if (rctx->dcc_stats[slot].ps_stats[i]) { 1964 rctx->b.destroy_query(&rctx->b, 1965 rctx->dcc_stats[slot].ps_stats[i]); 1966 rctx->dcc_stats[slot].ps_stats[i] = NULL; 1967 } 1968 1969 r600_texture_reference(&rctx->dcc_stats[slot].tex, NULL); 1970 } 1971 1972 /** 1973 * Return the per-context slot where DCC statistics queries for the texture live. 1974 */ 1975 static unsigned vi_get_context_dcc_stats_index(struct r600_common_context *rctx, 1976 struct r600_texture *tex) 1977 { 1978 int i, empty_slot = -1; 1979 1980 /* Remove zombie textures (textures kept alive by this array only). */ 1981 for (i = 0; i < ARRAY_SIZE(rctx->dcc_stats); i++) 1982 if (rctx->dcc_stats[i].tex && 1983 rctx->dcc_stats[i].tex->resource.b.b.reference.count == 1) 1984 vi_dcc_clean_up_context_slot(rctx, i); 1985 1986 /* Find the texture. */ 1987 for (i = 0; i < ARRAY_SIZE(rctx->dcc_stats); i++) { 1988 /* Return if found. */ 1989 if (rctx->dcc_stats[i].tex == tex) { 1990 rctx->dcc_stats[i].last_use_timestamp = os_time_get(); 1991 return i; 1992 } 1993 1994 /* Record the first seen empty slot. */ 1995 if (empty_slot == -1 && !rctx->dcc_stats[i].tex) 1996 empty_slot = i; 1997 } 1998 1999 /* Not found. Remove the oldest member to make space in the array. */ 2000 if (empty_slot == -1) { 2001 int oldest_slot = 0; 2002 2003 /* Find the oldest slot. */ 2004 for (i = 1; i < ARRAY_SIZE(rctx->dcc_stats); i++) 2005 if (rctx->dcc_stats[oldest_slot].last_use_timestamp > 2006 rctx->dcc_stats[i].last_use_timestamp) 2007 oldest_slot = i; 2008 2009 /* Clean up the oldest slot. */ 2010 vi_dcc_clean_up_context_slot(rctx, oldest_slot); 2011 empty_slot = oldest_slot; 2012 } 2013 2014 /* Add the texture to the new slot. */ 2015 r600_texture_reference(&rctx->dcc_stats[empty_slot].tex, tex); 2016 rctx->dcc_stats[empty_slot].last_use_timestamp = os_time_get(); 2017 return empty_slot; 2018 } 2019 2020 static struct pipe_query * 2021 vi_create_resuming_pipestats_query(struct pipe_context *ctx) 2022 { 2023 struct r600_query_hw *query = (struct r600_query_hw*) 2024 ctx->create_query(ctx, PIPE_QUERY_PIPELINE_STATISTICS, 0); 2025 2026 query->flags |= R600_QUERY_HW_FLAG_BEGIN_RESUMES; 2027 return (struct pipe_query*)query; 2028 } 2029 2030 /** 2031 * Called when binding a color buffer. 2032 */ 2033 void vi_separate_dcc_start_query(struct pipe_context *ctx, 2034 struct r600_texture *tex) 2035 { 2036 struct r600_common_context *rctx = (struct r600_common_context*)ctx; 2037 unsigned i = vi_get_context_dcc_stats_index(rctx, tex); 2038 2039 assert(!rctx->dcc_stats[i].query_active); 2040 2041 if (!rctx->dcc_stats[i].ps_stats[0]) 2042 rctx->dcc_stats[i].ps_stats[0] = vi_create_resuming_pipestats_query(ctx); 2043 2044 /* begin or resume the query */ 2045 ctx->begin_query(ctx, rctx->dcc_stats[i].ps_stats[0]); 2046 rctx->dcc_stats[i].query_active = true; 2047 } 2048 2049 /** 2050 * Called when unbinding a color buffer. 2051 */ 2052 void vi_separate_dcc_stop_query(struct pipe_context *ctx, 2053 struct r600_texture *tex) 2054 { 2055 struct r600_common_context *rctx = (struct r600_common_context*)ctx; 2056 unsigned i = vi_get_context_dcc_stats_index(rctx, tex); 2057 2058 assert(rctx->dcc_stats[i].query_active); 2059 assert(rctx->dcc_stats[i].ps_stats[0]); 2060 2061 /* pause or end the query */ 2062 ctx->end_query(ctx, rctx->dcc_stats[i].ps_stats[0]); 2063 rctx->dcc_stats[i].query_active = false; 2064 } 2065 2066 static bool vi_should_enable_separate_dcc(struct r600_texture *tex) 2067 { 2068 /* The minimum number of fullscreen draws per frame that is required 2069 * to enable DCC. */ 2070 return tex->ps_draw_ratio + tex->num_slow_clears >= 5; 2071 } 2072 2073 /* Called by fast clear. */ 2074 static void vi_separate_dcc_try_enable(struct r600_common_context *rctx, 2075 struct r600_texture *tex) 2076 { 2077 /* The intent is to use this with shared displayable back buffers, 2078 * but it's not strictly limited only to them. 2079 */ 2080 if (!tex->resource.is_shared || 2081 !(tex->resource.external_usage & PIPE_HANDLE_USAGE_EXPLICIT_FLUSH) || 2082 tex->resource.b.b.target != PIPE_TEXTURE_2D || 2083 tex->resource.b.b.last_level > 0 || 2084 !tex->surface.dcc_size) 2085 return; 2086 2087 if (tex->dcc_offset) 2088 return; /* already enabled */ 2089 2090 /* Enable the DCC stat gathering. */ 2091 if (!tex->dcc_gather_statistics) { 2092 tex->dcc_gather_statistics = true; 2093 vi_separate_dcc_start_query(&rctx->b, tex); 2094 } 2095 2096 if (!vi_should_enable_separate_dcc(tex)) 2097 return; /* stats show that DCC decompression is too expensive */ 2098 2099 assert(tex->surface.num_dcc_levels); 2100 assert(!tex->dcc_separate_buffer); 2101 2102 r600_texture_discard_cmask(rctx->screen, tex); 2103 2104 /* Get a DCC buffer. */ 2105 if (tex->last_dcc_separate_buffer) { 2106 assert(tex->dcc_gather_statistics); 2107 assert(!tex->dcc_separate_buffer); 2108 tex->dcc_separate_buffer = tex->last_dcc_separate_buffer; 2109 tex->last_dcc_separate_buffer = NULL; 2110 } else { 2111 tex->dcc_separate_buffer = (struct r600_resource*) 2112 r600_aligned_buffer_create(rctx->b.screen, 0, 2113 PIPE_USAGE_DEFAULT, 2114 tex->surface.dcc_size, 2115 tex->surface.dcc_alignment); 2116 if (!tex->dcc_separate_buffer) 2117 return; 2118 } 2119 2120 /* dcc_offset is the absolute GPUVM address. */ 2121 tex->dcc_offset = tex->dcc_separate_buffer->gpu_address; 2122 2123 /* no need to flag anything since this is called by fast clear that 2124 * flags framebuffer state 2125 */ 2126 } 2127 2128 /** 2129 * Called by pipe_context::flush_resource, the place where DCC decompression 2130 * takes place. 2131 */ 2132 void vi_separate_dcc_process_and_reset_stats(struct pipe_context *ctx, 2133 struct r600_texture *tex) 2134 { 2135 struct r600_common_context *rctx = (struct r600_common_context*)ctx; 2136 struct pipe_query *tmp; 2137 unsigned i = vi_get_context_dcc_stats_index(rctx, tex); 2138 bool query_active = rctx->dcc_stats[i].query_active; 2139 bool disable = false; 2140 2141 if (rctx->dcc_stats[i].ps_stats[2]) { 2142 union pipe_query_result result; 2143 2144 /* Read the results. */ 2145 ctx->get_query_result(ctx, rctx->dcc_stats[i].ps_stats[2], 2146 true, &result); 2147 r600_query_hw_reset_buffers(rctx, 2148 (struct r600_query_hw*) 2149 rctx->dcc_stats[i].ps_stats[2]); 2150 2151 /* Compute the approximate number of fullscreen draws. */ 2152 tex->ps_draw_ratio = 2153 result.pipeline_statistics.ps_invocations / 2154 (tex->resource.b.b.width0 * tex->resource.b.b.height0); 2155 rctx->last_tex_ps_draw_ratio = tex->ps_draw_ratio; 2156 2157 disable = tex->dcc_separate_buffer && 2158 !vi_should_enable_separate_dcc(tex); 2159 } 2160 2161 tex->num_slow_clears = 0; 2162 2163 /* stop the statistics query for ps_stats[0] */ 2164 if (query_active) 2165 vi_separate_dcc_stop_query(ctx, tex); 2166 2167 /* Move the queries in the queue by one. */ 2168 tmp = rctx->dcc_stats[i].ps_stats[2]; 2169 rctx->dcc_stats[i].ps_stats[2] = rctx->dcc_stats[i].ps_stats[1]; 2170 rctx->dcc_stats[i].ps_stats[1] = rctx->dcc_stats[i].ps_stats[0]; 2171 rctx->dcc_stats[i].ps_stats[0] = tmp; 2172 2173 /* create and start a new query as ps_stats[0] */ 2174 if (query_active) 2175 vi_separate_dcc_start_query(ctx, tex); 2176 2177 if (disable) { 2178 assert(!tex->last_dcc_separate_buffer); 2179 tex->last_dcc_separate_buffer = tex->dcc_separate_buffer; 2180 tex->dcc_separate_buffer = NULL; 2181 tex->dcc_offset = 0; 2182 /* no need to flag anything since this is called after 2183 * decompression that re-sets framebuffer state 2184 */ 2185 } 2186 } 2187 2188 /* FAST COLOR CLEAR */ 2189 2190 static void evergreen_set_clear_color(struct r600_texture *rtex, 2191 enum pipe_format surface_format, 2192 const union pipe_color_union *color) 2193 { 2194 union util_color uc; 2195 2196 memset(&uc, 0, sizeof(uc)); 2197 2198 if (rtex->surface.bpe == 16) { 2199 /* DCC fast clear only: 2200 * CLEAR_WORD0 = R = G = B 2201 * CLEAR_WORD1 = A 2202 */ 2203 assert(color->ui[0] == color->ui[1] && 2204 color->ui[0] == color->ui[2]); 2205 uc.ui[0] = color->ui[0]; 2206 uc.ui[1] = color->ui[3]; 2207 } else if (util_format_is_pure_uint(surface_format)) { 2208 util_format_write_4ui(surface_format, color->ui, 0, &uc, 0, 0, 0, 1, 1); 2209 } else if (util_format_is_pure_sint(surface_format)) { 2210 util_format_write_4i(surface_format, color->i, 0, &uc, 0, 0, 0, 1, 1); 2211 } else { 2212 util_pack_color(color->f, surface_format, &uc); 2213 } 2214 2215 memcpy(rtex->color_clear_value, &uc, 2 * sizeof(uint32_t)); 2216 } 2217 2218 static bool vi_get_fast_clear_parameters(enum pipe_format surface_format, 2219 const union pipe_color_union *color, 2220 uint32_t* reset_value, 2221 bool* clear_words_needed) 2222 { 2223 bool values[4] = {}; 2224 int i; 2225 bool main_value = false; 2226 bool extra_value = false; 2227 int extra_channel; 2228 const struct util_format_description *desc = util_format_description(surface_format); 2229 2230 if (desc->block.bits == 128 && 2231 (color->ui[0] != color->ui[1] || 2232 color->ui[0] != color->ui[2])) 2233 return false; 2234 2235 *clear_words_needed = true; 2236 *reset_value = 0x20202020U; 2237 2238 /* If we want to clear without needing a fast clear eliminate step, we 2239 * can set each channel to 0 or 1 (or 0/max for integer formats). We 2240 * have two sets of flags, one for the last or first channel(extra) and 2241 * one for the other channels(main). 2242 */ 2243 2244 if (surface_format == PIPE_FORMAT_R11G11B10_FLOAT || 2245 surface_format == PIPE_FORMAT_B5G6R5_UNORM || 2246 surface_format == PIPE_FORMAT_B5G6R5_SRGB) { 2247 extra_channel = -1; 2248 } else if (desc->layout == UTIL_FORMAT_LAYOUT_PLAIN) { 2249 if(r600_translate_colorswap(surface_format, false) <= 1) 2250 extra_channel = desc->nr_channels - 1; 2251 else 2252 extra_channel = 0; 2253 } else 2254 return true; 2255 2256 for (i = 0; i < 4; ++i) { 2257 int index = desc->swizzle[i] - PIPE_SWIZZLE_X; 2258 2259 if (desc->swizzle[i] < PIPE_SWIZZLE_X || 2260 desc->swizzle[i] > PIPE_SWIZZLE_W) 2261 continue; 2262 2263 if (desc->channel[i].pure_integer && 2264 desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) { 2265 /* Use the maximum value for clamping the clear color. */ 2266 int max = u_bit_consecutive(0, desc->channel[i].size - 1); 2267 2268 values[i] = color->i[i] != 0; 2269 if (color->i[i] != 0 && MIN2(color->i[i], max) != max) 2270 return true; 2271 } else if (desc->channel[i].pure_integer && 2272 desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) { 2273 /* Use the maximum value for clamping the clear color. */ 2274 unsigned max = u_bit_consecutive(0, desc->channel[i].size); 2275 2276 values[i] = color->ui[i] != 0U; 2277 if (color->ui[i] != 0U && MIN2(color->ui[i], max) != max) 2278 return true; 2279 } else { 2280 values[i] = color->f[i] != 0.0F; 2281 if (color->f[i] != 0.0F && color->f[i] != 1.0F) 2282 return true; 2283 } 2284 2285 if (index == extra_channel) 2286 extra_value = values[i]; 2287 else 2288 main_value = values[i]; 2289 } 2290 2291 for (int i = 0; i < 4; ++i) 2292 if (values[i] != main_value && 2293 desc->swizzle[i] - PIPE_SWIZZLE_X != extra_channel && 2294 desc->swizzle[i] >= PIPE_SWIZZLE_X && 2295 desc->swizzle[i] <= PIPE_SWIZZLE_W) 2296 return true; 2297 2298 *clear_words_needed = false; 2299 if (main_value) 2300 *reset_value |= 0x80808080U; 2301 2302 if (extra_value) 2303 *reset_value |= 0x40404040U; 2304 return true; 2305 } 2306 2307 void vi_dcc_clear_level(struct r600_common_context *rctx, 2308 struct r600_texture *rtex, 2309 unsigned level, unsigned clear_value) 2310 { 2311 struct pipe_resource *dcc_buffer; 2312 uint64_t dcc_offset; 2313 2314 assert(rtex->dcc_offset && level < rtex->surface.num_dcc_levels); 2315 2316 if (rtex->dcc_separate_buffer) { 2317 dcc_buffer = &rtex->dcc_separate_buffer->b.b; 2318 dcc_offset = 0; 2319 } else { 2320 dcc_buffer = &rtex->resource.b.b; 2321 dcc_offset = rtex->dcc_offset; 2322 } 2323 2324 dcc_offset += rtex->surface.level[level].dcc_offset; 2325 2326 rctx->clear_buffer(&rctx->b, dcc_buffer, dcc_offset, 2327 rtex->surface.level[level].dcc_fast_clear_size, 2328 clear_value, R600_COHERENCY_CB_META); 2329 } 2330 2331 /* Set the same micro tile mode as the destination of the last MSAA resolve. 2332 * This allows hitting the MSAA resolve fast path, which requires that both 2333 * src and dst micro tile modes match. 2334 */ 2335 static void si_set_optimal_micro_tile_mode(struct r600_common_screen *rscreen, 2336 struct r600_texture *rtex) 2337 { 2338 if (rtex->resource.is_shared || 2339 rtex->resource.b.b.nr_samples <= 1 || 2340 rtex->surface.micro_tile_mode == rtex->last_msaa_resolve_target_micro_mode) 2341 return; 2342 2343 assert(rtex->surface.level[0].mode == RADEON_SURF_MODE_2D); 2344 assert(rtex->resource.b.b.last_level == 0); 2345 2346 /* These magic numbers were copied from addrlib. It doesn't use any 2347 * definitions for them either. They are all 2D_TILED_THIN1 modes with 2348 * different bpp and micro tile mode. 2349 */ 2350 if (rscreen->chip_class >= CIK) { 2351 switch (rtex->last_msaa_resolve_target_micro_mode) { 2352 case RADEON_MICRO_MODE_DISPLAY: 2353 rtex->surface.tiling_index[0] = 10; 2354 break; 2355 case RADEON_MICRO_MODE_THIN: 2356 rtex->surface.tiling_index[0] = 14; 2357 break; 2358 case RADEON_MICRO_MODE_ROTATED: 2359 rtex->surface.tiling_index[0] = 28; 2360 break; 2361 default: /* depth, thick */ 2362 assert(!"unexpected micro mode"); 2363 return; 2364 } 2365 } else { /* SI */ 2366 switch (rtex->last_msaa_resolve_target_micro_mode) { 2367 case RADEON_MICRO_MODE_DISPLAY: 2368 switch (rtex->surface.bpe) { 2369 case 1: 2370 rtex->surface.tiling_index[0] = 10; 2371 break; 2372 case 2: 2373 rtex->surface.tiling_index[0] = 11; 2374 break; 2375 default: /* 4, 8 */ 2376 rtex->surface.tiling_index[0] = 12; 2377 break; 2378 } 2379 break; 2380 case RADEON_MICRO_MODE_THIN: 2381 switch (rtex->surface.bpe) { 2382 case 1: 2383 rtex->surface.tiling_index[0] = 14; 2384 break; 2385 case 2: 2386 rtex->surface.tiling_index[0] = 15; 2387 break; 2388 case 4: 2389 rtex->surface.tiling_index[0] = 16; 2390 break; 2391 default: /* 8, 16 */ 2392 rtex->surface.tiling_index[0] = 17; 2393 break; 2394 } 2395 break; 2396 default: /* depth, thick */ 2397 assert(!"unexpected micro mode"); 2398 return; 2399 } 2400 } 2401 2402 rtex->surface.micro_tile_mode = rtex->last_msaa_resolve_target_micro_mode; 2403 2404 p_atomic_inc(&rscreen->dirty_fb_counter); 2405 p_atomic_inc(&rscreen->dirty_tex_descriptor_counter); 2406 } 2407 2408 void evergreen_do_fast_color_clear(struct r600_common_context *rctx, 2409 struct pipe_framebuffer_state *fb, 2410 struct r600_atom *fb_state, 2411 unsigned *buffers, unsigned *dirty_cbufs, 2412 const union pipe_color_union *color) 2413 { 2414 int i; 2415 2416 /* This function is broken in BE, so just disable this path for now */ 2417 #ifdef PIPE_ARCH_BIG_ENDIAN 2418 return; 2419 #endif 2420 2421 if (rctx->render_cond) 2422 return; 2423 2424 for (i = 0; i < fb->nr_cbufs; i++) { 2425 struct r600_texture *tex; 2426 unsigned clear_bit = PIPE_CLEAR_COLOR0 << i; 2427 2428 if (!fb->cbufs[i]) 2429 continue; 2430 2431 /* if this colorbuffer is not being cleared */ 2432 if (!(*buffers & clear_bit)) 2433 continue; 2434 2435 tex = (struct r600_texture *)fb->cbufs[i]->texture; 2436 2437 /* the clear is allowed if all layers are bound */ 2438 if (fb->cbufs[i]->u.tex.first_layer != 0 || 2439 fb->cbufs[i]->u.tex.last_layer != util_max_layer(&tex->resource.b.b, 0)) { 2440 continue; 2441 } 2442 2443 /* cannot clear mipmapped textures */ 2444 if (fb->cbufs[i]->texture->last_level != 0) { 2445 continue; 2446 } 2447 2448 /* only supported on tiled surfaces */ 2449 if (tex->surface.is_linear) { 2450 continue; 2451 } 2452 2453 /* shared textures can't use fast clear without an explicit flush, 2454 * because there is no way to communicate the clear color among 2455 * all clients 2456 */ 2457 if (tex->resource.is_shared && 2458 !(tex->resource.external_usage & PIPE_HANDLE_USAGE_EXPLICIT_FLUSH)) 2459 continue; 2460 2461 /* fast color clear with 1D tiling doesn't work on old kernels and CIK */ 2462 if (rctx->chip_class == CIK && 2463 tex->surface.level[0].mode == RADEON_SURF_MODE_1D && 2464 rctx->screen->info.drm_major == 2 && 2465 rctx->screen->info.drm_minor < 38) { 2466 continue; 2467 } 2468 2469 /* Fast clear is the most appropriate place to enable DCC for 2470 * displayable surfaces. 2471 */ 2472 if (rctx->chip_class >= VI && 2473 !(rctx->screen->debug_flags & DBG_NO_DCC_FB)) { 2474 vi_separate_dcc_try_enable(rctx, tex); 2475 2476 /* Stoney can't do a CMASK-based clear, so all clears are 2477 * considered to be hypothetically slow clears, which 2478 * is weighed when determining to enable separate DCC. 2479 */ 2480 if (tex->dcc_gather_statistics && 2481 rctx->family == CHIP_STONEY) 2482 tex->num_slow_clears++; 2483 } 2484 2485 /* Try to clear DCC first, otherwise try CMASK. */ 2486 if (tex->dcc_offset && tex->surface.num_dcc_levels) { 2487 uint32_t reset_value; 2488 bool clear_words_needed; 2489 2490 if (rctx->screen->debug_flags & DBG_NO_DCC_CLEAR) 2491 continue; 2492 2493 if (!vi_get_fast_clear_parameters(fb->cbufs[i]->format, 2494 color, &reset_value, 2495 &clear_words_needed)) 2496 continue; 2497 2498 vi_dcc_clear_level(rctx, tex, 0, reset_value); 2499 2500 if (clear_words_needed) 2501 tex->dirty_level_mask |= 1 << fb->cbufs[i]->u.tex.level; 2502 tex->separate_dcc_dirty = true; 2503 } else { 2504 /* 128-bit formats are unusupported */ 2505 if (tex->surface.bpe > 8) { 2506 continue; 2507 } 2508 2509 /* Stoney/RB+ doesn't work with CMASK fast clear. */ 2510 if (rctx->family == CHIP_STONEY) 2511 continue; 2512 2513 /* ensure CMASK is enabled */ 2514 r600_texture_alloc_cmask_separate(rctx->screen, tex); 2515 if (tex->cmask.size == 0) { 2516 continue; 2517 } 2518 2519 /* Do the fast clear. */ 2520 rctx->clear_buffer(&rctx->b, &tex->cmask_buffer->b.b, 2521 tex->cmask.offset, tex->cmask.size, 0, 2522 R600_COHERENCY_CB_META); 2523 2524 tex->dirty_level_mask |= 1 << fb->cbufs[i]->u.tex.level; 2525 } 2526 2527 /* We can change the micro tile mode before a full clear. */ 2528 if (rctx->screen->chip_class >= SI) 2529 si_set_optimal_micro_tile_mode(rctx->screen, tex); 2530 2531 evergreen_set_clear_color(tex, fb->cbufs[i]->format, color); 2532 2533 if (dirty_cbufs) 2534 *dirty_cbufs |= 1 << i; 2535 rctx->set_atom_dirty(rctx, fb_state, true); 2536 *buffers &= ~clear_bit; 2537 } 2538 } 2539 2540 void r600_init_screen_texture_functions(struct r600_common_screen *rscreen) 2541 { 2542 rscreen->b.resource_from_handle = r600_texture_from_handle; 2543 rscreen->b.resource_get_handle = r600_texture_get_handle; 2544 } 2545 2546 void r600_init_context_texture_functions(struct r600_common_context *rctx) 2547 { 2548 rctx->b.create_surface = r600_create_surface; 2549 rctx->b.surface_destroy = r600_surface_destroy; 2550 rctx->b.clear_texture = r600_clear_texture; 2551 } 2552