1 /* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ 2 3 /* 4 * Copyright (C) 2012 Rob Clark <robclark (at) freedesktop.org> 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the "Software"), 8 * to deal in the Software without restriction, including without limitation 9 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 * and/or sell copies of the Software, and to permit persons to whom the 11 * Software is furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice (including the next 14 * paragraph) shall be included in all copies or substantial portions of the 15 * Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 * SOFTWARE. 24 * 25 * Authors: 26 * Rob Clark <robclark (at) freedesktop.org> 27 */ 28 29 #include "util/u_format.h" 30 #include "util/u_format_rgtc.h" 31 #include "util/u_format_zs.h" 32 #include "util/u_inlines.h" 33 #include "util/u_transfer.h" 34 #include "util/u_string.h" 35 #include "util/u_surface.h" 36 #include "util/set.h" 37 38 #include "freedreno_resource.h" 39 #include "freedreno_batch_cache.h" 40 #include "freedreno_screen.h" 41 #include "freedreno_surface.h" 42 #include "freedreno_context.h" 43 #include "freedreno_query_hw.h" 44 #include "freedreno_util.h" 45 46 #include <errno.h> 47 48 /* XXX this should go away, needed for 'struct winsys_handle' */ 49 #include "state_tracker/drm_driver.h" 50 51 /** 52 * Go through the entire state and see if the resource is bound 53 * anywhere. If it is, mark the relevant state as dirty. This is 54 * called on realloc_bo to ensure the neccessary state is re- 55 * emitted so the GPU looks at the new backing bo. 56 */ 57 static void 58 rebind_resource(struct fd_context *ctx, struct pipe_resource *prsc) 59 { 60 /* VBOs */ 61 for (unsigned i = 0; i < ctx->vtx.vertexbuf.count && !(ctx->dirty & FD_DIRTY_VTXBUF); i++) { 62 if (ctx->vtx.vertexbuf.vb[i].buffer.resource == prsc) 63 ctx->dirty |= FD_DIRTY_VTXBUF; 64 } 65 66 /* per-shader-stage resources: */ 67 for (unsigned stage = 0; stage < PIPE_SHADER_TYPES; stage++) { 68 /* Constbufs.. note that constbuf[0] is normal uniforms emitted in 69 * cmdstream rather than by pointer.. 70 */ 71 const unsigned num_ubos = util_last_bit(ctx->constbuf[stage].enabled_mask); 72 for (unsigned i = 1; i < num_ubos; i++) { 73 if (ctx->dirty_shader[stage] & FD_DIRTY_SHADER_CONST) 74 break; 75 if (ctx->constbuf[stage].cb[i].buffer == prsc) 76 ctx->dirty_shader[stage] |= FD_DIRTY_SHADER_CONST; 77 } 78 79 /* Textures */ 80 for (unsigned i = 0; i < ctx->tex[stage].num_textures; i++) { 81 if (ctx->dirty_shader[stage] & FD_DIRTY_SHADER_TEX) 82 break; 83 if (ctx->tex[stage].textures[i] && (ctx->tex[stage].textures[i]->texture == prsc)) 84 ctx->dirty_shader[stage] |= FD_DIRTY_SHADER_TEX; 85 } 86 87 /* SSBOs */ 88 const unsigned num_ssbos = util_last_bit(ctx->shaderbuf[stage].enabled_mask); 89 for (unsigned i = 0; i < num_ssbos; i++) { 90 if (ctx->dirty_shader[stage] & FD_DIRTY_SHADER_SSBO) 91 break; 92 if (ctx->shaderbuf[stage].sb[i].buffer == prsc) 93 ctx->dirty_shader[stage] |= FD_DIRTY_SHADER_SSBO; 94 } 95 } 96 } 97 98 static void 99 realloc_bo(struct fd_resource *rsc, uint32_t size) 100 { 101 struct fd_screen *screen = fd_screen(rsc->base.screen); 102 uint32_t flags = DRM_FREEDRENO_GEM_CACHE_WCOMBINE | 103 DRM_FREEDRENO_GEM_TYPE_KMEM; /* TODO */ 104 105 /* if we start using things other than write-combine, 106 * be sure to check for PIPE_RESOURCE_FLAG_MAP_COHERENT 107 */ 108 109 if (rsc->bo) 110 fd_bo_del(rsc->bo); 111 112 rsc->bo = fd_bo_new(screen->dev, size, flags); 113 util_range_set_empty(&rsc->valid_buffer_range); 114 fd_bc_invalidate_resource(rsc, true); 115 } 116 117 static void 118 do_blit(struct fd_context *ctx, const struct pipe_blit_info *blit, bool fallback) 119 { 120 /* TODO size threshold too?? */ 121 if (!fallback) { 122 /* do blit on gpu: */ 123 fd_blitter_pipe_begin(ctx, false, true, FD_STAGE_BLIT); 124 ctx->blit(ctx, blit); 125 fd_blitter_pipe_end(ctx); 126 } else { 127 /* do blit on cpu: */ 128 util_resource_copy_region(&ctx->base, 129 blit->dst.resource, blit->dst.level, blit->dst.box.x, 130 blit->dst.box.y, blit->dst.box.z, 131 blit->src.resource, blit->src.level, &blit->src.box); 132 } 133 } 134 135 static bool 136 fd_try_shadow_resource(struct fd_context *ctx, struct fd_resource *rsc, 137 unsigned level, const struct pipe_box *box) 138 { 139 struct pipe_context *pctx = &ctx->base; 140 struct pipe_resource *prsc = &rsc->base; 141 bool fallback = false; 142 143 if (prsc->next) 144 return false; 145 146 /* TODO: somehow munge dimensions and format to copy unsupported 147 * render target format to something that is supported? 148 */ 149 if (!pctx->screen->is_format_supported(pctx->screen, 150 prsc->format, prsc->target, prsc->nr_samples, 151 PIPE_BIND_RENDER_TARGET)) 152 fallback = true; 153 154 /* do shadowing back-blits on the cpu for buffers: */ 155 if (prsc->target == PIPE_BUFFER) 156 fallback = true; 157 158 bool whole_level = util_texrange_covers_whole_level(prsc, level, 159 box->x, box->y, box->z, box->width, box->height, box->depth); 160 161 /* TODO need to be more clever about current level */ 162 if ((prsc->target >= PIPE_TEXTURE_2D) && !whole_level) 163 return false; 164 165 struct pipe_resource *pshadow = 166 pctx->screen->resource_create(pctx->screen, prsc); 167 168 if (!pshadow) 169 return false; 170 171 assert(!ctx->in_shadow); 172 ctx->in_shadow = true; 173 174 /* get rid of any references that batch-cache might have to us (which 175 * should empty/destroy rsc->batches hashset) 176 */ 177 fd_bc_invalidate_resource(rsc, false); 178 179 mtx_lock(&ctx->screen->lock); 180 181 /* Swap the backing bo's, so shadow becomes the old buffer, 182 * blit from shadow to new buffer. From here on out, we 183 * cannot fail. 184 * 185 * Note that we need to do it in this order, otherwise if 186 * we go down cpu blit path, the recursive transfer_map() 187 * sees the wrong status.. 188 */ 189 struct fd_resource *shadow = fd_resource(pshadow); 190 191 DBG("shadow: %p (%d) -> %p (%d)\n", rsc, rsc->base.reference.count, 192 shadow, shadow->base.reference.count); 193 194 /* TODO valid_buffer_range?? */ 195 swap(rsc->bo, shadow->bo); 196 swap(rsc->write_batch, shadow->write_batch); 197 198 /* at this point, the newly created shadow buffer is not referenced 199 * by any batches, but the existing rsc (probably) is. We need to 200 * transfer those references over: 201 */ 202 debug_assert(shadow->batch_mask == 0); 203 struct fd_batch *batch; 204 foreach_batch(batch, &ctx->screen->batch_cache, rsc->batch_mask) { 205 struct set_entry *entry = _mesa_set_search(batch->resources, rsc); 206 _mesa_set_remove(batch->resources, entry); 207 _mesa_set_add(batch->resources, shadow); 208 } 209 swap(rsc->batch_mask, shadow->batch_mask); 210 211 mtx_unlock(&ctx->screen->lock); 212 213 struct pipe_blit_info blit = {0}; 214 blit.dst.resource = prsc; 215 blit.dst.format = prsc->format; 216 blit.src.resource = pshadow; 217 blit.src.format = pshadow->format; 218 blit.mask = util_format_get_mask(prsc->format); 219 blit.filter = PIPE_TEX_FILTER_NEAREST; 220 221 #define set_box(field, val) do { \ 222 blit.dst.field = (val); \ 223 blit.src.field = (val); \ 224 } while (0) 225 226 /* blit the other levels in their entirety: */ 227 for (unsigned l = 0; l <= prsc->last_level; l++) { 228 if (l == level) 229 continue; 230 231 /* just blit whole level: */ 232 set_box(level, l); 233 set_box(box.width, u_minify(prsc->width0, l)); 234 set_box(box.height, u_minify(prsc->height0, l)); 235 set_box(box.depth, u_minify(prsc->depth0, l)); 236 237 do_blit(ctx, &blit, fallback); 238 } 239 240 /* deal w/ current level specially, since we might need to split 241 * it up into a couple blits: 242 */ 243 if (!whole_level) { 244 set_box(level, level); 245 246 switch (prsc->target) { 247 case PIPE_BUFFER: 248 case PIPE_TEXTURE_1D: 249 set_box(box.y, 0); 250 set_box(box.z, 0); 251 set_box(box.height, 1); 252 set_box(box.depth, 1); 253 254 if (box->x > 0) { 255 set_box(box.x, 0); 256 set_box(box.width, box->x); 257 258 do_blit(ctx, &blit, fallback); 259 } 260 if ((box->x + box->width) < u_minify(prsc->width0, level)) { 261 set_box(box.x, box->x + box->width); 262 set_box(box.width, u_minify(prsc->width0, level) - (box->x + box->width)); 263 264 do_blit(ctx, &blit, fallback); 265 } 266 break; 267 case PIPE_TEXTURE_2D: 268 /* TODO */ 269 default: 270 unreachable("TODO"); 271 } 272 } 273 274 ctx->in_shadow = false; 275 276 pipe_resource_reference(&pshadow, NULL); 277 278 return true; 279 } 280 281 static struct fd_resource * 282 fd_alloc_staging(struct fd_context *ctx, struct fd_resource *rsc, 283 unsigned level, const struct pipe_box *box) 284 { 285 struct pipe_context *pctx = &ctx->base; 286 struct pipe_resource tmpl = rsc->base; 287 288 tmpl.width0 = box->width; 289 tmpl.height0 = box->height; 290 tmpl.depth0 = box->depth; 291 tmpl.array_size = 1; 292 tmpl.last_level = 0; 293 tmpl.bind |= PIPE_BIND_LINEAR; 294 295 struct pipe_resource *pstaging = 296 pctx->screen->resource_create(pctx->screen, &tmpl); 297 if (!pstaging) 298 return NULL; 299 300 return fd_resource(pstaging); 301 } 302 303 static void 304 fd_blit_from_staging(struct fd_context *ctx, struct fd_transfer *trans) 305 { 306 struct pipe_resource *dst = trans->base.resource; 307 struct pipe_blit_info blit = {0}; 308 309 blit.dst.resource = dst; 310 blit.dst.format = dst->format; 311 blit.dst.level = trans->base.level; 312 blit.dst.box = trans->base.box; 313 blit.src.resource = trans->staging_prsc; 314 blit.src.format = trans->staging_prsc->format; 315 blit.src.level = 0; 316 blit.src.box = trans->staging_box; 317 blit.mask = util_format_get_mask(trans->staging_prsc->format); 318 blit.filter = PIPE_TEX_FILTER_NEAREST; 319 320 do_blit(ctx, &blit, false); 321 } 322 323 static void 324 fd_blit_to_staging(struct fd_context *ctx, struct fd_transfer *trans) 325 { 326 struct pipe_resource *src = trans->base.resource; 327 struct pipe_blit_info blit = {0}; 328 329 blit.src.resource = src; 330 blit.src.format = src->format; 331 blit.src.level = trans->base.level; 332 blit.src.box = trans->base.box; 333 blit.dst.resource = trans->staging_prsc; 334 blit.dst.format = trans->staging_prsc->format; 335 blit.dst.level = 0; 336 blit.dst.box = trans->staging_box; 337 blit.mask = util_format_get_mask(trans->staging_prsc->format); 338 blit.filter = PIPE_TEX_FILTER_NEAREST; 339 340 do_blit(ctx, &blit, false); 341 } 342 343 static unsigned 344 fd_resource_layer_offset(struct fd_resource *rsc, 345 struct fd_resource_slice *slice, 346 unsigned layer) 347 { 348 if (rsc->layer_first) 349 return layer * rsc->layer_size; 350 else 351 return layer * slice->size0; 352 } 353 354 static void fd_resource_transfer_flush_region(struct pipe_context *pctx, 355 struct pipe_transfer *ptrans, 356 const struct pipe_box *box) 357 { 358 struct fd_resource *rsc = fd_resource(ptrans->resource); 359 360 if (ptrans->resource->target == PIPE_BUFFER) 361 util_range_add(&rsc->valid_buffer_range, 362 ptrans->box.x + box->x, 363 ptrans->box.x + box->x + box->width); 364 } 365 366 static void 367 flush_resource(struct fd_context *ctx, struct fd_resource *rsc, unsigned usage) 368 { 369 struct fd_batch *write_batch = NULL; 370 371 fd_batch_reference(&write_batch, rsc->write_batch); 372 373 if (usage & PIPE_TRANSFER_WRITE) { 374 struct fd_batch *batch, *batches[32] = {0}; 375 uint32_t batch_mask; 376 377 /* This is a bit awkward, probably a fd_batch_flush_locked() 378 * would make things simpler.. but we need to hold the lock 379 * to iterate the batches which reference this resource. So 380 * we must first grab references under a lock, then flush. 381 */ 382 mtx_lock(&ctx->screen->lock); 383 batch_mask = rsc->batch_mask; 384 foreach_batch(batch, &ctx->screen->batch_cache, batch_mask) 385 fd_batch_reference(&batches[batch->idx], batch); 386 mtx_unlock(&ctx->screen->lock); 387 388 foreach_batch(batch, &ctx->screen->batch_cache, batch_mask) 389 fd_batch_flush(batch, false, false); 390 391 foreach_batch(batch, &ctx->screen->batch_cache, batch_mask) { 392 fd_batch_sync(batch); 393 fd_batch_reference(&batches[batch->idx], NULL); 394 } 395 assert(rsc->batch_mask == 0); 396 } else if (write_batch) { 397 fd_batch_flush(write_batch, true, false); 398 } 399 400 fd_batch_reference(&write_batch, NULL); 401 402 assert(!rsc->write_batch); 403 } 404 405 static void 406 fd_flush_resource(struct pipe_context *pctx, struct pipe_resource *prsc) 407 { 408 flush_resource(fd_context(pctx), fd_resource(prsc), PIPE_TRANSFER_READ); 409 } 410 411 static void 412 fd_resource_transfer_unmap(struct pipe_context *pctx, 413 struct pipe_transfer *ptrans) 414 { 415 struct fd_context *ctx = fd_context(pctx); 416 struct fd_resource *rsc = fd_resource(ptrans->resource); 417 struct fd_transfer *trans = fd_transfer(ptrans); 418 419 if (trans->staging_prsc) { 420 if (ptrans->usage & PIPE_TRANSFER_WRITE) 421 fd_blit_from_staging(ctx, trans); 422 pipe_resource_reference(&trans->staging_prsc, NULL); 423 } 424 425 if (!(ptrans->usage & PIPE_TRANSFER_UNSYNCHRONIZED)) { 426 fd_bo_cpu_fini(rsc->bo); 427 } 428 429 util_range_add(&rsc->valid_buffer_range, 430 ptrans->box.x, 431 ptrans->box.x + ptrans->box.width); 432 433 pipe_resource_reference(&ptrans->resource, NULL); 434 slab_free(&ctx->transfer_pool, ptrans); 435 } 436 437 static void * 438 fd_resource_transfer_map(struct pipe_context *pctx, 439 struct pipe_resource *prsc, 440 unsigned level, unsigned usage, 441 const struct pipe_box *box, 442 struct pipe_transfer **pptrans) 443 { 444 struct fd_context *ctx = fd_context(pctx); 445 struct fd_resource *rsc = fd_resource(prsc); 446 struct fd_resource_slice *slice = fd_resource_slice(rsc, level); 447 struct fd_transfer *trans; 448 struct pipe_transfer *ptrans; 449 enum pipe_format format = prsc->format; 450 uint32_t op = 0; 451 uint32_t offset; 452 char *buf; 453 int ret = 0; 454 455 DBG("prsc=%p, level=%u, usage=%x, box=%dx%d+%d,%d", prsc, level, usage, 456 box->width, box->height, box->x, box->y); 457 458 ptrans = slab_alloc(&ctx->transfer_pool); 459 if (!ptrans) 460 return NULL; 461 462 /* slab_alloc_st() doesn't zero: */ 463 trans = fd_transfer(ptrans); 464 memset(trans, 0, sizeof(*trans)); 465 466 pipe_resource_reference(&ptrans->resource, prsc); 467 ptrans->level = level; 468 ptrans->usage = usage; 469 ptrans->box = *box; 470 ptrans->stride = util_format_get_nblocksx(format, slice->pitch) * rsc->cpp; 471 ptrans->layer_stride = rsc->layer_first ? rsc->layer_size : slice->size0; 472 473 /* we always need a staging texture for tiled buffers: 474 * 475 * TODO we might sometimes want to *also* shadow the resource to avoid 476 * splitting a batch.. for ex, mid-frame texture uploads to a tiled 477 * texture. 478 */ 479 if (rsc->tile_mode) { 480 struct fd_resource *staging_rsc; 481 482 staging_rsc = fd_alloc_staging(ctx, rsc, level, box); 483 if (staging_rsc) { 484 // TODO for PIPE_TRANSFER_READ, need to do untiling blit.. 485 trans->staging_prsc = &staging_rsc->base; 486 trans->base.stride = util_format_get_nblocksx(format, 487 staging_rsc->slices[0].pitch) * staging_rsc->cpp; 488 trans->base.layer_stride = staging_rsc->layer_first ? 489 staging_rsc->layer_size : staging_rsc->slices[0].size0; 490 trans->staging_box = *box; 491 trans->staging_box.x = 0; 492 trans->staging_box.y = 0; 493 trans->staging_box.z = 0; 494 495 if (usage & PIPE_TRANSFER_READ) { 496 fd_blit_to_staging(ctx, trans); 497 fd_bo_cpu_prep(rsc->bo, ctx->pipe, DRM_FREEDRENO_PREP_READ); 498 } 499 500 buf = fd_bo_map(staging_rsc->bo); 501 offset = 0; 502 503 *pptrans = ptrans; 504 505 ctx->stats.staging_uploads++; 506 507 return buf; 508 } 509 } 510 511 if (ctx->in_shadow && !(usage & PIPE_TRANSFER_READ)) 512 usage |= PIPE_TRANSFER_UNSYNCHRONIZED; 513 514 if (usage & PIPE_TRANSFER_READ) 515 op |= DRM_FREEDRENO_PREP_READ; 516 517 if (usage & PIPE_TRANSFER_WRITE) 518 op |= DRM_FREEDRENO_PREP_WRITE; 519 520 if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) { 521 realloc_bo(rsc, fd_bo_size(rsc->bo)); 522 rebind_resource(ctx, prsc); 523 } else if ((usage & PIPE_TRANSFER_WRITE) && 524 prsc->target == PIPE_BUFFER && 525 !util_ranges_intersect(&rsc->valid_buffer_range, 526 box->x, box->x + box->width)) { 527 /* We are trying to write to a previously uninitialized range. No need 528 * to wait. 529 */ 530 } else if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) { 531 struct fd_batch *write_batch = NULL; 532 533 /* hold a reference, so it doesn't disappear under us: */ 534 fd_batch_reference(&write_batch, rsc->write_batch); 535 536 if ((usage & PIPE_TRANSFER_WRITE) && write_batch && 537 write_batch->back_blit) { 538 /* if only thing pending is a back-blit, we can discard it: */ 539 fd_batch_reset(write_batch); 540 } 541 542 /* If the GPU is writing to the resource, or if it is reading from the 543 * resource and we're trying to write to it, flush the renders. 544 */ 545 bool needs_flush = pending(rsc, !!(usage & PIPE_TRANSFER_WRITE)); 546 bool busy = needs_flush || (0 != fd_bo_cpu_prep(rsc->bo, 547 ctx->pipe, op | DRM_FREEDRENO_PREP_NOSYNC)); 548 549 /* if we need to flush/stall, see if we can make a shadow buffer 550 * to avoid this: 551 * 552 * TODO we could go down this path !reorder && !busy_for_read 553 * ie. we only *don't* want to go down this path if the blit 554 * will trigger a flush! 555 */ 556 if (ctx->screen->reorder && busy && !(usage & PIPE_TRANSFER_READ) && 557 (usage & PIPE_TRANSFER_DISCARD_RANGE)) { 558 /* try shadowing only if it avoids a flush, otherwise staging would 559 * be better: 560 */ 561 if (needs_flush && fd_try_shadow_resource(ctx, rsc, level, box)) { 562 needs_flush = busy = false; 563 rebind_resource(ctx, prsc); 564 ctx->stats.shadow_uploads++; 565 } else { 566 struct fd_resource *staging_rsc; 567 568 if (needs_flush) { 569 flush_resource(ctx, rsc, usage); 570 needs_flush = false; 571 } 572 573 /* in this case, we don't need to shadow the whole resource, 574 * since any draw that references the previous contents has 575 * already had rendering flushed for all tiles. So we can 576 * use a staging buffer to do the upload. 577 */ 578 staging_rsc = fd_alloc_staging(ctx, rsc, level, box); 579 if (staging_rsc) { 580 trans->staging_prsc = &staging_rsc->base; 581 trans->base.stride = util_format_get_nblocksx(format, 582 staging_rsc->slices[0].pitch) * staging_rsc->cpp; 583 trans->base.layer_stride = staging_rsc->layer_first ? 584 staging_rsc->layer_size : staging_rsc->slices[0].size0; 585 trans->staging_box = *box; 586 trans->staging_box.x = 0; 587 trans->staging_box.y = 0; 588 trans->staging_box.z = 0; 589 buf = fd_bo_map(staging_rsc->bo); 590 offset = 0; 591 592 *pptrans = ptrans; 593 594 fd_batch_reference(&write_batch, NULL); 595 596 ctx->stats.staging_uploads++; 597 598 return buf; 599 } 600 } 601 } 602 603 if (needs_flush) { 604 flush_resource(ctx, rsc, usage); 605 needs_flush = false; 606 } 607 608 fd_batch_reference(&write_batch, NULL); 609 610 /* The GPU keeps track of how the various bo's are being used, and 611 * will wait if necessary for the proper operation to have 612 * completed. 613 */ 614 if (busy) { 615 ret = fd_bo_cpu_prep(rsc->bo, ctx->pipe, op); 616 if (ret) 617 goto fail; 618 } 619 } 620 621 buf = fd_bo_map(rsc->bo); 622 offset = slice->offset + 623 box->y / util_format_get_blockheight(format) * ptrans->stride + 624 box->x / util_format_get_blockwidth(format) * rsc->cpp + 625 fd_resource_layer_offset(rsc, slice, box->z); 626 627 if (usage & PIPE_TRANSFER_WRITE) 628 rsc->valid = true; 629 630 *pptrans = ptrans; 631 632 return buf + offset; 633 634 fail: 635 fd_resource_transfer_unmap(pctx, ptrans); 636 return NULL; 637 } 638 639 static void 640 fd_resource_destroy(struct pipe_screen *pscreen, 641 struct pipe_resource *prsc) 642 { 643 struct fd_resource *rsc = fd_resource(prsc); 644 fd_bc_invalidate_resource(rsc, true); 645 if (rsc->bo) 646 fd_bo_del(rsc->bo); 647 util_range_destroy(&rsc->valid_buffer_range); 648 FREE(rsc); 649 } 650 651 static boolean 652 fd_resource_get_handle(struct pipe_screen *pscreen, 653 struct pipe_context *pctx, 654 struct pipe_resource *prsc, 655 struct winsys_handle *handle, 656 unsigned usage) 657 { 658 struct fd_resource *rsc = fd_resource(prsc); 659 660 return fd_screen_bo_get_handle(pscreen, rsc->bo, 661 rsc->slices[0].pitch * rsc->cpp, handle); 662 } 663 664 static uint32_t 665 setup_slices(struct fd_resource *rsc, uint32_t alignment, enum pipe_format format) 666 { 667 struct pipe_resource *prsc = &rsc->base; 668 struct fd_screen *screen = fd_screen(prsc->screen); 669 enum util_format_layout layout = util_format_description(format)->layout; 670 uint32_t pitchalign = screen->gmem_alignw; 671 uint32_t level, size = 0; 672 uint32_t width = prsc->width0; 673 uint32_t height = prsc->height0; 674 uint32_t depth = prsc->depth0; 675 /* in layer_first layout, the level (slice) contains just one 676 * layer (since in fact the layer contains the slices) 677 */ 678 uint32_t layers_in_level = rsc->layer_first ? 1 : prsc->array_size; 679 680 for (level = 0; level <= prsc->last_level; level++) { 681 struct fd_resource_slice *slice = fd_resource_slice(rsc, level); 682 uint32_t blocks; 683 684 if (layout == UTIL_FORMAT_LAYOUT_ASTC) 685 slice->pitch = width = 686 util_align_npot(width, pitchalign * util_format_get_blockwidth(format)); 687 else 688 slice->pitch = width = align(width, pitchalign); 689 slice->offset = size; 690 blocks = util_format_get_nblocks(format, width, height); 691 /* 1d array and 2d array textures must all have the same layer size 692 * for each miplevel on a3xx. 3d textures can have different layer 693 * sizes for high levels, but the hw auto-sizer is buggy (or at least 694 * different than what this code does), so as soon as the layer size 695 * range gets into range, we stop reducing it. 696 */ 697 if (prsc->target == PIPE_TEXTURE_3D && ( 698 level == 1 || 699 (level > 1 && rsc->slices[level - 1].size0 > 0xf000))) 700 slice->size0 = align(blocks * rsc->cpp, alignment); 701 else if (level == 0 || rsc->layer_first || alignment == 1) 702 slice->size0 = align(blocks * rsc->cpp, alignment); 703 else 704 slice->size0 = rsc->slices[level - 1].size0; 705 706 size += slice->size0 * depth * layers_in_level; 707 708 width = u_minify(width, 1); 709 height = u_minify(height, 1); 710 depth = u_minify(depth, 1); 711 } 712 713 return size; 714 } 715 716 static uint32_t 717 slice_alignment(enum pipe_texture_target target) 718 { 719 /* on a3xx, 2d array and 3d textures seem to want their 720 * layers aligned to page boundaries: 721 */ 722 switch (target) { 723 case PIPE_TEXTURE_3D: 724 case PIPE_TEXTURE_1D_ARRAY: 725 case PIPE_TEXTURE_2D_ARRAY: 726 return 4096; 727 default: 728 return 1; 729 } 730 } 731 732 /* cross generation texture layout to plug in to screen->setup_slices().. 733 * replace with generation specific one as-needed. 734 * 735 * TODO for a4xx probably can extract out the a4xx specific logic int 736 * a small fd4_setup_slices() wrapper that sets up layer_first, and then 737 * calls this. 738 */ 739 uint32_t 740 fd_setup_slices(struct fd_resource *rsc) 741 { 742 uint32_t alignment; 743 744 alignment = slice_alignment(rsc->base.target); 745 746 struct fd_screen *screen = fd_screen(rsc->base.screen); 747 if (is_a4xx(screen)) { 748 switch (rsc->base.target) { 749 case PIPE_TEXTURE_3D: 750 rsc->layer_first = false; 751 break; 752 default: 753 rsc->layer_first = true; 754 alignment = 1; 755 break; 756 } 757 } 758 759 return setup_slices(rsc, alignment, rsc->base.format); 760 } 761 762 /* special case to resize query buf after allocated.. */ 763 void 764 fd_resource_resize(struct pipe_resource *prsc, uint32_t sz) 765 { 766 struct fd_resource *rsc = fd_resource(prsc); 767 768 debug_assert(prsc->width0 == 0); 769 debug_assert(prsc->target == PIPE_BUFFER); 770 debug_assert(prsc->bind == PIPE_BIND_QUERY_BUFFER); 771 772 prsc->width0 = sz; 773 realloc_bo(rsc, fd_screen(prsc->screen)->setup_slices(rsc)); 774 } 775 776 // TODO common helper? 777 static bool 778 has_depth(enum pipe_format format) 779 { 780 switch (format) { 781 case PIPE_FORMAT_Z16_UNORM: 782 case PIPE_FORMAT_Z32_UNORM: 783 case PIPE_FORMAT_Z32_FLOAT: 784 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: 785 case PIPE_FORMAT_Z24_UNORM_S8_UINT: 786 case PIPE_FORMAT_S8_UINT_Z24_UNORM: 787 case PIPE_FORMAT_Z24X8_UNORM: 788 case PIPE_FORMAT_X8Z24_UNORM: 789 return true; 790 default: 791 return false; 792 } 793 } 794 795 /** 796 * Create a new texture object, using the given template info. 797 */ 798 static struct pipe_resource * 799 fd_resource_create(struct pipe_screen *pscreen, 800 const struct pipe_resource *tmpl) 801 { 802 struct fd_screen *screen = fd_screen(pscreen); 803 struct fd_resource *rsc = CALLOC_STRUCT(fd_resource); 804 struct pipe_resource *prsc = &rsc->base; 805 enum pipe_format format = tmpl->format; 806 uint32_t size; 807 808 DBG("%p: target=%d, format=%s, %ux%ux%u, array_size=%u, last_level=%u, " 809 "nr_samples=%u, usage=%u, bind=%x, flags=%x", prsc, 810 tmpl->target, util_format_name(format), 811 tmpl->width0, tmpl->height0, tmpl->depth0, 812 tmpl->array_size, tmpl->last_level, tmpl->nr_samples, 813 tmpl->usage, tmpl->bind, tmpl->flags); 814 815 if (!rsc) 816 return NULL; 817 818 *prsc = *tmpl; 819 820 #define LINEAR \ 821 (PIPE_BIND_SCANOUT | \ 822 PIPE_BIND_LINEAR | \ 823 PIPE_BIND_DISPLAY_TARGET) 824 825 if (screen->tile_mode && 826 (tmpl->target != PIPE_BUFFER) && 827 (tmpl->bind & PIPE_BIND_SAMPLER_VIEW) && 828 !(tmpl->bind & LINEAR)) { 829 rsc->tile_mode = screen->tile_mode(tmpl); 830 } 831 832 pipe_reference_init(&prsc->reference, 1); 833 834 prsc->screen = pscreen; 835 836 util_range_init(&rsc->valid_buffer_range); 837 838 rsc->internal_format = format; 839 rsc->cpp = util_format_get_blocksize(format); 840 841 assert(rsc->cpp); 842 843 // XXX probably need some extra work if we hit rsc shadowing path w/ lrz.. 844 if (is_a5xx(screen) && (fd_mesa_debug & FD_DBG_LRZ) && has_depth(format)) { 845 const uint32_t flags = DRM_FREEDRENO_GEM_CACHE_WCOMBINE | 846 DRM_FREEDRENO_GEM_TYPE_KMEM; /* TODO */ 847 unsigned lrz_pitch = align(DIV_ROUND_UP(tmpl->width0, 8), 32); 848 unsigned lrz_height = DIV_ROUND_UP(tmpl->height0, 8); 849 unsigned size = lrz_pitch * lrz_height * 2; 850 851 size += 0x1000; /* for GRAS_LRZ_FAST_CLEAR_BUFFER */ 852 853 rsc->lrz_height = lrz_height; 854 rsc->lrz_width = lrz_pitch; 855 rsc->lrz_pitch = lrz_pitch; 856 rsc->lrz = fd_bo_new(screen->dev, size, flags); 857 } 858 859 size = screen->setup_slices(rsc); 860 861 /* special case for hw-query buffer, which we need to allocate before we 862 * know the size: 863 */ 864 if (size == 0) { 865 /* note, semi-intention == instead of & */ 866 debug_assert(prsc->bind == PIPE_BIND_QUERY_BUFFER); 867 return prsc; 868 } 869 870 if (rsc->layer_first) { 871 rsc->layer_size = align(size, 4096); 872 size = rsc->layer_size * prsc->array_size; 873 } 874 875 realloc_bo(rsc, size); 876 if (!rsc->bo) 877 goto fail; 878 879 return prsc; 880 fail: 881 fd_resource_destroy(pscreen, prsc); 882 return NULL; 883 } 884 885 /** 886 * Create a texture from a winsys_handle. The handle is often created in 887 * another process by first creating a pipe texture and then calling 888 * resource_get_handle. 889 */ 890 static struct pipe_resource * 891 fd_resource_from_handle(struct pipe_screen *pscreen, 892 const struct pipe_resource *tmpl, 893 struct winsys_handle *handle, unsigned usage) 894 { 895 struct fd_resource *rsc = CALLOC_STRUCT(fd_resource); 896 struct fd_resource_slice *slice = &rsc->slices[0]; 897 struct pipe_resource *prsc = &rsc->base; 898 uint32_t pitchalign = fd_screen(pscreen)->gmem_alignw; 899 900 DBG("target=%d, format=%s, %ux%ux%u, array_size=%u, last_level=%u, " 901 "nr_samples=%u, usage=%u, bind=%x, flags=%x", 902 tmpl->target, util_format_name(tmpl->format), 903 tmpl->width0, tmpl->height0, tmpl->depth0, 904 tmpl->array_size, tmpl->last_level, tmpl->nr_samples, 905 tmpl->usage, tmpl->bind, tmpl->flags); 906 907 if (!rsc) 908 return NULL; 909 910 *prsc = *tmpl; 911 912 pipe_reference_init(&prsc->reference, 1); 913 914 prsc->screen = pscreen; 915 916 util_range_init(&rsc->valid_buffer_range); 917 918 rsc->bo = fd_screen_bo_from_handle(pscreen, handle); 919 if (!rsc->bo) 920 goto fail; 921 922 rsc->internal_format = tmpl->format; 923 rsc->cpp = util_format_get_blocksize(tmpl->format); 924 slice->pitch = handle->stride / rsc->cpp; 925 slice->offset = handle->offset; 926 slice->size0 = handle->stride * prsc->height0; 927 928 if ((slice->pitch < align(prsc->width0, pitchalign)) || 929 (slice->pitch & (pitchalign - 1))) 930 goto fail; 931 932 assert(rsc->cpp); 933 934 return prsc; 935 936 fail: 937 fd_resource_destroy(pscreen, prsc); 938 return NULL; 939 } 940 941 /** 942 * _copy_region using pipe (3d engine) 943 */ 944 static bool 945 fd_blitter_pipe_copy_region(struct fd_context *ctx, 946 struct pipe_resource *dst, 947 unsigned dst_level, 948 unsigned dstx, unsigned dsty, unsigned dstz, 949 struct pipe_resource *src, 950 unsigned src_level, 951 const struct pipe_box *src_box) 952 { 953 /* not until we allow rendertargets to be buffers */ 954 if (dst->target == PIPE_BUFFER || src->target == PIPE_BUFFER) 955 return false; 956 957 if (!util_blitter_is_copy_supported(ctx->blitter, dst, src)) 958 return false; 959 960 /* TODO we could discard if dst box covers dst level fully.. */ 961 fd_blitter_pipe_begin(ctx, false, false, FD_STAGE_BLIT); 962 util_blitter_copy_texture(ctx->blitter, 963 dst, dst_level, dstx, dsty, dstz, 964 src, src_level, src_box); 965 fd_blitter_pipe_end(ctx); 966 967 return true; 968 } 969 970 /** 971 * Copy a block of pixels from one resource to another. 972 * The resource must be of the same format. 973 * Resources with nr_samples > 1 are not allowed. 974 */ 975 static void 976 fd_resource_copy_region(struct pipe_context *pctx, 977 struct pipe_resource *dst, 978 unsigned dst_level, 979 unsigned dstx, unsigned dsty, unsigned dstz, 980 struct pipe_resource *src, 981 unsigned src_level, 982 const struct pipe_box *src_box) 983 { 984 struct fd_context *ctx = fd_context(pctx); 985 986 /* TODO if we have 2d core, or other DMA engine that could be used 987 * for simple copies and reasonably easily synchronized with the 3d 988 * core, this is where we'd plug it in.. 989 */ 990 991 /* try blit on 3d pipe: */ 992 if (fd_blitter_pipe_copy_region(ctx, 993 dst, dst_level, dstx, dsty, dstz, 994 src, src_level, src_box)) 995 return; 996 997 /* else fallback to pure sw: */ 998 util_resource_copy_region(pctx, 999 dst, dst_level, dstx, dsty, dstz, 1000 src, src_level, src_box); 1001 } 1002 1003 bool 1004 fd_render_condition_check(struct pipe_context *pctx) 1005 { 1006 struct fd_context *ctx = fd_context(pctx); 1007 1008 if (!ctx->cond_query) 1009 return true; 1010 1011 union pipe_query_result res = { 0 }; 1012 bool wait = 1013 ctx->cond_mode != PIPE_RENDER_COND_NO_WAIT && 1014 ctx->cond_mode != PIPE_RENDER_COND_BY_REGION_NO_WAIT; 1015 1016 if (pctx->get_query_result(pctx, ctx->cond_query, wait, &res)) 1017 return (bool)res.u64 != ctx->cond_cond; 1018 1019 return true; 1020 } 1021 1022 /** 1023 * Optimal hardware path for blitting pixels. 1024 * Scaling, format conversion, up- and downsampling (resolve) are allowed. 1025 */ 1026 static void 1027 fd_blit(struct pipe_context *pctx, const struct pipe_blit_info *blit_info) 1028 { 1029 struct fd_context *ctx = fd_context(pctx); 1030 struct pipe_blit_info info = *blit_info; 1031 bool discard = false; 1032 1033 if (info.src.resource->nr_samples > 1 && 1034 info.dst.resource->nr_samples <= 1 && 1035 !util_format_is_depth_or_stencil(info.src.resource->format) && 1036 !util_format_is_pure_integer(info.src.resource->format)) { 1037 DBG("color resolve unimplemented"); 1038 return; 1039 } 1040 1041 if (info.render_condition_enable && !fd_render_condition_check(pctx)) 1042 return; 1043 1044 if (!info.scissor_enable && !info.alpha_blend) { 1045 discard = util_texrange_covers_whole_level(info.dst.resource, 1046 info.dst.level, info.dst.box.x, info.dst.box.y, 1047 info.dst.box.z, info.dst.box.width, 1048 info.dst.box.height, info.dst.box.depth); 1049 } 1050 1051 if (util_try_blit_via_copy_region(pctx, &info)) { 1052 return; /* done */ 1053 } 1054 1055 if (info.mask & PIPE_MASK_S) { 1056 DBG("cannot blit stencil, skipping"); 1057 info.mask &= ~PIPE_MASK_S; 1058 } 1059 1060 if (!util_blitter_is_blit_supported(ctx->blitter, &info)) { 1061 DBG("blit unsupported %s -> %s", 1062 util_format_short_name(info.src.resource->format), 1063 util_format_short_name(info.dst.resource->format)); 1064 return; 1065 } 1066 1067 fd_blitter_pipe_begin(ctx, info.render_condition_enable, discard, FD_STAGE_BLIT); 1068 ctx->blit(ctx, &info); 1069 fd_blitter_pipe_end(ctx); 1070 } 1071 1072 void 1073 fd_blitter_pipe_begin(struct fd_context *ctx, bool render_cond, bool discard, 1074 enum fd_render_stage stage) 1075 { 1076 util_blitter_save_fragment_constant_buffer_slot(ctx->blitter, 1077 ctx->constbuf[PIPE_SHADER_FRAGMENT].cb); 1078 util_blitter_save_vertex_buffer_slot(ctx->blitter, ctx->vtx.vertexbuf.vb); 1079 util_blitter_save_vertex_elements(ctx->blitter, ctx->vtx.vtx); 1080 util_blitter_save_vertex_shader(ctx->blitter, ctx->prog.vp); 1081 util_blitter_save_so_targets(ctx->blitter, ctx->streamout.num_targets, 1082 ctx->streamout.targets); 1083 util_blitter_save_rasterizer(ctx->blitter, ctx->rasterizer); 1084 util_blitter_save_viewport(ctx->blitter, &ctx->viewport); 1085 util_blitter_save_scissor(ctx->blitter, &ctx->scissor); 1086 util_blitter_save_fragment_shader(ctx->blitter, ctx->prog.fp); 1087 util_blitter_save_blend(ctx->blitter, ctx->blend); 1088 util_blitter_save_depth_stencil_alpha(ctx->blitter, ctx->zsa); 1089 util_blitter_save_stencil_ref(ctx->blitter, &ctx->stencil_ref); 1090 util_blitter_save_sample_mask(ctx->blitter, ctx->sample_mask); 1091 util_blitter_save_framebuffer(ctx->blitter, 1092 ctx->batch ? &ctx->batch->framebuffer : NULL); 1093 util_blitter_save_fragment_sampler_states(ctx->blitter, 1094 ctx->tex[PIPE_SHADER_FRAGMENT].num_samplers, 1095 (void **)ctx->tex[PIPE_SHADER_FRAGMENT].samplers); 1096 util_blitter_save_fragment_sampler_views(ctx->blitter, 1097 ctx->tex[PIPE_SHADER_FRAGMENT].num_textures, 1098 ctx->tex[PIPE_SHADER_FRAGMENT].textures); 1099 if (!render_cond) 1100 util_blitter_save_render_condition(ctx->blitter, 1101 ctx->cond_query, ctx->cond_cond, ctx->cond_mode); 1102 1103 if (ctx->batch) 1104 fd_batch_set_stage(ctx->batch, stage); 1105 1106 ctx->in_blit = discard; 1107 } 1108 1109 void 1110 fd_blitter_pipe_end(struct fd_context *ctx) 1111 { 1112 if (ctx->batch) 1113 fd_batch_set_stage(ctx->batch, FD_STAGE_NULL); 1114 ctx->in_blit = false; 1115 } 1116 1117 static void 1118 fd_invalidate_resource(struct pipe_context *pctx, struct pipe_resource *prsc) 1119 { 1120 struct fd_resource *rsc = fd_resource(prsc); 1121 1122 /* 1123 * TODO I guess we could track that the resource is invalidated and 1124 * use that as a hint to realloc rather than stall in _transfer_map(), 1125 * even in the non-DISCARD_WHOLE_RESOURCE case? 1126 */ 1127 1128 if (rsc->write_batch) { 1129 struct fd_batch *batch = rsc->write_batch; 1130 struct pipe_framebuffer_state *pfb = &batch->framebuffer; 1131 1132 if (pfb->zsbuf && pfb->zsbuf->texture == prsc) 1133 batch->resolve &= ~(FD_BUFFER_DEPTH | FD_BUFFER_STENCIL); 1134 1135 for (unsigned i = 0; i < pfb->nr_cbufs; i++) { 1136 if (pfb->cbufs[i] && pfb->cbufs[i]->texture == prsc) { 1137 batch->resolve &= ~(PIPE_CLEAR_COLOR0 << i); 1138 } 1139 } 1140 } 1141 1142 rsc->valid = false; 1143 } 1144 1145 static enum pipe_format 1146 fd_resource_get_internal_format(struct pipe_resource *prsc) 1147 { 1148 return fd_resource(prsc)->internal_format; 1149 } 1150 1151 static void 1152 fd_resource_set_stencil(struct pipe_resource *prsc, 1153 struct pipe_resource *stencil) 1154 { 1155 fd_resource(prsc)->stencil = fd_resource(stencil); 1156 } 1157 1158 static struct pipe_resource * 1159 fd_resource_get_stencil(struct pipe_resource *prsc) 1160 { 1161 struct fd_resource *rsc = fd_resource(prsc); 1162 if (rsc->stencil) 1163 return &rsc->stencil->base; 1164 return NULL; 1165 } 1166 1167 static const struct u_transfer_vtbl transfer_vtbl = { 1168 .resource_create = fd_resource_create, 1169 .resource_destroy = fd_resource_destroy, 1170 .transfer_map = fd_resource_transfer_map, 1171 .transfer_flush_region = fd_resource_transfer_flush_region, 1172 .transfer_unmap = fd_resource_transfer_unmap, 1173 .get_internal_format = fd_resource_get_internal_format, 1174 .set_stencil = fd_resource_set_stencil, 1175 .get_stencil = fd_resource_get_stencil, 1176 }; 1177 1178 void 1179 fd_resource_screen_init(struct pipe_screen *pscreen) 1180 { 1181 struct fd_screen *screen = fd_screen(pscreen); 1182 bool fake_rgtc = screen->gpu_id < 400; 1183 1184 pscreen->resource_create = u_transfer_helper_resource_create; 1185 pscreen->resource_from_handle = fd_resource_from_handle; 1186 pscreen->resource_get_handle = fd_resource_get_handle; 1187 pscreen->resource_destroy = u_transfer_helper_resource_destroy; 1188 1189 pscreen->transfer_helper = u_transfer_helper_create(&transfer_vtbl, 1190 true, fake_rgtc, true); 1191 1192 if (!screen->setup_slices) 1193 screen->setup_slices = fd_setup_slices; 1194 } 1195 1196 void 1197 fd_resource_context_init(struct pipe_context *pctx) 1198 { 1199 pctx->transfer_map = u_transfer_helper_transfer_map; 1200 pctx->transfer_flush_region = u_transfer_helper_transfer_flush_region; 1201 pctx->transfer_unmap = u_transfer_helper_transfer_unmap; 1202 pctx->buffer_subdata = u_default_buffer_subdata; 1203 pctx->texture_subdata = u_default_texture_subdata; 1204 pctx->create_surface = fd_create_surface; 1205 pctx->surface_destroy = fd_surface_destroy; 1206 pctx->resource_copy_region = fd_resource_copy_region; 1207 pctx->blit = fd_blit; 1208 pctx->flush_resource = fd_flush_resource; 1209 pctx->invalidate_resource = fd_invalidate_resource; 1210 } 1211