1 /* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ 2 3 /* 4 * Copyright (C) 2012 Rob Clark <robclark (at) freedesktop.org> 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the "Software"), 8 * to deal in the Software without restriction, including without limitation 9 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 * and/or sell copies of the Software, and to permit persons to whom the 11 * Software is furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice (including the next 14 * paragraph) shall be included in all copies or substantial portions of the 15 * Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 * SOFTWARE. 24 * 25 * Authors: 26 * Rob Clark <robclark (at) freedesktop.org> 27 */ 28 29 #include "util/u_format.h" 30 #include "util/u_format_rgtc.h" 31 #include "util/u_format_zs.h" 32 #include "util/u_inlines.h" 33 #include "util/u_transfer.h" 34 #include "util/u_string.h" 35 #include "util/u_surface.h" 36 #include "util/set.h" 37 38 #include "freedreno_resource.h" 39 #include "freedreno_batch_cache.h" 40 #include "freedreno_screen.h" 41 #include "freedreno_surface.h" 42 #include "freedreno_context.h" 43 #include "freedreno_query_hw.h" 44 #include "freedreno_util.h" 45 46 #include <errno.h> 47 48 /* XXX this should go away, needed for 'struct winsys_handle' */ 49 #include "state_tracker/drm_driver.h" 50 51 static void 52 fd_invalidate_resource(struct fd_context *ctx, struct pipe_resource *prsc) 53 { 54 int i; 55 56 /* Go through the entire state and see if the resource is bound 57 * anywhere. If it is, mark the relevant state as dirty. This is called on 58 * realloc_bo. 59 */ 60 61 /* Constbufs */ 62 for (i = 1; i < PIPE_MAX_CONSTANT_BUFFERS && !(ctx->dirty & FD_DIRTY_CONSTBUF); i++) { 63 if (ctx->constbuf[PIPE_SHADER_VERTEX].cb[i].buffer == prsc) 64 ctx->dirty |= FD_DIRTY_CONSTBUF; 65 if (ctx->constbuf[PIPE_SHADER_FRAGMENT].cb[i].buffer == prsc) 66 ctx->dirty |= FD_DIRTY_CONSTBUF; 67 } 68 69 /* VBOs */ 70 for (i = 0; i < ctx->vtx.vertexbuf.count && !(ctx->dirty & FD_DIRTY_VTXBUF); i++) { 71 if (ctx->vtx.vertexbuf.vb[i].buffer == prsc) 72 ctx->dirty |= FD_DIRTY_VTXBUF; 73 } 74 75 /* Index buffer */ 76 if (ctx->indexbuf.buffer == prsc) 77 ctx->dirty |= FD_DIRTY_INDEXBUF; 78 79 /* Textures */ 80 for (i = 0; i < ctx->verttex.num_textures && !(ctx->dirty & FD_DIRTY_VERTTEX); i++) { 81 if (ctx->verttex.textures[i] && (ctx->verttex.textures[i]->texture == prsc)) 82 ctx->dirty |= FD_DIRTY_VERTTEX; 83 } 84 for (i = 0; i < ctx->fragtex.num_textures && !(ctx->dirty & FD_DIRTY_FRAGTEX); i++) { 85 if (ctx->fragtex.textures[i] && (ctx->fragtex.textures[i]->texture == prsc)) 86 ctx->dirty |= FD_DIRTY_FRAGTEX; 87 } 88 } 89 90 static void 91 realloc_bo(struct fd_resource *rsc, uint32_t size) 92 { 93 struct fd_screen *screen = fd_screen(rsc->base.b.screen); 94 uint32_t flags = DRM_FREEDRENO_GEM_CACHE_WCOMBINE | 95 DRM_FREEDRENO_GEM_TYPE_KMEM; /* TODO */ 96 97 /* if we start using things other than write-combine, 98 * be sure to check for PIPE_RESOURCE_FLAG_MAP_COHERENT 99 */ 100 101 if (rsc->bo) 102 fd_bo_del(rsc->bo); 103 104 rsc->bo = fd_bo_new(screen->dev, size, flags); 105 rsc->timestamp = 0; 106 util_range_set_empty(&rsc->valid_buffer_range); 107 fd_bc_invalidate_resource(rsc, true); 108 } 109 110 static void 111 do_blit(struct fd_context *ctx, const struct pipe_blit_info *blit, bool fallback) 112 { 113 /* TODO size threshold too?? */ 114 if ((blit->src.resource->target != PIPE_BUFFER) && !fallback) { 115 /* do blit on gpu: */ 116 fd_blitter_pipe_begin(ctx, false, true, FD_STAGE_BLIT); 117 util_blitter_blit(ctx->blitter, blit); 118 fd_blitter_pipe_end(ctx); 119 } else { 120 /* do blit on cpu: */ 121 util_resource_copy_region(&ctx->base, 122 blit->dst.resource, blit->dst.level, blit->dst.box.x, 123 blit->dst.box.y, blit->dst.box.z, 124 blit->src.resource, blit->src.level, &blit->src.box); 125 } 126 } 127 128 static bool 129 fd_try_shadow_resource(struct fd_context *ctx, struct fd_resource *rsc, 130 unsigned level, unsigned usage, const struct pipe_box *box) 131 { 132 struct pipe_context *pctx = &ctx->base; 133 struct pipe_resource *prsc = &rsc->base.b; 134 bool fallback = false; 135 136 if (prsc->next) 137 return false; 138 139 /* TODO: somehow munge dimensions and format to copy unsupported 140 * render target format to something that is supported? 141 */ 142 if (!pctx->screen->is_format_supported(pctx->screen, 143 prsc->format, prsc->target, prsc->nr_samples, 144 PIPE_BIND_RENDER_TARGET)) 145 fallback = true; 146 147 /* these cases should be handled elsewhere.. just for future 148 * reference in case this gets split into a more generic(ish) 149 * helper. 150 */ 151 debug_assert(!(usage & PIPE_TRANSFER_READ)); 152 debug_assert(!(usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE)); 153 154 /* if we do a gpu blit to clone the whole resource, we'll just 155 * end up stalling on that.. so only allow if we can discard 156 * current range (and blit, possibly cpu or gpu, the rest) 157 */ 158 if (!(usage & PIPE_TRANSFER_DISCARD_RANGE)) 159 return false; 160 161 bool whole_level = util_texrange_covers_whole_level(prsc, level, 162 box->x, box->y, box->z, box->width, box->height, box->depth); 163 164 /* TODO need to be more clever about current level */ 165 if ((prsc->target >= PIPE_TEXTURE_2D) && !whole_level) 166 return false; 167 168 struct pipe_resource *pshadow = 169 pctx->screen->resource_create(pctx->screen, prsc); 170 171 if (!pshadow) 172 return false; 173 174 assert(!ctx->in_shadow); 175 ctx->in_shadow = true; 176 177 /* get rid of any references that batch-cache might have to us (which 178 * should empty/destroy rsc->batches hashset) 179 */ 180 fd_bc_invalidate_resource(rsc, false); 181 182 pipe_mutex_lock(ctx->screen->lock); 183 184 /* Swap the backing bo's, so shadow becomes the old buffer, 185 * blit from shadow to new buffer. From here on out, we 186 * cannot fail. 187 * 188 * Note that we need to do it in this order, otherwise if 189 * we go down cpu blit path, the recursive transfer_map() 190 * sees the wrong status.. 191 */ 192 struct fd_resource *shadow = fd_resource(pshadow); 193 194 DBG("shadow: %p (%d) -> %p (%d)\n", rsc, rsc->base.b.reference.count, 195 shadow, shadow->base.b.reference.count); 196 197 /* TODO valid_buffer_range?? */ 198 swap(rsc->bo, shadow->bo); 199 swap(rsc->timestamp, shadow->timestamp); 200 swap(rsc->write_batch, shadow->write_batch); 201 202 /* at this point, the newly created shadow buffer is not referenced 203 * by any batches, but the existing rsc (probably) is. We need to 204 * transfer those references over: 205 */ 206 debug_assert(shadow->batch_mask == 0); 207 struct fd_batch *batch; 208 foreach_batch(batch, &ctx->screen->batch_cache, rsc->batch_mask) { 209 struct set_entry *entry = _mesa_set_search(batch->resources, rsc); 210 _mesa_set_remove(batch->resources, entry); 211 _mesa_set_add(batch->resources, shadow); 212 } 213 swap(rsc->batch_mask, shadow->batch_mask); 214 215 pipe_mutex_unlock(ctx->screen->lock); 216 217 struct pipe_blit_info blit = {0}; 218 blit.dst.resource = prsc; 219 blit.dst.format = prsc->format; 220 blit.src.resource = pshadow; 221 blit.src.format = pshadow->format; 222 blit.mask = util_format_get_mask(prsc->format); 223 blit.filter = PIPE_TEX_FILTER_NEAREST; 224 225 #define set_box(field, val) do { \ 226 blit.dst.field = (val); \ 227 blit.src.field = (val); \ 228 } while (0) 229 230 /* blit the other levels in their entirety: */ 231 for (unsigned l = 0; l <= prsc->last_level; l++) { 232 if (l == level) 233 continue; 234 235 /* just blit whole level: */ 236 set_box(level, l); 237 set_box(box.width, u_minify(prsc->width0, l)); 238 set_box(box.height, u_minify(prsc->height0, l)); 239 set_box(box.depth, u_minify(prsc->depth0, l)); 240 241 do_blit(ctx, &blit, fallback); 242 } 243 244 /* deal w/ current level specially, since we might need to split 245 * it up into a couple blits: 246 */ 247 if (!whole_level) { 248 set_box(level, level); 249 250 switch (prsc->target) { 251 case PIPE_BUFFER: 252 case PIPE_TEXTURE_1D: 253 set_box(box.y, 0); 254 set_box(box.z, 0); 255 set_box(box.height, 1); 256 set_box(box.depth, 1); 257 258 if (box->x > 0) { 259 set_box(box.x, 0); 260 set_box(box.width, box->x); 261 262 do_blit(ctx, &blit, fallback); 263 } 264 if ((box->x + box->width) < u_minify(prsc->width0, level)) { 265 set_box(box.x, box->x + box->width); 266 set_box(box.width, u_minify(prsc->width0, level) - (box->x + box->width)); 267 268 do_blit(ctx, &blit, fallback); 269 } 270 break; 271 case PIPE_TEXTURE_2D: 272 /* TODO */ 273 default: 274 unreachable("TODO"); 275 } 276 } 277 278 ctx->in_shadow = false; 279 280 pipe_resource_reference(&pshadow, NULL); 281 282 return true; 283 } 284 285 static unsigned 286 fd_resource_layer_offset(struct fd_resource *rsc, 287 struct fd_resource_slice *slice, 288 unsigned layer) 289 { 290 if (rsc->layer_first) 291 return layer * rsc->layer_size; 292 else 293 return layer * slice->size0; 294 } 295 296 static void 297 fd_resource_flush_z32s8(struct fd_transfer *trans, const struct pipe_box *box) 298 { 299 struct fd_resource *rsc = fd_resource(trans->base.resource); 300 struct fd_resource_slice *slice = fd_resource_slice(rsc, trans->base.level); 301 struct fd_resource_slice *sslice = fd_resource_slice(rsc->stencil, trans->base.level); 302 enum pipe_format format = trans->base.resource->format; 303 304 float *depth = fd_bo_map(rsc->bo) + slice->offset + 305 fd_resource_layer_offset(rsc, slice, trans->base.box.z) + 306 (trans->base.box.y + box->y) * slice->pitch * 4 + (trans->base.box.x + box->x) * 4; 307 uint8_t *stencil = fd_bo_map(rsc->stencil->bo) + sslice->offset + 308 fd_resource_layer_offset(rsc->stencil, sslice, trans->base.box.z) + 309 (trans->base.box.y + box->y) * sslice->pitch + trans->base.box.x + box->x; 310 311 if (format != PIPE_FORMAT_X32_S8X24_UINT) 312 util_format_z32_float_s8x24_uint_unpack_z_float( 313 depth, slice->pitch * 4, 314 trans->staging, trans->base.stride, 315 box->width, box->height); 316 317 util_format_z32_float_s8x24_uint_unpack_s_8uint( 318 stencil, sslice->pitch, 319 trans->staging, trans->base.stride, 320 box->width, box->height); 321 } 322 323 static void 324 fd_resource_flush_rgtc(struct fd_transfer *trans, const struct pipe_box *box) 325 { 326 struct fd_resource *rsc = fd_resource(trans->base.resource); 327 struct fd_resource_slice *slice = fd_resource_slice(rsc, trans->base.level); 328 enum pipe_format format = trans->base.resource->format; 329 330 uint8_t *data = fd_bo_map(rsc->bo) + slice->offset + 331 fd_resource_layer_offset(rsc, slice, trans->base.box.z) + 332 ((trans->base.box.y + box->y) * slice->pitch + 333 trans->base.box.x + box->x) * rsc->cpp; 334 335 uint8_t *source = trans->staging + 336 util_format_get_nblocksy(format, box->y) * trans->base.stride + 337 util_format_get_stride(format, box->x); 338 339 switch (format) { 340 case PIPE_FORMAT_RGTC1_UNORM: 341 case PIPE_FORMAT_RGTC1_SNORM: 342 case PIPE_FORMAT_LATC1_UNORM: 343 case PIPE_FORMAT_LATC1_SNORM: 344 util_format_rgtc1_unorm_unpack_rgba_8unorm( 345 data, slice->pitch * rsc->cpp, 346 source, trans->base.stride, 347 box->width, box->height); 348 break; 349 case PIPE_FORMAT_RGTC2_UNORM: 350 case PIPE_FORMAT_RGTC2_SNORM: 351 case PIPE_FORMAT_LATC2_UNORM: 352 case PIPE_FORMAT_LATC2_SNORM: 353 util_format_rgtc2_unorm_unpack_rgba_8unorm( 354 data, slice->pitch * rsc->cpp, 355 source, trans->base.stride, 356 box->width, box->height); 357 break; 358 default: 359 assert(!"Unexpected format\n"); 360 break; 361 } 362 } 363 364 static void 365 fd_resource_flush(struct fd_transfer *trans, const struct pipe_box *box) 366 { 367 enum pipe_format format = trans->base.resource->format; 368 369 switch (format) { 370 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: 371 case PIPE_FORMAT_X32_S8X24_UINT: 372 fd_resource_flush_z32s8(trans, box); 373 break; 374 case PIPE_FORMAT_RGTC1_UNORM: 375 case PIPE_FORMAT_RGTC1_SNORM: 376 case PIPE_FORMAT_RGTC2_UNORM: 377 case PIPE_FORMAT_RGTC2_SNORM: 378 case PIPE_FORMAT_LATC1_UNORM: 379 case PIPE_FORMAT_LATC1_SNORM: 380 case PIPE_FORMAT_LATC2_UNORM: 381 case PIPE_FORMAT_LATC2_SNORM: 382 fd_resource_flush_rgtc(trans, box); 383 break; 384 default: 385 assert(!"Unexpected staging transfer type"); 386 break; 387 } 388 } 389 390 static void fd_resource_transfer_flush_region(struct pipe_context *pctx, 391 struct pipe_transfer *ptrans, 392 const struct pipe_box *box) 393 { 394 struct fd_resource *rsc = fd_resource(ptrans->resource); 395 struct fd_transfer *trans = fd_transfer(ptrans); 396 397 if (ptrans->resource->target == PIPE_BUFFER) 398 util_range_add(&rsc->valid_buffer_range, 399 ptrans->box.x + box->x, 400 ptrans->box.x + box->x + box->width); 401 402 if (trans->staging) 403 fd_resource_flush(trans, box); 404 } 405 406 static void 407 fd_resource_transfer_unmap(struct pipe_context *pctx, 408 struct pipe_transfer *ptrans) 409 { 410 struct fd_context *ctx = fd_context(pctx); 411 struct fd_resource *rsc = fd_resource(ptrans->resource); 412 struct fd_transfer *trans = fd_transfer(ptrans); 413 414 if (trans->staging && !(ptrans->usage & PIPE_TRANSFER_FLUSH_EXPLICIT)) { 415 struct pipe_box box; 416 u_box_2d(0, 0, ptrans->box.width, ptrans->box.height, &box); 417 fd_resource_flush(trans, &box); 418 } 419 420 if (!(ptrans->usage & PIPE_TRANSFER_UNSYNCHRONIZED)) { 421 fd_bo_cpu_fini(rsc->bo); 422 if (rsc->stencil) 423 fd_bo_cpu_fini(rsc->stencil->bo); 424 } 425 426 util_range_add(&rsc->valid_buffer_range, 427 ptrans->box.x, 428 ptrans->box.x + ptrans->box.width); 429 430 pipe_resource_reference(&ptrans->resource, NULL); 431 slab_free(&ctx->transfer_pool, ptrans); 432 433 free(trans->staging); 434 } 435 436 static void * 437 fd_resource_transfer_map(struct pipe_context *pctx, 438 struct pipe_resource *prsc, 439 unsigned level, unsigned usage, 440 const struct pipe_box *box, 441 struct pipe_transfer **pptrans) 442 { 443 struct fd_context *ctx = fd_context(pctx); 444 struct fd_resource *rsc = fd_resource(prsc); 445 struct fd_resource_slice *slice = fd_resource_slice(rsc, level); 446 struct fd_transfer *trans; 447 struct pipe_transfer *ptrans; 448 enum pipe_format format = prsc->format; 449 uint32_t op = 0; 450 uint32_t offset; 451 char *buf; 452 int ret = 0; 453 454 DBG("prsc=%p, level=%u, usage=%x, box=%dx%d+%d,%d", prsc, level, usage, 455 box->width, box->height, box->x, box->y); 456 457 ptrans = slab_alloc(&ctx->transfer_pool); 458 if (!ptrans) 459 return NULL; 460 461 /* slab_alloc_st() doesn't zero: */ 462 trans = fd_transfer(ptrans); 463 memset(trans, 0, sizeof(*trans)); 464 465 pipe_resource_reference(&ptrans->resource, prsc); 466 ptrans->level = level; 467 ptrans->usage = usage; 468 ptrans->box = *box; 469 ptrans->stride = util_format_get_nblocksx(format, slice->pitch) * rsc->cpp; 470 ptrans->layer_stride = rsc->layer_first ? rsc->layer_size : slice->size0; 471 472 if (ctx->in_shadow && !(usage & PIPE_TRANSFER_READ)) 473 usage |= PIPE_TRANSFER_UNSYNCHRONIZED; 474 475 if (usage & PIPE_TRANSFER_READ) 476 op |= DRM_FREEDRENO_PREP_READ; 477 478 if (usage & PIPE_TRANSFER_WRITE) 479 op |= DRM_FREEDRENO_PREP_WRITE; 480 481 if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) { 482 realloc_bo(rsc, fd_bo_size(rsc->bo)); 483 if (rsc->stencil) 484 realloc_bo(rsc->stencil, fd_bo_size(rsc->stencil->bo)); 485 fd_invalidate_resource(ctx, prsc); 486 } else if ((usage & PIPE_TRANSFER_WRITE) && 487 prsc->target == PIPE_BUFFER && 488 !util_ranges_intersect(&rsc->valid_buffer_range, 489 box->x, box->x + box->width)) { 490 /* We are trying to write to a previously uninitialized range. No need 491 * to wait. 492 */ 493 } else if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) { 494 struct fd_batch *write_batch = NULL; 495 496 /* hold a reference, so it doesn't disappear under us: */ 497 fd_batch_reference(&write_batch, rsc->write_batch); 498 499 if ((usage & PIPE_TRANSFER_WRITE) && write_batch && 500 write_batch->back_blit) { 501 /* if only thing pending is a back-blit, we can discard it: */ 502 fd_batch_reset(write_batch); 503 } 504 505 /* If the GPU is writing to the resource, or if it is reading from the 506 * resource and we're trying to write to it, flush the renders. 507 */ 508 bool needs_flush = pending(rsc, !!(usage & PIPE_TRANSFER_WRITE)); 509 bool busy = needs_flush || (0 != fd_bo_cpu_prep(rsc->bo, 510 ctx->screen->pipe, op | DRM_FREEDRENO_PREP_NOSYNC)); 511 512 /* if we need to flush/stall, see if we can make a shadow buffer 513 * to avoid this: 514 * 515 * TODO we could go down this path !reorder && !busy_for_read 516 * ie. we only *don't* want to go down this path if the blit 517 * will trigger a flush! 518 */ 519 if (ctx->screen->reorder && busy && !(usage & PIPE_TRANSFER_READ)) { 520 if (fd_try_shadow_resource(ctx, rsc, level, usage, box)) { 521 needs_flush = busy = false; 522 fd_invalidate_resource(ctx, prsc); 523 } 524 } 525 526 if (needs_flush) { 527 if (usage & PIPE_TRANSFER_WRITE) { 528 struct fd_batch *batch, *last_batch = NULL; 529 foreach_batch(batch, &ctx->screen->batch_cache, rsc->batch_mask) { 530 fd_batch_reference(&last_batch, batch); 531 fd_batch_flush(batch, false); 532 } 533 if (last_batch) { 534 fd_batch_sync(last_batch); 535 fd_batch_reference(&last_batch, NULL); 536 } 537 assert(rsc->batch_mask == 0); 538 } else { 539 fd_batch_flush(write_batch, true); 540 } 541 assert(!rsc->write_batch); 542 } 543 544 fd_batch_reference(&write_batch, NULL); 545 546 /* The GPU keeps track of how the various bo's are being used, and 547 * will wait if necessary for the proper operation to have 548 * completed. 549 */ 550 if (busy) { 551 ret = fd_bo_cpu_prep(rsc->bo, ctx->screen->pipe, op); 552 if (ret) 553 goto fail; 554 } 555 } 556 557 buf = fd_bo_map(rsc->bo); 558 if (!buf) 559 goto fail; 560 561 offset = slice->offset + 562 box->y / util_format_get_blockheight(format) * ptrans->stride + 563 box->x / util_format_get_blockwidth(format) * rsc->cpp + 564 fd_resource_layer_offset(rsc, slice, box->z); 565 566 if (prsc->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT || 567 prsc->format == PIPE_FORMAT_X32_S8X24_UINT) { 568 assert(trans->base.box.depth == 1); 569 570 trans->base.stride = trans->base.box.width * rsc->cpp * 2; 571 trans->staging = malloc(trans->base.stride * trans->base.box.height); 572 if (!trans->staging) 573 goto fail; 574 575 /* if we're not discarding the whole range (or resource), we must copy 576 * the real data in. 577 */ 578 if (!(usage & (PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE | 579 PIPE_TRANSFER_DISCARD_RANGE))) { 580 struct fd_resource_slice *sslice = 581 fd_resource_slice(rsc->stencil, level); 582 void *sbuf = fd_bo_map(rsc->stencil->bo); 583 if (!sbuf) 584 goto fail; 585 586 float *depth = (float *)(buf + slice->offset + 587 fd_resource_layer_offset(rsc, slice, box->z) + 588 box->y * slice->pitch * 4 + box->x * 4); 589 uint8_t *stencil = sbuf + sslice->offset + 590 fd_resource_layer_offset(rsc->stencil, sslice, box->z) + 591 box->y * sslice->pitch + box->x; 592 593 if (format != PIPE_FORMAT_X32_S8X24_UINT) 594 util_format_z32_float_s8x24_uint_pack_z_float( 595 trans->staging, trans->base.stride, 596 depth, slice->pitch * 4, 597 box->width, box->height); 598 599 util_format_z32_float_s8x24_uint_pack_s_8uint( 600 trans->staging, trans->base.stride, 601 stencil, sslice->pitch, 602 box->width, box->height); 603 } 604 605 buf = trans->staging; 606 offset = 0; 607 } else if (rsc->internal_format != format && 608 util_format_description(format)->layout == UTIL_FORMAT_LAYOUT_RGTC) { 609 assert(trans->base.box.depth == 1); 610 611 trans->base.stride = util_format_get_stride( 612 format, trans->base.box.width); 613 trans->staging = malloc( 614 util_format_get_2d_size(format, trans->base.stride, 615 trans->base.box.height)); 616 if (!trans->staging) 617 goto fail; 618 619 /* if we're not discarding the whole range (or resource), we must copy 620 * the real data in. 621 */ 622 if (!(usage & (PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE | 623 PIPE_TRANSFER_DISCARD_RANGE))) { 624 uint8_t *rgba8 = (uint8_t *)buf + slice->offset + 625 fd_resource_layer_offset(rsc, slice, box->z) + 626 box->y * slice->pitch * rsc->cpp + box->x * rsc->cpp; 627 628 switch (format) { 629 case PIPE_FORMAT_RGTC1_UNORM: 630 case PIPE_FORMAT_RGTC1_SNORM: 631 case PIPE_FORMAT_LATC1_UNORM: 632 case PIPE_FORMAT_LATC1_SNORM: 633 util_format_rgtc1_unorm_pack_rgba_8unorm( 634 trans->staging, trans->base.stride, 635 rgba8, slice->pitch * rsc->cpp, 636 box->width, box->height); 637 break; 638 case PIPE_FORMAT_RGTC2_UNORM: 639 case PIPE_FORMAT_RGTC2_SNORM: 640 case PIPE_FORMAT_LATC2_UNORM: 641 case PIPE_FORMAT_LATC2_SNORM: 642 util_format_rgtc2_unorm_pack_rgba_8unorm( 643 trans->staging, trans->base.stride, 644 rgba8, slice->pitch * rsc->cpp, 645 box->width, box->height); 646 break; 647 default: 648 assert(!"Unexpected format"); 649 break; 650 } 651 } 652 653 buf = trans->staging; 654 offset = 0; 655 } 656 657 *pptrans = ptrans; 658 659 return buf + offset; 660 661 fail: 662 fd_resource_transfer_unmap(pctx, ptrans); 663 return NULL; 664 } 665 666 static void 667 fd_resource_destroy(struct pipe_screen *pscreen, 668 struct pipe_resource *prsc) 669 { 670 struct fd_resource *rsc = fd_resource(prsc); 671 fd_bc_invalidate_resource(rsc, true); 672 if (rsc->bo) 673 fd_bo_del(rsc->bo); 674 util_range_destroy(&rsc->valid_buffer_range); 675 FREE(rsc); 676 } 677 678 static boolean 679 fd_resource_get_handle(struct pipe_screen *pscreen, 680 struct pipe_resource *prsc, 681 struct winsys_handle *handle) 682 { 683 struct fd_resource *rsc = fd_resource(prsc); 684 685 return fd_screen_bo_get_handle(pscreen, rsc->bo, 686 rsc->slices[0].pitch * rsc->cpp, handle); 687 } 688 689 690 static const struct u_resource_vtbl fd_resource_vtbl = { 691 .resource_get_handle = fd_resource_get_handle, 692 .resource_destroy = fd_resource_destroy, 693 .transfer_map = fd_resource_transfer_map, 694 .transfer_flush_region = fd_resource_transfer_flush_region, 695 .transfer_unmap = fd_resource_transfer_unmap, 696 }; 697 698 static uint32_t 699 setup_slices(struct fd_resource *rsc, uint32_t alignment, enum pipe_format format) 700 { 701 struct pipe_resource *prsc = &rsc->base.b; 702 enum util_format_layout layout = util_format_description(format)->layout; 703 uint32_t pitchalign = fd_screen(prsc->screen)->gmem_alignw; 704 uint32_t level, size = 0; 705 uint32_t width = prsc->width0; 706 uint32_t height = prsc->height0; 707 uint32_t depth = prsc->depth0; 708 /* in layer_first layout, the level (slice) contains just one 709 * layer (since in fact the layer contains the slices) 710 */ 711 uint32_t layers_in_level = rsc->layer_first ? 1 : prsc->array_size; 712 713 for (level = 0; level <= prsc->last_level; level++) { 714 struct fd_resource_slice *slice = fd_resource_slice(rsc, level); 715 uint32_t blocks; 716 717 if (layout == UTIL_FORMAT_LAYOUT_ASTC) 718 slice->pitch = width = 719 util_align_npot(width, pitchalign * util_format_get_blockwidth(format)); 720 else 721 slice->pitch = width = align(width, pitchalign); 722 slice->offset = size; 723 blocks = util_format_get_nblocks(format, width, height); 724 /* 1d array and 2d array textures must all have the same layer size 725 * for each miplevel on a3xx. 3d textures can have different layer 726 * sizes for high levels, but the hw auto-sizer is buggy (or at least 727 * different than what this code does), so as soon as the layer size 728 * range gets into range, we stop reducing it. 729 */ 730 if (prsc->target == PIPE_TEXTURE_3D && ( 731 level == 1 || 732 (level > 1 && rsc->slices[level - 1].size0 > 0xf000))) 733 slice->size0 = align(blocks * rsc->cpp, alignment); 734 else if (level == 0 || rsc->layer_first || alignment == 1) 735 slice->size0 = align(blocks * rsc->cpp, alignment); 736 else 737 slice->size0 = rsc->slices[level - 1].size0; 738 739 size += slice->size0 * depth * layers_in_level; 740 741 width = u_minify(width, 1); 742 height = u_minify(height, 1); 743 depth = u_minify(depth, 1); 744 } 745 746 return size; 747 } 748 749 static uint32_t 750 slice_alignment(struct pipe_screen *pscreen, const struct pipe_resource *tmpl) 751 { 752 /* on a3xx, 2d array and 3d textures seem to want their 753 * layers aligned to page boundaries: 754 */ 755 switch (tmpl->target) { 756 case PIPE_TEXTURE_3D: 757 case PIPE_TEXTURE_1D_ARRAY: 758 case PIPE_TEXTURE_2D_ARRAY: 759 return 4096; 760 default: 761 return 1; 762 } 763 } 764 765 /* special case to resize query buf after allocated.. */ 766 void 767 fd_resource_resize(struct pipe_resource *prsc, uint32_t sz) 768 { 769 struct fd_resource *rsc = fd_resource(prsc); 770 771 debug_assert(prsc->width0 == 0); 772 debug_assert(prsc->target == PIPE_BUFFER); 773 debug_assert(prsc->bind == PIPE_BIND_QUERY_BUFFER); 774 775 prsc->width0 = sz; 776 realloc_bo(rsc, setup_slices(rsc, 1, prsc->format)); 777 } 778 779 /** 780 * Create a new texture object, using the given template info. 781 */ 782 static struct pipe_resource * 783 fd_resource_create(struct pipe_screen *pscreen, 784 const struct pipe_resource *tmpl) 785 { 786 struct fd_resource *rsc = CALLOC_STRUCT(fd_resource); 787 struct pipe_resource *prsc = &rsc->base.b; 788 enum pipe_format format = tmpl->format; 789 uint32_t size, alignment; 790 791 DBG("%p: target=%d, format=%s, %ux%ux%u, array_size=%u, last_level=%u, " 792 "nr_samples=%u, usage=%u, bind=%x, flags=%x", prsc, 793 tmpl->target, util_format_name(format), 794 tmpl->width0, tmpl->height0, tmpl->depth0, 795 tmpl->array_size, tmpl->last_level, tmpl->nr_samples, 796 tmpl->usage, tmpl->bind, tmpl->flags); 797 798 if (!rsc) 799 return NULL; 800 801 *prsc = *tmpl; 802 803 pipe_reference_init(&prsc->reference, 1); 804 805 prsc->screen = pscreen; 806 807 util_range_init(&rsc->valid_buffer_range); 808 809 rsc->base.vtbl = &fd_resource_vtbl; 810 811 if (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT) 812 format = PIPE_FORMAT_Z32_FLOAT; 813 else if (fd_screen(pscreen)->gpu_id < 400 && 814 util_format_description(format)->layout == UTIL_FORMAT_LAYOUT_RGTC) 815 format = PIPE_FORMAT_R8G8B8A8_UNORM; 816 rsc->internal_format = format; 817 rsc->cpp = util_format_get_blocksize(format); 818 819 assert(rsc->cpp); 820 821 alignment = slice_alignment(pscreen, tmpl); 822 if (is_a4xx(fd_screen(pscreen)) || is_a5xx(fd_screen(pscreen))) { 823 switch (tmpl->target) { 824 case PIPE_TEXTURE_3D: 825 rsc->layer_first = false; 826 break; 827 default: 828 rsc->layer_first = true; 829 alignment = 1; 830 break; 831 } 832 } 833 834 size = setup_slices(rsc, alignment, format); 835 836 /* special case for hw-query buffer, which we need to allocate before we 837 * know the size: 838 */ 839 if (size == 0) { 840 /* note, semi-intention == instead of & */ 841 debug_assert(prsc->bind == PIPE_BIND_QUERY_BUFFER); 842 return prsc; 843 } 844 845 if (rsc->layer_first) { 846 rsc->layer_size = align(size, 4096); 847 size = rsc->layer_size * prsc->array_size; 848 } 849 850 realloc_bo(rsc, size); 851 if (!rsc->bo) 852 goto fail; 853 854 /* There is no native Z32F_S8 sampling or rendering format, so this must 855 * be emulated via two separate textures. The depth texture still keeps 856 * its Z32F_S8 format though, and we also keep a reference to a separate 857 * S8 texture. 858 */ 859 if (tmpl->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT) { 860 struct pipe_resource stencil = *tmpl; 861 stencil.format = PIPE_FORMAT_S8_UINT; 862 rsc->stencil = fd_resource(fd_resource_create(pscreen, &stencil)); 863 if (!rsc->stencil) 864 goto fail; 865 } 866 867 return prsc; 868 fail: 869 fd_resource_destroy(pscreen, prsc); 870 return NULL; 871 } 872 873 /** 874 * Create a texture from a winsys_handle. The handle is often created in 875 * another process by first creating a pipe texture and then calling 876 * resource_get_handle. 877 */ 878 static struct pipe_resource * 879 fd_resource_from_handle(struct pipe_screen *pscreen, 880 const struct pipe_resource *tmpl, 881 struct winsys_handle *handle, unsigned usage) 882 { 883 struct fd_resource *rsc = CALLOC_STRUCT(fd_resource); 884 struct fd_resource_slice *slice = &rsc->slices[0]; 885 struct pipe_resource *prsc = &rsc->base.b; 886 uint32_t pitchalign = fd_screen(pscreen)->gmem_alignw; 887 888 DBG("target=%d, format=%s, %ux%ux%u, array_size=%u, last_level=%u, " 889 "nr_samples=%u, usage=%u, bind=%x, flags=%x", 890 tmpl->target, util_format_name(tmpl->format), 891 tmpl->width0, tmpl->height0, tmpl->depth0, 892 tmpl->array_size, tmpl->last_level, tmpl->nr_samples, 893 tmpl->usage, tmpl->bind, tmpl->flags); 894 895 if (!rsc) 896 return NULL; 897 898 *prsc = *tmpl; 899 900 pipe_reference_init(&prsc->reference, 1); 901 902 prsc->screen = pscreen; 903 904 util_range_init(&rsc->valid_buffer_range); 905 906 rsc->bo = fd_screen_bo_from_handle(pscreen, handle); 907 if (!rsc->bo) 908 goto fail; 909 910 rsc->base.vtbl = &fd_resource_vtbl; 911 rsc->cpp = util_format_get_blocksize(tmpl->format); 912 slice->pitch = handle->stride / rsc->cpp; 913 slice->offset = handle->offset; 914 slice->size0 = handle->stride * prsc->height0; 915 916 if ((slice->pitch < align(prsc->width0, pitchalign)) || 917 (slice->pitch & (pitchalign - 1))) 918 goto fail; 919 920 assert(rsc->cpp); 921 922 return prsc; 923 924 fail: 925 fd_resource_destroy(pscreen, prsc); 926 return NULL; 927 } 928 929 /** 930 * _copy_region using pipe (3d engine) 931 */ 932 static bool 933 fd_blitter_pipe_copy_region(struct fd_context *ctx, 934 struct pipe_resource *dst, 935 unsigned dst_level, 936 unsigned dstx, unsigned dsty, unsigned dstz, 937 struct pipe_resource *src, 938 unsigned src_level, 939 const struct pipe_box *src_box) 940 { 941 /* not until we allow rendertargets to be buffers */ 942 if (dst->target == PIPE_BUFFER || src->target == PIPE_BUFFER) 943 return false; 944 945 if (!util_blitter_is_copy_supported(ctx->blitter, dst, src)) 946 return false; 947 948 /* TODO we could discard if dst box covers dst level fully.. */ 949 fd_blitter_pipe_begin(ctx, false, false, FD_STAGE_BLIT); 950 util_blitter_copy_texture(ctx->blitter, 951 dst, dst_level, dstx, dsty, dstz, 952 src, src_level, src_box); 953 fd_blitter_pipe_end(ctx); 954 955 return true; 956 } 957 958 /** 959 * Copy a block of pixels from one resource to another. 960 * The resource must be of the same format. 961 * Resources with nr_samples > 1 are not allowed. 962 */ 963 static void 964 fd_resource_copy_region(struct pipe_context *pctx, 965 struct pipe_resource *dst, 966 unsigned dst_level, 967 unsigned dstx, unsigned dsty, unsigned dstz, 968 struct pipe_resource *src, 969 unsigned src_level, 970 const struct pipe_box *src_box) 971 { 972 struct fd_context *ctx = fd_context(pctx); 973 974 /* TODO if we have 2d core, or other DMA engine that could be used 975 * for simple copies and reasonably easily synchronized with the 3d 976 * core, this is where we'd plug it in.. 977 */ 978 979 /* try blit on 3d pipe: */ 980 if (fd_blitter_pipe_copy_region(ctx, 981 dst, dst_level, dstx, dsty, dstz, 982 src, src_level, src_box)) 983 return; 984 985 /* else fallback to pure sw: */ 986 util_resource_copy_region(pctx, 987 dst, dst_level, dstx, dsty, dstz, 988 src, src_level, src_box); 989 } 990 991 bool 992 fd_render_condition_check(struct pipe_context *pctx) 993 { 994 struct fd_context *ctx = fd_context(pctx); 995 996 if (!ctx->cond_query) 997 return true; 998 999 union pipe_query_result res = { 0 }; 1000 bool wait = 1001 ctx->cond_mode != PIPE_RENDER_COND_NO_WAIT && 1002 ctx->cond_mode != PIPE_RENDER_COND_BY_REGION_NO_WAIT; 1003 1004 if (pctx->get_query_result(pctx, ctx->cond_query, wait, &res)) 1005 return (bool)res.u64 != ctx->cond_cond; 1006 1007 return true; 1008 } 1009 1010 /** 1011 * Optimal hardware path for blitting pixels. 1012 * Scaling, format conversion, up- and downsampling (resolve) are allowed. 1013 */ 1014 static void 1015 fd_blit(struct pipe_context *pctx, const struct pipe_blit_info *blit_info) 1016 { 1017 struct fd_context *ctx = fd_context(pctx); 1018 struct pipe_blit_info info = *blit_info; 1019 bool discard = false; 1020 1021 if (info.src.resource->nr_samples > 1 && 1022 info.dst.resource->nr_samples <= 1 && 1023 !util_format_is_depth_or_stencil(info.src.resource->format) && 1024 !util_format_is_pure_integer(info.src.resource->format)) { 1025 DBG("color resolve unimplemented"); 1026 return; 1027 } 1028 1029 if (info.render_condition_enable && !fd_render_condition_check(pctx)) 1030 return; 1031 1032 if (!info.scissor_enable && !info.alpha_blend) { 1033 discard = util_texrange_covers_whole_level(info.dst.resource, 1034 info.dst.level, info.dst.box.x, info.dst.box.y, 1035 info.dst.box.z, info.dst.box.width, 1036 info.dst.box.height, info.dst.box.depth); 1037 } 1038 1039 if (util_try_blit_via_copy_region(pctx, &info)) { 1040 return; /* done */ 1041 } 1042 1043 if (info.mask & PIPE_MASK_S) { 1044 DBG("cannot blit stencil, skipping"); 1045 info.mask &= ~PIPE_MASK_S; 1046 } 1047 1048 if (!util_blitter_is_blit_supported(ctx->blitter, &info)) { 1049 DBG("blit unsupported %s -> %s", 1050 util_format_short_name(info.src.resource->format), 1051 util_format_short_name(info.dst.resource->format)); 1052 return; 1053 } 1054 1055 fd_blitter_pipe_begin(ctx, info.render_condition_enable, discard, FD_STAGE_BLIT); 1056 util_blitter_blit(ctx->blitter, &info); 1057 fd_blitter_pipe_end(ctx); 1058 } 1059 1060 void 1061 fd_blitter_pipe_begin(struct fd_context *ctx, bool render_cond, bool discard, 1062 enum fd_render_stage stage) 1063 { 1064 util_blitter_save_fragment_constant_buffer_slot(ctx->blitter, 1065 ctx->constbuf[PIPE_SHADER_FRAGMENT].cb); 1066 util_blitter_save_vertex_buffer_slot(ctx->blitter, ctx->vtx.vertexbuf.vb); 1067 util_blitter_save_vertex_elements(ctx->blitter, ctx->vtx.vtx); 1068 util_blitter_save_vertex_shader(ctx->blitter, ctx->prog.vp); 1069 util_blitter_save_so_targets(ctx->blitter, ctx->streamout.num_targets, 1070 ctx->streamout.targets); 1071 util_blitter_save_rasterizer(ctx->blitter, ctx->rasterizer); 1072 util_blitter_save_viewport(ctx->blitter, &ctx->viewport); 1073 util_blitter_save_scissor(ctx->blitter, &ctx->scissor); 1074 util_blitter_save_fragment_shader(ctx->blitter, ctx->prog.fp); 1075 util_blitter_save_blend(ctx->blitter, ctx->blend); 1076 util_blitter_save_depth_stencil_alpha(ctx->blitter, ctx->zsa); 1077 util_blitter_save_stencil_ref(ctx->blitter, &ctx->stencil_ref); 1078 util_blitter_save_sample_mask(ctx->blitter, ctx->sample_mask); 1079 util_blitter_save_framebuffer(ctx->blitter, 1080 ctx->batch ? &ctx->batch->framebuffer : NULL); 1081 util_blitter_save_fragment_sampler_states(ctx->blitter, 1082 ctx->fragtex.num_samplers, 1083 (void **)ctx->fragtex.samplers); 1084 util_blitter_save_fragment_sampler_views(ctx->blitter, 1085 ctx->fragtex.num_textures, ctx->fragtex.textures); 1086 if (!render_cond) 1087 util_blitter_save_render_condition(ctx->blitter, 1088 ctx->cond_query, ctx->cond_cond, ctx->cond_mode); 1089 1090 if (ctx->batch) 1091 fd_hw_query_set_stage(ctx->batch, ctx->batch->draw, stage); 1092 1093 ctx->in_blit = discard; 1094 } 1095 1096 void 1097 fd_blitter_pipe_end(struct fd_context *ctx) 1098 { 1099 if (ctx->batch) 1100 fd_hw_query_set_stage(ctx->batch, ctx->batch->draw, FD_STAGE_NULL); 1101 ctx->in_blit = false; 1102 } 1103 1104 static void 1105 fd_flush_resource(struct pipe_context *pctx, struct pipe_resource *prsc) 1106 { 1107 struct fd_resource *rsc = fd_resource(prsc); 1108 1109 if (rsc->write_batch) 1110 fd_batch_flush(rsc->write_batch, true); 1111 1112 assert(!rsc->write_batch); 1113 } 1114 1115 void 1116 fd_resource_screen_init(struct pipe_screen *pscreen) 1117 { 1118 pscreen->resource_create = fd_resource_create; 1119 pscreen->resource_from_handle = fd_resource_from_handle; 1120 pscreen->resource_get_handle = u_resource_get_handle_vtbl; 1121 pscreen->resource_destroy = u_resource_destroy_vtbl; 1122 } 1123 1124 void 1125 fd_resource_context_init(struct pipe_context *pctx) 1126 { 1127 pctx->transfer_map = u_transfer_map_vtbl; 1128 pctx->transfer_flush_region = u_transfer_flush_region_vtbl; 1129 pctx->transfer_unmap = u_transfer_unmap_vtbl; 1130 pctx->buffer_subdata = u_default_buffer_subdata; 1131 pctx->texture_subdata = u_default_texture_subdata; 1132 pctx->create_surface = fd_create_surface; 1133 pctx->surface_destroy = fd_surface_destroy; 1134 pctx->resource_copy_region = fd_resource_copy_region; 1135 pctx->blit = fd_blit; 1136 pctx->flush_resource = fd_flush_resource; 1137 } 1138