1 /* 2 * Copyright 2014 Broadcom 3 * Copyright (C) 2012 Rob Clark <robclark (at) freedesktop.org> 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice (including the next 13 * paragraph) shall be included in all copies or substantial portions of the 14 * Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 22 * IN THE SOFTWARE. 23 */ 24 25 #include "util/u_blit.h" 26 #include "util/u_memory.h" 27 #include "util/u_format.h" 28 #include "util/u_inlines.h" 29 #include "util/u_surface.h" 30 #include "util/u_upload_mgr.h" 31 32 #include "drm_fourcc.h" 33 #include "vc4_drm.h" 34 #include "vc4_screen.h" 35 #include "vc4_context.h" 36 #include "vc4_resource.h" 37 #include "vc4_tiling.h" 38 39 #ifndef DRM_FORMAT_MOD_INVALID 40 #define DRM_FORMAT_MOD_INVALID ((1ULL << 56) - 1) 41 #endif 42 43 static bool 44 vc4_resource_bo_alloc(struct vc4_resource *rsc) 45 { 46 struct pipe_resource *prsc = &rsc->base; 47 struct pipe_screen *pscreen = prsc->screen; 48 struct vc4_bo *bo; 49 50 if (vc4_debug & VC4_DEBUG_SURFACE) { 51 fprintf(stderr, "alloc %p: size %d + offset %d -> %d\n", 52 rsc, 53 rsc->slices[0].size, 54 rsc->slices[0].offset, 55 rsc->slices[0].offset + 56 rsc->slices[0].size + 57 rsc->cube_map_stride * (prsc->array_size - 1)); 58 } 59 60 bo = vc4_bo_alloc(vc4_screen(pscreen), 61 rsc->slices[0].offset + 62 rsc->slices[0].size + 63 rsc->cube_map_stride * (prsc->array_size - 1), 64 "resource"); 65 if (bo) { 66 vc4_bo_unreference(&rsc->bo); 67 rsc->bo = bo; 68 return true; 69 } else { 70 return false; 71 } 72 } 73 74 static void 75 vc4_resource_transfer_unmap(struct pipe_context *pctx, 76 struct pipe_transfer *ptrans) 77 { 78 struct vc4_context *vc4 = vc4_context(pctx); 79 struct vc4_transfer *trans = vc4_transfer(ptrans); 80 81 if (trans->map) { 82 struct vc4_resource *rsc; 83 struct vc4_resource_slice *slice; 84 if (trans->ss_resource) { 85 rsc = vc4_resource(trans->ss_resource); 86 slice = &rsc->slices[0]; 87 } else { 88 rsc = vc4_resource(ptrans->resource); 89 slice = &rsc->slices[ptrans->level]; 90 } 91 92 if (ptrans->usage & PIPE_TRANSFER_WRITE) { 93 vc4_store_tiled_image(rsc->bo->map + slice->offset + 94 ptrans->box.z * rsc->cube_map_stride, 95 slice->stride, 96 trans->map, ptrans->stride, 97 slice->tiling, rsc->cpp, 98 &ptrans->box); 99 } 100 free(trans->map); 101 } 102 103 if (trans->ss_resource && (ptrans->usage & PIPE_TRANSFER_WRITE)) { 104 struct pipe_blit_info blit; 105 memset(&blit, 0, sizeof(blit)); 106 107 blit.src.resource = trans->ss_resource; 108 blit.src.format = trans->ss_resource->format; 109 blit.src.box.width = trans->ss_box.width; 110 blit.src.box.height = trans->ss_box.height; 111 blit.src.box.depth = 1; 112 113 blit.dst.resource = ptrans->resource; 114 blit.dst.format = ptrans->resource->format; 115 blit.dst.level = ptrans->level; 116 blit.dst.box = trans->ss_box; 117 118 blit.mask = util_format_get_mask(ptrans->resource->format); 119 blit.filter = PIPE_TEX_FILTER_NEAREST; 120 121 pctx->blit(pctx, &blit); 122 123 pipe_resource_reference(&trans->ss_resource, NULL); 124 } 125 126 pipe_resource_reference(&ptrans->resource, NULL); 127 slab_free(&vc4->transfer_pool, ptrans); 128 } 129 130 static struct pipe_resource * 131 vc4_get_temp_resource(struct pipe_context *pctx, 132 struct pipe_resource *prsc, 133 const struct pipe_box *box) 134 { 135 struct pipe_resource temp_setup; 136 137 memset(&temp_setup, 0, sizeof(temp_setup)); 138 temp_setup.target = prsc->target; 139 temp_setup.format = prsc->format; 140 temp_setup.width0 = box->width; 141 temp_setup.height0 = box->height; 142 temp_setup.depth0 = 1; 143 temp_setup.array_size = 1; 144 145 return pctx->screen->resource_create(pctx->screen, &temp_setup); 146 } 147 148 static void * 149 vc4_resource_transfer_map(struct pipe_context *pctx, 150 struct pipe_resource *prsc, 151 unsigned level, unsigned usage, 152 const struct pipe_box *box, 153 struct pipe_transfer **pptrans) 154 { 155 struct vc4_context *vc4 = vc4_context(pctx); 156 struct vc4_resource *rsc = vc4_resource(prsc); 157 struct vc4_transfer *trans; 158 struct pipe_transfer *ptrans; 159 enum pipe_format format = prsc->format; 160 char *buf; 161 162 /* Upgrade DISCARD_RANGE to WHOLE_RESOURCE if the whole resource is 163 * being mapped. 164 */ 165 if ((usage & PIPE_TRANSFER_DISCARD_RANGE) && 166 !(usage & PIPE_TRANSFER_UNSYNCHRONIZED) && 167 !(prsc->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT) && 168 prsc->last_level == 0 && 169 prsc->width0 == box->width && 170 prsc->height0 == box->height && 171 prsc->depth0 == box->depth && 172 prsc->array_size == 1 && 173 rsc->bo->private) { 174 usage |= PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE; 175 } 176 177 if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) { 178 if (vc4_resource_bo_alloc(rsc)) { 179 /* If it might be bound as one of our vertex buffers, 180 * make sure we re-emit vertex buffer state. 181 */ 182 if (prsc->bind & PIPE_BIND_VERTEX_BUFFER) 183 vc4->dirty |= VC4_DIRTY_VTXBUF; 184 } else { 185 /* If we failed to reallocate, flush users so that we 186 * don't violate any syncing requirements. 187 */ 188 vc4_flush_jobs_reading_resource(vc4, prsc); 189 } 190 } else if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) { 191 /* If we're writing and the buffer is being used by the CL, we 192 * have to flush the CL first. If we're only reading, we need 193 * to flush if the CL has written our buffer. 194 */ 195 if (usage & PIPE_TRANSFER_WRITE) 196 vc4_flush_jobs_reading_resource(vc4, prsc); 197 else 198 vc4_flush_jobs_writing_resource(vc4, prsc); 199 } 200 201 if (usage & PIPE_TRANSFER_WRITE) { 202 rsc->writes++; 203 rsc->initialized_buffers = ~0; 204 } 205 206 trans = slab_alloc(&vc4->transfer_pool); 207 if (!trans) 208 return NULL; 209 210 /* XXX: Handle DONTBLOCK, DISCARD_RANGE, PERSISTENT, COHERENT. */ 211 212 /* slab_alloc_st() doesn't zero: */ 213 memset(trans, 0, sizeof(*trans)); 214 ptrans = &trans->base; 215 216 pipe_resource_reference(&ptrans->resource, prsc); 217 ptrans->level = level; 218 ptrans->usage = usage; 219 ptrans->box = *box; 220 221 /* If the resource is multisampled, we need to resolve to single 222 * sample. This seems like it should be handled at a higher layer. 223 */ 224 if (prsc->nr_samples > 1) { 225 trans->ss_resource = vc4_get_temp_resource(pctx, prsc, box); 226 if (!trans->ss_resource) 227 goto fail; 228 assert(!trans->ss_resource->nr_samples); 229 230 /* The ptrans->box gets modified for tile alignment, so save 231 * the original box for unmap time. 232 */ 233 trans->ss_box = *box; 234 235 if (usage & PIPE_TRANSFER_READ) { 236 struct pipe_blit_info blit; 237 memset(&blit, 0, sizeof(blit)); 238 239 blit.src.resource = ptrans->resource; 240 blit.src.format = ptrans->resource->format; 241 blit.src.level = ptrans->level; 242 blit.src.box = trans->ss_box; 243 244 blit.dst.resource = trans->ss_resource; 245 blit.dst.format = trans->ss_resource->format; 246 blit.dst.box.width = trans->ss_box.width; 247 blit.dst.box.height = trans->ss_box.height; 248 blit.dst.box.depth = 1; 249 250 blit.mask = util_format_get_mask(prsc->format); 251 blit.filter = PIPE_TEX_FILTER_NEAREST; 252 253 pctx->blit(pctx, &blit); 254 vc4_flush_jobs_writing_resource(vc4, blit.dst.resource); 255 } 256 257 /* The rest of the mapping process should use our temporary. */ 258 prsc = trans->ss_resource; 259 rsc = vc4_resource(prsc); 260 ptrans->box.x = 0; 261 ptrans->box.y = 0; 262 ptrans->box.z = 0; 263 } 264 265 if (usage & PIPE_TRANSFER_UNSYNCHRONIZED) 266 buf = vc4_bo_map_unsynchronized(rsc->bo); 267 else 268 buf = vc4_bo_map(rsc->bo); 269 if (!buf) { 270 fprintf(stderr, "Failed to map bo\n"); 271 goto fail; 272 } 273 274 *pptrans = ptrans; 275 276 struct vc4_resource_slice *slice = &rsc->slices[level]; 277 if (rsc->tiled) { 278 uint32_t utile_w = vc4_utile_width(rsc->cpp); 279 uint32_t utile_h = vc4_utile_height(rsc->cpp); 280 281 /* No direct mappings of tiled, since we need to manually 282 * tile/untile. 283 */ 284 if (usage & PIPE_TRANSFER_MAP_DIRECTLY) 285 return NULL; 286 287 if (format == PIPE_FORMAT_ETC1_RGB8) { 288 /* ETC1 is arranged as 64-bit blocks, where each block 289 * is 4x4 pixels. Texture tiling operates on the 290 * 64-bit block the way it would an uncompressed 291 * pixels. 292 */ 293 assert(!(ptrans->box.x & 3)); 294 assert(!(ptrans->box.y & 3)); 295 ptrans->box.x >>= 2; 296 ptrans->box.y >>= 2; 297 ptrans->box.width = (ptrans->box.width + 3) >> 2; 298 ptrans->box.height = (ptrans->box.height + 3) >> 2; 299 } 300 301 /* We need to align the box to utile boundaries, since that's 302 * what load/store operates on. This may cause us to need to 303 * read out the original contents in that border area. Right 304 * now we just read out the entire contents, including the 305 * middle area that will just get overwritten. 306 */ 307 uint32_t box_start_x = ptrans->box.x & (utile_w - 1); 308 uint32_t box_start_y = ptrans->box.y & (utile_h - 1); 309 bool needs_load = (usage & PIPE_TRANSFER_READ) != 0; 310 311 if (box_start_x) { 312 ptrans->box.width += box_start_x; 313 ptrans->box.x -= box_start_x; 314 needs_load = true; 315 } 316 if (box_start_y) { 317 ptrans->box.height += box_start_y; 318 ptrans->box.y -= box_start_y; 319 needs_load = true; 320 } 321 if (ptrans->box.width & (utile_w - 1)) { 322 /* We only need to force a load if our border region 323 * we're extending into is actually part of the 324 * texture. 325 */ 326 uint32_t slice_width = u_minify(prsc->width0, level); 327 if (ptrans->box.x + ptrans->box.width != slice_width) 328 needs_load = true; 329 ptrans->box.width = align(ptrans->box.width, utile_w); 330 } 331 if (ptrans->box.height & (utile_h - 1)) { 332 uint32_t slice_height = u_minify(prsc->height0, level); 333 if (ptrans->box.y + ptrans->box.height != slice_height) 334 needs_load = true; 335 ptrans->box.height = align(ptrans->box.height, utile_h); 336 } 337 338 ptrans->stride = ptrans->box.width * rsc->cpp; 339 ptrans->layer_stride = ptrans->stride * ptrans->box.height; 340 341 trans->map = malloc(ptrans->layer_stride * ptrans->box.depth); 342 343 if (needs_load) { 344 vc4_load_tiled_image(trans->map, ptrans->stride, 345 buf + slice->offset + 346 ptrans->box.z * rsc->cube_map_stride, 347 slice->stride, 348 slice->tiling, rsc->cpp, 349 &ptrans->box); 350 } 351 return (trans->map + 352 box_start_x * rsc->cpp + 353 box_start_y * ptrans->stride); 354 } else { 355 ptrans->stride = slice->stride; 356 ptrans->layer_stride = ptrans->stride; 357 358 return buf + slice->offset + 359 ptrans->box.y / util_format_get_blockheight(format) * ptrans->stride + 360 ptrans->box.x / util_format_get_blockwidth(format) * rsc->cpp + 361 ptrans->box.z * rsc->cube_map_stride; 362 } 363 364 365 fail: 366 vc4_resource_transfer_unmap(pctx, ptrans); 367 return NULL; 368 } 369 370 static void 371 vc4_resource_destroy(struct pipe_screen *pscreen, 372 struct pipe_resource *prsc) 373 { 374 struct vc4_screen *screen = vc4_screen(pscreen); 375 struct vc4_resource *rsc = vc4_resource(prsc); 376 vc4_bo_unreference(&rsc->bo); 377 378 if (rsc->scanout) 379 renderonly_scanout_destroy(rsc->scanout, screen->ro); 380 381 free(rsc); 382 } 383 384 static boolean 385 vc4_resource_get_handle(struct pipe_screen *pscreen, 386 struct pipe_context *pctx, 387 struct pipe_resource *prsc, 388 struct winsys_handle *whandle, 389 unsigned usage) 390 { 391 struct vc4_screen *screen = vc4_screen(pscreen); 392 struct vc4_resource *rsc = vc4_resource(prsc); 393 394 whandle->stride = rsc->slices[0].stride; 395 whandle->offset = 0; 396 397 /* If we're passing some reference to our BO out to some other part of 398 * the system, then we can't do any optimizations about only us being 399 * the ones seeing it (like BO caching or shadow update avoidance). 400 */ 401 rsc->bo->private = false; 402 403 if (rsc->tiled) 404 whandle->modifier = DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED; 405 else 406 whandle->modifier = DRM_FORMAT_MOD_LINEAR; 407 408 switch (whandle->type) { 409 case DRM_API_HANDLE_TYPE_SHARED: 410 if (screen->ro) { 411 /* This could probably be supported, assuming that a 412 * control node was used for pl111. 413 */ 414 fprintf(stderr, "flink unsupported with pl111\n"); 415 return FALSE; 416 } 417 418 return vc4_bo_flink(rsc->bo, &whandle->handle); 419 case DRM_API_HANDLE_TYPE_KMS: 420 if (screen->ro && renderonly_get_handle(rsc->scanout, whandle)) 421 return TRUE; 422 whandle->handle = rsc->bo->handle; 423 return TRUE; 424 case DRM_API_HANDLE_TYPE_FD: 425 /* FDs are cross-device, so we can export directly from vc4. 426 */ 427 whandle->handle = vc4_bo_get_dmabuf(rsc->bo); 428 return whandle->handle != -1; 429 } 430 431 return FALSE; 432 } 433 434 static void 435 vc4_setup_slices(struct vc4_resource *rsc, const char *caller) 436 { 437 struct pipe_resource *prsc = &rsc->base; 438 uint32_t width = prsc->width0; 439 uint32_t height = prsc->height0; 440 if (prsc->format == PIPE_FORMAT_ETC1_RGB8) { 441 width = (width + 3) >> 2; 442 height = (height + 3) >> 2; 443 } 444 445 uint32_t pot_width = util_next_power_of_two(width); 446 uint32_t pot_height = util_next_power_of_two(height); 447 uint32_t offset = 0; 448 uint32_t utile_w = vc4_utile_width(rsc->cpp); 449 uint32_t utile_h = vc4_utile_height(rsc->cpp); 450 451 for (int i = prsc->last_level; i >= 0; i--) { 452 struct vc4_resource_slice *slice = &rsc->slices[i]; 453 454 uint32_t level_width, level_height; 455 if (i == 0) { 456 level_width = width; 457 level_height = height; 458 } else { 459 level_width = u_minify(pot_width, i); 460 level_height = u_minify(pot_height, i); 461 } 462 463 if (!rsc->tiled) { 464 slice->tiling = VC4_TILING_FORMAT_LINEAR; 465 if (prsc->nr_samples > 1) { 466 /* MSAA (4x) surfaces are stored as raw tile buffer contents. */ 467 level_width = align(level_width, 32); 468 level_height = align(level_height, 32); 469 } else { 470 level_width = align(level_width, utile_w); 471 } 472 } else { 473 if (vc4_size_is_lt(level_width, level_height, 474 rsc->cpp)) { 475 slice->tiling = VC4_TILING_FORMAT_LT; 476 level_width = align(level_width, utile_w); 477 level_height = align(level_height, utile_h); 478 } else { 479 slice->tiling = VC4_TILING_FORMAT_T; 480 level_width = align(level_width, 481 4 * 2 * utile_w); 482 level_height = align(level_height, 483 4 * 2 * utile_h); 484 } 485 } 486 487 slice->offset = offset; 488 slice->stride = (level_width * rsc->cpp * 489 MAX2(prsc->nr_samples, 1)); 490 slice->size = level_height * slice->stride; 491 492 offset += slice->size; 493 494 if (vc4_debug & VC4_DEBUG_SURFACE) { 495 static const char tiling_chars[] = { 496 [VC4_TILING_FORMAT_LINEAR] = 'R', 497 [VC4_TILING_FORMAT_LT] = 'L', 498 [VC4_TILING_FORMAT_T] = 'T' 499 }; 500 fprintf(stderr, 501 "rsc %s %p (format %s: vc4 %d), %dx%d: " 502 "level %d (%c) -> %dx%d, stride %d@0x%08x\n", 503 caller, rsc, 504 util_format_short_name(prsc->format), 505 rsc->vc4_format, 506 prsc->width0, prsc->height0, 507 i, tiling_chars[slice->tiling], 508 level_width, level_height, 509 slice->stride, slice->offset); 510 } 511 } 512 513 /* The texture base pointer that has to point to level 0 doesn't have 514 * intra-page bits, so we have to align it, and thus shift up all the 515 * smaller slices. 516 */ 517 uint32_t page_align_offset = (align(rsc->slices[0].offset, 4096) - 518 rsc->slices[0].offset); 519 if (page_align_offset) { 520 for (int i = 0; i <= prsc->last_level; i++) 521 rsc->slices[i].offset += page_align_offset; 522 } 523 524 /* Cube map faces appear as whole miptrees at a page-aligned offset 525 * from the first face's miptree. 526 */ 527 if (prsc->target == PIPE_TEXTURE_CUBE) { 528 rsc->cube_map_stride = align(rsc->slices[0].offset + 529 rsc->slices[0].size, 4096); 530 } 531 } 532 533 static struct vc4_resource * 534 vc4_resource_setup(struct pipe_screen *pscreen, 535 const struct pipe_resource *tmpl) 536 { 537 struct vc4_resource *rsc = CALLOC_STRUCT(vc4_resource); 538 if (!rsc) 539 return NULL; 540 struct pipe_resource *prsc = &rsc->base; 541 542 *prsc = *tmpl; 543 544 pipe_reference_init(&prsc->reference, 1); 545 prsc->screen = pscreen; 546 547 if (prsc->nr_samples <= 1) 548 rsc->cpp = util_format_get_blocksize(tmpl->format); 549 else 550 rsc->cpp = sizeof(uint32_t); 551 552 assert(rsc->cpp); 553 554 return rsc; 555 } 556 557 static enum vc4_texture_data_type 558 get_resource_texture_format(struct pipe_resource *prsc) 559 { 560 struct vc4_resource *rsc = vc4_resource(prsc); 561 uint8_t format = vc4_get_tex_format(prsc->format); 562 563 if (!rsc->tiled) { 564 if (prsc->nr_samples > 1) { 565 return ~0; 566 } else { 567 assert(format == VC4_TEXTURE_TYPE_RGBA8888); 568 return VC4_TEXTURE_TYPE_RGBA32R; 569 } 570 } 571 572 return format; 573 } 574 575 static bool 576 find_modifier(uint64_t needle, const uint64_t *haystack, int count) 577 { 578 int i; 579 580 for (i = 0; i < count; i++) { 581 if (haystack[i] == needle) 582 return true; 583 } 584 585 return false; 586 } 587 588 static struct pipe_resource * 589 vc4_resource_create_with_modifiers(struct pipe_screen *pscreen, 590 const struct pipe_resource *tmpl, 591 const uint64_t *modifiers, 592 int count) 593 { 594 struct vc4_screen *screen = vc4_screen(pscreen); 595 struct vc4_resource *rsc = vc4_resource_setup(pscreen, tmpl); 596 struct pipe_resource *prsc = &rsc->base; 597 bool linear_ok = find_modifier(DRM_FORMAT_MOD_LINEAR, modifiers, count); 598 /* Use a tiled layout if we can, for better 3D performance. */ 599 bool should_tile = true; 600 601 /* VBOs/PBOs are untiled (and 1 height). */ 602 if (tmpl->target == PIPE_BUFFER) 603 should_tile = false; 604 605 /* MSAA buffers are linear. */ 606 if (tmpl->nr_samples > 1) 607 should_tile = false; 608 609 /* No tiling when we're sharing with another device (pl111). */ 610 if (screen->ro && (tmpl->bind & PIPE_BIND_SCANOUT)) 611 should_tile = false; 612 613 /* Cursors are always linear, and the user can request linear as well. 614 */ 615 if (tmpl->bind & (PIPE_BIND_LINEAR | PIPE_BIND_CURSOR)) 616 should_tile = false; 617 618 /* No shared objects with LT format -- the kernel only has T-format 619 * metadata. LT objects are small enough it's not worth the trouble to 620 * give them metadata to tile. 621 */ 622 if ((tmpl->bind & (PIPE_BIND_SHARED | PIPE_BIND_SCANOUT)) && 623 vc4_size_is_lt(prsc->width0, prsc->height0, rsc->cpp)) 624 should_tile = false; 625 626 /* If we're sharing or scanning out, we need the ioctl present to 627 * inform the kernel or the other side. 628 */ 629 if ((tmpl->bind & (PIPE_BIND_SHARED | 630 PIPE_BIND_SCANOUT)) && !screen->has_tiling_ioctl) 631 should_tile = false; 632 633 /* No user-specified modifier; determine our own. */ 634 if (count == 1 && modifiers[0] == DRM_FORMAT_MOD_INVALID) { 635 linear_ok = true; 636 rsc->tiled = should_tile; 637 } else if (should_tile && 638 find_modifier(DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED, 639 modifiers, count)) { 640 rsc->tiled = true; 641 } else if (linear_ok) { 642 rsc->tiled = false; 643 } else { 644 fprintf(stderr, "Unsupported modifier requested\n"); 645 return NULL; 646 } 647 648 if (tmpl->target != PIPE_BUFFER) 649 rsc->vc4_format = get_resource_texture_format(prsc); 650 651 vc4_setup_slices(rsc, "create"); 652 if (!vc4_resource_bo_alloc(rsc)) 653 goto fail; 654 655 if (screen->has_tiling_ioctl) { 656 uint64_t modifier; 657 if (rsc->tiled) 658 modifier = DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED; 659 else 660 modifier = DRM_FORMAT_MOD_LINEAR; 661 struct drm_vc4_set_tiling set_tiling = { 662 .handle = rsc->bo->handle, 663 .modifier = modifier, 664 }; 665 int ret = vc4_ioctl(screen->fd, DRM_IOCTL_VC4_SET_TILING, 666 &set_tiling); 667 if (ret != 0) 668 goto fail; 669 } 670 671 if (screen->ro && tmpl->bind & PIPE_BIND_SCANOUT) { 672 rsc->scanout = 673 renderonly_scanout_for_resource(prsc, screen->ro, NULL); 674 if (!rsc->scanout) 675 goto fail; 676 } 677 678 vc4_bo_label(screen, rsc->bo, "%sresource %dx%d@%d/%d", 679 (tmpl->bind & PIPE_BIND_SCANOUT) ? "scanout " : "", 680 tmpl->width0, tmpl->height0, 681 rsc->cpp * 8, prsc->last_level); 682 683 return prsc; 684 fail: 685 vc4_resource_destroy(pscreen, prsc); 686 return NULL; 687 } 688 689 struct pipe_resource * 690 vc4_resource_create(struct pipe_screen *pscreen, 691 const struct pipe_resource *tmpl) 692 { 693 const uint64_t mod = DRM_FORMAT_MOD_INVALID; 694 return vc4_resource_create_with_modifiers(pscreen, tmpl, &mod, 1); 695 } 696 697 static struct pipe_resource * 698 vc4_resource_from_handle(struct pipe_screen *pscreen, 699 const struct pipe_resource *tmpl, 700 struct winsys_handle *whandle, 701 unsigned usage) 702 { 703 struct vc4_screen *screen = vc4_screen(pscreen); 704 struct vc4_resource *rsc = vc4_resource_setup(pscreen, tmpl); 705 struct pipe_resource *prsc = &rsc->base; 706 struct vc4_resource_slice *slice = &rsc->slices[0]; 707 708 if (!rsc) 709 return NULL; 710 711 if (whandle->offset != 0) { 712 fprintf(stderr, 713 "Attempt to import unsupported winsys offset %u\n", 714 whandle->offset); 715 return NULL; 716 } 717 718 switch (whandle->type) { 719 case DRM_API_HANDLE_TYPE_SHARED: 720 rsc->bo = vc4_bo_open_name(screen, 721 whandle->handle, whandle->stride); 722 break; 723 case DRM_API_HANDLE_TYPE_FD: 724 rsc->bo = vc4_bo_open_dmabuf(screen, 725 whandle->handle, whandle->stride); 726 break; 727 default: 728 fprintf(stderr, 729 "Attempt to import unsupported handle type %d\n", 730 whandle->type); 731 } 732 733 if (!rsc->bo) 734 goto fail; 735 736 struct drm_vc4_get_tiling get_tiling = { 737 .handle = rsc->bo->handle, 738 }; 739 int ret = vc4_ioctl(screen->fd, DRM_IOCTL_VC4_GET_TILING, &get_tiling); 740 741 if (ret != 0) { 742 whandle->modifier = DRM_FORMAT_MOD_LINEAR; 743 } else if (whandle->modifier == DRM_FORMAT_MOD_INVALID) { 744 whandle->modifier = get_tiling.modifier; 745 } else if (whandle->modifier != get_tiling.modifier) { 746 fprintf(stderr, 747 "Modifier 0x%llx vs. tiling (0x%llx) mismatch\n", 748 (long long)whandle->modifier, get_tiling.modifier); 749 goto fail; 750 } 751 752 switch (whandle->modifier) { 753 case DRM_FORMAT_MOD_LINEAR: 754 rsc->tiled = false; 755 break; 756 case DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED: 757 rsc->tiled = true; 758 break; 759 default: 760 fprintf(stderr, 761 "Attempt to import unsupported modifier 0x%llx\n", 762 (long long)whandle->modifier); 763 goto fail; 764 } 765 766 rsc->vc4_format = get_resource_texture_format(prsc); 767 vc4_setup_slices(rsc, "import"); 768 769 if (screen->ro) { 770 /* Make sure that renderonly has a handle to our buffer in the 771 * display's fd, so that a later renderonly_get_handle() 772 * returns correct handles or GEM names. 773 */ 774 rsc->scanout = 775 renderonly_create_gpu_import_for_resource(prsc, 776 screen->ro, 777 NULL); 778 if (!rsc->scanout) 779 goto fail; 780 } 781 782 if (whandle->stride != slice->stride) { 783 static bool warned = false; 784 if (!warned) { 785 warned = true; 786 fprintf(stderr, 787 "Attempting to import %dx%d %s with " 788 "unsupported stride %d instead of %d\n", 789 prsc->width0, prsc->height0, 790 util_format_short_name(prsc->format), 791 whandle->stride, 792 slice->stride); 793 } 794 goto fail; 795 } 796 797 return prsc; 798 799 fail: 800 vc4_resource_destroy(pscreen, prsc); 801 return NULL; 802 } 803 804 static struct pipe_surface * 805 vc4_create_surface(struct pipe_context *pctx, 806 struct pipe_resource *ptex, 807 const struct pipe_surface *surf_tmpl) 808 { 809 struct vc4_surface *surface = CALLOC_STRUCT(vc4_surface); 810 struct vc4_resource *rsc = vc4_resource(ptex); 811 812 if (!surface) 813 return NULL; 814 815 assert(surf_tmpl->u.tex.first_layer == surf_tmpl->u.tex.last_layer); 816 817 struct pipe_surface *psurf = &surface->base; 818 unsigned level = surf_tmpl->u.tex.level; 819 820 pipe_reference_init(&psurf->reference, 1); 821 pipe_resource_reference(&psurf->texture, ptex); 822 823 psurf->context = pctx; 824 psurf->format = surf_tmpl->format; 825 psurf->width = u_minify(ptex->width0, level); 826 psurf->height = u_minify(ptex->height0, level); 827 psurf->u.tex.level = level; 828 psurf->u.tex.first_layer = surf_tmpl->u.tex.first_layer; 829 psurf->u.tex.last_layer = surf_tmpl->u.tex.last_layer; 830 surface->offset = (rsc->slices[level].offset + 831 psurf->u.tex.first_layer * rsc->cube_map_stride); 832 surface->tiling = rsc->slices[level].tiling; 833 834 return &surface->base; 835 } 836 837 static void 838 vc4_surface_destroy(struct pipe_context *pctx, struct pipe_surface *psurf) 839 { 840 pipe_resource_reference(&psurf->texture, NULL); 841 FREE(psurf); 842 } 843 844 static void 845 vc4_dump_surface_non_msaa(struct pipe_surface *psurf) 846 { 847 struct pipe_resource *prsc = psurf->texture; 848 struct vc4_resource *rsc = vc4_resource(prsc); 849 uint32_t *map = vc4_bo_map(rsc->bo); 850 uint32_t stride = rsc->slices[0].stride / 4; 851 uint32_t width = psurf->width; 852 uint32_t height = psurf->height; 853 uint32_t chunk_w = width / 79; 854 uint32_t chunk_h = height / 40; 855 uint32_t found_colors[10]; 856 uint32_t num_found_colors = 0; 857 858 if (rsc->vc4_format != VC4_TEXTURE_TYPE_RGBA32R) { 859 fprintf(stderr, "%s: Unsupported format %s\n", 860 __func__, util_format_short_name(psurf->format)); 861 return; 862 } 863 864 for (int by = 0; by < height; by += chunk_h) { 865 for (int bx = 0; bx < width; bx += chunk_w) { 866 int all_found_color = -1; /* nothing found */ 867 868 for (int y = by; y < MIN2(height, by + chunk_h); y++) { 869 for (int x = bx; x < MIN2(width, bx + chunk_w); x++) { 870 uint32_t pix = map[y * stride + x]; 871 872 int i; 873 for (i = 0; i < num_found_colors; i++) { 874 if (pix == found_colors[i]) 875 break; 876 } 877 if (i == num_found_colors && 878 num_found_colors < 879 ARRAY_SIZE(found_colors)) { 880 found_colors[num_found_colors++] = pix; 881 } 882 883 if (i < num_found_colors) { 884 if (all_found_color == -1) 885 all_found_color = i; 886 else if (i != all_found_color) 887 all_found_color = ARRAY_SIZE(found_colors); 888 } 889 } 890 } 891 /* If all pixels for this chunk have a consistent 892 * value, then print a character for it. Either a 893 * fixed name (particularly common for piglit tests), 894 * or a runtime-generated number. 895 */ 896 if (all_found_color >= 0 && 897 all_found_color < ARRAY_SIZE(found_colors)) { 898 static const struct { 899 uint32_t val; 900 const char *c; 901 } named_colors[] = { 902 { 0xff000000, "" }, 903 { 0x00000000, "" }, 904 { 0xffff0000, "r" }, 905 { 0xff00ff00, "g" }, 906 { 0xff0000ff, "b" }, 907 { 0xffffffff, "w" }, 908 }; 909 int i; 910 for (i = 0; i < ARRAY_SIZE(named_colors); i++) { 911 if (named_colors[i].val == 912 found_colors[all_found_color]) { 913 fprintf(stderr, "%s", 914 named_colors[i].c); 915 break; 916 } 917 } 918 /* For unnamed colors, print a number and the 919 * numbers will have values printed at the 920 * end. 921 */ 922 if (i == ARRAY_SIZE(named_colors)) { 923 fprintf(stderr, "%c", 924 '0' + all_found_color); 925 } 926 } else { 927 /* If there's no consistent color, print this. 928 */ 929 fprintf(stderr, "."); 930 } 931 } 932 fprintf(stderr, "\n"); 933 } 934 935 for (int i = 0; i < num_found_colors; i++) { 936 fprintf(stderr, "color %d: 0x%08x\n", i, found_colors[i]); 937 } 938 } 939 940 static uint32_t 941 vc4_surface_msaa_get_sample(struct pipe_surface *psurf, 942 uint32_t x, uint32_t y, uint32_t sample) 943 { 944 struct pipe_resource *prsc = psurf->texture; 945 struct vc4_resource *rsc = vc4_resource(prsc); 946 uint32_t tile_w = 32, tile_h = 32; 947 uint32_t tiles_w = DIV_ROUND_UP(psurf->width, 32); 948 949 uint32_t tile_x = x / tile_w; 950 uint32_t tile_y = y / tile_h; 951 uint32_t *tile = (vc4_bo_map(rsc->bo) + 952 VC4_TILE_BUFFER_SIZE * (tile_y * tiles_w + tile_x)); 953 uint32_t subtile_x = x % tile_w; 954 uint32_t subtile_y = y % tile_h; 955 956 uint32_t quad_samples = VC4_MAX_SAMPLES * 4; 957 uint32_t tile_stride = quad_samples * tile_w / 2; 958 959 return *((uint32_t *)tile + 960 (subtile_y >> 1) * tile_stride + 961 (subtile_x >> 1) * quad_samples + 962 ((subtile_y & 1) << 1) + 963 (subtile_x & 1) + 964 sample); 965 } 966 967 static void 968 vc4_dump_surface_msaa_char(struct pipe_surface *psurf, 969 uint32_t start_x, uint32_t start_y, 970 uint32_t w, uint32_t h) 971 { 972 bool all_same_color = true; 973 uint32_t all_pix = 0; 974 975 for (int y = start_y; y < start_y + h; y++) { 976 for (int x = start_x; x < start_x + w; x++) { 977 for (int s = 0; s < VC4_MAX_SAMPLES; s++) { 978 uint32_t pix = vc4_surface_msaa_get_sample(psurf, 979 x, y, 980 s); 981 if (x == start_x && y == start_y) 982 all_pix = pix; 983 else if (all_pix != pix) 984 all_same_color = false; 985 } 986 } 987 } 988 if (all_same_color) { 989 static const struct { 990 uint32_t val; 991 const char *c; 992 } named_colors[] = { 993 { 0xff000000, "" }, 994 { 0x00000000, "" }, 995 { 0xffff0000, "r" }, 996 { 0xff00ff00, "g" }, 997 { 0xff0000ff, "b" }, 998 { 0xffffffff, "w" }, 999 }; 1000 int i; 1001 for (i = 0; i < ARRAY_SIZE(named_colors); i++) { 1002 if (named_colors[i].val == all_pix) { 1003 fprintf(stderr, "%s", 1004 named_colors[i].c); 1005 return; 1006 } 1007 } 1008 fprintf(stderr, "x"); 1009 } else { 1010 fprintf(stderr, "."); 1011 } 1012 } 1013 1014 static void 1015 vc4_dump_surface_msaa(struct pipe_surface *psurf) 1016 { 1017 uint32_t tile_w = 32, tile_h = 32; 1018 uint32_t tiles_w = DIV_ROUND_UP(psurf->width, tile_w); 1019 uint32_t tiles_h = DIV_ROUND_UP(psurf->height, tile_h); 1020 uint32_t char_w = 140, char_h = 60; 1021 uint32_t char_w_per_tile = char_w / tiles_w - 1; 1022 uint32_t char_h_per_tile = char_h / tiles_h - 1; 1023 1024 fprintf(stderr, "Surface: %dx%d (%dx MSAA)\n", 1025 psurf->width, psurf->height, psurf->texture->nr_samples); 1026 1027 for (int x = 0; x < (char_w_per_tile + 1) * tiles_w; x++) 1028 fprintf(stderr, "-"); 1029 fprintf(stderr, "\n"); 1030 1031 for (int ty = 0; ty < psurf->height; ty += tile_h) { 1032 for (int y = 0; y < char_h_per_tile; y++) { 1033 1034 for (int tx = 0; tx < psurf->width; tx += tile_w) { 1035 for (int x = 0; x < char_w_per_tile; x++) { 1036 uint32_t bx1 = (x * tile_w / 1037 char_w_per_tile); 1038 uint32_t bx2 = ((x + 1) * tile_w / 1039 char_w_per_tile); 1040 uint32_t by1 = (y * tile_h / 1041 char_h_per_tile); 1042 uint32_t by2 = ((y + 1) * tile_h / 1043 char_h_per_tile); 1044 1045 vc4_dump_surface_msaa_char(psurf, 1046 tx + bx1, 1047 ty + by1, 1048 bx2 - bx1, 1049 by2 - by1); 1050 } 1051 fprintf(stderr, "|"); 1052 } 1053 fprintf(stderr, "\n"); 1054 } 1055 1056 for (int x = 0; x < (char_w_per_tile + 1) * tiles_w; x++) 1057 fprintf(stderr, "-"); 1058 fprintf(stderr, "\n"); 1059 } 1060 } 1061 1062 /** Debug routine to dump the contents of an 8888 surface to the console */ 1063 void 1064 vc4_dump_surface(struct pipe_surface *psurf) 1065 { 1066 if (!psurf) 1067 return; 1068 1069 if (psurf->texture->nr_samples > 1) 1070 vc4_dump_surface_msaa(psurf); 1071 else 1072 vc4_dump_surface_non_msaa(psurf); 1073 } 1074 1075 static void 1076 vc4_flush_resource(struct pipe_context *pctx, struct pipe_resource *resource) 1077 { 1078 /* All calls to flush_resource are followed by a flush of the context, 1079 * so there's nothing to do. 1080 */ 1081 } 1082 1083 void 1084 vc4_update_shadow_baselevel_texture(struct pipe_context *pctx, 1085 struct pipe_sampler_view *pview) 1086 { 1087 struct vc4_sampler_view *view = vc4_sampler_view(pview); 1088 struct vc4_resource *shadow = vc4_resource(view->texture); 1089 struct vc4_resource *orig = vc4_resource(pview->texture); 1090 1091 assert(view->texture != pview->texture); 1092 1093 if (shadow->writes == orig->writes && orig->bo->private) 1094 return; 1095 1096 perf_debug("Updating %dx%d@%d shadow texture due to %s\n", 1097 orig->base.width0, orig->base.height0, 1098 pview->u.tex.first_level, 1099 pview->u.tex.first_level ? "base level" : "raster layout"); 1100 1101 for (int i = 0; i <= shadow->base.last_level; i++) { 1102 unsigned width = u_minify(shadow->base.width0, i); 1103 unsigned height = u_minify(shadow->base.height0, i); 1104 struct pipe_blit_info info = { 1105 .dst = { 1106 .resource = &shadow->base, 1107 .level = i, 1108 .box = { 1109 .x = 0, 1110 .y = 0, 1111 .z = 0, 1112 .width = width, 1113 .height = height, 1114 .depth = 1, 1115 }, 1116 .format = shadow->base.format, 1117 }, 1118 .src = { 1119 .resource = &orig->base, 1120 .level = pview->u.tex.first_level + i, 1121 .box = { 1122 .x = 0, 1123 .y = 0, 1124 .z = 0, 1125 .width = width, 1126 .height = height, 1127 .depth = 1, 1128 }, 1129 .format = orig->base.format, 1130 }, 1131 .mask = ~0, 1132 }; 1133 pctx->blit(pctx, &info); 1134 } 1135 1136 shadow->writes = orig->writes; 1137 } 1138 1139 /** 1140 * Converts a 4-byte index buffer to 2 bytes. 1141 * 1142 * Since GLES2 only has support for 1 and 2-byte indices, the hardware doesn't 1143 * include 4-byte index support, and we have to shrink it down. 1144 * 1145 * There's no fallback support for when indices end up being larger than 2^16, 1146 * though it will at least assertion fail. Also, if the original index data 1147 * was in user memory, it would be nice to not have uploaded it to a VBO 1148 * before translating. 1149 */ 1150 struct pipe_resource * 1151 vc4_get_shadow_index_buffer(struct pipe_context *pctx, 1152 const struct pipe_draw_info *info, 1153 uint32_t offset, 1154 uint32_t count, 1155 uint32_t *shadow_offset) 1156 { 1157 struct vc4_context *vc4 = vc4_context(pctx); 1158 struct vc4_resource *orig = vc4_resource(info->index.resource); 1159 perf_debug("Fallback conversion for %d uint indices\n", count); 1160 1161 void *data; 1162 struct pipe_resource *shadow_rsc = NULL; 1163 u_upload_alloc(vc4->uploader, 0, count * 2, 4, 1164 shadow_offset, &shadow_rsc, &data); 1165 uint16_t *dst = data; 1166 1167 struct pipe_transfer *src_transfer = NULL; 1168 const uint32_t *src; 1169 if (info->has_user_indices) { 1170 src = info->index.user; 1171 } else { 1172 src = pipe_buffer_map_range(pctx, &orig->base, 1173 offset, 1174 count * 4, 1175 PIPE_TRANSFER_READ, &src_transfer); 1176 } 1177 1178 for (int i = 0; i < count; i++) { 1179 uint32_t src_index = src[i]; 1180 assert(src_index <= 0xffff); 1181 dst[i] = src_index; 1182 } 1183 1184 if (src_transfer) 1185 pctx->transfer_unmap(pctx, src_transfer); 1186 1187 return shadow_rsc; 1188 } 1189 1190 void 1191 vc4_resource_screen_init(struct pipe_screen *pscreen) 1192 { 1193 struct vc4_screen *screen = vc4_screen(pscreen); 1194 1195 pscreen->resource_create = vc4_resource_create; 1196 pscreen->resource_create_with_modifiers = 1197 vc4_resource_create_with_modifiers; 1198 pscreen->resource_from_handle = vc4_resource_from_handle; 1199 pscreen->resource_destroy = u_resource_destroy_vtbl; 1200 pscreen->resource_get_handle = vc4_resource_get_handle; 1201 pscreen->resource_destroy = vc4_resource_destroy; 1202 1203 /* Test if the kernel has GET_TILING; it will return -EINVAL if the 1204 * ioctl does not exist, but -ENOENT if we pass an impossible handle. 1205 * 0 cannot be a valid GEM object, so use that. 1206 */ 1207 struct drm_vc4_get_tiling get_tiling = { 1208 .handle = 0x0, 1209 }; 1210 int ret = vc4_ioctl(screen->fd, DRM_IOCTL_VC4_GET_TILING, &get_tiling); 1211 if (ret == -1 && errno == ENOENT) 1212 screen->has_tiling_ioctl = true; 1213 } 1214 1215 void 1216 vc4_resource_context_init(struct pipe_context *pctx) 1217 { 1218 pctx->transfer_map = vc4_resource_transfer_map; 1219 pctx->transfer_flush_region = u_default_transfer_flush_region; 1220 pctx->transfer_unmap = vc4_resource_transfer_unmap; 1221 pctx->buffer_subdata = u_default_buffer_subdata; 1222 pctx->texture_subdata = u_default_texture_subdata; 1223 pctx->create_surface = vc4_create_surface; 1224 pctx->surface_destroy = vc4_surface_destroy; 1225 pctx->resource_copy_region = util_resource_copy_region; 1226 pctx->blit = vc4_blit; 1227 pctx->flush_resource = vc4_flush_resource; 1228 } 1229