1 /* 2 * Mesa 3-D graphics library 3 * 4 * Copyright (C) 2012-2013 LunarG, Inc. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the "Software"), 8 * to deal in the Software without restriction, including without limitation 9 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 * and/or sell copies of the Software, and to permit persons to whom the 11 * Software is furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included 14 * in all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 22 * DEALINGS IN THE SOFTWARE. 23 * 24 * Authors: 25 * Chia-I Wu <olv (at) lunarg.com> 26 */ 27 28 #include "util/u_surface.h" 29 #include "util/u_transfer.h" 30 #include "util/u_format_etc.h" 31 32 #include "ilo_blit.h" 33 #include "ilo_blitter.h" 34 #include "ilo_cp.h" 35 #include "ilo_context.h" 36 #include "ilo_resource.h" 37 #include "ilo_state.h" 38 #include "ilo_transfer.h" 39 40 /* 41 * For buffers that are not busy, we want to map/unmap them directly. For 42 * those that are busy, we have to worry about synchronization. We could wait 43 * for GPU to finish, but there are cases where we could avoid waiting. 44 * 45 * - When PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE is set, the contents of the 46 * buffer can be discarded. We can replace the backing bo by a new one of 47 * the same size (renaming). 48 * - When PIPE_TRANSFER_DISCARD_RANGE is set, the contents of the mapped 49 * range can be discarded. We can allocate and map a staging bo on 50 * mapping, and (pipelined-)copy it over to the real bo on unmapping. 51 * - When PIPE_TRANSFER_FLUSH_EXPLICIT is set, there is no reading and only 52 * flushed regions need to be written. We can still allocate and map a 53 * staging bo, but should copy only the flushed regions over. 54 * 55 * However, there are other flags to consider. 56 * 57 * - When PIPE_TRANSFER_UNSYNCHRONIZED is set, we do not need to worry about 58 * synchronization at all on mapping. 59 * - When PIPE_TRANSFER_MAP_DIRECTLY is set, no staging area is allowed. 60 * - When PIPE_TRANSFER_DONTBLOCK is set, we should fail if we have to block. 61 * - When PIPE_TRANSFER_PERSISTENT is set, GPU may access the buffer while it 62 * is mapped. Synchronization is done by defining memory barriers, 63 * explicitly via memory_barrier() or implicitly via 64 * transfer_flush_region(), as well as GPU fences. 65 * - When PIPE_TRANSFER_COHERENT is set, updates by either CPU or GPU should 66 * be made visible to the other side immediately. Since the kernel flushes 67 * GPU caches at the end of each batch buffer, CPU always sees GPU updates. 68 * We could use a coherent mapping to make all persistent mappings 69 * coherent. 70 * 71 * These also apply to textures, except that we may additionally need to do 72 * format conversion or tiling/untiling. 73 */ 74 75 /** 76 * Return a transfer method suitable for the usage. The returned method will 77 * correctly block when the resource is busy. 78 */ 79 static bool 80 resource_get_transfer_method(struct pipe_resource *res, 81 const struct pipe_transfer *transfer, 82 enum ilo_transfer_map_method *method) 83 { 84 const struct ilo_screen *is = ilo_screen(res->screen); 85 const unsigned usage = transfer->usage; 86 enum ilo_transfer_map_method m; 87 bool tiled; 88 89 if (res->target == PIPE_BUFFER) { 90 tiled = false; 91 } else { 92 struct ilo_texture *tex = ilo_texture(res); 93 bool need_convert = false; 94 95 /* we may need to convert on the fly */ 96 if (tex->image.tiling == GEN8_TILING_W || tex->separate_s8) { 97 /* on GEN6, separate stencil is enabled only when HiZ is */ 98 if (ilo_dev_gen(&is->dev) >= ILO_GEN(7) || 99 ilo_image_can_enable_aux(&tex->image, transfer->level)) { 100 m = ILO_TRANSFER_MAP_SW_ZS; 101 need_convert = true; 102 } 103 } else if (tex->image_format != tex->base.format) { 104 m = ILO_TRANSFER_MAP_SW_CONVERT; 105 need_convert = true; 106 } 107 108 if (need_convert) { 109 if (usage & (PIPE_TRANSFER_MAP_DIRECTLY | PIPE_TRANSFER_PERSISTENT)) 110 return false; 111 112 *method = m; 113 return true; 114 } 115 116 tiled = (tex->image.tiling != GEN6_TILING_NONE); 117 } 118 119 if (tiled) 120 m = ILO_TRANSFER_MAP_GTT; /* to have a linear view */ 121 else if (is->dev.has_llc) 122 m = ILO_TRANSFER_MAP_CPU; /* fast and mostly coherent */ 123 else if (usage & PIPE_TRANSFER_PERSISTENT) 124 m = ILO_TRANSFER_MAP_GTT; /* for coherency */ 125 else if (usage & PIPE_TRANSFER_READ) 126 m = ILO_TRANSFER_MAP_CPU; /* gtt read is too slow */ 127 else 128 m = ILO_TRANSFER_MAP_GTT; 129 130 *method = m; 131 132 return true; 133 } 134 135 /** 136 * Return true if usage allows the use of staging bo to avoid blocking. 137 */ 138 static bool 139 usage_allows_staging_bo(unsigned usage) 140 { 141 /* do we know how to write the data back to the resource? */ 142 const unsigned can_writeback = (PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE | 143 PIPE_TRANSFER_DISCARD_RANGE | 144 PIPE_TRANSFER_FLUSH_EXPLICIT); 145 const unsigned reasons_against = (PIPE_TRANSFER_READ | 146 PIPE_TRANSFER_MAP_DIRECTLY | 147 PIPE_TRANSFER_PERSISTENT); 148 149 return (usage & can_writeback) && !(usage & reasons_against); 150 } 151 152 /** 153 * Allocate the staging resource. It is always linear and its size matches 154 * the transfer box, with proper paddings. 155 */ 156 static bool 157 xfer_alloc_staging_res(struct ilo_transfer *xfer) 158 { 159 const struct pipe_resource *res = xfer->base.resource; 160 const struct pipe_box *box = &xfer->base.box; 161 struct pipe_resource templ; 162 163 memset(&templ, 0, sizeof(templ)); 164 165 templ.format = res->format; 166 167 if (res->target == PIPE_BUFFER) { 168 templ.target = PIPE_BUFFER; 169 templ.width0 = 170 (box->x % ILO_TRANSFER_MAP_BUFFER_ALIGNMENT) + box->width; 171 } 172 else { 173 /* use 2D array for any texture target */ 174 templ.target = PIPE_TEXTURE_2D_ARRAY; 175 templ.width0 = box->width; 176 } 177 178 templ.height0 = box->height; 179 templ.depth0 = 1; 180 templ.array_size = box->depth; 181 templ.nr_samples = 1; 182 templ.usage = PIPE_USAGE_STAGING; 183 184 if (xfer->base.usage & PIPE_TRANSFER_FLUSH_EXPLICIT) { 185 templ.flags = PIPE_RESOURCE_FLAG_MAP_PERSISTENT | 186 PIPE_RESOURCE_FLAG_MAP_COHERENT; 187 } 188 189 xfer->staging.res = res->screen->resource_create(res->screen, &templ); 190 191 if (xfer->staging.res && xfer->staging.res->target != PIPE_BUFFER) { 192 assert(ilo_texture(xfer->staging.res)->image.tiling == 193 GEN6_TILING_NONE); 194 } 195 196 return (xfer->staging.res != NULL); 197 } 198 199 /** 200 * Use an alternative transfer method or rename the resource to unblock an 201 * otherwise blocking transfer. 202 */ 203 static bool 204 xfer_unblock(struct ilo_transfer *xfer, bool *resource_renamed) 205 { 206 struct pipe_resource *res = xfer->base.resource; 207 bool unblocked = false, renamed = false; 208 209 switch (xfer->method) { 210 case ILO_TRANSFER_MAP_CPU: 211 case ILO_TRANSFER_MAP_GTT: 212 if (xfer->base.usage & PIPE_TRANSFER_UNSYNCHRONIZED) { 213 xfer->method = ILO_TRANSFER_MAP_GTT_ASYNC; 214 unblocked = true; 215 } 216 else if ((xfer->base.usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) && 217 ilo_resource_rename_bo(res)) { 218 renamed = true; 219 unblocked = true; 220 } 221 else if (usage_allows_staging_bo(xfer->base.usage) && 222 xfer_alloc_staging_res(xfer)) { 223 xfer->method = ILO_TRANSFER_MAP_STAGING; 224 unblocked = true; 225 } 226 break; 227 case ILO_TRANSFER_MAP_GTT_ASYNC: 228 case ILO_TRANSFER_MAP_STAGING: 229 unblocked = true; 230 break; 231 default: 232 break; 233 } 234 235 *resource_renamed = renamed; 236 237 return unblocked; 238 } 239 240 /** 241 * Allocate the staging system buffer based on the resource format and the 242 * transfer box. 243 */ 244 static bool 245 xfer_alloc_staging_sys(struct ilo_transfer *xfer) 246 { 247 const enum pipe_format format = xfer->base.resource->format; 248 const struct pipe_box *box = &xfer->base.box; 249 const unsigned alignment = 64; 250 251 /* need to tell the world the layout */ 252 xfer->base.stride = 253 align(util_format_get_stride(format, box->width), alignment); 254 xfer->base.layer_stride = 255 util_format_get_2d_size(format, xfer->base.stride, box->height); 256 257 xfer->staging.sys = 258 align_malloc(xfer->base.layer_stride * box->depth, alignment); 259 260 return (xfer->staging.sys != NULL); 261 } 262 263 /** 264 * Map according to the method. The staging system buffer should have been 265 * allocated if the method requires it. 266 */ 267 static void * 268 xfer_map(struct ilo_transfer *xfer) 269 { 270 const struct ilo_vma *vma; 271 void *ptr; 272 273 switch (xfer->method) { 274 case ILO_TRANSFER_MAP_CPU: 275 vma = ilo_resource_get_vma(xfer->base.resource); 276 ptr = intel_bo_map(vma->bo, xfer->base.usage & PIPE_TRANSFER_WRITE); 277 break; 278 case ILO_TRANSFER_MAP_GTT: 279 vma = ilo_resource_get_vma(xfer->base.resource); 280 ptr = intel_bo_map_gtt(vma->bo); 281 break; 282 case ILO_TRANSFER_MAP_GTT_ASYNC: 283 vma = ilo_resource_get_vma(xfer->base.resource); 284 ptr = intel_bo_map_gtt_async(vma->bo); 285 break; 286 case ILO_TRANSFER_MAP_STAGING: 287 { 288 const struct ilo_screen *is = ilo_screen(xfer->staging.res->screen); 289 290 vma = ilo_resource_get_vma(xfer->staging.res); 291 292 /* 293 * We want a writable, optionally persistent and coherent, mapping 294 * for a linear bo. We can call resource_get_transfer_method(), but 295 * this turns out to be fairly simple. 296 */ 297 if (is->dev.has_llc) 298 ptr = intel_bo_map(vma->bo, true); 299 else 300 ptr = intel_bo_map_gtt(vma->bo); 301 302 if (ptr && xfer->staging.res->target == PIPE_BUFFER) 303 ptr += (xfer->base.box.x % ILO_TRANSFER_MAP_BUFFER_ALIGNMENT); 304 } 305 break; 306 case ILO_TRANSFER_MAP_SW_CONVERT: 307 case ILO_TRANSFER_MAP_SW_ZS: 308 vma = NULL; 309 ptr = xfer->staging.sys; 310 break; 311 default: 312 assert(!"unknown mapping method"); 313 vma = NULL; 314 ptr = NULL; 315 break; 316 } 317 318 if (ptr && vma) 319 ptr = (void *) ((char *) ptr + vma->bo_offset); 320 321 return ptr; 322 } 323 324 /** 325 * Unmap a transfer. 326 */ 327 static void 328 xfer_unmap(struct ilo_transfer *xfer) 329 { 330 switch (xfer->method) { 331 case ILO_TRANSFER_MAP_CPU: 332 case ILO_TRANSFER_MAP_GTT: 333 case ILO_TRANSFER_MAP_GTT_ASYNC: 334 intel_bo_unmap(ilo_resource_get_vma(xfer->base.resource)->bo); 335 break; 336 case ILO_TRANSFER_MAP_STAGING: 337 intel_bo_unmap(ilo_resource_get_vma(xfer->staging.res)->bo); 338 break; 339 default: 340 break; 341 } 342 } 343 344 static void 345 tex_get_box_origin(const struct ilo_texture *tex, 346 unsigned level, unsigned slice, 347 const struct pipe_box *box, 348 unsigned *mem_x, unsigned *mem_y) 349 { 350 unsigned x, y; 351 352 ilo_image_get_slice_pos(&tex->image, level, box->z + slice, &x, &y); 353 x += box->x; 354 y += box->y; 355 356 ilo_image_pos_to_mem(&tex->image, x, y, mem_x, mem_y); 357 } 358 359 static unsigned 360 tex_get_box_offset(const struct ilo_texture *tex, unsigned level, 361 const struct pipe_box *box) 362 { 363 unsigned mem_x, mem_y; 364 365 tex_get_box_origin(tex, level, 0, box, &mem_x, &mem_y); 366 367 return ilo_image_mem_to_linear(&tex->image, mem_x, mem_y); 368 } 369 370 static unsigned 371 tex_get_slice_stride(const struct ilo_texture *tex, unsigned level) 372 { 373 return ilo_image_get_slice_stride(&tex->image, level); 374 } 375 376 static unsigned 377 tex_tile_x_swizzle(unsigned addr) 378 { 379 /* 380 * From the Ivy Bridge PRM, volume 1 part 2, page 24: 381 * 382 * "As shown in the tiling algorithm, the new address bit[6] should be: 383 * 384 * Address bit[6] <= TiledAddr bit[6] XOR 385 * TiledAddr bit[9] XOR 386 * TiledAddr bit[10]" 387 */ 388 return addr ^ (((addr >> 3) ^ (addr >> 4)) & 0x40); 389 } 390 391 static unsigned 392 tex_tile_y_swizzle(unsigned addr) 393 { 394 /* 395 * From the Ivy Bridge PRM, volume 1 part 2, page 24: 396 * 397 * "As shown in the tiling algorithm, The new address bit[6] becomes: 398 * 399 * Address bit[6] <= TiledAddr bit[6] XOR 400 * TiledAddr bit[9]" 401 */ 402 return addr ^ ((addr >> 3) & 0x40); 403 } 404 405 static unsigned 406 tex_tile_x_offset(unsigned mem_x, unsigned mem_y, 407 unsigned tiles_per_row, bool swizzle) 408 { 409 /* 410 * From the Sandy Bridge PRM, volume 1 part 2, page 21, we know that a 411 * X-major tile has 8 rows and 32 OWord columns (512 bytes). Tiles in the 412 * tiled region are numbered in row-major order, starting from zero. The 413 * tile number can thus be calculated as follows: 414 * 415 * tile = (mem_y / 8) * tiles_per_row + (mem_x / 512) 416 * 417 * OWords in that tile are also numbered in row-major order, starting from 418 * zero. The OWord number can thus be calculated as follows: 419 * 420 * oword = (mem_y % 8) * 32 + ((mem_x % 512) / 16) 421 * 422 * and the tiled offset is 423 * 424 * offset = tile * 4096 + oword * 16 + (mem_x % 16) 425 * = tile * 4096 + (mem_y % 8) * 512 + (mem_x % 512) 426 */ 427 unsigned tile, offset; 428 429 tile = (mem_y >> 3) * tiles_per_row + (mem_x >> 9); 430 offset = tile << 12 | (mem_y & 0x7) << 9 | (mem_x & 0x1ff); 431 432 return (swizzle) ? tex_tile_x_swizzle(offset) : offset; 433 } 434 435 static unsigned 436 tex_tile_y_offset(unsigned mem_x, unsigned mem_y, 437 unsigned tiles_per_row, bool swizzle) 438 { 439 /* 440 * From the Sandy Bridge PRM, volume 1 part 2, page 22, we know that a 441 * Y-major tile has 32 rows and 8 OWord columns (128 bytes). Tiles in the 442 * tiled region are numbered in row-major order, starting from zero. The 443 * tile number can thus be calculated as follows: 444 * 445 * tile = (mem_y / 32) * tiles_per_row + (mem_x / 128) 446 * 447 * OWords in that tile are numbered in column-major order, starting from 448 * zero. The OWord number can thus be calculated as follows: 449 * 450 * oword = ((mem_x % 128) / 16) * 32 + (mem_y % 32) 451 * 452 * and the tiled offset is 453 * 454 * offset = tile * 4096 + oword * 16 + (mem_x % 16) 455 */ 456 unsigned tile, oword, offset; 457 458 tile = (mem_y >> 5) * tiles_per_row + (mem_x >> 7); 459 oword = (mem_x & 0x70) << 1 | (mem_y & 0x1f); 460 offset = tile << 12 | oword << 4 | (mem_x & 0xf); 461 462 return (swizzle) ? tex_tile_y_swizzle(offset) : offset; 463 } 464 465 static unsigned 466 tex_tile_w_offset(unsigned mem_x, unsigned mem_y, 467 unsigned tiles_per_row, bool swizzle) 468 { 469 /* 470 * From the Sandy Bridge PRM, volume 1 part 2, page 23, we know that a 471 * W-major tile has 8 8x8-block rows and 8 8x8-block columns. Tiles in the 472 * tiled region are numbered in row-major order, starting from zero. The 473 * tile number can thus be calculated as follows: 474 * 475 * tile = (mem_y / 64) * tiles_per_row + (mem_x / 64) 476 * 477 * 8x8-blocks in that tile are numbered in column-major order, starting 478 * from zero. The 8x8-block number can thus be calculated as follows: 479 * 480 * blk8 = ((mem_x % 64) / 8) * 8 + ((mem_y % 64) / 8) 481 * 482 * Each 8x8-block is divided into 4 4x4-blocks, in row-major order. Each 483 * 4x4-block is further divided into 4 2x2-blocks, also in row-major order. 484 * We have 485 * 486 * blk4 = (((mem_y % 64) / 4) & 1) * 2 + (((mem_x % 64) / 4) & 1) 487 * blk2 = (((mem_y % 64) / 2) & 1) * 2 + (((mem_x % 64) / 2) & 1) 488 * blk1 = (((mem_y % 64) ) & 1) * 2 + (((mem_x % 64) ) & 1) 489 * 490 * and the tiled offset is 491 * 492 * offset = tile * 4096 + blk8 * 64 + blk4 * 16 + blk2 * 4 + blk1 493 */ 494 unsigned tile, blk8, blk4, blk2, blk1, offset; 495 496 tile = (mem_y >> 6) * tiles_per_row + (mem_x >> 6); 497 blk8 = ((mem_x >> 3) & 0x7) << 3 | ((mem_y >> 3) & 0x7); 498 blk4 = ((mem_y >> 2) & 0x1) << 1 | ((mem_x >> 2) & 0x1); 499 blk2 = ((mem_y >> 1) & 0x1) << 1 | ((mem_x >> 1) & 0x1); 500 blk1 = ((mem_y ) & 0x1) << 1 | ((mem_x ) & 0x1); 501 offset = tile << 12 | blk8 << 6 | blk4 << 4 | blk2 << 2 | blk1; 502 503 return (swizzle) ? tex_tile_y_swizzle(offset) : offset; 504 } 505 506 static unsigned 507 tex_tile_none_offset(unsigned mem_x, unsigned mem_y, 508 unsigned tiles_per_row, bool swizzle) 509 { 510 return mem_y * tiles_per_row + mem_x; 511 } 512 513 typedef unsigned (*tex_tile_offset_func)(unsigned mem_x, unsigned mem_y, 514 unsigned tiles_per_row, 515 bool swizzle); 516 517 static tex_tile_offset_func 518 tex_tile_choose_offset_func(const struct ilo_texture *tex, 519 unsigned *tiles_per_row) 520 { 521 switch (tex->image.tiling) { 522 default: 523 assert(!"unknown tiling"); 524 /* fall through */ 525 case GEN6_TILING_NONE: 526 *tiles_per_row = tex->image.bo_stride; 527 return tex_tile_none_offset; 528 case GEN6_TILING_X: 529 *tiles_per_row = tex->image.bo_stride / 512; 530 return tex_tile_x_offset; 531 case GEN6_TILING_Y: 532 *tiles_per_row = tex->image.bo_stride / 128; 533 return tex_tile_y_offset; 534 case GEN8_TILING_W: 535 *tiles_per_row = tex->image.bo_stride / 64; 536 return tex_tile_w_offset; 537 } 538 } 539 540 static void * 541 tex_staging_sys_map_bo(struct ilo_texture *tex, 542 bool for_read_back, 543 bool linear_view) 544 { 545 const struct ilo_screen *is = ilo_screen(tex->base.screen); 546 const bool prefer_cpu = (is->dev.has_llc || for_read_back); 547 void *ptr; 548 549 if (prefer_cpu && (tex->image.tiling == GEN6_TILING_NONE || 550 !linear_view)) 551 ptr = intel_bo_map(tex->vma.bo, !for_read_back); 552 else 553 ptr = intel_bo_map_gtt(tex->vma.bo); 554 555 if (ptr) 556 ptr = (void *) ((char *) ptr + tex->vma.bo_offset); 557 558 return ptr; 559 } 560 561 static void 562 tex_staging_sys_unmap_bo(struct ilo_texture *tex) 563 { 564 intel_bo_unmap(tex->vma.bo); 565 } 566 567 static bool 568 tex_staging_sys_zs_read(struct ilo_texture *tex, 569 const struct ilo_transfer *xfer) 570 { 571 const struct ilo_screen *is = ilo_screen(tex->base.screen); 572 const bool swizzle = is->dev.has_address_swizzling; 573 const struct pipe_box *box = &xfer->base.box; 574 const uint8_t *src; 575 tex_tile_offset_func tile_offset; 576 unsigned tiles_per_row; 577 int slice; 578 579 src = tex_staging_sys_map_bo(tex, true, false); 580 if (!src) 581 return false; 582 583 tile_offset = tex_tile_choose_offset_func(tex, &tiles_per_row); 584 585 assert(tex->image.block_width == 1 && tex->image.block_height == 1); 586 587 if (tex->separate_s8) { 588 struct ilo_texture *s8_tex = tex->separate_s8; 589 const uint8_t *s8_src; 590 tex_tile_offset_func s8_tile_offset; 591 unsigned s8_tiles_per_row; 592 int dst_cpp, dst_s8_pos, src_cpp_used; 593 594 s8_src = tex_staging_sys_map_bo(s8_tex, true, false); 595 if (!s8_src) { 596 tex_staging_sys_unmap_bo(tex); 597 return false; 598 } 599 600 s8_tile_offset = tex_tile_choose_offset_func(s8_tex, &s8_tiles_per_row); 601 602 if (tex->base.format == PIPE_FORMAT_Z24_UNORM_S8_UINT) { 603 assert(tex->image_format == PIPE_FORMAT_Z24X8_UNORM); 604 605 dst_cpp = 4; 606 dst_s8_pos = 3; 607 src_cpp_used = 3; 608 } 609 else { 610 assert(tex->base.format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT); 611 assert(tex->image_format == PIPE_FORMAT_Z32_FLOAT); 612 613 dst_cpp = 8; 614 dst_s8_pos = 4; 615 src_cpp_used = 4; 616 } 617 618 for (slice = 0; slice < box->depth; slice++) { 619 unsigned mem_x, mem_y, s8_mem_x, s8_mem_y; 620 uint8_t *dst; 621 int i, j; 622 623 tex_get_box_origin(tex, xfer->base.level, slice, 624 box, &mem_x, &mem_y); 625 tex_get_box_origin(s8_tex, xfer->base.level, slice, 626 box, &s8_mem_x, &s8_mem_y); 627 628 dst = xfer->staging.sys + xfer->base.layer_stride * slice; 629 630 for (i = 0; i < box->height; i++) { 631 unsigned x = mem_x, s8_x = s8_mem_x; 632 uint8_t *d = dst; 633 634 for (j = 0; j < box->width; j++) { 635 const unsigned offset = 636 tile_offset(x, mem_y, tiles_per_row, swizzle); 637 const unsigned s8_offset = 638 s8_tile_offset(s8_x, s8_mem_y, s8_tiles_per_row, swizzle); 639 640 memcpy(d, src + offset, src_cpp_used); 641 d[dst_s8_pos] = s8_src[s8_offset]; 642 643 d += dst_cpp; 644 x += tex->image.block_size; 645 s8_x++; 646 } 647 648 dst += xfer->base.stride; 649 mem_y++; 650 s8_mem_y++; 651 } 652 } 653 654 tex_staging_sys_unmap_bo(s8_tex); 655 } 656 else { 657 assert(tex->image_format == PIPE_FORMAT_S8_UINT); 658 659 for (slice = 0; slice < box->depth; slice++) { 660 unsigned mem_x, mem_y; 661 uint8_t *dst; 662 int i, j; 663 664 tex_get_box_origin(tex, xfer->base.level, slice, 665 box, &mem_x, &mem_y); 666 667 dst = xfer->staging.sys + xfer->base.layer_stride * slice; 668 669 for (i = 0; i < box->height; i++) { 670 unsigned x = mem_x; 671 uint8_t *d = dst; 672 673 for (j = 0; j < box->width; j++) { 674 const unsigned offset = 675 tile_offset(x, mem_y, tiles_per_row, swizzle); 676 677 *d = src[offset]; 678 679 d++; 680 x++; 681 } 682 683 dst += xfer->base.stride; 684 mem_y++; 685 } 686 } 687 } 688 689 tex_staging_sys_unmap_bo(tex); 690 691 return true; 692 } 693 694 static bool 695 tex_staging_sys_zs_write(struct ilo_texture *tex, 696 const struct ilo_transfer *xfer) 697 { 698 const struct ilo_screen *is = ilo_screen(tex->base.screen); 699 const bool swizzle = is->dev.has_address_swizzling; 700 const struct pipe_box *box = &xfer->base.box; 701 uint8_t *dst; 702 tex_tile_offset_func tile_offset; 703 unsigned tiles_per_row; 704 int slice; 705 706 dst = tex_staging_sys_map_bo(tex, false, false); 707 if (!dst) 708 return false; 709 710 tile_offset = tex_tile_choose_offset_func(tex, &tiles_per_row); 711 712 assert(tex->image.block_width == 1 && tex->image.block_height == 1); 713 714 if (tex->separate_s8) { 715 struct ilo_texture *s8_tex = tex->separate_s8; 716 uint8_t *s8_dst; 717 tex_tile_offset_func s8_tile_offset; 718 unsigned s8_tiles_per_row; 719 int src_cpp, src_s8_pos, dst_cpp_used; 720 721 s8_dst = tex_staging_sys_map_bo(s8_tex, false, false); 722 if (!s8_dst) { 723 tex_staging_sys_unmap_bo(s8_tex); 724 return false; 725 } 726 727 s8_tile_offset = tex_tile_choose_offset_func(s8_tex, &s8_tiles_per_row); 728 729 if (tex->base.format == PIPE_FORMAT_Z24_UNORM_S8_UINT) { 730 assert(tex->image_format == PIPE_FORMAT_Z24X8_UNORM); 731 732 src_cpp = 4; 733 src_s8_pos = 3; 734 dst_cpp_used = 3; 735 } 736 else { 737 assert(tex->base.format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT); 738 assert(tex->image_format == PIPE_FORMAT_Z32_FLOAT); 739 740 src_cpp = 8; 741 src_s8_pos = 4; 742 dst_cpp_used = 4; 743 } 744 745 for (slice = 0; slice < box->depth; slice++) { 746 unsigned mem_x, mem_y, s8_mem_x, s8_mem_y; 747 const uint8_t *src; 748 int i, j; 749 750 tex_get_box_origin(tex, xfer->base.level, slice, 751 box, &mem_x, &mem_y); 752 tex_get_box_origin(s8_tex, xfer->base.level, slice, 753 box, &s8_mem_x, &s8_mem_y); 754 755 src = xfer->staging.sys + xfer->base.layer_stride * slice; 756 757 for (i = 0; i < box->height; i++) { 758 unsigned x = mem_x, s8_x = s8_mem_x; 759 const uint8_t *s = src; 760 761 for (j = 0; j < box->width; j++) { 762 const unsigned offset = 763 tile_offset(x, mem_y, tiles_per_row, swizzle); 764 const unsigned s8_offset = 765 s8_tile_offset(s8_x, s8_mem_y, s8_tiles_per_row, swizzle); 766 767 memcpy(dst + offset, s, dst_cpp_used); 768 s8_dst[s8_offset] = s[src_s8_pos]; 769 770 s += src_cpp; 771 x += tex->image.block_size; 772 s8_x++; 773 } 774 775 src += xfer->base.stride; 776 mem_y++; 777 s8_mem_y++; 778 } 779 } 780 781 tex_staging_sys_unmap_bo(s8_tex); 782 } 783 else { 784 assert(tex->image_format == PIPE_FORMAT_S8_UINT); 785 786 for (slice = 0; slice < box->depth; slice++) { 787 unsigned mem_x, mem_y; 788 const uint8_t *src; 789 int i, j; 790 791 tex_get_box_origin(tex, xfer->base.level, slice, 792 box, &mem_x, &mem_y); 793 794 src = xfer->staging.sys + xfer->base.layer_stride * slice; 795 796 for (i = 0; i < box->height; i++) { 797 unsigned x = mem_x; 798 const uint8_t *s = src; 799 800 for (j = 0; j < box->width; j++) { 801 const unsigned offset = 802 tile_offset(x, mem_y, tiles_per_row, swizzle); 803 804 dst[offset] = *s; 805 806 s++; 807 x++; 808 } 809 810 src += xfer->base.stride; 811 mem_y++; 812 } 813 } 814 } 815 816 tex_staging_sys_unmap_bo(tex); 817 818 return true; 819 } 820 821 static bool 822 tex_staging_sys_convert_write(struct ilo_texture *tex, 823 const struct ilo_transfer *xfer) 824 { 825 const struct pipe_box *box = &xfer->base.box; 826 unsigned dst_slice_stride; 827 void *dst; 828 int slice; 829 830 dst = tex_staging_sys_map_bo(tex, false, true); 831 if (!dst) 832 return false; 833 834 dst += tex_get_box_offset(tex, xfer->base.level, box); 835 836 /* slice stride is not always available */ 837 if (box->depth > 1) 838 dst_slice_stride = tex_get_slice_stride(tex, xfer->base.level); 839 else 840 dst_slice_stride = 0; 841 842 if (unlikely(tex->image_format == tex->base.format)) { 843 util_copy_box(dst, tex->image_format, tex->image.bo_stride, 844 dst_slice_stride, 0, 0, 0, box->width, box->height, box->depth, 845 xfer->staging.sys, xfer->base.stride, xfer->base.layer_stride, 846 0, 0, 0); 847 848 tex_staging_sys_unmap_bo(tex); 849 850 return true; 851 } 852 853 switch (tex->base.format) { 854 case PIPE_FORMAT_ETC1_RGB8: 855 assert(tex->image_format == PIPE_FORMAT_R8G8B8X8_UNORM); 856 857 for (slice = 0; slice < box->depth; slice++) { 858 const void *src = 859 xfer->staging.sys + xfer->base.layer_stride * slice; 860 861 util_format_etc1_rgb8_unpack_rgba_8unorm(dst, 862 tex->image.bo_stride, src, xfer->base.stride, 863 box->width, box->height); 864 865 dst += dst_slice_stride; 866 } 867 break; 868 default: 869 assert(!"unable to convert the staging data"); 870 break; 871 } 872 873 tex_staging_sys_unmap_bo(tex); 874 875 return true; 876 } 877 878 static void 879 tex_staging_sys_writeback(struct ilo_transfer *xfer) 880 { 881 struct ilo_texture *tex = ilo_texture(xfer->base.resource); 882 bool success; 883 884 if (!(xfer->base.usage & PIPE_TRANSFER_WRITE)) 885 return; 886 887 switch (xfer->method) { 888 case ILO_TRANSFER_MAP_SW_CONVERT: 889 success = tex_staging_sys_convert_write(tex, xfer); 890 break; 891 case ILO_TRANSFER_MAP_SW_ZS: 892 success = tex_staging_sys_zs_write(tex, xfer); 893 break; 894 default: 895 assert(!"unknown mapping method"); 896 success = false; 897 break; 898 } 899 900 if (!success) 901 ilo_err("failed to map resource for moving staging data\n"); 902 } 903 904 static bool 905 tex_staging_sys_readback(struct ilo_transfer *xfer) 906 { 907 struct ilo_texture *tex = ilo_texture(xfer->base.resource); 908 bool read_back = false, success; 909 910 /* see if we need to read the resource back */ 911 if (xfer->base.usage & PIPE_TRANSFER_READ) { 912 read_back = true; 913 } 914 else if (xfer->base.usage & PIPE_TRANSFER_WRITE) { 915 const unsigned discard_flags = 916 (PIPE_TRANSFER_DISCARD_RANGE | PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE); 917 918 if (!(xfer->base.usage & discard_flags)) 919 read_back = true; 920 } 921 922 if (!read_back) 923 return true; 924 925 switch (xfer->method) { 926 case ILO_TRANSFER_MAP_SW_CONVERT: 927 assert(!"no on-the-fly format conversion for mapping"); 928 success = false; 929 break; 930 case ILO_TRANSFER_MAP_SW_ZS: 931 success = tex_staging_sys_zs_read(tex, xfer); 932 break; 933 default: 934 assert(!"unknown mapping method"); 935 success = false; 936 break; 937 } 938 939 return success; 940 } 941 942 static void * 943 tex_map(struct ilo_transfer *xfer) 944 { 945 void *ptr; 946 947 switch (xfer->method) { 948 case ILO_TRANSFER_MAP_CPU: 949 case ILO_TRANSFER_MAP_GTT: 950 case ILO_TRANSFER_MAP_GTT_ASYNC: 951 ptr = xfer_map(xfer); 952 if (ptr) { 953 const struct ilo_texture *tex = ilo_texture(xfer->base.resource); 954 955 ptr += tex_get_box_offset(tex, xfer->base.level, &xfer->base.box); 956 957 /* stride is for a block row, not a texel row */ 958 xfer->base.stride = tex->image.bo_stride; 959 /* note that slice stride is not always available */ 960 xfer->base.layer_stride = (xfer->base.box.depth > 1) ? 961 tex_get_slice_stride(tex, xfer->base.level) : 0; 962 } 963 break; 964 case ILO_TRANSFER_MAP_STAGING: 965 ptr = xfer_map(xfer); 966 if (ptr) { 967 const struct ilo_texture *staging = ilo_texture(xfer->staging.res); 968 xfer->base.stride = staging->image.bo_stride; 969 xfer->base.layer_stride = tex_get_slice_stride(staging, 0); 970 } 971 break; 972 case ILO_TRANSFER_MAP_SW_CONVERT: 973 case ILO_TRANSFER_MAP_SW_ZS: 974 if (xfer_alloc_staging_sys(xfer) && tex_staging_sys_readback(xfer)) 975 ptr = xfer_map(xfer); 976 else 977 ptr = NULL; 978 break; 979 default: 980 assert(!"unknown mapping method"); 981 ptr = NULL; 982 break; 983 } 984 985 return ptr; 986 } 987 988 static void * 989 buf_map(struct ilo_transfer *xfer) 990 { 991 void *ptr; 992 993 ptr = xfer_map(xfer); 994 if (!ptr) 995 return NULL; 996 997 if (xfer->method != ILO_TRANSFER_MAP_STAGING) 998 ptr += xfer->base.box.x; 999 1000 xfer->base.stride = 0; 1001 xfer->base.layer_stride = 0; 1002 1003 assert(xfer->base.level == 0); 1004 assert(xfer->base.box.y == 0); 1005 assert(xfer->base.box.z == 0); 1006 assert(xfer->base.box.height == 1); 1007 assert(xfer->base.box.depth == 1); 1008 1009 return ptr; 1010 } 1011 1012 static void 1013 copy_staging_resource(struct ilo_context *ilo, 1014 struct ilo_transfer *xfer, 1015 const struct pipe_box *box) 1016 { 1017 const unsigned pad_x = (xfer->staging.res->target == PIPE_BUFFER) ? 1018 xfer->base.box.x % ILO_TRANSFER_MAP_BUFFER_ALIGNMENT : 0; 1019 struct pipe_box modified_box; 1020 1021 assert(xfer->method == ILO_TRANSFER_MAP_STAGING && xfer->staging.res); 1022 1023 if (!box) { 1024 u_box_3d(pad_x, 0, 0, xfer->base.box.width, xfer->base.box.height, 1025 xfer->base.box.depth, &modified_box); 1026 box = &modified_box; 1027 } 1028 else if (pad_x) { 1029 modified_box = *box; 1030 modified_box.x += pad_x; 1031 box = &modified_box; 1032 } 1033 1034 ilo_blitter_blt_copy_resource(ilo->blitter, 1035 xfer->base.resource, xfer->base.level, 1036 xfer->base.box.x, xfer->base.box.y, xfer->base.box.z, 1037 xfer->staging.res, 0, box); 1038 } 1039 1040 static bool 1041 is_bo_busy(struct ilo_context *ilo, struct intel_bo *bo, bool *need_submit) 1042 { 1043 const bool referenced = ilo_builder_has_reloc(&ilo->cp->builder, bo); 1044 1045 if (need_submit) 1046 *need_submit = referenced; 1047 1048 if (referenced) 1049 return true; 1050 1051 return intel_bo_is_busy(bo); 1052 } 1053 1054 /** 1055 * Choose the best mapping method, depending on the transfer usage and whether 1056 * the bo is busy. 1057 */ 1058 static bool 1059 choose_transfer_method(struct ilo_context *ilo, struct ilo_transfer *xfer) 1060 { 1061 struct pipe_resource *res = xfer->base.resource; 1062 bool need_submit; 1063 1064 if (!resource_get_transfer_method(res, &xfer->base, &xfer->method)) 1065 return false; 1066 1067 /* see if we can avoid blocking */ 1068 if (is_bo_busy(ilo, ilo_resource_get_vma(res)->bo, &need_submit)) { 1069 bool resource_renamed; 1070 1071 if (!xfer_unblock(xfer, &resource_renamed)) { 1072 if (xfer->base.usage & PIPE_TRANSFER_DONTBLOCK) 1073 return false; 1074 1075 /* submit to make bo really busy and map() correctly blocks */ 1076 if (need_submit) 1077 ilo_cp_submit(ilo->cp, "syncing for transfers"); 1078 } 1079 1080 if (resource_renamed) 1081 ilo_state_vector_resource_renamed(&ilo->state_vector, res); 1082 } 1083 1084 return true; 1085 } 1086 1087 static void 1088 buf_pwrite(struct ilo_context *ilo, struct pipe_resource *res, 1089 unsigned usage, int offset, int size, const void *data) 1090 { 1091 struct ilo_buffer_resource *buf = ilo_buffer_resource(res); 1092 bool need_submit; 1093 1094 /* see if we can avoid blocking */ 1095 if (is_bo_busy(ilo, buf->vma.bo, &need_submit)) { 1096 bool unblocked = false; 1097 1098 if ((usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) && 1099 ilo_resource_rename_bo(res)) { 1100 ilo_state_vector_resource_renamed(&ilo->state_vector, res); 1101 unblocked = true; 1102 } 1103 else { 1104 struct pipe_resource templ, *staging; 1105 1106 /* 1107 * allocate a staging buffer to hold the data and pipelined copy it 1108 * over 1109 */ 1110 templ = *res; 1111 templ.width0 = size; 1112 templ.usage = PIPE_USAGE_STAGING; 1113 templ.bind = 0; 1114 staging = ilo->base.screen->resource_create(ilo->base.screen, &templ); 1115 if (staging) { 1116 const struct ilo_vma *staging_vma = ilo_resource_get_vma(staging); 1117 struct pipe_box staging_box; 1118 1119 /* offset by staging_vma->bo_offset for pwrite */ 1120 intel_bo_pwrite(staging_vma->bo, staging_vma->bo_offset, 1121 size, data); 1122 1123 u_box_1d(0, size, &staging_box); 1124 ilo_blitter_blt_copy_resource(ilo->blitter, 1125 res, 0, offset, 0, 0, 1126 staging, 0, &staging_box); 1127 1128 pipe_resource_reference(&staging, NULL); 1129 1130 return; 1131 } 1132 } 1133 1134 /* submit to make bo really busy and pwrite() correctly blocks */ 1135 if (!unblocked && need_submit) 1136 ilo_cp_submit(ilo->cp, "syncing for pwrites"); 1137 } 1138 1139 /* offset by buf->vma.bo_offset for pwrite */ 1140 intel_bo_pwrite(buf->vma.bo, buf->vma.bo_offset + offset, size, data); 1141 } 1142 1143 static void 1144 ilo_transfer_flush_region(struct pipe_context *pipe, 1145 struct pipe_transfer *transfer, 1146 const struct pipe_box *box) 1147 { 1148 struct ilo_context *ilo = ilo_context(pipe); 1149 struct ilo_transfer *xfer = ilo_transfer(transfer); 1150 1151 /* 1152 * The staging resource is mapped persistently and coherently. We can copy 1153 * without unmapping. 1154 */ 1155 if (xfer->method == ILO_TRANSFER_MAP_STAGING && 1156 (xfer->base.usage & PIPE_TRANSFER_FLUSH_EXPLICIT)) 1157 copy_staging_resource(ilo, xfer, box); 1158 } 1159 1160 static void 1161 ilo_transfer_unmap(struct pipe_context *pipe, 1162 struct pipe_transfer *transfer) 1163 { 1164 struct ilo_context *ilo = ilo_context(pipe); 1165 struct ilo_transfer *xfer = ilo_transfer(transfer); 1166 1167 xfer_unmap(xfer); 1168 1169 switch (xfer->method) { 1170 case ILO_TRANSFER_MAP_STAGING: 1171 if (!(xfer->base.usage & PIPE_TRANSFER_FLUSH_EXPLICIT)) 1172 copy_staging_resource(ilo, xfer, NULL); 1173 pipe_resource_reference(&xfer->staging.res, NULL); 1174 break; 1175 case ILO_TRANSFER_MAP_SW_CONVERT: 1176 case ILO_TRANSFER_MAP_SW_ZS: 1177 tex_staging_sys_writeback(xfer); 1178 align_free(xfer->staging.sys); 1179 break; 1180 default: 1181 break; 1182 } 1183 1184 pipe_resource_reference(&xfer->base.resource, NULL); 1185 1186 slab_free_st(&ilo->transfer_mempool, xfer); 1187 } 1188 1189 static void * 1190 ilo_transfer_map(struct pipe_context *pipe, 1191 struct pipe_resource *res, 1192 unsigned level, 1193 unsigned usage, 1194 const struct pipe_box *box, 1195 struct pipe_transfer **transfer) 1196 { 1197 struct ilo_context *ilo = ilo_context(pipe); 1198 struct ilo_transfer *xfer; 1199 void *ptr; 1200 1201 /* note that xfer is not zero'd */ 1202 xfer = slab_alloc_st(&ilo->transfer_mempool); 1203 if (!xfer) { 1204 *transfer = NULL; 1205 return NULL; 1206 } 1207 1208 xfer->base.resource = NULL; 1209 pipe_resource_reference(&xfer->base.resource, res); 1210 xfer->base.level = level; 1211 xfer->base.usage = usage; 1212 xfer->base.box = *box; 1213 1214 ilo_blit_resolve_transfer(ilo, &xfer->base); 1215 1216 if (choose_transfer_method(ilo, xfer)) { 1217 if (res->target == PIPE_BUFFER) 1218 ptr = buf_map(xfer); 1219 else 1220 ptr = tex_map(xfer); 1221 } 1222 else { 1223 ptr = NULL; 1224 } 1225 1226 if (!ptr) { 1227 pipe_resource_reference(&xfer->base.resource, NULL); 1228 slab_free_st(&ilo->transfer_mempool, xfer); 1229 *transfer = NULL; 1230 return NULL; 1231 } 1232 1233 *transfer = &xfer->base; 1234 1235 return ptr; 1236 } 1237 1238 static void ilo_buffer_subdata(struct pipe_context *pipe, 1239 struct pipe_resource *resource, 1240 unsigned usage, unsigned offset, 1241 unsigned size, const void *data) 1242 { 1243 if (usage & PIPE_TRANSFER_UNSYNCHRONIZED) 1244 u_default_buffer_subdata(pipe, resource, usage, offset, size, data); 1245 else 1246 buf_pwrite(ilo_context(pipe), resource, usage, offset, size, data); 1247 } 1248 1249 /** 1250 * Initialize transfer-related functions. 1251 */ 1252 void 1253 ilo_init_transfer_functions(struct ilo_context *ilo) 1254 { 1255 ilo->base.transfer_map = ilo_transfer_map; 1256 ilo->base.transfer_flush_region = ilo_transfer_flush_region; 1257 ilo->base.transfer_unmap = ilo_transfer_unmap; 1258 ilo->base.buffer_subdata = ilo_buffer_subdata; 1259 ilo->base.texture_subdata = u_default_texture_subdata; 1260 } 1261