1 /* 2 * Copyright 2006 VMware, Inc. 3 * All Rights Reserved. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the 7 * "Software"), to deal in the Software without restriction, including 8 * without limitation the rights to use, copy, modify, merge, publish, 9 * distribute, sublicense, and/or sell copies of the Software, and to 10 * permit persons to whom the Software is furnished to do so, subject to 11 * the following conditions: 12 * 13 * The above copyright notice and this permission notice (including the 14 * next paragraph) shall be included in all copies or substantial portions 15 * of the Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 19 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 20 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 21 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 22 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 23 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 24 */ 25 26 #include <GL/gl.h> 27 #include <GL/internal/dri_interface.h> 28 29 #include "intel_batchbuffer.h" 30 #include "intel_mipmap_tree.h" 31 #include "intel_resolve_map.h" 32 #include "intel_tex.h" 33 #include "intel_blit.h" 34 #include "intel_fbo.h" 35 36 #include "brw_blorp.h" 37 #include "brw_context.h" 38 #include "brw_state.h" 39 40 #include "main/enums.h" 41 #include "main/fbobject.h" 42 #include "main/formats.h" 43 #include "main/glformats.h" 44 #include "main/texcompress_etc.h" 45 #include "main/teximage.h" 46 #include "main/streaming-load-memcpy.h" 47 #include "x86/common_x86_asm.h" 48 49 #define FILE_DEBUG_FLAG DEBUG_MIPTREE 50 51 static void *intel_miptree_map_raw(struct brw_context *brw, 52 struct intel_mipmap_tree *mt); 53 54 static void intel_miptree_unmap_raw(struct intel_mipmap_tree *mt); 55 56 static bool 57 intel_miptree_alloc_mcs(struct brw_context *brw, 58 struct intel_mipmap_tree *mt, 59 GLuint num_samples); 60 61 /** 62 * Determine which MSAA layout should be used by the MSAA surface being 63 * created, based on the chip generation and the surface type. 64 */ 65 static enum intel_msaa_layout 66 compute_msaa_layout(struct brw_context *brw, mesa_format format, 67 enum intel_aux_disable aux_disable) 68 { 69 /* Prior to Gen7, all MSAA surfaces used IMS layout. */ 70 if (brw->gen < 7) 71 return INTEL_MSAA_LAYOUT_IMS; 72 73 /* In Gen7, IMS layout is only used for depth and stencil buffers. */ 74 switch (_mesa_get_format_base_format(format)) { 75 case GL_DEPTH_COMPONENT: 76 case GL_STENCIL_INDEX: 77 case GL_DEPTH_STENCIL: 78 return INTEL_MSAA_LAYOUT_IMS; 79 default: 80 /* From the Ivy Bridge PRM, Vol4 Part1 p77 ("MCS Enable"): 81 * 82 * This field must be set to 0 for all SINT MSRTs when all RT channels 83 * are not written 84 * 85 * In practice this means that we have to disable MCS for all signed 86 * integer MSAA buffers. The alternative, to disable MCS only when one 87 * of the render target channels is disabled, is impractical because it 88 * would require converting between CMS and UMS MSAA layouts on the fly, 89 * which is expensive. 90 */ 91 if (brw->gen == 7 && _mesa_get_format_datatype(format) == GL_INT) { 92 return INTEL_MSAA_LAYOUT_UMS; 93 } else if (aux_disable & INTEL_AUX_DISABLE_MCS) { 94 /* We can't use the CMS layout because it uses an aux buffer, the MCS 95 * buffer. So fallback to UMS, which is identical to CMS without the 96 * MCS. */ 97 return INTEL_MSAA_LAYOUT_UMS; 98 } else { 99 return INTEL_MSAA_LAYOUT_CMS; 100 } 101 } 102 } 103 104 bool 105 intel_tiling_supports_non_msrt_mcs(const struct brw_context *brw, 106 unsigned tiling) 107 { 108 /* From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render 109 * Target(s)", beneath the "Fast Color Clear" bullet (p326): 110 * 111 * - Support is limited to tiled render targets. 112 * 113 * Gen9 changes the restriction to Y-tile only. 114 */ 115 if (brw->gen >= 9) 116 return tiling == I915_TILING_Y; 117 else if (brw->gen >= 7) 118 return tiling != I915_TILING_NONE; 119 else 120 return false; 121 } 122 123 /** 124 * For a single-sampled render target ("non-MSRT"), determine if an MCS buffer 125 * can be used. This doesn't (and should not) inspect any of the properties of 126 * the miptree's BO. 127 * 128 * From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render Target(s)", 129 * beneath the "Fast Color Clear" bullet (p326): 130 * 131 * - Support is for non-mip-mapped and non-array surface types only. 132 * 133 * And then later, on p327: 134 * 135 * - MCS buffer for non-MSRT is supported only for RT formats 32bpp, 136 * 64bpp, and 128bpp. 137 * 138 * From the Skylake documentation, it is made clear that X-tiling is no longer 139 * supported: 140 * 141 * - MCS and Lossless compression is supported for TiledY/TileYs/TileYf 142 * non-MSRTs only. 143 */ 144 bool 145 intel_miptree_supports_non_msrt_fast_clear(struct brw_context *brw, 146 const struct intel_mipmap_tree *mt) 147 { 148 /* MCS support does not exist prior to Gen7 */ 149 if (brw->gen < 7) 150 return false; 151 152 if (mt->aux_disable & INTEL_AUX_DISABLE_MCS) 153 return false; 154 155 /* This function applies only to non-multisampled render targets. */ 156 if (mt->num_samples > 1) 157 return false; 158 159 /* MCS is only supported for color buffers */ 160 switch (_mesa_get_format_base_format(mt->format)) { 161 case GL_DEPTH_COMPONENT: 162 case GL_DEPTH_STENCIL: 163 case GL_STENCIL_INDEX: 164 return false; 165 } 166 167 if (mt->cpp != 4 && mt->cpp != 8 && mt->cpp != 16) 168 return false; 169 170 const bool mip_mapped = mt->first_level != 0 || mt->last_level != 0; 171 const bool arrayed = mt->physical_depth0 != 1; 172 173 if (arrayed) { 174 /* Multisample surfaces with the CMS layout are not layered surfaces, 175 * yet still have physical_depth0 > 1. Assert that we don't 176 * accidentally reject a multisampled surface here. We should have 177 * rejected it earlier by explicitly checking the sample count. 178 */ 179 assert(mt->num_samples <= 1); 180 } 181 182 /* Handle the hardware restrictions... 183 * 184 * All GENs have the following restriction: "MCS buffer for non-MSRT is 185 * supported only for RT formats 32bpp, 64bpp, and 128bpp." 186 * 187 * From the HSW PRM Volume 7: 3D-Media-GPGPU, page 652: (Color Clear of 188 * Non-MultiSampler Render Target Restrictions) Support is for 189 * non-mip-mapped and non-array surface types only. 190 * 191 * From the BDW PRM Volume 7: 3D-Media-GPGPU, page 649: (Color Clear of 192 * Non-MultiSampler Render Target Restriction). Mip-mapped and arrayed 193 * surfaces are supported with MCS buffer layout with these alignments in 194 * the RT space: Horizontal Alignment = 256 and Vertical Alignment = 128. 195 * 196 * From the SKL PRM Volume 7: 3D-Media-GPGPU, page 632: (Color Clear of 197 * Non-MultiSampler Render Target Restriction). Mip-mapped and arrayed 198 * surfaces are supported with MCS buffer layout with these alignments in 199 * the RT space: Horizontal Alignment = 128 and Vertical Alignment = 64. 200 */ 201 if (brw->gen < 8 && (mip_mapped || arrayed)) 202 return false; 203 204 /* There's no point in using an MCS buffer if the surface isn't in a 205 * renderable format. 206 */ 207 if (!brw->format_supported_as_render_target[mt->format]) 208 return false; 209 210 if (brw->gen >= 9) { 211 mesa_format linear_format = _mesa_get_srgb_format_linear(mt->format); 212 const uint32_t brw_format = brw_format_for_mesa_format(linear_format); 213 return isl_format_supports_lossless_compression(&brw->screen->devinfo, 214 brw_format); 215 } else 216 return true; 217 } 218 219 /* On Gen9 support for color buffer compression was extended to single 220 * sampled surfaces. This is a helper considering both auxiliary buffer 221 * type and number of samples telling if the given miptree represents 222 * the new single sampled case - also called lossless compression. 223 */ 224 bool 225 intel_miptree_is_lossless_compressed(const struct brw_context *brw, 226 const struct intel_mipmap_tree *mt) 227 { 228 /* Only available from Gen9 onwards. */ 229 if (brw->gen < 9) 230 return false; 231 232 /* Compression always requires auxiliary buffer. */ 233 if (!mt->mcs_buf) 234 return false; 235 236 /* Single sample compression is represented re-using msaa compression 237 * layout type: "Compressed Multisampled Surfaces". 238 */ 239 if (mt->msaa_layout != INTEL_MSAA_LAYOUT_CMS) 240 return false; 241 242 /* And finally distinguish between msaa and single sample case. */ 243 return mt->num_samples <= 1; 244 } 245 246 bool 247 intel_miptree_supports_lossless_compressed(struct brw_context *brw, 248 const struct intel_mipmap_tree *mt) 249 { 250 /* For now compression is only enabled for integer formats even though 251 * there exist supported floating point formats also. This is a heuristic 252 * decision based on current public benchmarks. In none of the cases these 253 * formats provided any improvement but a few cases were seen to regress. 254 * Hence these are left to to be enabled in the future when they are known 255 * to improve things. 256 */ 257 if (_mesa_get_format_datatype(mt->format) == GL_FLOAT) 258 return false; 259 260 /* Fast clear mechanism and lossless compression go hand in hand. */ 261 if (!intel_miptree_supports_non_msrt_fast_clear(brw, mt)) 262 return false; 263 264 /* Fast clear can be also used to clear srgb surfaces by using equivalent 265 * linear format. This trick, however, can't be extended to be used with 266 * lossless compression and therefore a check is needed to see if the format 267 * really is linear. 268 */ 269 return _mesa_get_srgb_format_linear(mt->format) == mt->format; 270 } 271 272 /** 273 * Determine depth format corresponding to a depth+stencil format, 274 * for separate stencil. 275 */ 276 mesa_format 277 intel_depth_format_for_depthstencil_format(mesa_format format) { 278 switch (format) { 279 case MESA_FORMAT_Z24_UNORM_S8_UINT: 280 return MESA_FORMAT_Z24_UNORM_X8_UINT; 281 case MESA_FORMAT_Z32_FLOAT_S8X24_UINT: 282 return MESA_FORMAT_Z_FLOAT32; 283 default: 284 return format; 285 } 286 } 287 288 289 /** 290 * @param for_bo Indicates that the caller is 291 * intel_miptree_create_for_bo(). If true, then do not create 292 * \c stencil_mt. 293 */ 294 static struct intel_mipmap_tree * 295 intel_miptree_create_layout(struct brw_context *brw, 296 GLenum target, 297 mesa_format format, 298 GLuint first_level, 299 GLuint last_level, 300 GLuint width0, 301 GLuint height0, 302 GLuint depth0, 303 GLuint num_samples, 304 uint32_t layout_flags) 305 { 306 struct intel_mipmap_tree *mt = calloc(sizeof(*mt), 1); 307 if (!mt) 308 return NULL; 309 310 DBG("%s target %s format %s level %d..%d slices %d <-- %p\n", __func__, 311 _mesa_enum_to_string(target), 312 _mesa_get_format_name(format), 313 first_level, last_level, depth0, mt); 314 315 if (target == GL_TEXTURE_1D_ARRAY) 316 assert(height0 == 1); 317 318 mt->target = target; 319 mt->format = format; 320 mt->first_level = first_level; 321 mt->last_level = last_level; 322 mt->logical_width0 = width0; 323 mt->logical_height0 = height0; 324 mt->logical_depth0 = depth0; 325 mt->aux_disable = (layout_flags & MIPTREE_LAYOUT_DISABLE_AUX) != 0 ? 326 INTEL_AUX_DISABLE_ALL : INTEL_AUX_DISABLE_NONE; 327 mt->aux_disable |= INTEL_AUX_DISABLE_CCS; 328 mt->is_scanout = (layout_flags & MIPTREE_LAYOUT_FOR_SCANOUT) != 0; 329 exec_list_make_empty(&mt->hiz_map); 330 exec_list_make_empty(&mt->color_resolve_map); 331 mt->cpp = _mesa_get_format_bytes(format); 332 mt->num_samples = num_samples; 333 mt->compressed = _mesa_is_format_compressed(format); 334 mt->msaa_layout = INTEL_MSAA_LAYOUT_NONE; 335 mt->refcount = 1; 336 337 int depth_multiply = 1; 338 if (num_samples > 1) { 339 /* Adjust width/height/depth for MSAA */ 340 mt->msaa_layout = compute_msaa_layout(brw, format, mt->aux_disable); 341 if (mt->msaa_layout == INTEL_MSAA_LAYOUT_IMS) { 342 /* From the Ivybridge PRM, Volume 1, Part 1, page 108: 343 * "If the surface is multisampled and it is a depth or stencil 344 * surface or Multisampled Surface StorageFormat in SURFACE_STATE is 345 * MSFMT_DEPTH_STENCIL, WL and HL must be adjusted as follows before 346 * proceeding: 347 * 348 * +----------------------------------------------------------------+ 349 * | Num Multisamples | W_l = | H_l = | 350 * +----------------------------------------------------------------+ 351 * | 2 | ceiling(W_l / 2) * 4 | H_l (no adjustment) | 352 * | 4 | ceiling(W_l / 2) * 4 | ceiling(H_l / 2) * 4 | 353 * | 8 | ceiling(W_l / 2) * 8 | ceiling(H_l / 2) * 4 | 354 * | 16 | ceiling(W_l / 2) * 8 | ceiling(H_l / 2) * 8 | 355 * +----------------------------------------------------------------+ 356 * " 357 * 358 * Note that MSFMT_DEPTH_STENCIL just means the IMS (interleaved) 359 * format rather than UMS/CMS (array slices). The Sandybridge PRM, 360 * Volume 1, Part 1, Page 111 has the same formula for 4x MSAA. 361 * 362 * Another more complicated explanation for these adjustments comes 363 * from the Sandybridge PRM, volume 4, part 1, page 31: 364 * 365 * "Any of the other messages (sample*, LOD, load4) used with a 366 * (4x) multisampled surface will in-effect sample a surface with 367 * double the height and width as that indicated in the surface 368 * state. Each pixel position on the original-sized surface is 369 * replaced with a 2x2 of samples with the following arrangement: 370 * 371 * sample 0 sample 2 372 * sample 1 sample 3" 373 * 374 * Thus, when sampling from a multisampled texture, it behaves as 375 * though the layout in memory for (x,y,sample) is: 376 * 377 * (0,0,0) (0,0,2) (1,0,0) (1,0,2) 378 * (0,0,1) (0,0,3) (1,0,1) (1,0,3) 379 * 380 * (0,1,0) (0,1,2) (1,1,0) (1,1,2) 381 * (0,1,1) (0,1,3) (1,1,1) (1,1,3) 382 * 383 * However, the actual layout of multisampled data in memory is: 384 * 385 * (0,0,0) (1,0,0) (0,0,1) (1,0,1) 386 * (0,1,0) (1,1,0) (0,1,1) (1,1,1) 387 * 388 * (0,0,2) (1,0,2) (0,0,3) (1,0,3) 389 * (0,1,2) (1,1,2) (0,1,3) (1,1,3) 390 * 391 * This pattern repeats for each 2x2 pixel block. 392 * 393 * As a result, when calculating the size of our 4-sample buffer for 394 * an odd width or height, we have to align before scaling up because 395 * sample 3 is in that bottom right 2x2 block. 396 */ 397 switch (num_samples) { 398 case 2: 399 assert(brw->gen >= 8); 400 width0 = ALIGN(width0, 2) * 2; 401 height0 = ALIGN(height0, 2); 402 break; 403 case 4: 404 width0 = ALIGN(width0, 2) * 2; 405 height0 = ALIGN(height0, 2) * 2; 406 break; 407 case 8: 408 width0 = ALIGN(width0, 2) * 4; 409 height0 = ALIGN(height0, 2) * 2; 410 break; 411 case 16: 412 width0 = ALIGN(width0, 2) * 4; 413 height0 = ALIGN(height0, 2) * 4; 414 break; 415 default: 416 /* num_samples should already have been quantized to 0, 1, 2, 4, 8 417 * or 16. 418 */ 419 unreachable("not reached"); 420 } 421 } else { 422 /* Non-interleaved */ 423 depth_multiply = num_samples; 424 depth0 *= depth_multiply; 425 } 426 } 427 428 /* Set array_layout to ALL_SLICES_AT_EACH_LOD when array_spacing_lod0 can 429 * be used. array_spacing_lod0 is only used for non-IMS MSAA surfaces on 430 * Gen 7 and 8. On Gen 8 and 9 this layout is not available but it is still 431 * used on Gen8 to make it pick a qpitch value which doesn't include space 432 * for the mipmaps. On Gen9 this is not necessary because it will 433 * automatically pick a packed qpitch value whenever mt->first_level == 434 * mt->last_level. 435 * TODO: can we use it elsewhere? 436 * TODO: also disable this on Gen8 and pick the qpitch value like Gen9 437 */ 438 if (brw->gen >= 9) { 439 mt->array_layout = ALL_LOD_IN_EACH_SLICE; 440 } else { 441 switch (mt->msaa_layout) { 442 case INTEL_MSAA_LAYOUT_NONE: 443 case INTEL_MSAA_LAYOUT_IMS: 444 mt->array_layout = ALL_LOD_IN_EACH_SLICE; 445 break; 446 case INTEL_MSAA_LAYOUT_UMS: 447 case INTEL_MSAA_LAYOUT_CMS: 448 mt->array_layout = ALL_SLICES_AT_EACH_LOD; 449 break; 450 } 451 } 452 453 if (target == GL_TEXTURE_CUBE_MAP) 454 assert(depth0 == 6 * depth_multiply); 455 456 mt->physical_width0 = width0; 457 mt->physical_height0 = height0; 458 mt->physical_depth0 = depth0; 459 460 if (!(layout_flags & MIPTREE_LAYOUT_FOR_BO) && 461 _mesa_get_format_base_format(format) == GL_DEPTH_STENCIL && 462 (brw->must_use_separate_stencil || 463 (brw->has_separate_stencil && 464 intel_miptree_wants_hiz_buffer(brw, mt)))) { 465 uint32_t stencil_flags = MIPTREE_LAYOUT_ACCELERATED_UPLOAD; 466 if (brw->gen == 6) { 467 stencil_flags |= MIPTREE_LAYOUT_FORCE_ALL_SLICE_AT_LOD | 468 MIPTREE_LAYOUT_TILING_ANY; 469 } 470 471 mt->stencil_mt = intel_miptree_create(brw, 472 mt->target, 473 MESA_FORMAT_S_UINT8, 474 mt->first_level, 475 mt->last_level, 476 mt->logical_width0, 477 mt->logical_height0, 478 mt->logical_depth0, 479 num_samples, 480 stencil_flags); 481 482 if (!mt->stencil_mt) { 483 intel_miptree_release(&mt); 484 return NULL; 485 } 486 mt->stencil_mt->r8stencil_needs_update = true; 487 488 /* Fix up the Z miptree format for how we're splitting out separate 489 * stencil. Gen7 expects there to be no stencil bits in its depth buffer. 490 */ 491 mt->format = intel_depth_format_for_depthstencil_format(mt->format); 492 mt->cpp = 4; 493 494 if (format == mt->format) { 495 _mesa_problem(NULL, "Unknown format %s in separate stencil mt\n", 496 _mesa_get_format_name(mt->format)); 497 } 498 } 499 500 if (layout_flags & MIPTREE_LAYOUT_FORCE_ALL_SLICE_AT_LOD) 501 mt->array_layout = ALL_SLICES_AT_EACH_LOD; 502 503 /* 504 * Obey HALIGN_16 constraints for Gen8 and Gen9 buffers which are 505 * multisampled or have an AUX buffer attached to it. 506 * 507 * GEN | MSRT | AUX_CCS_* or AUX_MCS 508 * ------------------------------------------- 509 * 9 | HALIGN_16 | HALIGN_16 510 * 8 | HALIGN_ANY | HALIGN_16 511 * 7 | ? | ? 512 * 6 | ? | ? 513 */ 514 if (intel_miptree_supports_non_msrt_fast_clear(brw, mt)) { 515 if (brw->gen >= 9 || (brw->gen == 8 && num_samples <= 1)) 516 layout_flags |= MIPTREE_LAYOUT_FORCE_HALIGN16; 517 } else if (brw->gen >= 9 && num_samples > 1) { 518 layout_flags |= MIPTREE_LAYOUT_FORCE_HALIGN16; 519 } else { 520 const UNUSED bool is_lossless_compressed_aux = 521 brw->gen >= 9 && num_samples == 1 && 522 mt->format == MESA_FORMAT_R_UINT32; 523 524 /* For now, nothing else has this requirement */ 525 assert(is_lossless_compressed_aux || 526 (layout_flags & MIPTREE_LAYOUT_FORCE_HALIGN16) == 0); 527 } 528 529 brw_miptree_layout(brw, mt, layout_flags); 530 531 if (mt->aux_disable & INTEL_AUX_DISABLE_MCS) 532 assert(mt->msaa_layout != INTEL_MSAA_LAYOUT_CMS); 533 534 return mt; 535 } 536 537 538 /** 539 * Choose an appropriate uncompressed format for a requested 540 * compressed format, if unsupported. 541 */ 542 mesa_format 543 intel_lower_compressed_format(struct brw_context *brw, mesa_format format) 544 { 545 /* No need to lower ETC formats on these platforms, 546 * they are supported natively. 547 */ 548 if (brw->gen >= 8 || brw->is_baytrail) 549 return format; 550 551 switch (format) { 552 case MESA_FORMAT_ETC1_RGB8: 553 return MESA_FORMAT_R8G8B8X8_UNORM; 554 case MESA_FORMAT_ETC2_RGB8: 555 return MESA_FORMAT_R8G8B8X8_UNORM; 556 case MESA_FORMAT_ETC2_SRGB8: 557 case MESA_FORMAT_ETC2_SRGB8_ALPHA8_EAC: 558 case MESA_FORMAT_ETC2_SRGB8_PUNCHTHROUGH_ALPHA1: 559 return MESA_FORMAT_B8G8R8A8_SRGB; 560 case MESA_FORMAT_ETC2_RGBA8_EAC: 561 case MESA_FORMAT_ETC2_RGB8_PUNCHTHROUGH_ALPHA1: 562 return MESA_FORMAT_R8G8B8A8_UNORM; 563 case MESA_FORMAT_ETC2_R11_EAC: 564 return MESA_FORMAT_R_UNORM16; 565 case MESA_FORMAT_ETC2_SIGNED_R11_EAC: 566 return MESA_FORMAT_R_SNORM16; 567 case MESA_FORMAT_ETC2_RG11_EAC: 568 return MESA_FORMAT_R16G16_UNORM; 569 case MESA_FORMAT_ETC2_SIGNED_RG11_EAC: 570 return MESA_FORMAT_R16G16_SNORM; 571 default: 572 /* Non ETC1 / ETC2 format */ 573 return format; 574 } 575 } 576 577 /* This function computes Yf/Ys tiled bo size, alignment and pitch. */ 578 static unsigned long 579 intel_get_yf_ys_bo_size(struct intel_mipmap_tree *mt, unsigned *alignment, 580 unsigned long *pitch) 581 { 582 uint32_t tile_width, tile_height; 583 unsigned long stride, size, aligned_y; 584 585 assert(mt->tr_mode != INTEL_MIPTREE_TRMODE_NONE); 586 intel_get_tile_dims(mt->tiling, mt->tr_mode, mt->cpp, 587 &tile_width, &tile_height); 588 589 aligned_y = ALIGN(mt->total_height, tile_height); 590 stride = mt->total_width * mt->cpp; 591 stride = ALIGN(stride, tile_width); 592 size = stride * aligned_y; 593 594 if (mt->tr_mode == INTEL_MIPTREE_TRMODE_YF) { 595 assert(size % 4096 == 0); 596 *alignment = 4096; 597 } else { 598 assert(size % (64 * 1024) == 0); 599 *alignment = 64 * 1024; 600 } 601 *pitch = stride; 602 return size; 603 } 604 605 static struct intel_mipmap_tree * 606 miptree_create(struct brw_context *brw, 607 GLenum target, 608 mesa_format format, 609 GLuint first_level, 610 GLuint last_level, 611 GLuint width0, 612 GLuint height0, 613 GLuint depth0, 614 GLuint num_samples, 615 uint32_t layout_flags) 616 { 617 struct intel_mipmap_tree *mt; 618 mesa_format tex_format = format; 619 mesa_format etc_format = MESA_FORMAT_NONE; 620 uint32_t alloc_flags = 0; 621 622 format = intel_lower_compressed_format(brw, format); 623 624 etc_format = (format != tex_format) ? tex_format : MESA_FORMAT_NONE; 625 626 assert((layout_flags & MIPTREE_LAYOUT_FOR_BO) == 0); 627 mt = intel_miptree_create_layout(brw, target, format, 628 first_level, last_level, width0, 629 height0, depth0, num_samples, 630 layout_flags); 631 /* 632 * pitch == 0 || height == 0 indicates the null texture 633 */ 634 if (!mt || !mt->total_width || !mt->total_height) { 635 intel_miptree_release(&mt); 636 return NULL; 637 } 638 639 if (mt->tiling == (I915_TILING_Y | I915_TILING_X)) 640 mt->tiling = I915_TILING_Y; 641 642 if (layout_flags & MIPTREE_LAYOUT_ACCELERATED_UPLOAD) 643 alloc_flags |= BO_ALLOC_FOR_RENDER; 644 645 unsigned long pitch; 646 mt->etc_format = etc_format; 647 648 if (mt->tr_mode != INTEL_MIPTREE_TRMODE_NONE) { 649 unsigned alignment = 0; 650 unsigned long size; 651 size = intel_get_yf_ys_bo_size(mt, &alignment, &pitch); 652 assert(size); 653 mt->bo = drm_intel_bo_alloc_for_render(brw->bufmgr, "miptree", 654 size, alignment); 655 } else { 656 if (format == MESA_FORMAT_S_UINT8) { 657 /* Align to size of W tile, 64x64. */ 658 mt->bo = drm_intel_bo_alloc_tiled(brw->bufmgr, "miptree", 659 ALIGN(mt->total_width, 64), 660 ALIGN(mt->total_height, 64), 661 mt->cpp, &mt->tiling, &pitch, 662 alloc_flags); 663 } else { 664 mt->bo = drm_intel_bo_alloc_tiled(brw->bufmgr, "miptree", 665 mt->total_width, mt->total_height, 666 mt->cpp, &mt->tiling, &pitch, 667 alloc_flags); 668 } 669 } 670 671 mt->pitch = pitch; 672 673 return mt; 674 } 675 676 struct intel_mipmap_tree * 677 intel_miptree_create(struct brw_context *brw, 678 GLenum target, 679 mesa_format format, 680 GLuint first_level, 681 GLuint last_level, 682 GLuint width0, 683 GLuint height0, 684 GLuint depth0, 685 GLuint num_samples, 686 uint32_t layout_flags) 687 { 688 struct intel_mipmap_tree *mt = miptree_create( 689 brw, target, format, 690 first_level, last_level, 691 width0, height0, depth0, num_samples, 692 layout_flags); 693 694 /* If the BO is too large to fit in the aperture, we need to use the 695 * BLT engine to support it. Prior to Sandybridge, the BLT paths can't 696 * handle Y-tiling, so we need to fall back to X. 697 */ 698 if (brw->gen < 6 && mt->bo->size >= brw->max_gtt_map_object_size && 699 mt->tiling == I915_TILING_Y) { 700 unsigned long pitch = mt->pitch; 701 const uint32_t alloc_flags = 702 (layout_flags & MIPTREE_LAYOUT_ACCELERATED_UPLOAD) ? 703 BO_ALLOC_FOR_RENDER : 0; 704 perf_debug("%dx%d miptree larger than aperture; falling back to X-tiled\n", 705 mt->total_width, mt->total_height); 706 707 mt->tiling = I915_TILING_X; 708 drm_intel_bo_unreference(mt->bo); 709 mt->bo = drm_intel_bo_alloc_tiled(brw->bufmgr, "miptree", 710 mt->total_width, mt->total_height, mt->cpp, 711 &mt->tiling, &pitch, alloc_flags); 712 mt->pitch = pitch; 713 } 714 715 mt->offset = 0; 716 717 if (!mt->bo) { 718 intel_miptree_release(&mt); 719 return NULL; 720 } 721 722 723 if (mt->msaa_layout == INTEL_MSAA_LAYOUT_CMS) { 724 assert(mt->num_samples > 1); 725 if (!intel_miptree_alloc_mcs(brw, mt, num_samples)) { 726 intel_miptree_release(&mt); 727 return NULL; 728 } 729 } 730 731 /* If this miptree is capable of supporting fast color clears, set 732 * fast_clear_state appropriately to ensure that fast clears will occur. 733 * Allocation of the MCS miptree will be deferred until the first fast 734 * clear actually occurs or when compressed single sampled buffer is 735 * written by the GPU for the first time. 736 */ 737 if (intel_tiling_supports_non_msrt_mcs(brw, mt->tiling) && 738 intel_miptree_supports_non_msrt_fast_clear(brw, mt)) { 739 mt->aux_disable &= ~INTEL_AUX_DISABLE_CCS; 740 assert(brw->gen < 8 || mt->halign == 16 || num_samples <= 1); 741 742 /* On Gen9+ clients are not currently capable of consuming compressed 743 * single-sampled buffers. Disabling compression allows us to skip 744 * resolves. 745 */ 746 const bool lossless_compression_disabled = INTEL_DEBUG & DEBUG_NO_RBC; 747 const bool is_lossless_compressed = 748 unlikely(!lossless_compression_disabled) && 749 brw->gen >= 9 && !mt->is_scanout && 750 intel_miptree_supports_lossless_compressed(brw, mt); 751 752 if (is_lossless_compressed) { 753 intel_miptree_alloc_non_msrt_mcs(brw, mt, is_lossless_compressed); 754 } 755 } 756 757 return mt; 758 } 759 760 struct intel_mipmap_tree * 761 intel_miptree_create_for_bo(struct brw_context *brw, 762 drm_intel_bo *bo, 763 mesa_format format, 764 uint32_t offset, 765 uint32_t width, 766 uint32_t height, 767 uint32_t depth, 768 int pitch, 769 uint32_t layout_flags) 770 { 771 struct intel_mipmap_tree *mt; 772 uint32_t tiling, swizzle; 773 GLenum target; 774 775 drm_intel_bo_get_tiling(bo, &tiling, &swizzle); 776 777 /* Nothing will be able to use this miptree with the BO if the offset isn't 778 * aligned. 779 */ 780 if (tiling != I915_TILING_NONE) 781 assert(offset % 4096 == 0); 782 783 /* miptrees can't handle negative pitch. If you need flipping of images, 784 * that's outside of the scope of the mt. 785 */ 786 assert(pitch >= 0); 787 788 target = depth > 1 ? GL_TEXTURE_2D_ARRAY : GL_TEXTURE_2D; 789 790 /* The BO already has a tiling format and we shouldn't confuse the lower 791 * layers by making it try to find a tiling format again. 792 */ 793 assert((layout_flags & MIPTREE_LAYOUT_TILING_ANY) == 0); 794 assert((layout_flags & MIPTREE_LAYOUT_TILING_NONE) == 0); 795 796 layout_flags |= MIPTREE_LAYOUT_FOR_BO; 797 mt = intel_miptree_create_layout(brw, target, format, 798 0, 0, 799 width, height, depth, 0, 800 layout_flags); 801 if (!mt) 802 return NULL; 803 804 drm_intel_bo_reference(bo); 805 mt->bo = bo; 806 mt->pitch = pitch; 807 mt->offset = offset; 808 mt->tiling = tiling; 809 810 return mt; 811 } 812 813 /** 814 * For a singlesample renderbuffer, this simply wraps the given BO with a 815 * miptree. 816 * 817 * For a multisample renderbuffer, this wraps the window system's 818 * (singlesample) BO with a singlesample miptree attached to the 819 * intel_renderbuffer, then creates a multisample miptree attached to irb->mt 820 * that will contain the actual rendering (which is lazily resolved to 821 * irb->singlesample_mt). 822 */ 823 void 824 intel_update_winsys_renderbuffer_miptree(struct brw_context *intel, 825 struct intel_renderbuffer *irb, 826 drm_intel_bo *bo, 827 uint32_t width, uint32_t height, 828 uint32_t pitch) 829 { 830 struct intel_mipmap_tree *singlesample_mt = NULL; 831 struct intel_mipmap_tree *multisample_mt = NULL; 832 struct gl_renderbuffer *rb = &irb->Base.Base; 833 mesa_format format = rb->Format; 834 int num_samples = rb->NumSamples; 835 836 /* Only the front and back buffers, which are color buffers, are allocated 837 * through the image loader. 838 */ 839 assert(_mesa_get_format_base_format(format) == GL_RGB || 840 _mesa_get_format_base_format(format) == GL_RGBA); 841 842 singlesample_mt = intel_miptree_create_for_bo(intel, 843 bo, 844 format, 845 0, 846 width, 847 height, 848 1, 849 pitch, 850 MIPTREE_LAYOUT_FOR_SCANOUT); 851 if (!singlesample_mt) 852 goto fail; 853 854 /* If this miptree is capable of supporting fast color clears, set 855 * mcs_state appropriately to ensure that fast clears will occur. 856 * Allocation of the MCS miptree will be deferred until the first fast 857 * clear actually occurs. 858 */ 859 if (intel_tiling_supports_non_msrt_mcs(intel, singlesample_mt->tiling) && 860 intel_miptree_supports_non_msrt_fast_clear(intel, singlesample_mt)) { 861 singlesample_mt->aux_disable &= ~INTEL_AUX_DISABLE_CCS; 862 } 863 864 if (num_samples == 0) { 865 intel_miptree_release(&irb->mt); 866 irb->mt = singlesample_mt; 867 868 assert(!irb->singlesample_mt); 869 } else { 870 intel_miptree_release(&irb->singlesample_mt); 871 irb->singlesample_mt = singlesample_mt; 872 873 if (!irb->mt || 874 irb->mt->logical_width0 != width || 875 irb->mt->logical_height0 != height) { 876 multisample_mt = intel_miptree_create_for_renderbuffer(intel, 877 format, 878 width, 879 height, 880 num_samples); 881 if (!multisample_mt) 882 goto fail; 883 884 irb->need_downsample = false; 885 intel_miptree_release(&irb->mt); 886 irb->mt = multisample_mt; 887 } 888 } 889 return; 890 891 fail: 892 intel_miptree_release(&irb->singlesample_mt); 893 intel_miptree_release(&irb->mt); 894 return; 895 } 896 897 struct intel_mipmap_tree* 898 intel_miptree_create_for_renderbuffer(struct brw_context *brw, 899 mesa_format format, 900 uint32_t width, 901 uint32_t height, 902 uint32_t num_samples) 903 { 904 struct intel_mipmap_tree *mt; 905 uint32_t depth = 1; 906 bool ok; 907 GLenum target = num_samples > 1 ? GL_TEXTURE_2D_MULTISAMPLE : GL_TEXTURE_2D; 908 const uint32_t layout_flags = MIPTREE_LAYOUT_ACCELERATED_UPLOAD | 909 MIPTREE_LAYOUT_TILING_ANY | 910 MIPTREE_LAYOUT_FOR_SCANOUT; 911 912 mt = intel_miptree_create(brw, target, format, 0, 0, 913 width, height, depth, num_samples, 914 layout_flags); 915 if (!mt) 916 goto fail; 917 918 if (intel_miptree_wants_hiz_buffer(brw, mt)) { 919 ok = intel_miptree_alloc_hiz(brw, mt); 920 if (!ok) 921 goto fail; 922 } 923 924 return mt; 925 926 fail: 927 intel_miptree_release(&mt); 928 return NULL; 929 } 930 931 void 932 intel_miptree_reference(struct intel_mipmap_tree **dst, 933 struct intel_mipmap_tree *src) 934 { 935 if (*dst == src) 936 return; 937 938 intel_miptree_release(dst); 939 940 if (src) { 941 src->refcount++; 942 DBG("%s %p refcount now %d\n", __func__, src, src->refcount); 943 } 944 945 *dst = src; 946 } 947 948 static void 949 intel_miptree_hiz_buffer_free(struct intel_miptree_hiz_buffer *hiz_buf) 950 { 951 if (hiz_buf == NULL) 952 return; 953 954 if (hiz_buf->mt) 955 intel_miptree_release(&hiz_buf->mt); 956 else 957 drm_intel_bo_unreference(hiz_buf->aux_base.bo); 958 959 free(hiz_buf); 960 } 961 962 void 963 intel_miptree_release(struct intel_mipmap_tree **mt) 964 { 965 if (!*mt) 966 return; 967 968 DBG("%s %p refcount will be %d\n", __func__, *mt, (*mt)->refcount - 1); 969 if (--(*mt)->refcount <= 0) { 970 GLuint i; 971 972 DBG("%s deleting %p\n", __func__, *mt); 973 974 drm_intel_bo_unreference((*mt)->bo); 975 intel_miptree_release(&(*mt)->stencil_mt); 976 intel_miptree_release(&(*mt)->r8stencil_mt); 977 intel_miptree_hiz_buffer_free((*mt)->hiz_buf); 978 if ((*mt)->mcs_buf) { 979 drm_intel_bo_unreference((*mt)->mcs_buf->bo); 980 free((*mt)->mcs_buf); 981 } 982 intel_resolve_map_clear(&(*mt)->hiz_map); 983 intel_resolve_map_clear(&(*mt)->color_resolve_map); 984 985 intel_miptree_release(&(*mt)->plane[0]); 986 intel_miptree_release(&(*mt)->plane[1]); 987 988 for (i = 0; i < MAX_TEXTURE_LEVELS; i++) { 989 free((*mt)->level[i].slice); 990 } 991 992 free(*mt); 993 } 994 *mt = NULL; 995 } 996 997 998 void 999 intel_get_image_dims(struct gl_texture_image *image, 1000 int *width, int *height, int *depth) 1001 { 1002 switch (image->TexObject->Target) { 1003 case GL_TEXTURE_1D_ARRAY: 1004 /* For a 1D Array texture the OpenGL API will treat the image height as 1005 * the number of array slices. For Intel hardware, we treat the 1D array 1006 * as a 2D Array with a height of 1. So, here we want to swap image 1007 * height and depth. 1008 */ 1009 assert(image->Depth == 1); 1010 *width = image->Width; 1011 *height = 1; 1012 *depth = image->Height; 1013 break; 1014 case GL_TEXTURE_CUBE_MAP: 1015 /* For Cube maps, the mesa/main api layer gives us a depth of 1 even 1016 * though we really have 6 slices. 1017 */ 1018 assert(image->Depth == 1); 1019 *width = image->Width; 1020 *height = image->Height; 1021 *depth = 6; 1022 break; 1023 default: 1024 *width = image->Width; 1025 *height = image->Height; 1026 *depth = image->Depth; 1027 break; 1028 } 1029 } 1030 1031 /** 1032 * Can the image be pulled into a unified mipmap tree? This mirrors 1033 * the completeness test in a lot of ways. 1034 * 1035 * Not sure whether I want to pass gl_texture_image here. 1036 */ 1037 bool 1038 intel_miptree_match_image(struct intel_mipmap_tree *mt, 1039 struct gl_texture_image *image) 1040 { 1041 struct intel_texture_image *intelImage = intel_texture_image(image); 1042 GLuint level = intelImage->base.Base.Level; 1043 int width, height, depth; 1044 1045 /* glTexImage* choose the texture object based on the target passed in, and 1046 * objects can't change targets over their lifetimes, so this should be 1047 * true. 1048 */ 1049 assert(image->TexObject->Target == mt->target); 1050 1051 mesa_format mt_format = mt->format; 1052 if (mt->format == MESA_FORMAT_Z24_UNORM_X8_UINT && mt->stencil_mt) 1053 mt_format = MESA_FORMAT_Z24_UNORM_S8_UINT; 1054 if (mt->format == MESA_FORMAT_Z_FLOAT32 && mt->stencil_mt) 1055 mt_format = MESA_FORMAT_Z32_FLOAT_S8X24_UINT; 1056 if (mt->etc_format != MESA_FORMAT_NONE) 1057 mt_format = mt->etc_format; 1058 1059 if (image->TexFormat != mt_format) 1060 return false; 1061 1062 intel_get_image_dims(image, &width, &height, &depth); 1063 1064 if (mt->target == GL_TEXTURE_CUBE_MAP) 1065 depth = 6; 1066 1067 int level_depth = mt->level[level].depth; 1068 if (mt->num_samples > 1) { 1069 switch (mt->msaa_layout) { 1070 case INTEL_MSAA_LAYOUT_NONE: 1071 case INTEL_MSAA_LAYOUT_IMS: 1072 break; 1073 case INTEL_MSAA_LAYOUT_UMS: 1074 case INTEL_MSAA_LAYOUT_CMS: 1075 level_depth /= mt->num_samples; 1076 break; 1077 } 1078 } 1079 1080 /* Test image dimensions against the base level image adjusted for 1081 * minification. This will also catch images not present in the 1082 * tree, changed targets, etc. 1083 */ 1084 if (width != minify(mt->logical_width0, level - mt->first_level) || 1085 height != minify(mt->logical_height0, level - mt->first_level) || 1086 depth != level_depth) { 1087 return false; 1088 } 1089 1090 if (image->NumSamples != mt->num_samples) 1091 return false; 1092 1093 return true; 1094 } 1095 1096 1097 void 1098 intel_miptree_set_level_info(struct intel_mipmap_tree *mt, 1099 GLuint level, 1100 GLuint x, GLuint y, GLuint d) 1101 { 1102 mt->level[level].depth = d; 1103 mt->level[level].level_x = x; 1104 mt->level[level].level_y = y; 1105 1106 DBG("%s level %d, depth %d, offset %d,%d\n", __func__, 1107 level, d, x, y); 1108 1109 assert(mt->level[level].slice == NULL); 1110 1111 mt->level[level].slice = calloc(d, sizeof(*mt->level[0].slice)); 1112 mt->level[level].slice[0].x_offset = mt->level[level].level_x; 1113 mt->level[level].slice[0].y_offset = mt->level[level].level_y; 1114 } 1115 1116 1117 void 1118 intel_miptree_set_image_offset(struct intel_mipmap_tree *mt, 1119 GLuint level, GLuint img, 1120 GLuint x, GLuint y) 1121 { 1122 if (img == 0 && level == 0) 1123 assert(x == 0 && y == 0); 1124 1125 assert(img < mt->level[level].depth); 1126 1127 mt->level[level].slice[img].x_offset = mt->level[level].level_x + x; 1128 mt->level[level].slice[img].y_offset = mt->level[level].level_y + y; 1129 1130 DBG("%s level %d img %d pos %d,%d\n", 1131 __func__, level, img, 1132 mt->level[level].slice[img].x_offset, 1133 mt->level[level].slice[img].y_offset); 1134 } 1135 1136 void 1137 intel_miptree_get_image_offset(const struct intel_mipmap_tree *mt, 1138 GLuint level, GLuint slice, 1139 GLuint *x, GLuint *y) 1140 { 1141 assert(slice < mt->level[level].depth); 1142 1143 *x = mt->level[level].slice[slice].x_offset; 1144 *y = mt->level[level].slice[slice].y_offset; 1145 } 1146 1147 1148 /** 1149 * This function computes the tile_w (in bytes) and tile_h (in rows) of 1150 * different tiling patterns. If the BO is untiled, tile_w is set to cpp 1151 * and tile_h is set to 1. 1152 */ 1153 void 1154 intel_get_tile_dims(uint32_t tiling, uint32_t tr_mode, uint32_t cpp, 1155 uint32_t *tile_w, uint32_t *tile_h) 1156 { 1157 if (tr_mode == INTEL_MIPTREE_TRMODE_NONE) { 1158 switch (tiling) { 1159 case I915_TILING_X: 1160 *tile_w = 512; 1161 *tile_h = 8; 1162 break; 1163 case I915_TILING_Y: 1164 *tile_w = 128; 1165 *tile_h = 32; 1166 break; 1167 case I915_TILING_NONE: 1168 *tile_w = cpp; 1169 *tile_h = 1; 1170 break; 1171 default: 1172 unreachable("not reached"); 1173 } 1174 } else { 1175 uint32_t aspect_ratio = 1; 1176 assert(_mesa_is_pow_two(cpp)); 1177 1178 switch (cpp) { 1179 case 1: 1180 *tile_h = 64; 1181 break; 1182 case 2: 1183 case 4: 1184 *tile_h = 32; 1185 break; 1186 case 8: 1187 case 16: 1188 *tile_h = 16; 1189 break; 1190 default: 1191 unreachable("not reached"); 1192 } 1193 1194 if (cpp == 2 || cpp == 8) 1195 aspect_ratio = 2; 1196 1197 if (tr_mode == INTEL_MIPTREE_TRMODE_YS) 1198 *tile_h *= 4; 1199 1200 *tile_w = *tile_h * aspect_ratio * cpp; 1201 } 1202 } 1203 1204 1205 /** 1206 * This function computes masks that may be used to select the bits of the X 1207 * and Y coordinates that indicate the offset within a tile. If the BO is 1208 * untiled, the masks are set to 0. 1209 */ 1210 void 1211 intel_get_tile_masks(uint32_t tiling, uint32_t tr_mode, uint32_t cpp, 1212 uint32_t *mask_x, uint32_t *mask_y) 1213 { 1214 uint32_t tile_w_bytes, tile_h; 1215 1216 intel_get_tile_dims(tiling, tr_mode, cpp, &tile_w_bytes, &tile_h); 1217 1218 *mask_x = tile_w_bytes / cpp - 1; 1219 *mask_y = tile_h - 1; 1220 } 1221 1222 /** 1223 * Compute the offset (in bytes) from the start of the BO to the given x 1224 * and y coordinate. For tiled BOs, caller must ensure that x and y are 1225 * multiples of the tile size. 1226 */ 1227 uint32_t 1228 intel_miptree_get_aligned_offset(const struct intel_mipmap_tree *mt, 1229 uint32_t x, uint32_t y) 1230 { 1231 int cpp = mt->cpp; 1232 uint32_t pitch = mt->pitch; 1233 uint32_t tiling = mt->tiling; 1234 1235 switch (tiling) { 1236 default: 1237 unreachable("not reached"); 1238 case I915_TILING_NONE: 1239 return y * pitch + x * cpp; 1240 case I915_TILING_X: 1241 assert((x % (512 / cpp)) == 0); 1242 assert((y % 8) == 0); 1243 return y * pitch + x / (512 / cpp) * 4096; 1244 case I915_TILING_Y: 1245 assert((x % (128 / cpp)) == 0); 1246 assert((y % 32) == 0); 1247 return y * pitch + x / (128 / cpp) * 4096; 1248 } 1249 } 1250 1251 /** 1252 * Rendering with tiled buffers requires that the base address of the buffer 1253 * be aligned to a page boundary. For renderbuffers, and sometimes with 1254 * textures, we may want the surface to point at a texture image level that 1255 * isn't at a page boundary. 1256 * 1257 * This function returns an appropriately-aligned base offset 1258 * according to the tiling restrictions, plus any required x/y offset 1259 * from there. 1260 */ 1261 uint32_t 1262 intel_miptree_get_tile_offsets(const struct intel_mipmap_tree *mt, 1263 GLuint level, GLuint slice, 1264 uint32_t *tile_x, 1265 uint32_t *tile_y) 1266 { 1267 uint32_t x, y; 1268 uint32_t mask_x, mask_y; 1269 1270 intel_get_tile_masks(mt->tiling, mt->tr_mode, mt->cpp, &mask_x, &mask_y); 1271 intel_miptree_get_image_offset(mt, level, slice, &x, &y); 1272 1273 *tile_x = x & mask_x; 1274 *tile_y = y & mask_y; 1275 1276 return intel_miptree_get_aligned_offset(mt, x & ~mask_x, y & ~mask_y); 1277 } 1278 1279 static void 1280 intel_miptree_copy_slice_sw(struct brw_context *brw, 1281 struct intel_mipmap_tree *dst_mt, 1282 struct intel_mipmap_tree *src_mt, 1283 int level, 1284 int slice, 1285 int width, 1286 int height) 1287 { 1288 void *src, *dst; 1289 ptrdiff_t src_stride, dst_stride; 1290 int cpp = dst_mt->cpp; 1291 1292 intel_miptree_map(brw, src_mt, 1293 level, slice, 1294 0, 0, 1295 width, height, 1296 GL_MAP_READ_BIT | BRW_MAP_DIRECT_BIT, 1297 &src, &src_stride); 1298 1299 intel_miptree_map(brw, dst_mt, 1300 level, slice, 1301 0, 0, 1302 width, height, 1303 GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT | 1304 BRW_MAP_DIRECT_BIT, 1305 &dst, &dst_stride); 1306 1307 DBG("sw blit %s mt %p %p/%"PRIdPTR" -> %s mt %p %p/%"PRIdPTR" (%dx%d)\n", 1308 _mesa_get_format_name(src_mt->format), 1309 src_mt, src, src_stride, 1310 _mesa_get_format_name(dst_mt->format), 1311 dst_mt, dst, dst_stride, 1312 width, height); 1313 1314 int row_size = cpp * width; 1315 if (src_stride == row_size && 1316 dst_stride == row_size) { 1317 memcpy(dst, src, row_size * height); 1318 } else { 1319 for (int i = 0; i < height; i++) { 1320 memcpy(dst, src, row_size); 1321 dst += dst_stride; 1322 src += src_stride; 1323 } 1324 } 1325 1326 intel_miptree_unmap(brw, dst_mt, level, slice); 1327 intel_miptree_unmap(brw, src_mt, level, slice); 1328 1329 /* Don't forget to copy the stencil data over, too. We could have skipped 1330 * passing BRW_MAP_DIRECT_BIT, but that would have meant intel_miptree_map 1331 * shuffling the two data sources in/out of temporary storage instead of 1332 * the direct mapping we get this way. 1333 */ 1334 if (dst_mt->stencil_mt) { 1335 assert(src_mt->stencil_mt); 1336 intel_miptree_copy_slice_sw(brw, dst_mt->stencil_mt, src_mt->stencil_mt, 1337 level, slice, width, height); 1338 } 1339 } 1340 1341 static void 1342 intel_miptree_copy_slice(struct brw_context *brw, 1343 struct intel_mipmap_tree *dst_mt, 1344 struct intel_mipmap_tree *src_mt, 1345 int level, 1346 int face, 1347 int depth) 1348 1349 { 1350 mesa_format format = src_mt->format; 1351 uint32_t width = minify(src_mt->physical_width0, level - src_mt->first_level); 1352 uint32_t height = minify(src_mt->physical_height0, level - src_mt->first_level); 1353 int slice; 1354 1355 if (face > 0) 1356 slice = face; 1357 else 1358 slice = depth; 1359 1360 assert(depth < src_mt->level[level].depth); 1361 assert(src_mt->format == dst_mt->format); 1362 1363 if (dst_mt->compressed) { 1364 unsigned int i, j; 1365 _mesa_get_format_block_size(dst_mt->format, &i, &j); 1366 height = ALIGN_NPOT(height, j) / j; 1367 width = ALIGN_NPOT(width, i) / i; 1368 } 1369 1370 /* If it's a packed depth/stencil buffer with separate stencil, the blit 1371 * below won't apply since we can't do the depth's Y tiling or the 1372 * stencil's W tiling in the blitter. 1373 */ 1374 if (src_mt->stencil_mt) { 1375 intel_miptree_copy_slice_sw(brw, 1376 dst_mt, src_mt, 1377 level, slice, 1378 width, height); 1379 return; 1380 } 1381 1382 uint32_t dst_x, dst_y, src_x, src_y; 1383 intel_miptree_get_image_offset(dst_mt, level, slice, &dst_x, &dst_y); 1384 intel_miptree_get_image_offset(src_mt, level, slice, &src_x, &src_y); 1385 1386 DBG("validate blit mt %s %p %d,%d/%d -> mt %s %p %d,%d/%d (%dx%d)\n", 1387 _mesa_get_format_name(src_mt->format), 1388 src_mt, src_x, src_y, src_mt->pitch, 1389 _mesa_get_format_name(dst_mt->format), 1390 dst_mt, dst_x, dst_y, dst_mt->pitch, 1391 width, height); 1392 1393 if (!intel_miptree_blit(brw, 1394 src_mt, level, slice, 0, 0, false, 1395 dst_mt, level, slice, 0, 0, false, 1396 width, height, GL_COPY)) { 1397 perf_debug("miptree validate blit for %s failed\n", 1398 _mesa_get_format_name(format)); 1399 1400 intel_miptree_copy_slice_sw(brw, dst_mt, src_mt, level, slice, 1401 width, height); 1402 } 1403 } 1404 1405 /** 1406 * Copies the image's current data to the given miptree, and associates that 1407 * miptree with the image. 1408 * 1409 * If \c invalidate is true, then the actual image data does not need to be 1410 * copied, but the image still needs to be associated to the new miptree (this 1411 * is set to true if we're about to clear the image). 1412 */ 1413 void 1414 intel_miptree_copy_teximage(struct brw_context *brw, 1415 struct intel_texture_image *intelImage, 1416 struct intel_mipmap_tree *dst_mt, 1417 bool invalidate) 1418 { 1419 struct intel_mipmap_tree *src_mt = intelImage->mt; 1420 struct intel_texture_object *intel_obj = 1421 intel_texture_object(intelImage->base.Base.TexObject); 1422 int level = intelImage->base.Base.Level; 1423 int face = intelImage->base.Base.Face; 1424 1425 GLuint depth; 1426 if (intel_obj->base.Target == GL_TEXTURE_1D_ARRAY) 1427 depth = intelImage->base.Base.Height; 1428 else 1429 depth = intelImage->base.Base.Depth; 1430 1431 if (!invalidate) { 1432 for (int slice = 0; slice < depth; slice++) { 1433 intel_miptree_copy_slice(brw, dst_mt, src_mt, level, face, slice); 1434 } 1435 } 1436 1437 intel_miptree_reference(&intelImage->mt, dst_mt); 1438 intel_obj->needs_validate = true; 1439 } 1440 1441 static void 1442 intel_miptree_init_mcs(struct brw_context *brw, 1443 struct intel_mipmap_tree *mt, 1444 int init_value) 1445 { 1446 assert(mt->mcs_buf != NULL); 1447 1448 /* From the Ivy Bridge PRM, Vol 2 Part 1 p326: 1449 * 1450 * When MCS buffer is enabled and bound to MSRT, it is required that it 1451 * is cleared prior to any rendering. 1452 * 1453 * Since we don't use the MCS buffer for any purpose other than rendering, 1454 * it makes sense to just clear it immediately upon allocation. 1455 * 1456 * Note: the clear value for MCS buffers is all 1's, so we memset to 0xff. 1457 */ 1458 const int ret = brw_bo_map_gtt(brw, mt->mcs_buf->bo, "miptree"); 1459 if (unlikely(ret)) { 1460 fprintf(stderr, "Failed to map mcs buffer into GTT\n"); 1461 drm_intel_bo_unreference(mt->mcs_buf->bo); 1462 free(mt->mcs_buf); 1463 return; 1464 } 1465 void *data = mt->mcs_buf->bo->virtual; 1466 memset(data, init_value, mt->mcs_buf->size); 1467 drm_intel_bo_unmap(mt->mcs_buf->bo); 1468 } 1469 1470 static struct intel_miptree_aux_buffer * 1471 intel_mcs_miptree_buf_create(struct brw_context *brw, 1472 struct intel_mipmap_tree *mt, 1473 mesa_format format, 1474 unsigned mcs_width, 1475 unsigned mcs_height, 1476 uint32_t layout_flags) 1477 { 1478 struct intel_miptree_aux_buffer *buf = calloc(sizeof(*buf), 1); 1479 struct intel_mipmap_tree *temp_mt; 1480 1481 if (!buf) 1482 return NULL; 1483 1484 /* From the Ivy Bridge PRM, Vol4 Part1 p76, "MCS Base Address": 1485 * 1486 * "The MCS surface must be stored as Tile Y." 1487 */ 1488 layout_flags |= MIPTREE_LAYOUT_TILING_Y; 1489 temp_mt = miptree_create(brw, 1490 mt->target, 1491 format, 1492 mt->first_level, 1493 mt->last_level, 1494 mcs_width, 1495 mcs_height, 1496 mt->logical_depth0, 1497 0 /* num_samples */, 1498 layout_flags); 1499 if (!temp_mt) { 1500 free(buf); 1501 return NULL; 1502 } 1503 1504 buf->bo = temp_mt->bo; 1505 buf->offset = temp_mt->offset; 1506 buf->size = temp_mt->total_height * temp_mt->pitch; 1507 buf->pitch = temp_mt->pitch; 1508 buf->qpitch = temp_mt->qpitch; 1509 1510 /* Just hang on to the BO which backs the AUX buffer; the rest of the miptree 1511 * structure should go away. We use miptree create simply as a means to make 1512 * sure all the constraints for the buffer are satisfied. 1513 */ 1514 drm_intel_bo_reference(temp_mt->bo); 1515 intel_miptree_release(&temp_mt); 1516 1517 return buf; 1518 } 1519 1520 static bool 1521 intel_miptree_alloc_mcs(struct brw_context *brw, 1522 struct intel_mipmap_tree *mt, 1523 GLuint num_samples) 1524 { 1525 assert(brw->gen >= 7); /* MCS only used on Gen7+ */ 1526 assert(mt->mcs_buf == NULL); 1527 assert((mt->aux_disable & INTEL_AUX_DISABLE_MCS) == 0); 1528 1529 /* Choose the correct format for the MCS buffer. All that really matters 1530 * is that we allocate the right buffer size, since we'll always be 1531 * accessing this miptree using MCS-specific hardware mechanisms, which 1532 * infer the correct format based on num_samples. 1533 */ 1534 mesa_format format; 1535 switch (num_samples) { 1536 case 2: 1537 case 4: 1538 /* 8 bits/pixel are required for MCS data when using 4x MSAA (2 bits for 1539 * each sample). 1540 */ 1541 format = MESA_FORMAT_R_UNORM8; 1542 break; 1543 case 8: 1544 /* 32 bits/pixel are required for MCS data when using 8x MSAA (3 bits 1545 * for each sample, plus 8 padding bits). 1546 */ 1547 format = MESA_FORMAT_R_UINT32; 1548 break; 1549 case 16: 1550 /* 64 bits/pixel are required for MCS data when using 16x MSAA (4 bits 1551 * for each sample). 1552 */ 1553 format = MESA_FORMAT_RG_UINT32; 1554 break; 1555 default: 1556 unreachable("Unrecognized sample count in intel_miptree_alloc_mcs"); 1557 }; 1558 1559 mt->mcs_buf = 1560 intel_mcs_miptree_buf_create(brw, mt, 1561 format, 1562 mt->logical_width0, 1563 mt->logical_height0, 1564 MIPTREE_LAYOUT_ACCELERATED_UPLOAD); 1565 if (!mt->mcs_buf) 1566 return false; 1567 1568 intel_miptree_init_mcs(brw, mt, 0xFF); 1569 1570 /* Multisampled miptrees are only supported for single level. */ 1571 assert(mt->first_level == 0); 1572 intel_miptree_set_fast_clear_state(brw, mt, mt->first_level, 0, 1573 mt->logical_depth0, 1574 INTEL_FAST_CLEAR_STATE_CLEAR); 1575 1576 return true; 1577 } 1578 1579 1580 bool 1581 intel_miptree_alloc_non_msrt_mcs(struct brw_context *brw, 1582 struct intel_mipmap_tree *mt, 1583 bool is_lossless_compressed) 1584 { 1585 assert(mt->mcs_buf == NULL); 1586 assert(!(mt->aux_disable & (INTEL_AUX_DISABLE_MCS | INTEL_AUX_DISABLE_CCS))); 1587 1588 struct isl_surf temp_main_surf; 1589 struct isl_surf temp_ccs_surf; 1590 1591 /* Create first an ISL presentation for the main color surface and let ISL 1592 * calculate equivalent CCS surface against it. 1593 */ 1594 intel_miptree_get_isl_surf(brw, mt, &temp_main_surf); 1595 if (!isl_surf_get_ccs_surf(&brw->isl_dev, &temp_main_surf, &temp_ccs_surf)) 1596 return false; 1597 1598 assert(temp_ccs_surf.size && 1599 (temp_ccs_surf.size % temp_ccs_surf.row_pitch == 0)); 1600 1601 struct intel_miptree_aux_buffer *buf = calloc(sizeof(*buf), 1); 1602 if (!buf) 1603 return false; 1604 1605 buf->size = temp_ccs_surf.size; 1606 buf->pitch = temp_ccs_surf.row_pitch; 1607 buf->qpitch = isl_surf_get_array_pitch_sa_rows(&temp_ccs_surf); 1608 1609 /* In case of compression mcs buffer needs to be initialised requiring the 1610 * buffer to be immediately mapped to cpu space for writing. Therefore do 1611 * not use the gpu access flag which can cause an unnecessary delay if the 1612 * backing pages happened to be just used by the GPU. 1613 */ 1614 const uint32_t alloc_flags = 1615 is_lossless_compressed ? 0 : BO_ALLOC_FOR_RENDER; 1616 uint32_t tiling = I915_TILING_Y; 1617 unsigned long pitch; 1618 1619 /* ISL has stricter set of alignment rules then the drm allocator. 1620 * Therefore one can pass the ISL dimensions in terms of bytes instead of 1621 * trying to recalculate based on different format block sizes. 1622 */ 1623 buf->bo = drm_intel_bo_alloc_tiled(brw->bufmgr, "ccs-miptree", 1624 buf->pitch, buf->size / buf->pitch, 1625 1, &tiling, &pitch, alloc_flags); 1626 if (buf->bo) { 1627 assert(pitch == buf->pitch); 1628 assert(tiling == I915_TILING_Y); 1629 } else { 1630 free(buf); 1631 return false; 1632 } 1633 1634 mt->mcs_buf = buf; 1635 1636 /* From Gen9 onwards single-sampled (non-msrt) auxiliary buffers are 1637 * used for lossless compression which requires similar initialisation 1638 * as multi-sample compression. 1639 */ 1640 if (is_lossless_compressed) { 1641 /* Hardware sets the auxiliary buffer to all zeroes when it does full 1642 * resolve. Initialize it accordingly in case the first renderer is 1643 * cpu (or other none compression aware party). 1644 * 1645 * This is also explicitly stated in the spec (MCS Buffer for Render 1646 * Target(s)): 1647 * "If Software wants to enable Color Compression without Fast clear, 1648 * Software needs to initialize MCS with zeros." 1649 */ 1650 intel_miptree_init_mcs(brw, mt, 0); 1651 mt->msaa_layout = INTEL_MSAA_LAYOUT_CMS; 1652 } 1653 1654 return true; 1655 } 1656 1657 /** 1658 * Helper for intel_miptree_alloc_hiz() that sets 1659 * \c mt->level[level].has_hiz. Return true if and only if 1660 * \c has_hiz was set. 1661 */ 1662 static bool 1663 intel_miptree_level_enable_hiz(struct brw_context *brw, 1664 struct intel_mipmap_tree *mt, 1665 uint32_t level) 1666 { 1667 assert(mt->hiz_buf); 1668 1669 if (brw->gen >= 8 || brw->is_haswell) { 1670 uint32_t width = minify(mt->physical_width0, level); 1671 uint32_t height = minify(mt->physical_height0, level); 1672 1673 /* Disable HiZ for LOD > 0 unless the width is 8 aligned 1674 * and the height is 4 aligned. This allows our HiZ support 1675 * to fulfill Haswell restrictions for HiZ ops. For LOD == 0, 1676 * we can grow the width & height to allow the HiZ op to 1677 * force the proper size alignments. 1678 */ 1679 if (level > 0 && ((width & 7) || (height & 3))) { 1680 DBG("mt %p level %d: HiZ DISABLED\n", mt, level); 1681 return false; 1682 } 1683 } 1684 1685 DBG("mt %p level %d: HiZ enabled\n", mt, level); 1686 mt->level[level].has_hiz = true; 1687 return true; 1688 } 1689 1690 1691 /** 1692 * Helper for intel_miptree_alloc_hiz() that determines the required hiz 1693 * buffer dimensions and allocates a bo for the hiz buffer. 1694 */ 1695 static struct intel_miptree_hiz_buffer * 1696 intel_gen7_hiz_buf_create(struct brw_context *brw, 1697 struct intel_mipmap_tree *mt) 1698 { 1699 unsigned z_width = mt->logical_width0; 1700 unsigned z_height = mt->logical_height0; 1701 const unsigned z_depth = MAX2(mt->logical_depth0, 1); 1702 unsigned hz_width, hz_height; 1703 struct intel_miptree_hiz_buffer *buf = calloc(sizeof(*buf), 1); 1704 1705 if (!buf) 1706 return NULL; 1707 1708 /* Gen7 PRM Volume 2, Part 1, 11.5.3 "Hierarchical Depth Buffer" documents 1709 * adjustments required for Z_Height and Z_Width based on multisampling. 1710 */ 1711 switch (mt->num_samples) { 1712 case 0: 1713 case 1: 1714 break; 1715 case 2: 1716 case 4: 1717 z_width *= 2; 1718 z_height *= 2; 1719 break; 1720 case 8: 1721 z_width *= 4; 1722 z_height *= 2; 1723 break; 1724 default: 1725 unreachable("unsupported sample count"); 1726 } 1727 1728 const unsigned vertical_align = 8; /* 'j' in the docs */ 1729 const unsigned H0 = z_height; 1730 const unsigned h0 = ALIGN(H0, vertical_align); 1731 const unsigned h1 = ALIGN(minify(H0, 1), vertical_align); 1732 const unsigned Z0 = z_depth; 1733 1734 /* HZ_Width (bytes) = ceiling(Z_Width / 16) * 16 */ 1735 hz_width = ALIGN(z_width, 16); 1736 1737 if (mt->target == GL_TEXTURE_3D) { 1738 unsigned H_i = H0; 1739 unsigned Z_i = Z0; 1740 hz_height = 0; 1741 for (unsigned level = mt->first_level; level <= mt->last_level; ++level) { 1742 unsigned h_i = ALIGN(H_i, vertical_align); 1743 /* sum(i=0 to m; h_i * max(1, floor(Z_Depth/2**i))) */ 1744 hz_height += h_i * Z_i; 1745 H_i = minify(H_i, 1); 1746 Z_i = minify(Z_i, 1); 1747 } 1748 /* HZ_Height = 1749 * (1/2) * sum(i=0 to m; h_i * max(1, floor(Z_Depth/2**i))) 1750 */ 1751 hz_height = DIV_ROUND_UP(hz_height, 2); 1752 } else { 1753 const unsigned hz_qpitch = h0 + h1 + (12 * vertical_align); 1754 /* HZ_Height (rows) = Ceiling ( ( Q_pitch * Z_depth/2) /8 ) * 8 */ 1755 hz_height = DIV_ROUND_UP(hz_qpitch * Z0, 2 * 8) * 8; 1756 } 1757 1758 unsigned long pitch; 1759 uint32_t tiling = I915_TILING_Y; 1760 buf->aux_base.bo = drm_intel_bo_alloc_tiled(brw->bufmgr, "hiz", 1761 hz_width, hz_height, 1, 1762 &tiling, &pitch, 1763 BO_ALLOC_FOR_RENDER); 1764 if (!buf->aux_base.bo) { 1765 free(buf); 1766 return NULL; 1767 } else if (tiling != I915_TILING_Y) { 1768 drm_intel_bo_unreference(buf->aux_base.bo); 1769 free(buf); 1770 return NULL; 1771 } 1772 1773 buf->aux_base.size = hz_width * hz_height; 1774 buf->aux_base.pitch = pitch; 1775 1776 return buf; 1777 } 1778 1779 1780 /** 1781 * Helper for intel_miptree_alloc_hiz() that determines the required hiz 1782 * buffer dimensions and allocates a bo for the hiz buffer. 1783 */ 1784 static struct intel_miptree_hiz_buffer * 1785 intel_gen8_hiz_buf_create(struct brw_context *brw, 1786 struct intel_mipmap_tree *mt) 1787 { 1788 unsigned z_width = mt->logical_width0; 1789 unsigned z_height = mt->logical_height0; 1790 const unsigned z_depth = MAX2(mt->logical_depth0, 1); 1791 unsigned hz_width, hz_height; 1792 struct intel_miptree_hiz_buffer *buf = calloc(sizeof(*buf), 1); 1793 1794 if (!buf) 1795 return NULL; 1796 1797 /* Gen7 PRM Volume 2, Part 1, 11.5.3 "Hierarchical Depth Buffer" documents 1798 * adjustments required for Z_Height and Z_Width based on multisampling. 1799 */ 1800 if (brw->gen < 9) { 1801 switch (mt->num_samples) { 1802 case 0: 1803 case 1: 1804 break; 1805 case 2: 1806 case 4: 1807 z_width *= 2; 1808 z_height *= 2; 1809 break; 1810 case 8: 1811 z_width *= 4; 1812 z_height *= 2; 1813 break; 1814 default: 1815 unreachable("unsupported sample count"); 1816 } 1817 } 1818 1819 const unsigned vertical_align = 8; /* 'j' in the docs */ 1820 const unsigned H0 = z_height; 1821 const unsigned h0 = ALIGN(H0, vertical_align); 1822 const unsigned h1 = ALIGN(minify(H0, 1), vertical_align); 1823 const unsigned Z0 = z_depth; 1824 1825 /* HZ_Width (bytes) = ceiling(Z_Width / 16) * 16 */ 1826 hz_width = ALIGN(z_width, 16); 1827 1828 unsigned H_i = H0; 1829 unsigned Z_i = Z0; 1830 unsigned sum_h_i = 0; 1831 unsigned hz_height_3d_sum = 0; 1832 for (unsigned level = mt->first_level; level <= mt->last_level; ++level) { 1833 unsigned i = level - mt->first_level; 1834 unsigned h_i = ALIGN(H_i, vertical_align); 1835 /* sum(i=2 to m; h_i) */ 1836 if (i >= 2) { 1837 sum_h_i += h_i; 1838 } 1839 /* sum(i=0 to m; h_i * max(1, floor(Z_Depth/2**i))) */ 1840 hz_height_3d_sum += h_i * Z_i; 1841 H_i = minify(H_i, 1); 1842 Z_i = minify(Z_i, 1); 1843 } 1844 /* HZ_QPitch = h0 + max(h1, sum(i=2 to m; h_i)) */ 1845 buf->aux_base.qpitch = h0 + MAX2(h1, sum_h_i); 1846 1847 if (mt->target == GL_TEXTURE_3D) { 1848 /* (1/2) * sum(i=0 to m; h_i * max(1, floor(Z_Depth/2**i))) */ 1849 hz_height = DIV_ROUND_UP(hz_height_3d_sum, 2); 1850 } else { 1851 /* HZ_Height (rows) = ceiling( (HZ_QPitch/2)/8) *8 * Z_Depth */ 1852 hz_height = DIV_ROUND_UP(buf->aux_base.qpitch, 2 * 8) * 8 * Z0; 1853 } 1854 1855 unsigned long pitch; 1856 uint32_t tiling = I915_TILING_Y; 1857 buf->aux_base.bo = drm_intel_bo_alloc_tiled(brw->bufmgr, "hiz", 1858 hz_width, hz_height, 1, 1859 &tiling, &pitch, 1860 BO_ALLOC_FOR_RENDER); 1861 if (!buf->aux_base.bo) { 1862 free(buf); 1863 return NULL; 1864 } else if (tiling != I915_TILING_Y) { 1865 drm_intel_bo_unreference(buf->aux_base.bo); 1866 free(buf); 1867 return NULL; 1868 } 1869 1870 buf->aux_base.size = hz_width * hz_height; 1871 buf->aux_base.pitch = pitch; 1872 1873 return buf; 1874 } 1875 1876 1877 static struct intel_miptree_hiz_buffer * 1878 intel_hiz_miptree_buf_create(struct brw_context *brw, 1879 struct intel_mipmap_tree *mt) 1880 { 1881 struct intel_miptree_hiz_buffer *buf = calloc(sizeof(*buf), 1); 1882 uint32_t layout_flags = MIPTREE_LAYOUT_ACCELERATED_UPLOAD; 1883 1884 if (brw->gen == 6) 1885 layout_flags |= MIPTREE_LAYOUT_FORCE_ALL_SLICE_AT_LOD; 1886 1887 if (!buf) 1888 return NULL; 1889 1890 layout_flags |= MIPTREE_LAYOUT_TILING_ANY; 1891 buf->mt = intel_miptree_create(brw, 1892 mt->target, 1893 mt->format, 1894 mt->first_level, 1895 mt->last_level, 1896 mt->logical_width0, 1897 mt->logical_height0, 1898 mt->logical_depth0, 1899 mt->num_samples, 1900 layout_flags); 1901 if (!buf->mt) { 1902 free(buf); 1903 return NULL; 1904 } 1905 1906 buf->aux_base.bo = buf->mt->bo; 1907 buf->aux_base.size = buf->mt->total_height * buf->mt->pitch; 1908 buf->aux_base.pitch = buf->mt->pitch; 1909 buf->aux_base.qpitch = buf->mt->qpitch; 1910 1911 return buf; 1912 } 1913 1914 bool 1915 intel_miptree_wants_hiz_buffer(struct brw_context *brw, 1916 struct intel_mipmap_tree *mt) 1917 { 1918 if (!brw->has_hiz) 1919 return false; 1920 1921 if (mt->hiz_buf != NULL) 1922 return false; 1923 1924 if (mt->aux_disable & INTEL_AUX_DISABLE_HIZ) 1925 return false; 1926 1927 switch (mt->format) { 1928 case MESA_FORMAT_Z_FLOAT32: 1929 case MESA_FORMAT_Z32_FLOAT_S8X24_UINT: 1930 case MESA_FORMAT_Z24_UNORM_X8_UINT: 1931 case MESA_FORMAT_Z24_UNORM_S8_UINT: 1932 case MESA_FORMAT_Z_UNORM16: 1933 return true; 1934 default: 1935 return false; 1936 } 1937 } 1938 1939 bool 1940 intel_miptree_alloc_hiz(struct brw_context *brw, 1941 struct intel_mipmap_tree *mt) 1942 { 1943 assert(mt->hiz_buf == NULL); 1944 assert((mt->aux_disable & INTEL_AUX_DISABLE_HIZ) == 0); 1945 1946 if (brw->gen == 7) { 1947 mt->hiz_buf = intel_gen7_hiz_buf_create(brw, mt); 1948 } else if (brw->gen >= 8) { 1949 mt->hiz_buf = intel_gen8_hiz_buf_create(brw, mt); 1950 } else { 1951 mt->hiz_buf = intel_hiz_miptree_buf_create(brw, mt); 1952 } 1953 1954 if (!mt->hiz_buf) 1955 return false; 1956 1957 /* Mark that all slices need a HiZ resolve. */ 1958 for (unsigned level = mt->first_level; level <= mt->last_level; ++level) { 1959 if (!intel_miptree_level_enable_hiz(brw, mt, level)) 1960 continue; 1961 1962 for (unsigned layer = 0; layer < mt->level[level].depth; ++layer) { 1963 struct intel_resolve_map *m = malloc(sizeof(struct intel_resolve_map)); 1964 exec_node_init(&m->link); 1965 m->level = level; 1966 m->layer = layer; 1967 m->need = BLORP_HIZ_OP_HIZ_RESOLVE; 1968 1969 exec_list_push_tail(&mt->hiz_map, &m->link); 1970 } 1971 } 1972 1973 return true; 1974 } 1975 1976 /** 1977 * Can the miptree sample using the hiz buffer? 1978 */ 1979 bool 1980 intel_miptree_sample_with_hiz(struct brw_context *brw, 1981 struct intel_mipmap_tree *mt) 1982 { 1983 /* It's unclear how well supported sampling from the hiz buffer is on GEN8, 1984 * so keep things conservative for now and never enable it unless we're SKL+. 1985 */ 1986 if (brw->gen < 9) { 1987 return false; 1988 } 1989 1990 if (!mt->hiz_buf) { 1991 return false; 1992 } 1993 1994 /* It seems the hardware won't fallback to the depth buffer if some of the 1995 * mipmap levels aren't available in the HiZ buffer. So we need all levels 1996 * of the texture to be HiZ enabled. 1997 */ 1998 for (unsigned level = mt->first_level; level <= mt->last_level; ++level) { 1999 if (!intel_miptree_level_has_hiz(mt, level)) 2000 return false; 2001 } 2002 2003 /* If compressed multisampling is enabled, then we use it for the auxiliary 2004 * buffer instead. 2005 * 2006 * From the BDW PRM (Volume 2d: Command Reference: Structures 2007 * RENDER_SURFACE_STATE.AuxiliarySurfaceMode): 2008 * 2009 * "If this field is set to AUX_HIZ, Number of Multisamples must be 2010 * MULTISAMPLECOUNT_1, and Surface Type cannot be SURFTYPE_3D. 2011 * 2012 * There is no such blurb for 1D textures, but there is sufficient evidence 2013 * that this is broken on SKL+. 2014 */ 2015 return (mt->num_samples <= 1 && 2016 mt->target != GL_TEXTURE_3D && 2017 mt->target != GL_TEXTURE_1D /* gen9+ restriction */); 2018 } 2019 2020 /** 2021 * Does the miptree slice have hiz enabled? 2022 */ 2023 bool 2024 intel_miptree_level_has_hiz(struct intel_mipmap_tree *mt, uint32_t level) 2025 { 2026 intel_miptree_check_level_layer(mt, level, 0); 2027 return mt->level[level].has_hiz; 2028 } 2029 2030 void 2031 intel_miptree_slice_set_needs_hiz_resolve(struct intel_mipmap_tree *mt, 2032 uint32_t level, 2033 uint32_t layer) 2034 { 2035 if (!intel_miptree_level_has_hiz(mt, level)) 2036 return; 2037 2038 intel_resolve_map_set(&mt->hiz_map, 2039 level, layer, BLORP_HIZ_OP_HIZ_RESOLVE); 2040 } 2041 2042 2043 void 2044 intel_miptree_slice_set_needs_depth_resolve(struct intel_mipmap_tree *mt, 2045 uint32_t level, 2046 uint32_t layer) 2047 { 2048 if (!intel_miptree_level_has_hiz(mt, level)) 2049 return; 2050 2051 intel_resolve_map_set(&mt->hiz_map, 2052 level, layer, BLORP_HIZ_OP_DEPTH_RESOLVE); 2053 } 2054 2055 void 2056 intel_miptree_set_all_slices_need_depth_resolve(struct intel_mipmap_tree *mt, 2057 uint32_t level) 2058 { 2059 uint32_t layer; 2060 uint32_t end_layer = mt->level[level].depth; 2061 2062 for (layer = 0; layer < end_layer; layer++) { 2063 intel_miptree_slice_set_needs_depth_resolve(mt, level, layer); 2064 } 2065 } 2066 2067 static bool 2068 intel_miptree_slice_resolve(struct brw_context *brw, 2069 struct intel_mipmap_tree *mt, 2070 uint32_t level, 2071 uint32_t layer, 2072 enum blorp_hiz_op need) 2073 { 2074 intel_miptree_check_level_layer(mt, level, layer); 2075 2076 struct intel_resolve_map *item = 2077 intel_resolve_map_get(&mt->hiz_map, level, layer); 2078 2079 if (!item || item->need != need) 2080 return false; 2081 2082 intel_hiz_exec(brw, mt, level, layer, need); 2083 intel_resolve_map_remove(item); 2084 return true; 2085 } 2086 2087 bool 2088 intel_miptree_slice_resolve_hiz(struct brw_context *brw, 2089 struct intel_mipmap_tree *mt, 2090 uint32_t level, 2091 uint32_t layer) 2092 { 2093 return intel_miptree_slice_resolve(brw, mt, level, layer, 2094 BLORP_HIZ_OP_HIZ_RESOLVE); 2095 } 2096 2097 bool 2098 intel_miptree_slice_resolve_depth(struct brw_context *brw, 2099 struct intel_mipmap_tree *mt, 2100 uint32_t level, 2101 uint32_t layer) 2102 { 2103 return intel_miptree_slice_resolve(brw, mt, level, layer, 2104 BLORP_HIZ_OP_DEPTH_RESOLVE); 2105 } 2106 2107 static bool 2108 intel_miptree_all_slices_resolve(struct brw_context *brw, 2109 struct intel_mipmap_tree *mt, 2110 enum blorp_hiz_op need) 2111 { 2112 bool did_resolve = false; 2113 2114 foreach_list_typed_safe(struct intel_resolve_map, map, link, &mt->hiz_map) { 2115 if (map->need != need) 2116 continue; 2117 2118 intel_hiz_exec(brw, mt, map->level, map->layer, need); 2119 intel_resolve_map_remove(map); 2120 did_resolve = true; 2121 } 2122 2123 return did_resolve; 2124 } 2125 2126 bool 2127 intel_miptree_all_slices_resolve_hiz(struct brw_context *brw, 2128 struct intel_mipmap_tree *mt) 2129 { 2130 return intel_miptree_all_slices_resolve(brw, mt, 2131 BLORP_HIZ_OP_HIZ_RESOLVE); 2132 } 2133 2134 bool 2135 intel_miptree_all_slices_resolve_depth(struct brw_context *brw, 2136 struct intel_mipmap_tree *mt) 2137 { 2138 return intel_miptree_all_slices_resolve(brw, mt, 2139 BLORP_HIZ_OP_DEPTH_RESOLVE); 2140 } 2141 2142 enum intel_fast_clear_state 2143 intel_miptree_get_fast_clear_state(const struct intel_mipmap_tree *mt, 2144 unsigned level, unsigned layer) 2145 { 2146 intel_miptree_check_level_layer(mt, level, layer); 2147 2148 const struct intel_resolve_map *item = 2149 intel_resolve_map_const_get(&mt->color_resolve_map, level, layer); 2150 2151 if (!item) 2152 return INTEL_FAST_CLEAR_STATE_RESOLVED; 2153 2154 return item->fast_clear_state; 2155 } 2156 2157 static void 2158 intel_miptree_check_color_resolve(const struct brw_context *brw, 2159 const struct intel_mipmap_tree *mt, 2160 unsigned level, unsigned layer) 2161 { 2162 2163 if ((mt->aux_disable & INTEL_AUX_DISABLE_CCS) || !mt->mcs_buf) 2164 return; 2165 2166 /* Fast color clear is supported for mipmapped surfaces only on Gen8+. */ 2167 assert(brw->gen >= 8 || 2168 (level == 0 && mt->first_level == 0 && mt->last_level == 0)); 2169 2170 /* Compression of arrayed msaa surfaces is supported. */ 2171 if (mt->num_samples > 1) 2172 return; 2173 2174 /* Fast color clear is supported for non-msaa arrays only on Gen8+. */ 2175 assert(brw->gen >= 8 || (layer == 0 && mt->logical_depth0 == 1)); 2176 2177 (void)level; 2178 (void)layer; 2179 } 2180 2181 void 2182 intel_miptree_set_fast_clear_state(const struct brw_context *brw, 2183 struct intel_mipmap_tree *mt, 2184 unsigned level, 2185 unsigned first_layer, 2186 unsigned num_layers, 2187 enum intel_fast_clear_state new_state) 2188 { 2189 /* Setting the state to resolved means removing the item from the list 2190 * altogether. 2191 */ 2192 assert(new_state != INTEL_FAST_CLEAR_STATE_RESOLVED); 2193 2194 intel_miptree_check_color_resolve(brw, mt, level, first_layer); 2195 2196 assert(first_layer + num_layers <= mt->physical_depth0); 2197 2198 for (unsigned i = 0; i < num_layers; i++) 2199 intel_resolve_map_set(&mt->color_resolve_map, level, 2200 first_layer + i, new_state); 2201 } 2202 2203 bool 2204 intel_miptree_has_color_unresolved(const struct intel_mipmap_tree *mt, 2205 unsigned start_level, unsigned num_levels, 2206 unsigned start_layer, unsigned num_layers) 2207 { 2208 return intel_resolve_map_find_any(&mt->color_resolve_map, 2209 start_level, num_levels, 2210 start_layer, num_layers) != NULL; 2211 } 2212 2213 void 2214 intel_miptree_used_for_rendering(const struct brw_context *brw, 2215 struct intel_mipmap_tree *mt, unsigned level, 2216 unsigned start_layer, unsigned num_layers) 2217 { 2218 const bool is_lossless_compressed = 2219 intel_miptree_is_lossless_compressed(brw, mt); 2220 2221 for (unsigned i = 0; i < num_layers; ++i) { 2222 const enum intel_fast_clear_state fast_clear_state = 2223 intel_miptree_get_fast_clear_state(mt, level, start_layer + i); 2224 2225 /* If the buffer was previously in fast clear state, change it to 2226 * unresolved state, since it won't be guaranteed to be clear after 2227 * rendering occurs. 2228 */ 2229 if (is_lossless_compressed || 2230 fast_clear_state == INTEL_FAST_CLEAR_STATE_CLEAR) { 2231 intel_miptree_set_fast_clear_state( 2232 brw, mt, level, start_layer + i, 1, 2233 INTEL_FAST_CLEAR_STATE_UNRESOLVED); 2234 } 2235 } 2236 } 2237 2238 static bool 2239 intel_miptree_needs_color_resolve(const struct brw_context *brw, 2240 const struct intel_mipmap_tree *mt, 2241 int flags) 2242 { 2243 if (mt->aux_disable & INTEL_AUX_DISABLE_CCS) 2244 return false; 2245 2246 const bool is_lossless_compressed = 2247 intel_miptree_is_lossless_compressed(brw, mt); 2248 2249 /* From gen9 onwards there is new compression scheme for single sampled 2250 * surfaces called "lossless compressed". These don't need to be always 2251 * resolved. 2252 */ 2253 if ((flags & INTEL_MIPTREE_IGNORE_CCS_E) && is_lossless_compressed) 2254 return false; 2255 2256 /* Fast color clear resolves only make sense for non-MSAA buffers. */ 2257 if (mt->msaa_layout != INTEL_MSAA_LAYOUT_NONE && !is_lossless_compressed) 2258 return false; 2259 2260 return true; 2261 } 2262 2263 bool 2264 intel_miptree_resolve_color(struct brw_context *brw, 2265 struct intel_mipmap_tree *mt, unsigned level, 2266 unsigned start_layer, unsigned num_layers, 2267 int flags) 2268 { 2269 intel_miptree_check_color_resolve(brw, mt, level, start_layer); 2270 2271 if (!intel_miptree_needs_color_resolve(brw, mt, flags)) 2272 return false; 2273 2274 /* Arrayed fast clear is only supported for gen8+. */ 2275 assert(brw->gen >= 8 || num_layers == 1); 2276 2277 bool resolved = false; 2278 for (unsigned i = 0; i < num_layers; ++i) { 2279 intel_miptree_check_level_layer(mt, level, start_layer + i); 2280 2281 struct intel_resolve_map *item = 2282 intel_resolve_map_get(&mt->color_resolve_map, level, 2283 start_layer + i); 2284 2285 if (item) { 2286 assert(item->fast_clear_state != INTEL_FAST_CLEAR_STATE_RESOLVED); 2287 2288 brw_blorp_resolve_color(brw, mt, level, start_layer); 2289 intel_resolve_map_remove(item); 2290 resolved = true; 2291 } 2292 } 2293 2294 return resolved; 2295 } 2296 2297 void 2298 intel_miptree_all_slices_resolve_color(struct brw_context *brw, 2299 struct intel_mipmap_tree *mt, 2300 int flags) 2301 { 2302 if (!intel_miptree_needs_color_resolve(brw, mt, flags)) 2303 return; 2304 2305 foreach_list_typed_safe(struct intel_resolve_map, map, link, 2306 &mt->color_resolve_map) { 2307 assert(map->fast_clear_state != INTEL_FAST_CLEAR_STATE_RESOLVED); 2308 2309 brw_blorp_resolve_color(brw, mt, map->level, map->layer); 2310 intel_resolve_map_remove(map); 2311 } 2312 } 2313 2314 /** 2315 * Make it possible to share the BO backing the given miptree with another 2316 * process or another miptree. 2317 * 2318 * Fast color clears are unsafe with shared buffers, so we need to resolve and 2319 * then discard the MCS buffer, if present. We also set the no_ccs flag to 2320 * ensure that no MCS buffer gets allocated in the future. 2321 * 2322 * HiZ is similarly unsafe with shared buffers. 2323 */ 2324 void 2325 intel_miptree_make_shareable(struct brw_context *brw, 2326 struct intel_mipmap_tree *mt) 2327 { 2328 /* MCS buffers are also used for multisample buffers, but we can't resolve 2329 * away a multisample MCS buffer because it's an integral part of how the 2330 * pixel data is stored. Fortunately this code path should never be 2331 * reached for multisample buffers. 2332 */ 2333 assert(mt->msaa_layout == INTEL_MSAA_LAYOUT_NONE || mt->num_samples <= 1); 2334 2335 if (mt->mcs_buf) { 2336 intel_miptree_all_slices_resolve_color(brw, mt, 0); 2337 mt->aux_disable |= (INTEL_AUX_DISABLE_CCS | INTEL_AUX_DISABLE_MCS); 2338 drm_intel_bo_unreference(mt->mcs_buf->bo); 2339 free(mt->mcs_buf); 2340 mt->mcs_buf = NULL; 2341 } 2342 2343 if (mt->hiz_buf) { 2344 mt->aux_disable |= INTEL_AUX_DISABLE_HIZ; 2345 intel_miptree_all_slices_resolve_depth(brw, mt); 2346 intel_miptree_hiz_buffer_free(mt->hiz_buf); 2347 mt->hiz_buf = NULL; 2348 } 2349 } 2350 2351 2352 /** 2353 * \brief Get pointer offset into stencil buffer. 2354 * 2355 * The stencil buffer is W tiled. Since the GTT is incapable of W fencing, we 2356 * must decode the tile's layout in software. 2357 * 2358 * See 2359 * - PRM, 2011 Sandy Bridge, Volume 1, Part 2, Section 4.5.2.1 W-Major Tile 2360 * Format. 2361 * - PRM, 2011 Sandy Bridge, Volume 1, Part 2, Section 4.5.3 Tiling Algorithm 2362 * 2363 * Even though the returned offset is always positive, the return type is 2364 * signed due to 2365 * commit e8b1c6d6f55f5be3bef25084fdd8b6127517e137 2366 * mesa: Fix return type of _mesa_get_format_bytes() (#37351) 2367 */ 2368 static intptr_t 2369 intel_offset_S8(uint32_t stride, uint32_t x, uint32_t y, bool swizzled) 2370 { 2371 uint32_t tile_size = 4096; 2372 uint32_t tile_width = 64; 2373 uint32_t tile_height = 64; 2374 uint32_t row_size = 64 * stride; 2375 2376 uint32_t tile_x = x / tile_width; 2377 uint32_t tile_y = y / tile_height; 2378 2379 /* The byte's address relative to the tile's base addres. */ 2380 uint32_t byte_x = x % tile_width; 2381 uint32_t byte_y = y % tile_height; 2382 2383 uintptr_t u = tile_y * row_size 2384 + tile_x * tile_size 2385 + 512 * (byte_x / 8) 2386 + 64 * (byte_y / 8) 2387 + 32 * ((byte_y / 4) % 2) 2388 + 16 * ((byte_x / 4) % 2) 2389 + 8 * ((byte_y / 2) % 2) 2390 + 4 * ((byte_x / 2) % 2) 2391 + 2 * (byte_y % 2) 2392 + 1 * (byte_x % 2); 2393 2394 if (swizzled) { 2395 /* adjust for bit6 swizzling */ 2396 if (((byte_x / 8) % 2) == 1) { 2397 if (((byte_y / 8) % 2) == 0) { 2398 u += 64; 2399 } else { 2400 u -= 64; 2401 } 2402 } 2403 } 2404 2405 return u; 2406 } 2407 2408 void 2409 intel_miptree_updownsample(struct brw_context *brw, 2410 struct intel_mipmap_tree *src, 2411 struct intel_mipmap_tree *dst) 2412 { 2413 brw_blorp_blit_miptrees(brw, 2414 src, 0 /* level */, 0 /* layer */, 2415 src->format, SWIZZLE_XYZW, 2416 dst, 0 /* level */, 0 /* layer */, dst->format, 2417 0, 0, 2418 src->logical_width0, src->logical_height0, 2419 0, 0, 2420 dst->logical_width0, dst->logical_height0, 2421 GL_NEAREST, false, false /*mirror x, y*/, 2422 false, false); 2423 2424 if (src->stencil_mt) { 2425 brw_blorp_blit_miptrees(brw, 2426 src->stencil_mt, 0 /* level */, 0 /* layer */, 2427 src->stencil_mt->format, SWIZZLE_XYZW, 2428 dst->stencil_mt, 0 /* level */, 0 /* layer */, 2429 dst->stencil_mt->format, 2430 0, 0, 2431 src->logical_width0, src->logical_height0, 2432 0, 0, 2433 dst->logical_width0, dst->logical_height0, 2434 GL_NEAREST, false, false /*mirror x, y*/, 2435 false, false /* decode/encode srgb */); 2436 } 2437 } 2438 2439 void 2440 intel_update_r8stencil(struct brw_context *brw, 2441 struct intel_mipmap_tree *mt) 2442 { 2443 assert(brw->gen >= 7); 2444 struct intel_mipmap_tree *src = 2445 mt->format == MESA_FORMAT_S_UINT8 ? mt : mt->stencil_mt; 2446 if (!src || brw->gen >= 8 || !src->r8stencil_needs_update) 2447 return; 2448 2449 if (!mt->r8stencil_mt) { 2450 const uint32_t r8stencil_flags = 2451 MIPTREE_LAYOUT_ACCELERATED_UPLOAD | MIPTREE_LAYOUT_TILING_Y | 2452 MIPTREE_LAYOUT_DISABLE_AUX; 2453 assert(brw->gen > 6); /* Handle MIPTREE_LAYOUT_FORCE_ALL_SLICE_AT_LOD */ 2454 mt->r8stencil_mt = intel_miptree_create(brw, 2455 src->target, 2456 MESA_FORMAT_R_UINT8, 2457 src->first_level, 2458 src->last_level, 2459 src->logical_width0, 2460 src->logical_height0, 2461 src->logical_depth0, 2462 src->num_samples, 2463 r8stencil_flags); 2464 assert(mt->r8stencil_mt); 2465 } 2466 2467 struct intel_mipmap_tree *dst = mt->r8stencil_mt; 2468 2469 for (int level = src->first_level; level <= src->last_level; level++) { 2470 const unsigned depth = src->level[level].depth; 2471 const int layers_per_blit = 2472 (dst->msaa_layout == INTEL_MSAA_LAYOUT_UMS || 2473 dst->msaa_layout == INTEL_MSAA_LAYOUT_CMS) ? 2474 dst->num_samples : 1; 2475 2476 for (unsigned layer = 0; layer < depth; layer++) { 2477 brw_blorp_blit_miptrees(brw, 2478 src, level, layer, 2479 src->format, SWIZZLE_X, 2480 dst, level, layers_per_blit * layer, 2481 MESA_FORMAT_R_UNORM8, 2482 0, 0, 2483 minify(src->logical_width0, level), 2484 minify(src->logical_height0, level), 2485 0, 0, 2486 minify(dst->logical_width0, level), 2487 minify(dst->logical_height0, level), 2488 GL_NEAREST, false, false /*mirror x, y*/, 2489 false, false /* decode/encode srgb */); 2490 } 2491 } 2492 2493 brw_render_cache_set_check_flush(brw, dst->bo); 2494 src->r8stencil_needs_update = false; 2495 } 2496 2497 static void * 2498 intel_miptree_map_raw(struct brw_context *brw, struct intel_mipmap_tree *mt) 2499 { 2500 /* CPU accesses to color buffers don't understand fast color clears, so 2501 * resolve any pending fast color clears before we map. 2502 */ 2503 intel_miptree_all_slices_resolve_color(brw, mt, 0); 2504 2505 drm_intel_bo *bo = mt->bo; 2506 2507 if (drm_intel_bo_references(brw->batch.bo, bo)) 2508 intel_batchbuffer_flush(brw); 2509 2510 if (mt->tiling != I915_TILING_NONE) 2511 brw_bo_map_gtt(brw, bo, "miptree"); 2512 else 2513 brw_bo_map(brw, bo, true, "miptree"); 2514 2515 return bo->virtual; 2516 } 2517 2518 static void 2519 intel_miptree_unmap_raw(struct intel_mipmap_tree *mt) 2520 { 2521 drm_intel_bo_unmap(mt->bo); 2522 } 2523 2524 static void 2525 intel_miptree_map_gtt(struct brw_context *brw, 2526 struct intel_mipmap_tree *mt, 2527 struct intel_miptree_map *map, 2528 unsigned int level, unsigned int slice) 2529 { 2530 unsigned int bw, bh; 2531 void *base; 2532 unsigned int image_x, image_y; 2533 intptr_t x = map->x; 2534 intptr_t y = map->y; 2535 2536 /* For compressed formats, the stride is the number of bytes per 2537 * row of blocks. intel_miptree_get_image_offset() already does 2538 * the divide. 2539 */ 2540 _mesa_get_format_block_size(mt->format, &bw, &bh); 2541 assert(y % bh == 0); 2542 assert(x % bw == 0); 2543 y /= bh; 2544 x /= bw; 2545 2546 base = intel_miptree_map_raw(brw, mt) + mt->offset; 2547 2548 if (base == NULL) 2549 map->ptr = NULL; 2550 else { 2551 /* Note that in the case of cube maps, the caller must have passed the 2552 * slice number referencing the face. 2553 */ 2554 intel_miptree_get_image_offset(mt, level, slice, &image_x, &image_y); 2555 x += image_x; 2556 y += image_y; 2557 2558 map->stride = mt->pitch; 2559 map->ptr = base + y * map->stride + x * mt->cpp; 2560 } 2561 2562 DBG("%s: %d,%d %dx%d from mt %p (%s) " 2563 "%"PRIiPTR",%"PRIiPTR" = %p/%d\n", __func__, 2564 map->x, map->y, map->w, map->h, 2565 mt, _mesa_get_format_name(mt->format), 2566 x, y, map->ptr, map->stride); 2567 } 2568 2569 static void 2570 intel_miptree_unmap_gtt(struct intel_mipmap_tree *mt) 2571 { 2572 intel_miptree_unmap_raw(mt); 2573 } 2574 2575 static void 2576 intel_miptree_map_blit(struct brw_context *brw, 2577 struct intel_mipmap_tree *mt, 2578 struct intel_miptree_map *map, 2579 unsigned int level, unsigned int slice) 2580 { 2581 map->linear_mt = intel_miptree_create(brw, GL_TEXTURE_2D, mt->format, 2582 /* first_level */ 0, 2583 /* last_level */ 0, 2584 map->w, map->h, 1, 2585 /* samples */ 0, 2586 MIPTREE_LAYOUT_TILING_NONE); 2587 2588 if (!map->linear_mt) { 2589 fprintf(stderr, "Failed to allocate blit temporary\n"); 2590 goto fail; 2591 } 2592 map->stride = map->linear_mt->pitch; 2593 2594 /* One of either READ_BIT or WRITE_BIT or both is set. READ_BIT implies no 2595 * INVALIDATE_RANGE_BIT. WRITE_BIT needs the original values read in unless 2596 * invalidate is set, since we'll be writing the whole rectangle from our 2597 * temporary buffer back out. 2598 */ 2599 if (!(map->mode & GL_MAP_INVALIDATE_RANGE_BIT)) { 2600 if (!intel_miptree_copy(brw, 2601 mt, level, slice, map->x, map->y, 2602 map->linear_mt, 0, 0, 0, 0, 2603 map->w, map->h)) { 2604 fprintf(stderr, "Failed to blit\n"); 2605 goto fail; 2606 } 2607 } 2608 2609 map->ptr = intel_miptree_map_raw(brw, map->linear_mt); 2610 2611 DBG("%s: %d,%d %dx%d from mt %p (%s) %d,%d = %p/%d\n", __func__, 2612 map->x, map->y, map->w, map->h, 2613 mt, _mesa_get_format_name(mt->format), 2614 level, slice, map->ptr, map->stride); 2615 2616 return; 2617 2618 fail: 2619 intel_miptree_release(&map->linear_mt); 2620 map->ptr = NULL; 2621 map->stride = 0; 2622 } 2623 2624 static void 2625 intel_miptree_unmap_blit(struct brw_context *brw, 2626 struct intel_mipmap_tree *mt, 2627 struct intel_miptree_map *map, 2628 unsigned int level, 2629 unsigned int slice) 2630 { 2631 struct gl_context *ctx = &brw->ctx; 2632 2633 intel_miptree_unmap_raw(map->linear_mt); 2634 2635 if (map->mode & GL_MAP_WRITE_BIT) { 2636 bool ok = intel_miptree_copy(brw, 2637 map->linear_mt, 0, 0, 0, 0, 2638 mt, level, slice, map->x, map->y, 2639 map->w, map->h); 2640 WARN_ONCE(!ok, "Failed to blit from linear temporary mapping"); 2641 } 2642 2643 intel_miptree_release(&map->linear_mt); 2644 } 2645 2646 /** 2647 * "Map" a buffer by copying it to an untiled temporary using MOVNTDQA. 2648 */ 2649 #if defined(USE_SSE41) 2650 static void 2651 intel_miptree_map_movntdqa(struct brw_context *brw, 2652 struct intel_mipmap_tree *mt, 2653 struct intel_miptree_map *map, 2654 unsigned int level, unsigned int slice) 2655 { 2656 assert(map->mode & GL_MAP_READ_BIT); 2657 assert(!(map->mode & GL_MAP_WRITE_BIT)); 2658 2659 DBG("%s: %d,%d %dx%d from mt %p (%s) %d,%d = %p/%d\n", __func__, 2660 map->x, map->y, map->w, map->h, 2661 mt, _mesa_get_format_name(mt->format), 2662 level, slice, map->ptr, map->stride); 2663 2664 /* Map the original image */ 2665 uint32_t image_x; 2666 uint32_t image_y; 2667 intel_miptree_get_image_offset(mt, level, slice, &image_x, &image_y); 2668 image_x += map->x; 2669 image_y += map->y; 2670 2671 void *src = intel_miptree_map_raw(brw, mt); 2672 if (!src) 2673 return; 2674 2675 src += mt->offset; 2676 2677 src += image_y * mt->pitch; 2678 src += image_x * mt->cpp; 2679 2680 /* Due to the pixel offsets for the particular image being mapped, our 2681 * src pointer may not be 16-byte aligned. However, if the pitch is 2682 * divisible by 16, then the amount by which it's misaligned will remain 2683 * consistent from row to row. 2684 */ 2685 assert((mt->pitch % 16) == 0); 2686 const int misalignment = ((uintptr_t) src) & 15; 2687 2688 /* Create an untiled temporary buffer for the mapping. */ 2689 const unsigned width_bytes = _mesa_format_row_stride(mt->format, map->w); 2690 2691 map->stride = ALIGN(misalignment + width_bytes, 16); 2692 2693 map->buffer = _mesa_align_malloc(map->stride * map->h, 16); 2694 /* Offset the destination so it has the same misalignment as src. */ 2695 map->ptr = map->buffer + misalignment; 2696 2697 assert((((uintptr_t) map->ptr) & 15) == misalignment); 2698 2699 for (uint32_t y = 0; y < map->h; y++) { 2700 void *dst_ptr = map->ptr + y * map->stride; 2701 void *src_ptr = src + y * mt->pitch; 2702 2703 _mesa_streaming_load_memcpy(dst_ptr, src_ptr, width_bytes); 2704 } 2705 2706 intel_miptree_unmap_raw(mt); 2707 } 2708 2709 static void 2710 intel_miptree_unmap_movntdqa(struct brw_context *brw, 2711 struct intel_mipmap_tree *mt, 2712 struct intel_miptree_map *map, 2713 unsigned int level, 2714 unsigned int slice) 2715 { 2716 _mesa_align_free(map->buffer); 2717 map->buffer = NULL; 2718 map->ptr = NULL; 2719 } 2720 #endif 2721 2722 static void 2723 intel_miptree_map_s8(struct brw_context *brw, 2724 struct intel_mipmap_tree *mt, 2725 struct intel_miptree_map *map, 2726 unsigned int level, unsigned int slice) 2727 { 2728 map->stride = map->w; 2729 map->buffer = map->ptr = malloc(map->stride * map->h); 2730 if (!map->buffer) 2731 return; 2732 2733 /* One of either READ_BIT or WRITE_BIT or both is set. READ_BIT implies no 2734 * INVALIDATE_RANGE_BIT. WRITE_BIT needs the original values read in unless 2735 * invalidate is set, since we'll be writing the whole rectangle from our 2736 * temporary buffer back out. 2737 */ 2738 if (!(map->mode & GL_MAP_INVALIDATE_RANGE_BIT)) { 2739 uint8_t *untiled_s8_map = map->ptr; 2740 uint8_t *tiled_s8_map = intel_miptree_map_raw(brw, mt); 2741 unsigned int image_x, image_y; 2742 2743 intel_miptree_get_image_offset(mt, level, slice, &image_x, &image_y); 2744 2745 for (uint32_t y = 0; y < map->h; y++) { 2746 for (uint32_t x = 0; x < map->w; x++) { 2747 ptrdiff_t offset = intel_offset_S8(mt->pitch, 2748 x + image_x + map->x, 2749 y + image_y + map->y, 2750 brw->has_swizzling); 2751 untiled_s8_map[y * map->w + x] = tiled_s8_map[offset]; 2752 } 2753 } 2754 2755 intel_miptree_unmap_raw(mt); 2756 2757 DBG("%s: %d,%d %dx%d from mt %p %d,%d = %p/%d\n", __func__, 2758 map->x, map->y, map->w, map->h, 2759 mt, map->x + image_x, map->y + image_y, map->ptr, map->stride); 2760 } else { 2761 DBG("%s: %d,%d %dx%d from mt %p = %p/%d\n", __func__, 2762 map->x, map->y, map->w, map->h, 2763 mt, map->ptr, map->stride); 2764 } 2765 } 2766 2767 static void 2768 intel_miptree_unmap_s8(struct brw_context *brw, 2769 struct intel_mipmap_tree *mt, 2770 struct intel_miptree_map *map, 2771 unsigned int level, 2772 unsigned int slice) 2773 { 2774 if (map->mode & GL_MAP_WRITE_BIT) { 2775 unsigned int image_x, image_y; 2776 uint8_t *untiled_s8_map = map->ptr; 2777 uint8_t *tiled_s8_map = intel_miptree_map_raw(brw, mt); 2778 2779 intel_miptree_get_image_offset(mt, level, slice, &image_x, &image_y); 2780 2781 for (uint32_t y = 0; y < map->h; y++) { 2782 for (uint32_t x = 0; x < map->w; x++) { 2783 ptrdiff_t offset = intel_offset_S8(mt->pitch, 2784 image_x + x + map->x, 2785 image_y + y + map->y, 2786 brw->has_swizzling); 2787 tiled_s8_map[offset] = untiled_s8_map[y * map->w + x]; 2788 } 2789 } 2790 2791 intel_miptree_unmap_raw(mt); 2792 } 2793 2794 free(map->buffer); 2795 } 2796 2797 static void 2798 intel_miptree_map_etc(struct brw_context *brw, 2799 struct intel_mipmap_tree *mt, 2800 struct intel_miptree_map *map, 2801 unsigned int level, 2802 unsigned int slice) 2803 { 2804 assert(mt->etc_format != MESA_FORMAT_NONE); 2805 if (mt->etc_format == MESA_FORMAT_ETC1_RGB8) { 2806 assert(mt->format == MESA_FORMAT_R8G8B8X8_UNORM); 2807 } 2808 2809 assert(map->mode & GL_MAP_WRITE_BIT); 2810 assert(map->mode & GL_MAP_INVALIDATE_RANGE_BIT); 2811 2812 map->stride = _mesa_format_row_stride(mt->etc_format, map->w); 2813 map->buffer = malloc(_mesa_format_image_size(mt->etc_format, 2814 map->w, map->h, 1)); 2815 map->ptr = map->buffer; 2816 } 2817 2818 static void 2819 intel_miptree_unmap_etc(struct brw_context *brw, 2820 struct intel_mipmap_tree *mt, 2821 struct intel_miptree_map *map, 2822 unsigned int level, 2823 unsigned int slice) 2824 { 2825 uint32_t image_x; 2826 uint32_t image_y; 2827 intel_miptree_get_image_offset(mt, level, slice, &image_x, &image_y); 2828 2829 image_x += map->x; 2830 image_y += map->y; 2831 2832 uint8_t *dst = intel_miptree_map_raw(brw, mt) 2833 + image_y * mt->pitch 2834 + image_x * mt->cpp; 2835 2836 if (mt->etc_format == MESA_FORMAT_ETC1_RGB8) 2837 _mesa_etc1_unpack_rgba8888(dst, mt->pitch, 2838 map->ptr, map->stride, 2839 map->w, map->h); 2840 else 2841 _mesa_unpack_etc2_format(dst, mt->pitch, 2842 map->ptr, map->stride, 2843 map->w, map->h, mt->etc_format); 2844 2845 intel_miptree_unmap_raw(mt); 2846 free(map->buffer); 2847 } 2848 2849 /** 2850 * Mapping function for packed depth/stencil miptrees backed by real separate 2851 * miptrees for depth and stencil. 2852 * 2853 * On gen7, and to support HiZ pre-gen7, we have to have the stencil buffer 2854 * separate from the depth buffer. Yet at the GL API level, we have to expose 2855 * packed depth/stencil textures and FBO attachments, and Mesa core expects to 2856 * be able to map that memory for texture storage and glReadPixels-type 2857 * operations. We give Mesa core that access by mallocing a temporary and 2858 * copying the data between the actual backing store and the temporary. 2859 */ 2860 static void 2861 intel_miptree_map_depthstencil(struct brw_context *brw, 2862 struct intel_mipmap_tree *mt, 2863 struct intel_miptree_map *map, 2864 unsigned int level, unsigned int slice) 2865 { 2866 struct intel_mipmap_tree *z_mt = mt; 2867 struct intel_mipmap_tree *s_mt = mt->stencil_mt; 2868 bool map_z32f_x24s8 = mt->format == MESA_FORMAT_Z_FLOAT32; 2869 int packed_bpp = map_z32f_x24s8 ? 8 : 4; 2870 2871 map->stride = map->w * packed_bpp; 2872 map->buffer = map->ptr = malloc(map->stride * map->h); 2873 if (!map->buffer) 2874 return; 2875 2876 /* One of either READ_BIT or WRITE_BIT or both is set. READ_BIT implies no 2877 * INVALIDATE_RANGE_BIT. WRITE_BIT needs the original values read in unless 2878 * invalidate is set, since we'll be writing the whole rectangle from our 2879 * temporary buffer back out. 2880 */ 2881 if (!(map->mode & GL_MAP_INVALIDATE_RANGE_BIT)) { 2882 uint32_t *packed_map = map->ptr; 2883 uint8_t *s_map = intel_miptree_map_raw(brw, s_mt); 2884 uint32_t *z_map = intel_miptree_map_raw(brw, z_mt); 2885 unsigned int s_image_x, s_image_y; 2886 unsigned int z_image_x, z_image_y; 2887 2888 intel_miptree_get_image_offset(s_mt, level, slice, 2889 &s_image_x, &s_image_y); 2890 intel_miptree_get_image_offset(z_mt, level, slice, 2891 &z_image_x, &z_image_y); 2892 2893 for (uint32_t y = 0; y < map->h; y++) { 2894 for (uint32_t x = 0; x < map->w; x++) { 2895 int map_x = map->x + x, map_y = map->y + y; 2896 ptrdiff_t s_offset = intel_offset_S8(s_mt->pitch, 2897 map_x + s_image_x, 2898 map_y + s_image_y, 2899 brw->has_swizzling); 2900 ptrdiff_t z_offset = ((map_y + z_image_y) * 2901 (z_mt->pitch / 4) + 2902 (map_x + z_image_x)); 2903 uint8_t s = s_map[s_offset]; 2904 uint32_t z = z_map[z_offset]; 2905 2906 if (map_z32f_x24s8) { 2907 packed_map[(y * map->w + x) * 2 + 0] = z; 2908 packed_map[(y * map->w + x) * 2 + 1] = s; 2909 } else { 2910 packed_map[y * map->w + x] = (s << 24) | (z & 0x00ffffff); 2911 } 2912 } 2913 } 2914 2915 intel_miptree_unmap_raw(s_mt); 2916 intel_miptree_unmap_raw(z_mt); 2917 2918 DBG("%s: %d,%d %dx%d from z mt %p %d,%d, s mt %p %d,%d = %p/%d\n", 2919 __func__, 2920 map->x, map->y, map->w, map->h, 2921 z_mt, map->x + z_image_x, map->y + z_image_y, 2922 s_mt, map->x + s_image_x, map->y + s_image_y, 2923 map->ptr, map->stride); 2924 } else { 2925 DBG("%s: %d,%d %dx%d from mt %p = %p/%d\n", __func__, 2926 map->x, map->y, map->w, map->h, 2927 mt, map->ptr, map->stride); 2928 } 2929 } 2930 2931 static void 2932 intel_miptree_unmap_depthstencil(struct brw_context *brw, 2933 struct intel_mipmap_tree *mt, 2934 struct intel_miptree_map *map, 2935 unsigned int level, 2936 unsigned int slice) 2937 { 2938 struct intel_mipmap_tree *z_mt = mt; 2939 struct intel_mipmap_tree *s_mt = mt->stencil_mt; 2940 bool map_z32f_x24s8 = mt->format == MESA_FORMAT_Z_FLOAT32; 2941 2942 if (map->mode & GL_MAP_WRITE_BIT) { 2943 uint32_t *packed_map = map->ptr; 2944 uint8_t *s_map = intel_miptree_map_raw(brw, s_mt); 2945 uint32_t *z_map = intel_miptree_map_raw(brw, z_mt); 2946 unsigned int s_image_x, s_image_y; 2947 unsigned int z_image_x, z_image_y; 2948 2949 intel_miptree_get_image_offset(s_mt, level, slice, 2950 &s_image_x, &s_image_y); 2951 intel_miptree_get_image_offset(z_mt, level, slice, 2952 &z_image_x, &z_image_y); 2953 2954 for (uint32_t y = 0; y < map->h; y++) { 2955 for (uint32_t x = 0; x < map->w; x++) { 2956 ptrdiff_t s_offset = intel_offset_S8(s_mt->pitch, 2957 x + s_image_x + map->x, 2958 y + s_image_y + map->y, 2959 brw->has_swizzling); 2960 ptrdiff_t z_offset = ((y + z_image_y + map->y) * 2961 (z_mt->pitch / 4) + 2962 (x + z_image_x + map->x)); 2963 2964 if (map_z32f_x24s8) { 2965 z_map[z_offset] = packed_map[(y * map->w + x) * 2 + 0]; 2966 s_map[s_offset] = packed_map[(y * map->w + x) * 2 + 1]; 2967 } else { 2968 uint32_t packed = packed_map[y * map->w + x]; 2969 s_map[s_offset] = packed >> 24; 2970 z_map[z_offset] = packed; 2971 } 2972 } 2973 } 2974 2975 intel_miptree_unmap_raw(s_mt); 2976 intel_miptree_unmap_raw(z_mt); 2977 2978 DBG("%s: %d,%d %dx%d from z mt %p (%s) %d,%d, s mt %p %d,%d = %p/%d\n", 2979 __func__, 2980 map->x, map->y, map->w, map->h, 2981 z_mt, _mesa_get_format_name(z_mt->format), 2982 map->x + z_image_x, map->y + z_image_y, 2983 s_mt, map->x + s_image_x, map->y + s_image_y, 2984 map->ptr, map->stride); 2985 } 2986 2987 free(map->buffer); 2988 } 2989 2990 /** 2991 * Create and attach a map to the miptree at (level, slice). Return the 2992 * attached map. 2993 */ 2994 static struct intel_miptree_map* 2995 intel_miptree_attach_map(struct intel_mipmap_tree *mt, 2996 unsigned int level, 2997 unsigned int slice, 2998 unsigned int x, 2999 unsigned int y, 3000 unsigned int w, 3001 unsigned int h, 3002 GLbitfield mode) 3003 { 3004 struct intel_miptree_map *map = calloc(1, sizeof(*map)); 3005 3006 if (!map) 3007 return NULL; 3008 3009 assert(mt->level[level].slice[slice].map == NULL); 3010 mt->level[level].slice[slice].map = map; 3011 3012 map->mode = mode; 3013 map->x = x; 3014 map->y = y; 3015 map->w = w; 3016 map->h = h; 3017 3018 return map; 3019 } 3020 3021 /** 3022 * Release the map at (level, slice). 3023 */ 3024 static void 3025 intel_miptree_release_map(struct intel_mipmap_tree *mt, 3026 unsigned int level, 3027 unsigned int slice) 3028 { 3029 struct intel_miptree_map **map; 3030 3031 map = &mt->level[level].slice[slice].map; 3032 free(*map); 3033 *map = NULL; 3034 } 3035 3036 static bool 3037 can_blit_slice(struct intel_mipmap_tree *mt, 3038 unsigned int level, unsigned int slice) 3039 { 3040 /* See intel_miptree_blit() for details on the 32k pitch limit. */ 3041 if (mt->pitch >= 32768) 3042 return false; 3043 3044 return true; 3045 } 3046 3047 static bool 3048 use_intel_mipree_map_blit(struct brw_context *brw, 3049 struct intel_mipmap_tree *mt, 3050 GLbitfield mode, 3051 unsigned int level, 3052 unsigned int slice) 3053 { 3054 if (brw->has_llc && 3055 /* It's probably not worth swapping to the blit ring because of 3056 * all the overhead involved. But, we must use blitter for the 3057 * surfaces with INTEL_MIPTREE_TRMODE_{YF,YS}. 3058 */ 3059 (!(mode & GL_MAP_WRITE_BIT) || 3060 mt->tr_mode != INTEL_MIPTREE_TRMODE_NONE) && 3061 !mt->compressed && 3062 (mt->tiling == I915_TILING_X || 3063 /* Prior to Sandybridge, the blitter can't handle Y tiling */ 3064 (brw->gen >= 6 && mt->tiling == I915_TILING_Y) || 3065 /* Fast copy blit on skl+ supports all tiling formats. */ 3066 brw->gen >= 9) && 3067 can_blit_slice(mt, level, slice)) 3068 return true; 3069 3070 if (mt->tiling != I915_TILING_NONE && 3071 mt->bo->size >= brw->max_gtt_map_object_size) { 3072 assert(can_blit_slice(mt, level, slice)); 3073 return true; 3074 } 3075 3076 return false; 3077 } 3078 3079 /** 3080 * Parameter \a out_stride has type ptrdiff_t not because the buffer stride may 3081 * exceed 32 bits but to diminish the likelihood subtle bugs in pointer 3082 * arithmetic overflow. 3083 * 3084 * If you call this function and use \a out_stride, then you're doing pointer 3085 * arithmetic on \a out_ptr. The type of \a out_stride doesn't prevent all 3086 * bugs. The caller must still take care to avoid 32-bit overflow errors in 3087 * all arithmetic expressions that contain buffer offsets and pixel sizes, 3088 * which usually have type uint32_t or GLuint. 3089 */ 3090 void 3091 intel_miptree_map(struct brw_context *brw, 3092 struct intel_mipmap_tree *mt, 3093 unsigned int level, 3094 unsigned int slice, 3095 unsigned int x, 3096 unsigned int y, 3097 unsigned int w, 3098 unsigned int h, 3099 GLbitfield mode, 3100 void **out_ptr, 3101 ptrdiff_t *out_stride) 3102 { 3103 struct intel_miptree_map *map; 3104 3105 assert(mt->num_samples <= 1); 3106 3107 map = intel_miptree_attach_map(mt, level, slice, x, y, w, h, mode); 3108 if (!map){ 3109 *out_ptr = NULL; 3110 *out_stride = 0; 3111 return; 3112 } 3113 3114 intel_miptree_slice_resolve_depth(brw, mt, level, slice); 3115 if (map->mode & GL_MAP_WRITE_BIT) { 3116 intel_miptree_slice_set_needs_hiz_resolve(mt, level, slice); 3117 } 3118 3119 if (mt->format == MESA_FORMAT_S_UINT8) { 3120 intel_miptree_map_s8(brw, mt, map, level, slice); 3121 } else if (mt->etc_format != MESA_FORMAT_NONE && 3122 !(mode & BRW_MAP_DIRECT_BIT)) { 3123 intel_miptree_map_etc(brw, mt, map, level, slice); 3124 } else if (mt->stencil_mt && !(mode & BRW_MAP_DIRECT_BIT)) { 3125 intel_miptree_map_depthstencil(brw, mt, map, level, slice); 3126 } else if (use_intel_mipree_map_blit(brw, mt, mode, level, slice)) { 3127 intel_miptree_map_blit(brw, mt, map, level, slice); 3128 #if defined(USE_SSE41) 3129 } else if (!(mode & GL_MAP_WRITE_BIT) && 3130 !mt->compressed && cpu_has_sse4_1 && 3131 (mt->pitch % 16 == 0)) { 3132 intel_miptree_map_movntdqa(brw, mt, map, level, slice); 3133 #endif 3134 } else { 3135 /* intel_miptree_map_gtt() doesn't support surfaces with Yf/Ys tiling. */ 3136 assert(mt->tr_mode == INTEL_MIPTREE_TRMODE_NONE); 3137 intel_miptree_map_gtt(brw, mt, map, level, slice); 3138 } 3139 3140 *out_ptr = map->ptr; 3141 *out_stride = map->stride; 3142 3143 if (map->ptr == NULL) 3144 intel_miptree_release_map(mt, level, slice); 3145 } 3146 3147 void 3148 intel_miptree_unmap(struct brw_context *brw, 3149 struct intel_mipmap_tree *mt, 3150 unsigned int level, 3151 unsigned int slice) 3152 { 3153 struct intel_miptree_map *map = mt->level[level].slice[slice].map; 3154 3155 assert(mt->num_samples <= 1); 3156 3157 if (!map) 3158 return; 3159 3160 DBG("%s: mt %p (%s) level %d slice %d\n", __func__, 3161 mt, _mesa_get_format_name(mt->format), level, slice); 3162 3163 if (mt->format == MESA_FORMAT_S_UINT8) { 3164 intel_miptree_unmap_s8(brw, mt, map, level, slice); 3165 } else if (mt->etc_format != MESA_FORMAT_NONE && 3166 !(map->mode & BRW_MAP_DIRECT_BIT)) { 3167 intel_miptree_unmap_etc(brw, mt, map, level, slice); 3168 } else if (mt->stencil_mt && !(map->mode & BRW_MAP_DIRECT_BIT)) { 3169 intel_miptree_unmap_depthstencil(brw, mt, map, level, slice); 3170 } else if (map->linear_mt) { 3171 intel_miptree_unmap_blit(brw, mt, map, level, slice); 3172 #if defined(USE_SSE41) 3173 } else if (map->buffer && cpu_has_sse4_1) { 3174 intel_miptree_unmap_movntdqa(brw, mt, map, level, slice); 3175 #endif 3176 } else { 3177 intel_miptree_unmap_gtt(mt); 3178 } 3179 3180 intel_miptree_release_map(mt, level, slice); 3181 } 3182 3183 enum isl_surf_dim 3184 get_isl_surf_dim(GLenum target) 3185 { 3186 switch (target) { 3187 case GL_TEXTURE_1D: 3188 case GL_TEXTURE_1D_ARRAY: 3189 return ISL_SURF_DIM_1D; 3190 3191 case GL_TEXTURE_2D: 3192 case GL_TEXTURE_2D_ARRAY: 3193 case GL_TEXTURE_RECTANGLE: 3194 case GL_TEXTURE_CUBE_MAP: 3195 case GL_TEXTURE_CUBE_MAP_ARRAY: 3196 case GL_TEXTURE_2D_MULTISAMPLE: 3197 case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: 3198 case GL_TEXTURE_EXTERNAL_OES: 3199 return ISL_SURF_DIM_2D; 3200 3201 case GL_TEXTURE_3D: 3202 return ISL_SURF_DIM_3D; 3203 } 3204 3205 unreachable("Invalid texture target"); 3206 } 3207 3208 enum isl_dim_layout 3209 get_isl_dim_layout(const struct gen_device_info *devinfo, uint32_t tiling, 3210 GLenum target) 3211 { 3212 switch (target) { 3213 case GL_TEXTURE_1D: 3214 case GL_TEXTURE_1D_ARRAY: 3215 return (devinfo->gen >= 9 && tiling == I915_TILING_NONE ? 3216 ISL_DIM_LAYOUT_GEN9_1D : ISL_DIM_LAYOUT_GEN4_2D); 3217 3218 case GL_TEXTURE_2D: 3219 case GL_TEXTURE_2D_ARRAY: 3220 case GL_TEXTURE_RECTANGLE: 3221 case GL_TEXTURE_2D_MULTISAMPLE: 3222 case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: 3223 case GL_TEXTURE_EXTERNAL_OES: 3224 return ISL_DIM_LAYOUT_GEN4_2D; 3225 3226 case GL_TEXTURE_CUBE_MAP: 3227 case GL_TEXTURE_CUBE_MAP_ARRAY: 3228 return (devinfo->gen == 4 ? ISL_DIM_LAYOUT_GEN4_3D : 3229 ISL_DIM_LAYOUT_GEN4_2D); 3230 3231 case GL_TEXTURE_3D: 3232 return (devinfo->gen >= 9 ? 3233 ISL_DIM_LAYOUT_GEN4_2D : ISL_DIM_LAYOUT_GEN4_3D); 3234 } 3235 3236 unreachable("Invalid texture target"); 3237 } 3238 3239 enum isl_tiling 3240 intel_miptree_get_isl_tiling(const struct intel_mipmap_tree *mt) 3241 { 3242 if (mt->format == MESA_FORMAT_S_UINT8) { 3243 return ISL_TILING_W; 3244 } else { 3245 switch (mt->tiling) { 3246 case I915_TILING_NONE: 3247 return ISL_TILING_LINEAR; 3248 case I915_TILING_X: 3249 return ISL_TILING_X; 3250 case I915_TILING_Y: 3251 switch (mt->tr_mode) { 3252 case INTEL_MIPTREE_TRMODE_NONE: 3253 return ISL_TILING_Y0; 3254 case INTEL_MIPTREE_TRMODE_YF: 3255 return ISL_TILING_Yf; 3256 case INTEL_MIPTREE_TRMODE_YS: 3257 return ISL_TILING_Ys; 3258 default: 3259 unreachable("Invalid tiled resource mode"); 3260 } 3261 default: 3262 unreachable("Invalid tiling mode"); 3263 } 3264 } 3265 } 3266 3267 void 3268 intel_miptree_get_isl_surf(struct brw_context *brw, 3269 const struct intel_mipmap_tree *mt, 3270 struct isl_surf *surf) 3271 { 3272 surf->dim = get_isl_surf_dim(mt->target); 3273 surf->dim_layout = get_isl_dim_layout(&brw->screen->devinfo, 3274 mt->tiling, mt->target); 3275 3276 if (mt->num_samples > 1) { 3277 switch (mt->msaa_layout) { 3278 case INTEL_MSAA_LAYOUT_IMS: 3279 surf->msaa_layout = ISL_MSAA_LAYOUT_INTERLEAVED; 3280 break; 3281 case INTEL_MSAA_LAYOUT_UMS: 3282 case INTEL_MSAA_LAYOUT_CMS: 3283 surf->msaa_layout = ISL_MSAA_LAYOUT_ARRAY; 3284 break; 3285 default: 3286 unreachable("Invalid MSAA layout"); 3287 } 3288 } else { 3289 surf->msaa_layout = ISL_MSAA_LAYOUT_NONE; 3290 } 3291 3292 surf->tiling = intel_miptree_get_isl_tiling(mt); 3293 3294 if (mt->format == MESA_FORMAT_S_UINT8) { 3295 /* The ISL definition of row_pitch matches the surface state pitch field 3296 * a bit better than intel_mipmap_tree. In particular, ISL incorporates 3297 * the factor of 2 for W-tiling in row_pitch. 3298 */ 3299 surf->row_pitch = 2 * mt->pitch; 3300 } else { 3301 surf->row_pitch = mt->pitch; 3302 } 3303 3304 surf->format = translate_tex_format(brw, mt->format, false); 3305 3306 if (brw->gen >= 9) { 3307 if (surf->dim == ISL_SURF_DIM_1D && surf->tiling == ISL_TILING_LINEAR) { 3308 /* For gen9 1-D surfaces, intel_mipmap_tree has a bogus alignment. */ 3309 surf->image_alignment_el = isl_extent3d(64, 1, 1); 3310 } else { 3311 /* On gen9+, intel_mipmap_tree stores the horizontal and vertical 3312 * alignment in terms of surface elements like we want. 3313 */ 3314 surf->image_alignment_el = isl_extent3d(mt->halign, mt->valign, 1); 3315 } 3316 } else { 3317 /* On earlier gens it's stored in pixels. */ 3318 unsigned bw, bh; 3319 _mesa_get_format_block_size(mt->format, &bw, &bh); 3320 surf->image_alignment_el = 3321 isl_extent3d(mt->halign / bw, mt->valign / bh, 1); 3322 } 3323 3324 surf->logical_level0_px.width = mt->logical_width0; 3325 surf->logical_level0_px.height = mt->logical_height0; 3326 if (surf->dim == ISL_SURF_DIM_3D) { 3327 surf->logical_level0_px.depth = mt->logical_depth0; 3328 surf->logical_level0_px.array_len = 1; 3329 } else { 3330 surf->logical_level0_px.depth = 1; 3331 surf->logical_level0_px.array_len = mt->logical_depth0; 3332 } 3333 3334 surf->phys_level0_sa.width = mt->physical_width0; 3335 surf->phys_level0_sa.height = mt->physical_height0; 3336 if (surf->dim == ISL_SURF_DIM_3D) { 3337 surf->phys_level0_sa.depth = mt->physical_depth0; 3338 surf->phys_level0_sa.array_len = 1; 3339 } else { 3340 surf->phys_level0_sa.depth = 1; 3341 surf->phys_level0_sa.array_len = mt->physical_depth0; 3342 } 3343 3344 surf->levels = mt->last_level + 1; 3345 surf->samples = MAX2(mt->num_samples, 1); 3346 3347 surf->size = 0; /* TODO */ 3348 surf->alignment = 0; /* TODO */ 3349 3350 switch (surf->dim_layout) { 3351 case ISL_DIM_LAYOUT_GEN4_2D: 3352 case ISL_DIM_LAYOUT_GEN4_3D: 3353 if (brw->gen >= 9) { 3354 surf->array_pitch_el_rows = mt->qpitch; 3355 } else { 3356 unsigned bw, bh; 3357 _mesa_get_format_block_size(mt->format, &bw, &bh); 3358 assert(mt->qpitch % bh == 0); 3359 surf->array_pitch_el_rows = mt->qpitch / bh; 3360 } 3361 break; 3362 case ISL_DIM_LAYOUT_GEN9_1D: 3363 surf->array_pitch_el_rows = 1; 3364 break; 3365 } 3366 3367 switch (mt->array_layout) { 3368 case ALL_LOD_IN_EACH_SLICE: 3369 surf->array_pitch_span = ISL_ARRAY_PITCH_SPAN_FULL; 3370 break; 3371 case ALL_SLICES_AT_EACH_LOD: 3372 surf->array_pitch_span = ISL_ARRAY_PITCH_SPAN_COMPACT; 3373 break; 3374 default: 3375 unreachable("Invalid array layout"); 3376 } 3377 3378 GLenum base_format = _mesa_get_format_base_format(mt->format); 3379 switch (base_format) { 3380 case GL_DEPTH_COMPONENT: 3381 surf->usage = ISL_SURF_USAGE_DEPTH_BIT | ISL_SURF_USAGE_TEXTURE_BIT; 3382 break; 3383 case GL_STENCIL_INDEX: 3384 surf->usage = ISL_SURF_USAGE_STENCIL_BIT; 3385 if (brw->gen >= 8) 3386 surf->usage |= ISL_SURF_USAGE_TEXTURE_BIT; 3387 break; 3388 case GL_DEPTH_STENCIL: 3389 /* In this case we only texture from the depth part */ 3390 surf->usage = ISL_SURF_USAGE_DEPTH_BIT | ISL_SURF_USAGE_STENCIL_BIT | 3391 ISL_SURF_USAGE_TEXTURE_BIT; 3392 break; 3393 default: 3394 surf->usage = ISL_SURF_USAGE_TEXTURE_BIT; 3395 if (brw->format_supported_as_render_target[mt->format]) 3396 surf->usage = ISL_SURF_USAGE_RENDER_TARGET_BIT; 3397 break; 3398 } 3399 3400 if (_mesa_is_cube_map_texture(mt->target)) 3401 surf->usage |= ISL_SURF_USAGE_CUBE_BIT; 3402 } 3403 3404 /* WARNING: THE SURFACE CREATED BY THIS FUNCTION IS NOT COMPLETE AND CANNOT BE 3405 * USED FOR ANY REAL CALCULATIONS. THE ONLY VALID USE OF SUCH A SURFACE IS TO 3406 * PASS IT INTO isl_surf_fill_state. 3407 */ 3408 void 3409 intel_miptree_get_aux_isl_surf(struct brw_context *brw, 3410 const struct intel_mipmap_tree *mt, 3411 struct isl_surf *surf, 3412 enum isl_aux_usage *usage) 3413 { 3414 uint32_t aux_pitch, aux_qpitch; 3415 if (mt->mcs_buf) { 3416 aux_pitch = mt->mcs_buf->pitch; 3417 aux_qpitch = mt->mcs_buf->qpitch; 3418 3419 if (mt->num_samples > 1) { 3420 assert(mt->msaa_layout == INTEL_MSAA_LAYOUT_CMS); 3421 *usage = ISL_AUX_USAGE_MCS; 3422 } else if (intel_miptree_is_lossless_compressed(brw, mt)) { 3423 assert(brw->gen >= 9); 3424 *usage = ISL_AUX_USAGE_CCS_E; 3425 } else if ((mt->aux_disable & INTEL_AUX_DISABLE_CCS) == 0) { 3426 *usage = ISL_AUX_USAGE_CCS_D; 3427 } else { 3428 unreachable("Invalid MCS miptree"); 3429 } 3430 } else if (mt->hiz_buf) { 3431 if (mt->hiz_buf->mt) { 3432 aux_pitch = mt->hiz_buf->mt->pitch; 3433 aux_qpitch = mt->hiz_buf->mt->qpitch; 3434 } else { 3435 aux_pitch = mt->hiz_buf->aux_base.pitch; 3436 aux_qpitch = mt->hiz_buf->aux_base.qpitch; 3437 } 3438 3439 *usage = ISL_AUX_USAGE_HIZ; 3440 } else { 3441 *usage = ISL_AUX_USAGE_NONE; 3442 return; 3443 } 3444 3445 /* Start with a copy of the original surface. */ 3446 intel_miptree_get_isl_surf(brw, mt, surf); 3447 3448 /* Figure out the format and tiling of the auxiliary surface */ 3449 switch (*usage) { 3450 case ISL_AUX_USAGE_NONE: 3451 unreachable("Invalid auxiliary usage"); 3452 3453 case ISL_AUX_USAGE_HIZ: 3454 isl_surf_get_hiz_surf(&brw->isl_dev, surf, surf); 3455 break; 3456 3457 case ISL_AUX_USAGE_MCS: 3458 /* 3459 * From the SKL PRM: 3460 * "When Auxiliary Surface Mode is set to AUX_CCS_D or AUX_CCS_E, 3461 * HALIGN 16 must be used." 3462 */ 3463 if (brw->gen >= 9) 3464 assert(mt->halign == 16); 3465 3466 isl_surf_get_mcs_surf(&brw->isl_dev, surf, surf); 3467 break; 3468 3469 case ISL_AUX_USAGE_CCS_D: 3470 case ISL_AUX_USAGE_CCS_E: 3471 /* 3472 * From the BDW PRM, Volume 2d, page 260 (RENDER_SURFACE_STATE): 3473 * 3474 * "When MCS is enabled for non-MSRT, HALIGN_16 must be used" 3475 * 3476 * From the hardware spec for GEN9: 3477 * 3478 * "When Auxiliary Surface Mode is set to AUX_CCS_D or AUX_CCS_E, 3479 * HALIGN 16 must be used." 3480 */ 3481 assert(mt->num_samples <= 1); 3482 if (brw->gen >= 8) 3483 assert(mt->halign == 16); 3484 3485 isl_surf_get_ccs_surf(&brw->isl_dev, surf, surf); 3486 break; 3487 } 3488 3489 /* We want the pitch of the actual aux buffer. */ 3490 surf->row_pitch = aux_pitch; 3491 3492 /* Auxiliary surfaces in ISL have compressed formats and array_pitch_el_rows 3493 * is in elements. This doesn't match intel_mipmap_tree::qpitch which is 3494 * in elements of the primary color surface so we have to divide by the 3495 * compression block height. 3496 */ 3497 surf->array_pitch_el_rows = 3498 aux_qpitch / isl_format_get_layout(surf->format)->bh; 3499 } 3500 3501 union isl_color_value 3502 intel_miptree_get_isl_clear_color(struct brw_context *brw, 3503 const struct intel_mipmap_tree *mt) 3504 { 3505 union isl_color_value clear_color; 3506 3507 if (_mesa_get_format_base_format(mt->format) == GL_DEPTH_COMPONENT) { 3508 clear_color.i32[0] = mt->depth_clear_value; 3509 clear_color.i32[1] = 0; 3510 clear_color.i32[2] = 0; 3511 clear_color.i32[3] = 0; 3512 } else if (brw->gen >= 9) { 3513 clear_color.i32[0] = mt->gen9_fast_clear_color.i[0]; 3514 clear_color.i32[1] = mt->gen9_fast_clear_color.i[1]; 3515 clear_color.i32[2] = mt->gen9_fast_clear_color.i[2]; 3516 clear_color.i32[3] = mt->gen9_fast_clear_color.i[3]; 3517 } else if (_mesa_is_format_integer(mt->format)) { 3518 clear_color.i32[0] = (mt->fast_clear_color_value & (1u << 31)) != 0; 3519 clear_color.i32[1] = (mt->fast_clear_color_value & (1u << 30)) != 0; 3520 clear_color.i32[2] = (mt->fast_clear_color_value & (1u << 29)) != 0; 3521 clear_color.i32[3] = (mt->fast_clear_color_value & (1u << 28)) != 0; 3522 } else { 3523 clear_color.f32[0] = (mt->fast_clear_color_value & (1u << 31)) != 0; 3524 clear_color.f32[1] = (mt->fast_clear_color_value & (1u << 30)) != 0; 3525 clear_color.f32[2] = (mt->fast_clear_color_value & (1u << 29)) != 0; 3526 clear_color.f32[3] = (mt->fast_clear_color_value & (1u << 28)) != 0; 3527 } 3528 3529 return clear_color; 3530 } 3531