1 /* 2 * Copyright 2015 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24 #include <assert.h> 25 #include <stdarg.h> 26 #include <stdio.h> 27 28 #include "isl.h" 29 #include "isl_gen4.h" 30 #include "isl_gen6.h" 31 #include "isl_gen7.h" 32 #include "isl_gen8.h" 33 #include "isl_gen9.h" 34 #include "isl_priv.h" 35 36 void PRINTFLIKE(3, 4) UNUSED 37 __isl_finishme(const char *file, int line, const char *fmt, ...) 38 { 39 va_list ap; 40 char buf[512]; 41 42 va_start(ap, fmt); 43 vsnprintf(buf, sizeof(buf), fmt, ap); 44 va_end(ap); 45 46 fprintf(stderr, "%s:%d: FINISHME: %s\n", file, line, buf); 47 } 48 49 static const struct { 50 uint8_t size; 51 uint8_t align; 52 uint8_t addr_offset; 53 uint8_t aux_addr_offset; 54 } ss_infos[] = { 55 [4] = {24, 32, 4}, 56 [5] = {24, 32, 4}, 57 [6] = {24, 32, 4}, 58 [7] = {32, 32, 4, 24}, 59 [8] = {64, 64, 32, 40}, 60 [9] = {64, 64, 32, 40}, 61 }; 62 63 void 64 isl_device_init(struct isl_device *dev, 65 const struct gen_device_info *info, 66 bool has_bit6_swizzling) 67 { 68 dev->info = info; 69 dev->use_separate_stencil = ISL_DEV_GEN(dev) >= 6; 70 dev->has_bit6_swizzling = has_bit6_swizzling; 71 72 /* The ISL_DEV macros may be defined in the CFLAGS, thus hardcoding some 73 * device properties at buildtime. Verify that the macros with the device 74 * properties chosen during runtime. 75 */ 76 ISL_DEV_GEN_SANITIZE(dev); 77 ISL_DEV_USE_SEPARATE_STENCIL_SANITIZE(dev); 78 79 /* Did we break hiz or stencil? */ 80 if (ISL_DEV_USE_SEPARATE_STENCIL(dev)) 81 assert(info->has_hiz_and_separate_stencil); 82 if (info->must_use_separate_stencil) 83 assert(ISL_DEV_USE_SEPARATE_STENCIL(dev)); 84 85 dev->ss.size = ss_infos[ISL_DEV_GEN(dev)].size; 86 dev->ss.align = ss_infos[ISL_DEV_GEN(dev)].align; 87 dev->ss.addr_offset = ss_infos[ISL_DEV_GEN(dev)].addr_offset; 88 dev->ss.aux_addr_offset = ss_infos[ISL_DEV_GEN(dev)].aux_addr_offset; 89 } 90 91 /** 92 * @brief Query the set of multisamples supported by the device. 93 * 94 * This function always returns non-zero, as ISL_SAMPLE_COUNT_1_BIT is always 95 * supported. 96 */ 97 isl_sample_count_mask_t ATTRIBUTE_CONST 98 isl_device_get_sample_counts(struct isl_device *dev) 99 { 100 if (ISL_DEV_GEN(dev) >= 9) { 101 return ISL_SAMPLE_COUNT_1_BIT | 102 ISL_SAMPLE_COUNT_2_BIT | 103 ISL_SAMPLE_COUNT_4_BIT | 104 ISL_SAMPLE_COUNT_8_BIT | 105 ISL_SAMPLE_COUNT_16_BIT; 106 } else if (ISL_DEV_GEN(dev) >= 8) { 107 return ISL_SAMPLE_COUNT_1_BIT | 108 ISL_SAMPLE_COUNT_2_BIT | 109 ISL_SAMPLE_COUNT_4_BIT | 110 ISL_SAMPLE_COUNT_8_BIT; 111 } else if (ISL_DEV_GEN(dev) >= 7) { 112 return ISL_SAMPLE_COUNT_1_BIT | 113 ISL_SAMPLE_COUNT_4_BIT | 114 ISL_SAMPLE_COUNT_8_BIT; 115 } else if (ISL_DEV_GEN(dev) >= 6) { 116 return ISL_SAMPLE_COUNT_1_BIT | 117 ISL_SAMPLE_COUNT_4_BIT; 118 } else { 119 return ISL_SAMPLE_COUNT_1_BIT; 120 } 121 } 122 123 /** 124 * @param[out] info is written only on success 125 */ 126 static bool 127 isl_tiling_get_info(const struct isl_device *dev, 128 enum isl_tiling tiling, 129 uint32_t format_bpb, 130 struct isl_tile_info *tile_info) 131 { 132 const uint32_t bs = format_bpb / 8; 133 struct isl_extent2d logical_el, phys_B; 134 135 if (tiling != ISL_TILING_LINEAR && !isl_is_pow2(format_bpb)) { 136 /* It is possible to have non-power-of-two formats in a tiled buffer. 137 * The easiest way to handle this is to treat the tile as if it is three 138 * times as wide. This way no pixel will ever cross a tile boundary. 139 * This really only works on legacy X and Y tiling formats. 140 */ 141 assert(tiling == ISL_TILING_X || tiling == ISL_TILING_Y0); 142 assert(bs % 3 == 0 && isl_is_pow2(format_bpb / 3)); 143 return isl_tiling_get_info(dev, tiling, format_bpb / 3, tile_info); 144 } 145 146 switch (tiling) { 147 case ISL_TILING_LINEAR: 148 assert(bs > 0); 149 logical_el = isl_extent2d(1, 1); 150 phys_B = isl_extent2d(bs, 1); 151 break; 152 153 case ISL_TILING_X: 154 assert(bs > 0); 155 logical_el = isl_extent2d(512 / bs, 8); 156 phys_B = isl_extent2d(512, 8); 157 break; 158 159 case ISL_TILING_Y0: 160 assert(bs > 0); 161 logical_el = isl_extent2d(128 / bs, 32); 162 phys_B = isl_extent2d(128, 32); 163 break; 164 165 case ISL_TILING_W: 166 assert(bs == 1); 167 logical_el = isl_extent2d(64, 64); 168 /* From the Broadwell PRM Vol 2d, RENDER_SURFACE_STATE::SurfacePitch: 169 * 170 * "If the surface is a stencil buffer (and thus has Tile Mode set 171 * to TILEMODE_WMAJOR), the pitch must be set to 2x the value 172 * computed based on width, as the stencil buffer is stored with two 173 * rows interleaved." 174 * 175 * This, together with the fact that stencil buffers are referred to as 176 * being Y-tiled in the PRMs for older hardware implies that the 177 * physical size of a W-tile is actually the same as for a Y-tile. 178 */ 179 phys_B = isl_extent2d(128, 32); 180 break; 181 182 case ISL_TILING_Yf: 183 case ISL_TILING_Ys: { 184 if (ISL_DEV_GEN(dev) < 9) 185 return false; 186 187 if (!isl_is_pow2(bs)) 188 return false; 189 190 bool is_Ys = tiling == ISL_TILING_Ys; 191 192 assert(bs > 0); 193 unsigned width = 1 << (6 + (ffs(bs) / 2) + (2 * is_Ys)); 194 unsigned height = 1 << (6 - (ffs(bs) / 2) + (2 * is_Ys)); 195 196 logical_el = isl_extent2d(width / bs, height); 197 phys_B = isl_extent2d(width, height); 198 break; 199 } 200 201 case ISL_TILING_HIZ: 202 /* HiZ buffers are required to have ISL_FORMAT_HIZ which is an 8x4 203 * 128bpb format. The tiling has the same physical dimensions as 204 * Y-tiling but actually has two HiZ columns per Y-tiled column. 205 */ 206 assert(bs == 16); 207 logical_el = isl_extent2d(16, 16); 208 phys_B = isl_extent2d(128, 32); 209 break; 210 211 case ISL_TILING_CCS: 212 /* CCS surfaces are required to have one of the GENX_CCS_* formats which 213 * have a block size of 1 or 2 bits per block and each CCS element 214 * corresponds to one cache-line pair in the main surface. From the Sky 215 * Lake PRM Vol. 12 in the section on planes: 216 * 217 * "The Color Control Surface (CCS) contains the compression status 218 * of the cache-line pairs. The compression state of the cache-line 219 * pair is specified by 2 bits in the CCS. Each CCS cache-line 220 * represents an area on the main surface of 16x16 sets of 128 byte 221 * Y-tiled cache-line-pairs. CCS is always Y tiled." 222 * 223 * The CCS being Y-tiled implies that it's an 8x8 grid of cache-lines. 224 * Since each cache line corresponds to a 16x16 set of cache-line pairs, 225 * that yields total tile area of 128x128 cache-line pairs or CCS 226 * elements. On older hardware, each CCS element is 1 bit and the tile 227 * is 128x256 elements. 228 */ 229 assert(format_bpb == 1 || format_bpb == 2); 230 logical_el = isl_extent2d(128, 256 / format_bpb); 231 phys_B = isl_extent2d(128, 32); 232 break; 233 234 default: 235 unreachable("not reached"); 236 } /* end switch */ 237 238 *tile_info = (struct isl_tile_info) { 239 .tiling = tiling, 240 .format_bpb = format_bpb, 241 .logical_extent_el = logical_el, 242 .phys_extent_B = phys_B, 243 }; 244 245 return true; 246 } 247 248 /** 249 * @param[out] tiling is set only on success 250 */ 251 static bool 252 isl_surf_choose_tiling(const struct isl_device *dev, 253 const struct isl_surf_init_info *restrict info, 254 enum isl_tiling *tiling) 255 { 256 isl_tiling_flags_t tiling_flags = info->tiling_flags; 257 258 /* HiZ surfaces always use the HiZ tiling */ 259 if (info->usage & ISL_SURF_USAGE_HIZ_BIT) { 260 assert(info->format == ISL_FORMAT_HIZ); 261 assert(tiling_flags == ISL_TILING_HIZ_BIT); 262 *tiling = ISL_TILING_HIZ; 263 return true; 264 } 265 266 /* CCS surfaces always use the CCS tiling */ 267 if (info->usage & ISL_SURF_USAGE_CCS_BIT) { 268 assert(isl_format_get_layout(info->format)->txc == ISL_TXC_CCS); 269 assert(tiling_flags == ISL_TILING_CCS_BIT); 270 *tiling = ISL_TILING_CCS; 271 return true; 272 } 273 274 if (ISL_DEV_GEN(dev) >= 6) { 275 isl_gen6_filter_tiling(dev, info, &tiling_flags); 276 } else { 277 isl_finishme("%s: gen%u", __func__, ISL_DEV_GEN(dev)); 278 isl_gen6_filter_tiling(dev, info, &tiling_flags); 279 } 280 281 #define CHOOSE(__tiling) \ 282 do { \ 283 if (tiling_flags & (1u << (__tiling))) { \ 284 *tiling = (__tiling); \ 285 return true; \ 286 } \ 287 } while (0) 288 289 /* Of the tiling modes remaining, choose the one that offers the best 290 * performance. 291 */ 292 293 if (info->dim == ISL_SURF_DIM_1D) { 294 /* Prefer linear for 1D surfaces because they do not benefit from 295 * tiling. To the contrary, tiling leads to wasted memory and poor 296 * memory locality due to the swizzling and alignment restrictions 297 * required in tiled surfaces. 298 */ 299 CHOOSE(ISL_TILING_LINEAR); 300 } 301 302 CHOOSE(ISL_TILING_Ys); 303 CHOOSE(ISL_TILING_Yf); 304 CHOOSE(ISL_TILING_Y0); 305 CHOOSE(ISL_TILING_X); 306 CHOOSE(ISL_TILING_W); 307 CHOOSE(ISL_TILING_LINEAR); 308 309 #undef CHOOSE 310 311 /* No tiling mode accomodates the inputs. */ 312 return false; 313 } 314 315 static bool 316 isl_choose_msaa_layout(const struct isl_device *dev, 317 const struct isl_surf_init_info *info, 318 enum isl_tiling tiling, 319 enum isl_msaa_layout *msaa_layout) 320 { 321 if (ISL_DEV_GEN(dev) >= 8) { 322 return isl_gen8_choose_msaa_layout(dev, info, tiling, msaa_layout); 323 } else if (ISL_DEV_GEN(dev) >= 7) { 324 return isl_gen7_choose_msaa_layout(dev, info, tiling, msaa_layout); 325 } else if (ISL_DEV_GEN(dev) >= 6) { 326 return isl_gen6_choose_msaa_layout(dev, info, tiling, msaa_layout); 327 } else { 328 return isl_gen4_choose_msaa_layout(dev, info, tiling, msaa_layout); 329 } 330 } 331 332 struct isl_extent2d 333 isl_get_interleaved_msaa_px_size_sa(uint32_t samples) 334 { 335 assert(isl_is_pow2(samples)); 336 337 /* From the Broadwell PRM >> Volume 5: Memory Views >> Computing Mip Level 338 * Sizes (p133): 339 * 340 * If the surface is multisampled and it is a depth or stencil surface 341 * or Multisampled Surface StorageFormat in SURFACE_STATE is 342 * MSFMT_DEPTH_STENCIL, W_L and H_L must be adjusted as follows before 343 * proceeding: [...] 344 */ 345 return (struct isl_extent2d) { 346 .width = 1 << ((ffs(samples) - 0) / 2), 347 .height = 1 << ((ffs(samples) - 1) / 2), 348 }; 349 } 350 351 static void 352 isl_msaa_interleaved_scale_px_to_sa(uint32_t samples, 353 uint32_t *width, uint32_t *height) 354 { 355 const struct isl_extent2d px_size_sa = 356 isl_get_interleaved_msaa_px_size_sa(samples); 357 358 if (width) 359 *width = isl_align(*width, 2) * px_size_sa.width; 360 if (height) 361 *height = isl_align(*height, 2) * px_size_sa.height; 362 } 363 364 static enum isl_array_pitch_span 365 isl_choose_array_pitch_span(const struct isl_device *dev, 366 const struct isl_surf_init_info *restrict info, 367 enum isl_dim_layout dim_layout, 368 const struct isl_extent4d *phys_level0_sa) 369 { 370 switch (dim_layout) { 371 case ISL_DIM_LAYOUT_GEN9_1D: 372 case ISL_DIM_LAYOUT_GEN4_2D: 373 if (ISL_DEV_GEN(dev) >= 8) { 374 /* QPitch becomes programmable in Broadwell. So choose the 375 * most compact QPitch possible in order to conserve memory. 376 * 377 * From the Broadwell PRM >> Volume 2d: Command Reference: Structures 378 * >> RENDER_SURFACE_STATE Surface QPitch (p325): 379 * 380 * - Software must ensure that this field is set to a value 381 * sufficiently large such that the array slices in the surface 382 * do not overlap. Refer to the Memory Data Formats section for 383 * information on how surfaces are stored in memory. 384 * 385 * - This field specifies the distance in rows between array 386 * slices. It is used only in the following cases: 387 * 388 * - Surface Array is enabled OR 389 * - Number of Mulitsamples is not NUMSAMPLES_1 and 390 * Multisampled Surface Storage Format set to MSFMT_MSS OR 391 * - Surface Type is SURFTYPE_CUBE 392 */ 393 return ISL_ARRAY_PITCH_SPAN_COMPACT; 394 } else if (ISL_DEV_GEN(dev) >= 7) { 395 /* Note that Ivybridge introduces 396 * RENDER_SURFACE_STATE.SurfaceArraySpacing, which provides the 397 * driver more control over the QPitch. 398 */ 399 400 if (phys_level0_sa->array_len == 1) { 401 /* The hardware will never use the QPitch. So choose the most 402 * compact QPitch possible in order to conserve memory. 403 */ 404 return ISL_ARRAY_PITCH_SPAN_COMPACT; 405 } 406 407 if (isl_surf_usage_is_depth_or_stencil(info->usage) || 408 (info->usage & ISL_SURF_USAGE_HIZ_BIT)) { 409 /* From the Ivybridge PRM >> Volume 1 Part 1: Graphics Core >> 410 * Section 6.18.4.7: Surface Arrays (p112): 411 * 412 * If Surface Array Spacing is set to ARYSPC_FULL (note that 413 * the depth buffer and stencil buffer have an implied value of 414 * ARYSPC_FULL): 415 */ 416 return ISL_ARRAY_PITCH_SPAN_FULL; 417 } 418 419 if (info->levels == 1) { 420 /* We are able to set RENDER_SURFACE_STATE.SurfaceArraySpacing 421 * to ARYSPC_LOD0. 422 */ 423 return ISL_ARRAY_PITCH_SPAN_COMPACT; 424 } 425 426 return ISL_ARRAY_PITCH_SPAN_FULL; 427 } else if ((ISL_DEV_GEN(dev) == 5 || ISL_DEV_GEN(dev) == 6) && 428 ISL_DEV_USE_SEPARATE_STENCIL(dev) && 429 isl_surf_usage_is_stencil(info->usage)) { 430 /* [ILK-SNB] Errata from the Sandy Bridge PRM >> Volume 4 Part 1: 431 * Graphics Core >> Section 7.18.3.7: Surface Arrays: 432 * 433 * The separate stencil buffer does not support mip mapping, thus 434 * the storage for LODs other than LOD 0 is not needed. 435 */ 436 assert(info->levels == 1); 437 assert(phys_level0_sa->array_len == 1); 438 return ISL_ARRAY_PITCH_SPAN_COMPACT; 439 } else { 440 if ((ISL_DEV_GEN(dev) == 5 || ISL_DEV_GEN(dev) == 6) && 441 ISL_DEV_USE_SEPARATE_STENCIL(dev) && 442 isl_surf_usage_is_stencil(info->usage)) { 443 /* [ILK-SNB] Errata from the Sandy Bridge PRM >> Volume 4 Part 1: 444 * Graphics Core >> Section 7.18.3.7: Surface Arrays: 445 * 446 * The separate stencil buffer does not support mip mapping, 447 * thus the storage for LODs other than LOD 0 is not needed. 448 */ 449 assert(info->levels == 1); 450 assert(phys_level0_sa->array_len == 1); 451 return ISL_ARRAY_PITCH_SPAN_COMPACT; 452 } 453 454 if (phys_level0_sa->array_len == 1) { 455 /* The hardware will never use the QPitch. So choose the most 456 * compact QPitch possible in order to conserve memory. 457 */ 458 return ISL_ARRAY_PITCH_SPAN_COMPACT; 459 } 460 461 return ISL_ARRAY_PITCH_SPAN_FULL; 462 } 463 464 case ISL_DIM_LAYOUT_GEN4_3D: 465 /* The hardware will never use the QPitch. So choose the most 466 * compact QPitch possible in order to conserve memory. 467 */ 468 return ISL_ARRAY_PITCH_SPAN_COMPACT; 469 } 470 471 unreachable("bad isl_dim_layout"); 472 return ISL_ARRAY_PITCH_SPAN_FULL; 473 } 474 475 static void 476 isl_choose_image_alignment_el(const struct isl_device *dev, 477 const struct isl_surf_init_info *restrict info, 478 enum isl_tiling tiling, 479 enum isl_dim_layout dim_layout, 480 enum isl_msaa_layout msaa_layout, 481 struct isl_extent3d *image_align_el) 482 { 483 if (info->format == ISL_FORMAT_HIZ) { 484 assert(ISL_DEV_GEN(dev) >= 6); 485 /* HiZ surfaces are always aligned to 16x8 pixels in the primary surface 486 * which works out to 2x2 HiZ elments. 487 */ 488 *image_align_el = isl_extent3d(2, 2, 1); 489 return; 490 } 491 492 if (ISL_DEV_GEN(dev) >= 9) { 493 isl_gen9_choose_image_alignment_el(dev, info, tiling, dim_layout, 494 msaa_layout, image_align_el); 495 } else if (ISL_DEV_GEN(dev) >= 8) { 496 isl_gen8_choose_image_alignment_el(dev, info, tiling, dim_layout, 497 msaa_layout, image_align_el); 498 } else if (ISL_DEV_GEN(dev) >= 7) { 499 isl_gen7_choose_image_alignment_el(dev, info, tiling, dim_layout, 500 msaa_layout, image_align_el); 501 } else if (ISL_DEV_GEN(dev) >= 6) { 502 isl_gen6_choose_image_alignment_el(dev, info, tiling, dim_layout, 503 msaa_layout, image_align_el); 504 } else { 505 isl_gen4_choose_image_alignment_el(dev, info, tiling, dim_layout, 506 msaa_layout, image_align_el); 507 } 508 } 509 510 static enum isl_dim_layout 511 isl_surf_choose_dim_layout(const struct isl_device *dev, 512 enum isl_surf_dim logical_dim, 513 enum isl_tiling tiling) 514 { 515 if (ISL_DEV_GEN(dev) >= 9) { 516 switch (logical_dim) { 517 case ISL_SURF_DIM_1D: 518 /* From the Sky Lake PRM Vol. 5, "1D Surfaces": 519 * 520 * One-dimensional surfaces use a tiling mode of linear. 521 * Technically, they are not tiled resources, but the Tiled 522 * Resource Mode field in RENDER_SURFACE_STATE is still used to 523 * indicate the alignment requirements for this linear surface 524 * (See 1D Alignment requirements for how 4K and 64KB Tiled 525 * Resource Modes impact alignment). Alternatively, a 1D surface 526 * can be defined as a 2D tiled surface (e.g. TileY or TileX) with 527 * a height of 0. 528 * 529 * In other words, ISL_DIM_LAYOUT_GEN9_1D is only used for linear 530 * surfaces and, for tiled surfaces, ISL_DIM_LAYOUT_GEN4_2D is used. 531 */ 532 if (tiling == ISL_TILING_LINEAR) 533 return ISL_DIM_LAYOUT_GEN9_1D; 534 else 535 return ISL_DIM_LAYOUT_GEN4_2D; 536 case ISL_SURF_DIM_2D: 537 case ISL_SURF_DIM_3D: 538 return ISL_DIM_LAYOUT_GEN4_2D; 539 } 540 } else { 541 switch (logical_dim) { 542 case ISL_SURF_DIM_1D: 543 case ISL_SURF_DIM_2D: 544 return ISL_DIM_LAYOUT_GEN4_2D; 545 case ISL_SURF_DIM_3D: 546 return ISL_DIM_LAYOUT_GEN4_3D; 547 } 548 } 549 550 unreachable("bad isl_surf_dim"); 551 return ISL_DIM_LAYOUT_GEN4_2D; 552 } 553 554 /** 555 * Calculate the physical extent of the surface's first level, in units of 556 * surface samples. The result is aligned to the format's compression block. 557 */ 558 static void 559 isl_calc_phys_level0_extent_sa(const struct isl_device *dev, 560 const struct isl_surf_init_info *restrict info, 561 enum isl_dim_layout dim_layout, 562 enum isl_tiling tiling, 563 enum isl_msaa_layout msaa_layout, 564 struct isl_extent4d *phys_level0_sa) 565 { 566 const struct isl_format_layout *fmtl = isl_format_get_layout(info->format); 567 568 if (isl_format_is_yuv(info->format)) 569 isl_finishme("%s:%s: YUV format", __FILE__, __func__); 570 571 switch (info->dim) { 572 case ISL_SURF_DIM_1D: 573 assert(info->height == 1); 574 assert(info->depth == 1); 575 assert(info->samples == 1); 576 577 switch (dim_layout) { 578 case ISL_DIM_LAYOUT_GEN4_3D: 579 unreachable("bad isl_dim_layout"); 580 581 case ISL_DIM_LAYOUT_GEN9_1D: 582 case ISL_DIM_LAYOUT_GEN4_2D: 583 *phys_level0_sa = (struct isl_extent4d) { 584 .w = isl_align_npot(info->width, fmtl->bw), 585 .h = fmtl->bh, 586 .d = 1, 587 .a = info->array_len, 588 }; 589 break; 590 } 591 break; 592 593 case ISL_SURF_DIM_2D: 594 assert(dim_layout == ISL_DIM_LAYOUT_GEN4_2D); 595 596 if (tiling == ISL_TILING_Ys && info->samples > 1) 597 isl_finishme("%s:%s: multisample TileYs layout", __FILE__, __func__); 598 599 switch (msaa_layout) { 600 case ISL_MSAA_LAYOUT_NONE: 601 assert(info->depth == 1); 602 assert(info->samples == 1); 603 604 *phys_level0_sa = (struct isl_extent4d) { 605 .w = isl_align_npot(info->width, fmtl->bw), 606 .h = isl_align_npot(info->height, fmtl->bh), 607 .d = 1, 608 .a = info->array_len, 609 }; 610 break; 611 612 case ISL_MSAA_LAYOUT_ARRAY: 613 assert(info->depth == 1); 614 assert(info->levels == 1); 615 assert(isl_format_supports_multisampling(dev->info, info->format)); 616 assert(fmtl->bw == 1 && fmtl->bh == 1); 617 618 *phys_level0_sa = (struct isl_extent4d) { 619 .w = info->width, 620 .h = info->height, 621 .d = 1, 622 .a = info->array_len * info->samples, 623 }; 624 break; 625 626 case ISL_MSAA_LAYOUT_INTERLEAVED: 627 assert(info->depth == 1); 628 assert(info->levels == 1); 629 assert(isl_format_supports_multisampling(dev->info, info->format)); 630 631 *phys_level0_sa = (struct isl_extent4d) { 632 .w = info->width, 633 .h = info->height, 634 .d = 1, 635 .a = info->array_len, 636 }; 637 638 isl_msaa_interleaved_scale_px_to_sa(info->samples, 639 &phys_level0_sa->w, 640 &phys_level0_sa->h); 641 642 phys_level0_sa->w = isl_align(phys_level0_sa->w, fmtl->bw); 643 phys_level0_sa->h = isl_align(phys_level0_sa->h, fmtl->bh); 644 break; 645 } 646 break; 647 648 case ISL_SURF_DIM_3D: 649 assert(info->array_len == 1); 650 assert(info->samples == 1); 651 652 if (fmtl->bd > 1) { 653 isl_finishme("%s:%s: compression block with depth > 1", 654 __FILE__, __func__); 655 } 656 657 switch (dim_layout) { 658 case ISL_DIM_LAYOUT_GEN9_1D: 659 unreachable("bad isl_dim_layout"); 660 661 case ISL_DIM_LAYOUT_GEN4_2D: 662 assert(ISL_DEV_GEN(dev) >= 9); 663 664 *phys_level0_sa = (struct isl_extent4d) { 665 .w = isl_align_npot(info->width, fmtl->bw), 666 .h = isl_align_npot(info->height, fmtl->bh), 667 .d = 1, 668 .a = info->depth, 669 }; 670 break; 671 672 case ISL_DIM_LAYOUT_GEN4_3D: 673 assert(ISL_DEV_GEN(dev) < 9); 674 *phys_level0_sa = (struct isl_extent4d) { 675 .w = isl_align(info->width, fmtl->bw), 676 .h = isl_align(info->height, fmtl->bh), 677 .d = info->depth, 678 .a = 1, 679 }; 680 break; 681 } 682 break; 683 } 684 } 685 686 /** 687 * A variant of isl_calc_phys_slice0_extent_sa() specific to 688 * ISL_DIM_LAYOUT_GEN4_2D. 689 */ 690 static void 691 isl_calc_phys_slice0_extent_sa_gen4_2d( 692 const struct isl_device *dev, 693 const struct isl_surf_init_info *restrict info, 694 enum isl_msaa_layout msaa_layout, 695 const struct isl_extent3d *image_align_sa, 696 const struct isl_extent4d *phys_level0_sa, 697 struct isl_extent2d *phys_slice0_sa) 698 { 699 const struct isl_format_layout *fmtl = isl_format_get_layout(info->format); 700 701 assert(phys_level0_sa->depth == 1); 702 703 if (info->levels == 1) { 704 /* Do not pad the surface to the image alignment. Instead, pad it only 705 * to the pixel format's block alignment. 706 * 707 * For tiled surfaces, using a reduced alignment here avoids wasting CPU 708 * cycles on the below mipmap layout caluclations. Reducing the 709 * alignment here is safe because we later align the row pitch and array 710 * pitch to the tile boundary. It is safe even for 711 * ISL_MSAA_LAYOUT_INTERLEAVED, because phys_level0_sa is already scaled 712 * to accomodate the interleaved samples. 713 * 714 * For linear surfaces, reducing the alignment here permits us to later 715 * choose an arbitrary, non-aligned row pitch. If the surface backs 716 * a VkBuffer, then an arbitrary pitch may be needed to accomodate 717 * VkBufferImageCopy::bufferRowLength. 718 */ 719 *phys_slice0_sa = (struct isl_extent2d) { 720 .w = isl_align_npot(phys_level0_sa->w, fmtl->bw), 721 .h = isl_align_npot(phys_level0_sa->h, fmtl->bh), 722 }; 723 return; 724 } 725 726 uint32_t slice_top_w = 0; 727 uint32_t slice_bottom_w = 0; 728 uint32_t slice_left_h = 0; 729 uint32_t slice_right_h = 0; 730 731 uint32_t W0 = phys_level0_sa->w; 732 uint32_t H0 = phys_level0_sa->h; 733 734 for (uint32_t l = 0; l < info->levels; ++l) { 735 uint32_t W = isl_minify(W0, l); 736 uint32_t H = isl_minify(H0, l); 737 738 uint32_t w = isl_align_npot(W, image_align_sa->w); 739 uint32_t h = isl_align_npot(H, image_align_sa->h); 740 741 if (l == 0) { 742 slice_top_w = w; 743 slice_left_h = h; 744 slice_right_h = h; 745 } else if (l == 1) { 746 slice_bottom_w = w; 747 slice_left_h += h; 748 } else if (l == 2) { 749 slice_bottom_w += w; 750 slice_right_h += h; 751 } else { 752 slice_right_h += h; 753 } 754 } 755 756 *phys_slice0_sa = (struct isl_extent2d) { 757 .w = MAX(slice_top_w, slice_bottom_w), 758 .h = MAX(slice_left_h, slice_right_h), 759 }; 760 } 761 762 /** 763 * A variant of isl_calc_phys_slice0_extent_sa() specific to 764 * ISL_DIM_LAYOUT_GEN4_3D. 765 */ 766 static void 767 isl_calc_phys_slice0_extent_sa_gen4_3d( 768 const struct isl_device *dev, 769 const struct isl_surf_init_info *restrict info, 770 const struct isl_extent3d *image_align_sa, 771 const struct isl_extent4d *phys_level0_sa, 772 struct isl_extent2d *phys_slice0_sa) 773 { 774 assert(info->samples == 1); 775 assert(phys_level0_sa->array_len == 1); 776 777 uint32_t slice_w = 0; 778 uint32_t slice_h = 0; 779 780 uint32_t W0 = phys_level0_sa->w; 781 uint32_t H0 = phys_level0_sa->h; 782 uint32_t D0 = phys_level0_sa->d; 783 784 for (uint32_t l = 0; l < info->levels; ++l) { 785 uint32_t level_w = isl_align_npot(isl_minify(W0, l), image_align_sa->w); 786 uint32_t level_h = isl_align_npot(isl_minify(H0, l), image_align_sa->h); 787 uint32_t level_d = isl_align_npot(isl_minify(D0, l), image_align_sa->d); 788 789 uint32_t max_layers_horiz = MIN(level_d, 1u << l); 790 uint32_t max_layers_vert = isl_align(level_d, 1u << l) / (1u << l); 791 792 slice_w = MAX(slice_w, level_w * max_layers_horiz); 793 slice_h += level_h * max_layers_vert; 794 } 795 796 *phys_slice0_sa = (struct isl_extent2d) { 797 .w = slice_w, 798 .h = slice_h, 799 }; 800 } 801 802 /** 803 * A variant of isl_calc_phys_slice0_extent_sa() specific to 804 * ISL_DIM_LAYOUT_GEN9_1D. 805 */ 806 static void 807 isl_calc_phys_slice0_extent_sa_gen9_1d( 808 const struct isl_device *dev, 809 const struct isl_surf_init_info *restrict info, 810 const struct isl_extent3d *image_align_sa, 811 const struct isl_extent4d *phys_level0_sa, 812 struct isl_extent2d *phys_slice0_sa) 813 { 814 MAYBE_UNUSED const struct isl_format_layout *fmtl = isl_format_get_layout(info->format); 815 816 assert(phys_level0_sa->height == 1); 817 assert(phys_level0_sa->depth == 1); 818 assert(info->samples == 1); 819 assert(image_align_sa->w >= fmtl->bw); 820 821 uint32_t slice_w = 0; 822 const uint32_t W0 = phys_level0_sa->w; 823 824 for (uint32_t l = 0; l < info->levels; ++l) { 825 uint32_t W = isl_minify(W0, l); 826 uint32_t w = isl_align_npot(W, image_align_sa->w); 827 828 slice_w += w; 829 } 830 831 *phys_slice0_sa = isl_extent2d(slice_w, 1); 832 } 833 834 /** 835 * Calculate the physical extent of the surface's first array slice, in units 836 * of surface samples. If the surface is multi-leveled, then the result will 837 * be aligned to \a image_align_sa. 838 */ 839 static void 840 isl_calc_phys_slice0_extent_sa(const struct isl_device *dev, 841 const struct isl_surf_init_info *restrict info, 842 enum isl_dim_layout dim_layout, 843 enum isl_msaa_layout msaa_layout, 844 const struct isl_extent3d *image_align_sa, 845 const struct isl_extent4d *phys_level0_sa, 846 struct isl_extent2d *phys_slice0_sa) 847 { 848 switch (dim_layout) { 849 case ISL_DIM_LAYOUT_GEN9_1D: 850 isl_calc_phys_slice0_extent_sa_gen9_1d(dev, info, 851 image_align_sa, phys_level0_sa, 852 phys_slice0_sa); 853 return; 854 case ISL_DIM_LAYOUT_GEN4_2D: 855 isl_calc_phys_slice0_extent_sa_gen4_2d(dev, info, msaa_layout, 856 image_align_sa, phys_level0_sa, 857 phys_slice0_sa); 858 return; 859 case ISL_DIM_LAYOUT_GEN4_3D: 860 isl_calc_phys_slice0_extent_sa_gen4_3d(dev, info, image_align_sa, 861 phys_level0_sa, phys_slice0_sa); 862 return; 863 } 864 } 865 866 /** 867 * Calculate the pitch between physical array slices, in units of rows of 868 * surface elements. 869 */ 870 static uint32_t 871 isl_calc_array_pitch_el_rows(const struct isl_device *dev, 872 const struct isl_surf_init_info *restrict info, 873 const struct isl_tile_info *tile_info, 874 enum isl_dim_layout dim_layout, 875 enum isl_array_pitch_span array_pitch_span, 876 const struct isl_extent3d *image_align_sa, 877 const struct isl_extent4d *phys_level0_sa, 878 const struct isl_extent2d *phys_slice0_sa) 879 { 880 const struct isl_format_layout *fmtl = isl_format_get_layout(info->format); 881 uint32_t pitch_sa_rows = 0; 882 883 switch (dim_layout) { 884 case ISL_DIM_LAYOUT_GEN9_1D: 885 /* Each row is an array slice */ 886 pitch_sa_rows = 1; 887 break; 888 case ISL_DIM_LAYOUT_GEN4_2D: 889 switch (array_pitch_span) { 890 case ISL_ARRAY_PITCH_SPAN_COMPACT: 891 pitch_sa_rows = isl_align_npot(phys_slice0_sa->h, image_align_sa->h); 892 break; 893 case ISL_ARRAY_PITCH_SPAN_FULL: { 894 /* The QPitch equation is found in the Broadwell PRM >> Volume 5: 895 * Memory Views >> Common Surface Formats >> Surface Layout >> 2D 896 * Surfaces >> Surface Arrays. 897 */ 898 uint32_t H0_sa = phys_level0_sa->h; 899 uint32_t H1_sa = isl_minify(H0_sa, 1); 900 901 uint32_t h0_sa = isl_align_npot(H0_sa, image_align_sa->h); 902 uint32_t h1_sa = isl_align_npot(H1_sa, image_align_sa->h); 903 904 uint32_t m; 905 if (ISL_DEV_GEN(dev) >= 7) { 906 /* The QPitch equation changed slightly in Ivybridge. */ 907 m = 12; 908 } else { 909 m = 11; 910 } 911 912 pitch_sa_rows = h0_sa + h1_sa + (m * image_align_sa->h); 913 914 if (ISL_DEV_GEN(dev) == 6 && info->samples > 1 && 915 (info->height % 4 == 1)) { 916 /* [SNB] Errata from the Sandy Bridge PRM >> Volume 4 Part 1: 917 * Graphics Core >> Section 7.18.3.7: Surface Arrays: 918 * 919 * [SNB] Errata: Sampler MSAA Qpitch will be 4 greater than 920 * the value calculated in the equation above , for every 921 * other odd Surface Height starting from 1 i.e. 1,5,9,13. 922 * 923 * XXX(chadv): Is the errata natural corollary of the physical 924 * layout of interleaved samples? 925 */ 926 pitch_sa_rows += 4; 927 } 928 929 pitch_sa_rows = isl_align_npot(pitch_sa_rows, fmtl->bh); 930 } /* end case */ 931 break; 932 } 933 break; 934 case ISL_DIM_LAYOUT_GEN4_3D: 935 assert(array_pitch_span == ISL_ARRAY_PITCH_SPAN_COMPACT); 936 pitch_sa_rows = isl_align_npot(phys_slice0_sa->h, image_align_sa->h); 937 break; 938 default: 939 unreachable("bad isl_dim_layout"); 940 break; 941 } 942 943 assert(pitch_sa_rows % fmtl->bh == 0); 944 uint32_t pitch_el_rows = pitch_sa_rows / fmtl->bh; 945 946 if (ISL_DEV_GEN(dev) >= 9 && fmtl->txc == ISL_TXC_CCS) { 947 /* 948 * From the Sky Lake PRM Vol 7, "MCS Buffer for Render Target(s)" (p. 632): 949 * 950 * "Mip-mapped and arrayed surfaces are supported with MCS buffer 951 * layout with these alignments in the RT space: Horizontal 952 * Alignment = 128 and Vertical Alignment = 64." 953 * 954 * From the Sky Lake PRM Vol. 2d, "RENDER_SURFACE_STATE" (p. 435): 955 * 956 * "For non-multisampled render target's CCS auxiliary surface, 957 * QPitch must be computed with Horizontal Alignment = 128 and 958 * Surface Vertical Alignment = 256. These alignments are only for 959 * CCS buffer and not for associated render target." 960 * 961 * The first restriction is already handled by isl_choose_image_alignment_el 962 * but the second restriction, which is an extension of the first, only 963 * applies to qpitch and must be applied here. 964 */ 965 assert(fmtl->bh == 4); 966 pitch_el_rows = isl_align(pitch_el_rows, 256 / 4); 967 } 968 969 if (ISL_DEV_GEN(dev) >= 9 && 970 info->dim == ISL_SURF_DIM_3D && 971 tile_info->tiling != ISL_TILING_LINEAR) { 972 /* From the Skylake BSpec >> RENDER_SURFACE_STATE >> Surface QPitch: 973 * 974 * Tile Mode != Linear: This field must be set to an integer multiple 975 * of the tile height 976 */ 977 pitch_el_rows = isl_align(pitch_el_rows, tile_info->logical_extent_el.height); 978 } 979 980 return pitch_el_rows; 981 } 982 983 /** 984 * Calculate the pitch of each surface row, in bytes. 985 */ 986 static uint32_t 987 isl_calc_linear_row_pitch(const struct isl_device *dev, 988 const struct isl_surf_init_info *restrict info, 989 const struct isl_extent2d *phys_slice0_sa) 990 { 991 const struct isl_format_layout *fmtl = isl_format_get_layout(info->format); 992 993 uint32_t row_pitch = info->min_pitch; 994 995 /* First, align the surface to a cache line boundary, as the PRM explains 996 * below. 997 * 998 * From the Broadwell PRM >> Volume 5: Memory Views >> Common Surface 999 * Formats >> Surface Padding Requirements >> Render Target and Media 1000 * Surfaces: 1001 * 1002 * The data port accesses data (pixels) outside of the surface if they 1003 * are contained in the same cache request as pixels that are within the 1004 * surface. These pixels will not be returned by the requesting message, 1005 * however if these pixels lie outside of defined pages in the GTT, 1006 * a GTT error will result when the cache request is processed. In order 1007 * to avoid these GTT errors, padding at the bottom of the surface is 1008 * sometimes necessary. 1009 * 1010 * From the Broadwell PRM >> Volume 5: Memory Views >> Common Surface 1011 * Formats >> Surface Padding Requirements >> Sampling Engine Surfaces: 1012 * 1013 * The sampling engine accesses texels outside of the surface if they 1014 * are contained in the same cache line as texels that are within the 1015 * surface. These texels will not participate in any calculation 1016 * performed by the sampling engine and will not affect the result of 1017 * any sampling engine operation, however if these texels lie outside of 1018 * defined pages in the GTT, a GTT error will result when the cache line 1019 * is accessed. In order to avoid these GTT errors, padding at the 1020 * bottom and right side of a sampling engine surface is sometimes 1021 * necessary. 1022 * 1023 * It is possible that a cache line will straddle a page boundary if the 1024 * base address or pitch is not aligned. All pages included in the cache 1025 * lines that are part of the surface must map to valid GTT entries to 1026 * avoid errors. To determine the necessary padding on the bottom and 1027 * right side of the surface, refer to the table in Alignment Unit Size 1028 * section for the i and j parameters for the surface format in use. The 1029 * surface must then be extended to the next multiple of the alignment 1030 * unit size in each dimension, and all texels contained in this 1031 * extended surface must have valid GTT entries. 1032 * 1033 * For example, suppose the surface size is 15 texels by 10 texels and 1034 * the alignment parameters are i=4 and j=2. In this case, the extended 1035 * surface would be 16 by 10. Note that these calculations are done in 1036 * texels, and must be converted to bytes based on the surface format 1037 * being used to determine whether additional pages need to be defined. 1038 */ 1039 assert(phys_slice0_sa->w % fmtl->bw == 0); 1040 const uint32_t bs = fmtl->bpb / 8; 1041 row_pitch = MAX(row_pitch, bs * (phys_slice0_sa->w / fmtl->bw)); 1042 1043 /* From the Broadwel PRM >> Volume 2d: Command Reference: Structures >> 1044 * RENDER_SURFACE_STATE Surface Pitch (p349): 1045 * 1046 * - For linear render target surfaces and surfaces accessed with the 1047 * typed data port messages, the pitch must be a multiple of the 1048 * element size for non-YUV surface formats. Pitch must be 1049 * a multiple of 2 * element size for YUV surface formats. 1050 * 1051 * - [Requirements for SURFTYPE_BUFFER and SURFTYPE_STRBUF, which we 1052 * ignore because isl doesn't do buffers.] 1053 * 1054 * - For other linear surfaces, the pitch can be any multiple of 1055 * bytes. 1056 */ 1057 if (info->usage & ISL_SURF_USAGE_RENDER_TARGET_BIT) { 1058 if (isl_format_is_yuv(info->format)) { 1059 row_pitch = isl_align_npot(row_pitch, 2 * bs); 1060 } else { 1061 row_pitch = isl_align_npot(row_pitch, bs); 1062 } 1063 } 1064 1065 return row_pitch; 1066 } 1067 1068 /** 1069 * Calculate and apply any padding required for the surface. 1070 * 1071 * @param[inout] total_h_el is updated with the new height 1072 * @param[out] pad_bytes is overwritten with additional padding requirements. 1073 */ 1074 static void 1075 isl_apply_surface_padding(const struct isl_device *dev, 1076 const struct isl_surf_init_info *restrict info, 1077 const struct isl_tile_info *tile_info, 1078 uint32_t *total_h_el, 1079 uint32_t *pad_bytes) 1080 { 1081 const struct isl_format_layout *fmtl = isl_format_get_layout(info->format); 1082 1083 *pad_bytes = 0; 1084 1085 /* From the Broadwell PRM >> Volume 5: Memory Views >> Common Surface 1086 * Formats >> Surface Padding Requirements >> Render Target and Media 1087 * Surfaces: 1088 * 1089 * The data port accesses data (pixels) outside of the surface if they 1090 * are contained in the same cache request as pixels that are within the 1091 * surface. These pixels will not be returned by the requesting message, 1092 * however if these pixels lie outside of defined pages in the GTT, 1093 * a GTT error will result when the cache request is processed. In 1094 * order to avoid these GTT errors, padding at the bottom of the 1095 * surface is sometimes necessary. 1096 * 1097 * From the Broadwell PRM >> Volume 5: Memory Views >> Common Surface 1098 * Formats >> Surface Padding Requirements >> Sampling Engine Surfaces: 1099 * 1100 * ... Lots of padding requirements, all listed separately below. 1101 */ 1102 1103 /* We can safely ignore the first padding requirement, quoted below, 1104 * because isl doesn't do buffers. 1105 * 1106 * - [pre-BDW] For buffers, which have no inherent height, padding 1107 * requirements are different. A buffer must be padded to the next 1108 * multiple of 256 array elements, with an additional 16 bytes added 1109 * beyond that to account for the L1 cache line. 1110 */ 1111 1112 /* 1113 * - For compressed textures [...], padding at the bottom of the surface 1114 * is to an even compressed row. 1115 */ 1116 if (isl_format_is_compressed(info->format)) 1117 *total_h_el = isl_align(*total_h_el, 2); 1118 1119 /* 1120 * - For cube surfaces, an additional two rows of padding are required 1121 * at the bottom of the surface. 1122 */ 1123 if (info->usage & ISL_SURF_USAGE_CUBE_BIT) 1124 *total_h_el += 2; 1125 1126 /* 1127 * - For packed YUV, 96 bpt, 48 bpt, and 24 bpt surface formats, 1128 * additional padding is required. These surfaces require an extra row 1129 * plus 16 bytes of padding at the bottom in addition to the general 1130 * padding requirements. 1131 */ 1132 if (isl_format_is_yuv(info->format) && 1133 (fmtl->bpb == 96 || fmtl->bpb == 48|| fmtl->bpb == 24)) { 1134 *total_h_el += 1; 1135 *pad_bytes += 16; 1136 } 1137 1138 /* 1139 * - For linear surfaces, additional padding of 64 bytes is required at 1140 * the bottom of the surface. This is in addition to the padding 1141 * required above. 1142 */ 1143 if (tile_info->tiling == ISL_TILING_LINEAR) 1144 *pad_bytes += 64; 1145 1146 /* The below text weakens, not strengthens, the padding requirements for 1147 * linear surfaces. Therefore we can safely ignore it. 1148 * 1149 * - [BDW+] For SURFTYPE_BUFFER, SURFTYPE_1D, and SURFTYPE_2D non-array, 1150 * non-MSAA, non-mip-mapped surfaces in linear memory, the only 1151 * padding requirement is to the next aligned 64-byte boundary beyond 1152 * the end of the surface. The rest of the padding requirements 1153 * documented above do not apply to these surfaces. 1154 */ 1155 1156 /* 1157 * - [SKL+] For SURFTYPE_2D and SURFTYPE_3D with linear mode and 1158 * height % 4 != 0, the surface must be padded with 1159 * 4-(height % 4)*Surface Pitch # of bytes. 1160 */ 1161 if (ISL_DEV_GEN(dev) >= 9 && 1162 tile_info->tiling == ISL_TILING_LINEAR && 1163 (info->dim == ISL_SURF_DIM_2D || info->dim == ISL_SURF_DIM_3D)) { 1164 *total_h_el = isl_align(*total_h_el, 4); 1165 } 1166 1167 /* 1168 * - [SKL+] For SURFTYPE_1D with linear mode, the surface must be padded 1169 * to 4 times the Surface Pitch # of bytes 1170 */ 1171 if (ISL_DEV_GEN(dev) >= 9 && 1172 tile_info->tiling == ISL_TILING_LINEAR && 1173 info->dim == ISL_SURF_DIM_1D) { 1174 *total_h_el += 4; 1175 } 1176 } 1177 1178 bool 1179 isl_surf_init_s(const struct isl_device *dev, 1180 struct isl_surf *surf, 1181 const struct isl_surf_init_info *restrict info) 1182 { 1183 const struct isl_format_layout *fmtl = isl_format_get_layout(info->format); 1184 1185 const struct isl_extent4d logical_level0_px = { 1186 .w = info->width, 1187 .h = info->height, 1188 .d = info->depth, 1189 .a = info->array_len, 1190 }; 1191 1192 enum isl_tiling tiling; 1193 if (!isl_surf_choose_tiling(dev, info, &tiling)) 1194 return false; 1195 1196 struct isl_tile_info tile_info; 1197 if (!isl_tiling_get_info(dev, tiling, fmtl->bpb, &tile_info)) 1198 return false; 1199 1200 const enum isl_dim_layout dim_layout = 1201 isl_surf_choose_dim_layout(dev, info->dim, tiling); 1202 1203 enum isl_msaa_layout msaa_layout; 1204 if (!isl_choose_msaa_layout(dev, info, tiling, &msaa_layout)) 1205 return false; 1206 1207 struct isl_extent3d image_align_el; 1208 isl_choose_image_alignment_el(dev, info, tiling, dim_layout, msaa_layout, 1209 &image_align_el); 1210 1211 struct isl_extent3d image_align_sa = 1212 isl_extent3d_el_to_sa(info->format, image_align_el); 1213 1214 struct isl_extent4d phys_level0_sa; 1215 isl_calc_phys_level0_extent_sa(dev, info, dim_layout, tiling, msaa_layout, 1216 &phys_level0_sa); 1217 assert(phys_level0_sa.w % fmtl->bw == 0); 1218 assert(phys_level0_sa.h % fmtl->bh == 0); 1219 1220 enum isl_array_pitch_span array_pitch_span = 1221 isl_choose_array_pitch_span(dev, info, dim_layout, &phys_level0_sa); 1222 1223 struct isl_extent2d phys_slice0_sa; 1224 isl_calc_phys_slice0_extent_sa(dev, info, dim_layout, msaa_layout, 1225 &image_align_sa, &phys_level0_sa, 1226 &phys_slice0_sa); 1227 assert(phys_slice0_sa.w % fmtl->bw == 0); 1228 assert(phys_slice0_sa.h % fmtl->bh == 0); 1229 1230 const uint32_t array_pitch_el_rows = 1231 isl_calc_array_pitch_el_rows(dev, info, &tile_info, dim_layout, 1232 array_pitch_span, &image_align_sa, 1233 &phys_level0_sa, &phys_slice0_sa); 1234 1235 uint32_t total_h_el = phys_level0_sa.array_len * array_pitch_el_rows; 1236 1237 uint32_t pad_bytes; 1238 isl_apply_surface_padding(dev, info, &tile_info, &total_h_el, &pad_bytes); 1239 1240 uint32_t row_pitch, size, base_alignment; 1241 if (tiling == ISL_TILING_LINEAR) { 1242 row_pitch = isl_calc_linear_row_pitch(dev, info, &phys_slice0_sa); 1243 size = row_pitch * total_h_el + pad_bytes; 1244 1245 /* From the Broadwell PRM Vol 2d, RENDER_SURFACE_STATE::SurfaceBaseAddress: 1246 * 1247 * "The Base Address for linear render target surfaces and surfaces 1248 * accessed with the typed surface read/write data port messages must 1249 * be element-size aligned, for non-YUV surface formats, or a 1250 * multiple of 2 element-sizes for YUV surface formats. Other linear 1251 * surfaces have no alignment requirements (byte alignment is 1252 * sufficient.)" 1253 */ 1254 base_alignment = MAX(1, info->min_alignment); 1255 if (info->usage & ISL_SURF_USAGE_RENDER_TARGET_BIT) { 1256 if (isl_format_is_yuv(info->format)) { 1257 base_alignment = MAX(base_alignment, fmtl->bpb / 4); 1258 } else { 1259 base_alignment = MAX(base_alignment, fmtl->bpb / 8); 1260 } 1261 } 1262 base_alignment = isl_round_up_to_power_of_two(base_alignment); 1263 } else { 1264 assert(fmtl->bpb % tile_info.format_bpb == 0); 1265 const uint32_t tile_el_scale = fmtl->bpb / tile_info.format_bpb; 1266 1267 assert(phys_slice0_sa.w % fmtl->bw == 0); 1268 const uint32_t total_w_el = phys_slice0_sa.width / fmtl->bw; 1269 const uint32_t total_w_tl = 1270 isl_align_div(total_w_el * tile_el_scale, 1271 tile_info.logical_extent_el.width); 1272 1273 row_pitch = total_w_tl * tile_info.phys_extent_B.width; 1274 if (row_pitch < info->min_pitch) { 1275 row_pitch = isl_align_npot(info->min_pitch, 1276 tile_info.phys_extent_B.width); 1277 } 1278 1279 total_h_el += isl_align_div_npot(pad_bytes, row_pitch); 1280 const uint32_t total_h_tl = 1281 isl_align_div(total_h_el, tile_info.logical_extent_el.height); 1282 1283 size = total_h_tl * tile_info.phys_extent_B.height * row_pitch; 1284 1285 const uint32_t tile_size = tile_info.phys_extent_B.width * 1286 tile_info.phys_extent_B.height; 1287 assert(isl_is_pow2(info->min_alignment) && isl_is_pow2(tile_size)); 1288 base_alignment = MAX(info->min_alignment, tile_size); 1289 } 1290 1291 *surf = (struct isl_surf) { 1292 .dim = info->dim, 1293 .dim_layout = dim_layout, 1294 .msaa_layout = msaa_layout, 1295 .tiling = tiling, 1296 .format = info->format, 1297 1298 .levels = info->levels, 1299 .samples = info->samples, 1300 1301 .image_alignment_el = image_align_el, 1302 .logical_level0_px = logical_level0_px, 1303 .phys_level0_sa = phys_level0_sa, 1304 1305 .size = size, 1306 .alignment = base_alignment, 1307 .row_pitch = row_pitch, 1308 .array_pitch_el_rows = array_pitch_el_rows, 1309 .array_pitch_span = array_pitch_span, 1310 1311 .usage = info->usage, 1312 }; 1313 1314 return true; 1315 } 1316 1317 void 1318 isl_surf_get_tile_info(const struct isl_device *dev, 1319 const struct isl_surf *surf, 1320 struct isl_tile_info *tile_info) 1321 { 1322 const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format); 1323 isl_tiling_get_info(dev, surf->tiling, fmtl->bpb, tile_info); 1324 } 1325 1326 void 1327 isl_surf_get_hiz_surf(const struct isl_device *dev, 1328 const struct isl_surf *surf, 1329 struct isl_surf *hiz_surf) 1330 { 1331 assert(ISL_DEV_GEN(dev) >= 5 && ISL_DEV_USE_SEPARATE_STENCIL(dev)); 1332 1333 /* Multisampled depth is always interleaved */ 1334 assert(surf->msaa_layout == ISL_MSAA_LAYOUT_NONE || 1335 surf->msaa_layout == ISL_MSAA_LAYOUT_INTERLEAVED); 1336 1337 /* From the Broadwell PRM Vol. 7, "Hierarchical Depth Buffer": 1338 * 1339 * "The Surface Type, Height, Width, Depth, Minimum Array Element, Render 1340 * Target View Extent, and Depth Coordinate Offset X/Y of the 1341 * hierarchical depth buffer are inherited from the depth buffer. The 1342 * height and width of the hierarchical depth buffer that must be 1343 * allocated are computed by the following formulas, where HZ is the 1344 * hierarchical depth buffer and Z is the depth buffer. The Z_Height, 1345 * Z_Width, and Z_Depth values given in these formulas are those present 1346 * in 3DSTATE_DEPTH_BUFFER incremented by one. 1347 * 1348 * "The value of Z_Height and Z_Width must each be multiplied by 2 before 1349 * being applied to the table below if Number of Multisamples is set to 1350 * NUMSAMPLES_4. The value of Z_Height must be multiplied by 2 and 1351 * Z_Width must be multiplied by 4 before being applied to the table 1352 * below if Number of Multisamples is set to NUMSAMPLES_8." 1353 * 1354 * In the Sky Lake PRM, the second paragraph is replaced with this: 1355 * 1356 * "The Z_Height and Z_Width values must equal those present in 1357 * 3DSTATE_DEPTH_BUFFER incremented by one." 1358 * 1359 * In other words, on Sandy Bridge through Broadwell, each 128-bit HiZ 1360 * block corresponds to a region of 8x4 samples in the primary depth 1361 * surface. On Sky Lake, on the other hand, each HiZ block corresponds to 1362 * a region of 8x4 pixels in the primary depth surface regardless of the 1363 * number of samples. The dimensions of a HiZ block in both pixels and 1364 * samples are given in the table below: 1365 * 1366 * | SNB - BDW | SKL+ 1367 * ------+-----------+------------- 1368 * 1x | 8 x 4 sa | 8 x 4 sa 1369 * MSAA | 8 x 4 px | 8 x 4 px 1370 * ------+-----------+------------- 1371 * 2x | 8 x 4 sa | 16 x 4 sa 1372 * MSAA | 4 x 4 px | 8 x 4 px 1373 * ------+-----------+------------- 1374 * 4x | 8 x 4 sa | 16 x 8 sa 1375 * MSAA | 4 x 2 px | 8 x 4 px 1376 * ------+-----------+------------- 1377 * 8x | 8 x 4 sa | 32 x 8 sa 1378 * MSAA | 2 x 2 px | 8 x 4 px 1379 * ------+-----------+------------- 1380 * 16x | N/A | 32 x 16 sa 1381 * MSAA | N/A | 8 x 4 px 1382 * ------+-----------+------------- 1383 * 1384 * There are a number of different ways that this discrepency could be 1385 * handled. The way we have chosen is to simply make MSAA HiZ have the 1386 * same number of samples as the parent surface pre-Sky Lake and always be 1387 * single-sampled on Sky Lake and above. Since the block sizes of 1388 * compressed formats are given in samples, this neatly handles everything 1389 * without the need for additional HiZ formats with different block sizes 1390 * on SKL+. 1391 */ 1392 const unsigned samples = ISL_DEV_GEN(dev) >= 9 ? 1 : surf->samples; 1393 1394 isl_surf_init(dev, hiz_surf, 1395 .dim = surf->dim, 1396 .format = ISL_FORMAT_HIZ, 1397 .width = surf->logical_level0_px.width, 1398 .height = surf->logical_level0_px.height, 1399 .depth = surf->logical_level0_px.depth, 1400 .levels = surf->levels, 1401 .array_len = surf->logical_level0_px.array_len, 1402 .samples = samples, 1403 .usage = ISL_SURF_USAGE_HIZ_BIT, 1404 .tiling_flags = ISL_TILING_HIZ_BIT); 1405 } 1406 1407 void 1408 isl_surf_get_mcs_surf(const struct isl_device *dev, 1409 const struct isl_surf *surf, 1410 struct isl_surf *mcs_surf) 1411 { 1412 /* It must be multisampled with an array layout */ 1413 assert(surf->samples > 1 && surf->msaa_layout == ISL_MSAA_LAYOUT_ARRAY); 1414 1415 /* The following are true of all multisampled surfaces */ 1416 assert(surf->dim == ISL_SURF_DIM_2D); 1417 assert(surf->levels == 1); 1418 assert(surf->logical_level0_px.depth == 1); 1419 1420 enum isl_format mcs_format; 1421 switch (surf->samples) { 1422 case 2: mcs_format = ISL_FORMAT_MCS_2X; break; 1423 case 4: mcs_format = ISL_FORMAT_MCS_4X; break; 1424 case 8: mcs_format = ISL_FORMAT_MCS_8X; break; 1425 case 16: mcs_format = ISL_FORMAT_MCS_16X; break; 1426 default: 1427 unreachable("Invalid sample count"); 1428 } 1429 1430 isl_surf_init(dev, mcs_surf, 1431 .dim = ISL_SURF_DIM_2D, 1432 .format = mcs_format, 1433 .width = surf->logical_level0_px.width, 1434 .height = surf->logical_level0_px.height, 1435 .depth = 1, 1436 .levels = 1, 1437 .array_len = surf->logical_level0_px.array_len, 1438 .samples = 1, /* MCS surfaces are really single-sampled */ 1439 .usage = ISL_SURF_USAGE_MCS_BIT, 1440 .tiling_flags = ISL_TILING_Y0_BIT); 1441 } 1442 1443 bool 1444 isl_surf_get_ccs_surf(const struct isl_device *dev, 1445 const struct isl_surf *surf, 1446 struct isl_surf *ccs_surf) 1447 { 1448 assert(surf->samples == 1 && surf->msaa_layout == ISL_MSAA_LAYOUT_NONE); 1449 assert(ISL_DEV_GEN(dev) >= 7); 1450 1451 if (surf->usage & ISL_SURF_USAGE_DISABLE_AUX_BIT) 1452 return false; 1453 1454 if (ISL_DEV_GEN(dev) <= 8 && surf->dim != ISL_SURF_DIM_2D) 1455 return false; 1456 1457 if (isl_format_is_compressed(surf->format)) 1458 return false; 1459 1460 /* TODO: More conditions where it can fail. */ 1461 1462 enum isl_format ccs_format; 1463 if (ISL_DEV_GEN(dev) >= 9) { 1464 if (!isl_tiling_is_any_y(surf->tiling)) 1465 return false; 1466 1467 switch (isl_format_get_layout(surf->format)->bpb) { 1468 case 32: ccs_format = ISL_FORMAT_GEN9_CCS_32BPP; break; 1469 case 64: ccs_format = ISL_FORMAT_GEN9_CCS_64BPP; break; 1470 case 128: ccs_format = ISL_FORMAT_GEN9_CCS_128BPP; break; 1471 default: 1472 return false; 1473 } 1474 } else if (surf->tiling == ISL_TILING_Y0) { 1475 switch (isl_format_get_layout(surf->format)->bpb) { 1476 case 32: ccs_format = ISL_FORMAT_GEN7_CCS_32BPP_Y; break; 1477 case 64: ccs_format = ISL_FORMAT_GEN7_CCS_64BPP_Y; break; 1478 case 128: ccs_format = ISL_FORMAT_GEN7_CCS_128BPP_Y; break; 1479 default: 1480 return false; 1481 } 1482 } else if (surf->tiling == ISL_TILING_X) { 1483 switch (isl_format_get_layout(surf->format)->bpb) { 1484 case 32: ccs_format = ISL_FORMAT_GEN7_CCS_32BPP_X; break; 1485 case 64: ccs_format = ISL_FORMAT_GEN7_CCS_64BPP_X; break; 1486 case 128: ccs_format = ISL_FORMAT_GEN7_CCS_128BPP_X; break; 1487 default: 1488 return false; 1489 } 1490 } else { 1491 return false; 1492 } 1493 1494 isl_surf_init(dev, ccs_surf, 1495 .dim = surf->dim, 1496 .format = ccs_format, 1497 .width = surf->logical_level0_px.width, 1498 .height = surf->logical_level0_px.height, 1499 .depth = surf->logical_level0_px.depth, 1500 .levels = surf->levels, 1501 .array_len = surf->logical_level0_px.array_len, 1502 .samples = 1, 1503 .usage = ISL_SURF_USAGE_CCS_BIT, 1504 .tiling_flags = ISL_TILING_CCS_BIT); 1505 1506 return true; 1507 } 1508 1509 void 1510 isl_surf_fill_state_s(const struct isl_device *dev, void *state, 1511 const struct isl_surf_fill_state_info *restrict info) 1512 { 1513 #ifndef NDEBUG 1514 isl_surf_usage_flags_t _base_usage = 1515 info->view->usage & (ISL_SURF_USAGE_RENDER_TARGET_BIT | 1516 ISL_SURF_USAGE_TEXTURE_BIT | 1517 ISL_SURF_USAGE_STORAGE_BIT); 1518 /* They may only specify one of the above bits at a time */ 1519 assert(__builtin_popcount(_base_usage) == 1); 1520 /* The only other allowed bit is ISL_SURF_USAGE_CUBE_BIT */ 1521 assert((info->view->usage & ~ISL_SURF_USAGE_CUBE_BIT) == _base_usage); 1522 #endif 1523 1524 if (info->surf->dim == ISL_SURF_DIM_3D) { 1525 assert(info->view->base_array_layer + info->view->array_len <= 1526 info->surf->logical_level0_px.depth); 1527 } else { 1528 assert(info->view->base_array_layer + info->view->array_len <= 1529 info->surf->logical_level0_px.array_len); 1530 } 1531 1532 switch (ISL_DEV_GEN(dev)) { 1533 case 4: 1534 if (ISL_DEV_IS_G4X(dev)) { 1535 /* G45 surface state is the same as gen5 */ 1536 isl_gen5_surf_fill_state_s(dev, state, info); 1537 } else { 1538 isl_gen4_surf_fill_state_s(dev, state, info); 1539 } 1540 break; 1541 case 5: 1542 isl_gen5_surf_fill_state_s(dev, state, info); 1543 break; 1544 case 6: 1545 isl_gen6_surf_fill_state_s(dev, state, info); 1546 break; 1547 case 7: 1548 if (ISL_DEV_IS_HASWELL(dev)) { 1549 isl_gen75_surf_fill_state_s(dev, state, info); 1550 } else { 1551 isl_gen7_surf_fill_state_s(dev, state, info); 1552 } 1553 break; 1554 case 8: 1555 isl_gen8_surf_fill_state_s(dev, state, info); 1556 break; 1557 case 9: 1558 isl_gen9_surf_fill_state_s(dev, state, info); 1559 break; 1560 default: 1561 assert(!"Cannot fill surface state for this gen"); 1562 } 1563 } 1564 1565 void 1566 isl_buffer_fill_state_s(const struct isl_device *dev, void *state, 1567 const struct isl_buffer_fill_state_info *restrict info) 1568 { 1569 switch (ISL_DEV_GEN(dev)) { 1570 case 4: 1571 case 5: 1572 /* Gen 4-5 are all the same when it comes to buffer surfaces */ 1573 isl_gen5_buffer_fill_state_s(state, info); 1574 break; 1575 case 6: 1576 isl_gen6_buffer_fill_state_s(state, info); 1577 break; 1578 case 7: 1579 if (ISL_DEV_IS_HASWELL(dev)) { 1580 isl_gen75_buffer_fill_state_s(state, info); 1581 } else { 1582 isl_gen7_buffer_fill_state_s(state, info); 1583 } 1584 break; 1585 case 8: 1586 isl_gen8_buffer_fill_state_s(state, info); 1587 break; 1588 case 9: 1589 isl_gen9_buffer_fill_state_s(state, info); 1590 break; 1591 default: 1592 assert(!"Cannot fill surface state for this gen"); 1593 } 1594 } 1595 1596 /** 1597 * A variant of isl_surf_get_image_offset_sa() specific to 1598 * ISL_DIM_LAYOUT_GEN4_2D. 1599 */ 1600 static void 1601 get_image_offset_sa_gen4_2d(const struct isl_surf *surf, 1602 uint32_t level, uint32_t logical_array_layer, 1603 uint32_t *x_offset_sa, 1604 uint32_t *y_offset_sa) 1605 { 1606 assert(level < surf->levels); 1607 if (surf->dim == ISL_SURF_DIM_3D) 1608 assert(logical_array_layer < surf->logical_level0_px.depth); 1609 else 1610 assert(logical_array_layer < surf->logical_level0_px.array_len); 1611 1612 const struct isl_extent3d image_align_sa = 1613 isl_surf_get_image_alignment_sa(surf); 1614 1615 const uint32_t W0 = surf->phys_level0_sa.width; 1616 const uint32_t H0 = surf->phys_level0_sa.height; 1617 1618 const uint32_t phys_layer = logical_array_layer * 1619 (surf->msaa_layout == ISL_MSAA_LAYOUT_ARRAY ? surf->samples : 1); 1620 1621 uint32_t x = 0; 1622 uint32_t y = phys_layer * isl_surf_get_array_pitch_sa_rows(surf); 1623 1624 for (uint32_t l = 0; l < level; ++l) { 1625 if (l == 1) { 1626 uint32_t W = isl_minify(W0, l); 1627 x += isl_align_npot(W, image_align_sa.w); 1628 } else { 1629 uint32_t H = isl_minify(H0, l); 1630 y += isl_align_npot(H, image_align_sa.h); 1631 } 1632 } 1633 1634 *x_offset_sa = x; 1635 *y_offset_sa = y; 1636 } 1637 1638 /** 1639 * A variant of isl_surf_get_image_offset_sa() specific to 1640 * ISL_DIM_LAYOUT_GEN4_3D. 1641 */ 1642 static void 1643 get_image_offset_sa_gen4_3d(const struct isl_surf *surf, 1644 uint32_t level, uint32_t logical_z_offset_px, 1645 uint32_t *x_offset_sa, 1646 uint32_t *y_offset_sa) 1647 { 1648 assert(level < surf->levels); 1649 assert(logical_z_offset_px < isl_minify(surf->phys_level0_sa.depth, level)); 1650 assert(surf->phys_level0_sa.array_len == 1); 1651 1652 const struct isl_extent3d image_align_sa = 1653 isl_surf_get_image_alignment_sa(surf); 1654 1655 const uint32_t W0 = surf->phys_level0_sa.width; 1656 const uint32_t H0 = surf->phys_level0_sa.height; 1657 const uint32_t D0 = surf->phys_level0_sa.depth; 1658 1659 uint32_t x = 0; 1660 uint32_t y = 0; 1661 1662 for (uint32_t l = 0; l < level; ++l) { 1663 const uint32_t level_h = isl_align_npot(isl_minify(H0, l), image_align_sa.h); 1664 const uint32_t level_d = isl_align_npot(isl_minify(D0, l), image_align_sa.d); 1665 const uint32_t max_layers_vert = isl_align(level_d, 1u << l) / (1u << l); 1666 1667 y += level_h * max_layers_vert; 1668 } 1669 1670 const uint32_t level_w = isl_align_npot(isl_minify(W0, level), image_align_sa.w); 1671 const uint32_t level_h = isl_align_npot(isl_minify(H0, level), image_align_sa.h); 1672 const uint32_t level_d = isl_align_npot(isl_minify(D0, level), image_align_sa.d); 1673 1674 const uint32_t max_layers_horiz = MIN(level_d, 1u << level); 1675 1676 x += level_w * (logical_z_offset_px % max_layers_horiz); 1677 y += level_h * (logical_z_offset_px / max_layers_horiz); 1678 1679 *x_offset_sa = x; 1680 *y_offset_sa = y; 1681 } 1682 1683 /** 1684 * A variant of isl_surf_get_image_offset_sa() specific to 1685 * ISL_DIM_LAYOUT_GEN9_1D. 1686 */ 1687 static void 1688 get_image_offset_sa_gen9_1d(const struct isl_surf *surf, 1689 uint32_t level, uint32_t layer, 1690 uint32_t *x_offset_sa, 1691 uint32_t *y_offset_sa) 1692 { 1693 assert(level < surf->levels); 1694 assert(layer < surf->phys_level0_sa.array_len); 1695 assert(surf->phys_level0_sa.height == 1); 1696 assert(surf->phys_level0_sa.depth == 1); 1697 assert(surf->samples == 1); 1698 1699 const uint32_t W0 = surf->phys_level0_sa.width; 1700 const struct isl_extent3d image_align_sa = 1701 isl_surf_get_image_alignment_sa(surf); 1702 1703 uint32_t x = 0; 1704 1705 for (uint32_t l = 0; l < level; ++l) { 1706 uint32_t W = isl_minify(W0, l); 1707 uint32_t w = isl_align_npot(W, image_align_sa.w); 1708 1709 x += w; 1710 } 1711 1712 *x_offset_sa = x; 1713 *y_offset_sa = layer * isl_surf_get_array_pitch_sa_rows(surf); 1714 } 1715 1716 /** 1717 * Calculate the offset, in units of surface samples, to a subimage in the 1718 * surface. 1719 * 1720 * @invariant level < surface levels 1721 * @invariant logical_array_layer < logical array length of surface 1722 * @invariant logical_z_offset_px < logical depth of surface at level 1723 */ 1724 void 1725 isl_surf_get_image_offset_sa(const struct isl_surf *surf, 1726 uint32_t level, 1727 uint32_t logical_array_layer, 1728 uint32_t logical_z_offset_px, 1729 uint32_t *x_offset_sa, 1730 uint32_t *y_offset_sa) 1731 { 1732 assert(level < surf->levels); 1733 assert(logical_array_layer < surf->logical_level0_px.array_len); 1734 assert(logical_z_offset_px 1735 < isl_minify(surf->logical_level0_px.depth, level)); 1736 1737 switch (surf->dim_layout) { 1738 case ISL_DIM_LAYOUT_GEN9_1D: 1739 get_image_offset_sa_gen9_1d(surf, level, logical_array_layer, 1740 x_offset_sa, y_offset_sa); 1741 break; 1742 case ISL_DIM_LAYOUT_GEN4_2D: 1743 get_image_offset_sa_gen4_2d(surf, level, logical_array_layer 1744 + logical_z_offset_px, 1745 x_offset_sa, y_offset_sa); 1746 break; 1747 case ISL_DIM_LAYOUT_GEN4_3D: 1748 get_image_offset_sa_gen4_3d(surf, level, logical_z_offset_px, 1749 x_offset_sa, y_offset_sa); 1750 break; 1751 1752 default: 1753 unreachable("not reached"); 1754 } 1755 } 1756 1757 void 1758 isl_surf_get_image_offset_el(const struct isl_surf *surf, 1759 uint32_t level, 1760 uint32_t logical_array_layer, 1761 uint32_t logical_z_offset_px, 1762 uint32_t *x_offset_el, 1763 uint32_t *y_offset_el) 1764 { 1765 const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format); 1766 1767 assert(level < surf->levels); 1768 assert(logical_array_layer < surf->logical_level0_px.array_len); 1769 assert(logical_z_offset_px 1770 < isl_minify(surf->logical_level0_px.depth, level)); 1771 1772 uint32_t x_offset_sa, y_offset_sa; 1773 isl_surf_get_image_offset_sa(surf, level, 1774 logical_array_layer, 1775 logical_z_offset_px, 1776 &x_offset_sa, 1777 &y_offset_sa); 1778 1779 *x_offset_el = x_offset_sa / fmtl->bw; 1780 *y_offset_el = y_offset_sa / fmtl->bh; 1781 } 1782 1783 void 1784 isl_tiling_get_intratile_offset_el(const struct isl_device *dev, 1785 enum isl_tiling tiling, 1786 uint8_t bs, 1787 uint32_t row_pitch, 1788 uint32_t total_x_offset_el, 1789 uint32_t total_y_offset_el, 1790 uint32_t *base_address_offset, 1791 uint32_t *x_offset_el, 1792 uint32_t *y_offset_el) 1793 { 1794 if (tiling == ISL_TILING_LINEAR) { 1795 *base_address_offset = total_y_offset_el * row_pitch + 1796 total_x_offset_el * bs; 1797 *x_offset_el = 0; 1798 *y_offset_el = 0; 1799 return; 1800 } 1801 1802 const uint32_t bpb = bs * 8; 1803 1804 struct isl_tile_info tile_info; 1805 isl_tiling_get_info(dev, tiling, bpb, &tile_info); 1806 1807 assert(row_pitch % tile_info.phys_extent_B.width == 0); 1808 1809 /* For non-power-of-two formats, we need the address to be both tile and 1810 * element-aligned. The easiest way to achieve this is to work with a tile 1811 * that is three times as wide as the regular tile. 1812 * 1813 * The tile info returned by get_tile_info has a logical size that is an 1814 * integer number of tile_info.format_bpb size elements. To scale the 1815 * tile, we scale up the physical width and then treat the logical tile 1816 * size as if it has bpb size elements. 1817 */ 1818 const uint32_t tile_el_scale = bpb / tile_info.format_bpb; 1819 tile_info.phys_extent_B.width *= tile_el_scale; 1820 1821 /* Compute the offset into the tile */ 1822 *x_offset_el = total_x_offset_el % tile_info.logical_extent_el.w; 1823 *y_offset_el = total_y_offset_el % tile_info.logical_extent_el.h; 1824 1825 /* Compute the offset of the tile in units of whole tiles */ 1826 uint32_t x_offset_tl = total_x_offset_el / tile_info.logical_extent_el.w; 1827 uint32_t y_offset_tl = total_y_offset_el / tile_info.logical_extent_el.h; 1828 1829 *base_address_offset = 1830 y_offset_tl * tile_info.phys_extent_B.h * row_pitch + 1831 x_offset_tl * tile_info.phys_extent_B.h * tile_info.phys_extent_B.w; 1832 } 1833 1834 uint32_t 1835 isl_surf_get_depth_format(const struct isl_device *dev, 1836 const struct isl_surf *surf) 1837 { 1838 /* Support for separate stencil buffers began in gen5. Support for 1839 * interleaved depthstencil buffers ceased in gen7. The intermediate gens, 1840 * those that supported separate and interleaved stencil, were gen5 and 1841 * gen6. 1842 * 1843 * For a list of all available formats, see the Sandybridge PRM >> Volume 1844 * 2 Part 1: 3D/Media - 3D Pipeline >> 3DSTATE_DEPTH_BUFFER >> Surface 1845 * Format (p321). 1846 */ 1847 1848 bool has_stencil = surf->usage & ISL_SURF_USAGE_STENCIL_BIT; 1849 1850 assert(surf->usage & ISL_SURF_USAGE_DEPTH_BIT); 1851 1852 if (has_stencil) 1853 assert(ISL_DEV_GEN(dev) < 7); 1854 1855 switch (surf->format) { 1856 default: 1857 unreachable("bad isl depth format"); 1858 case ISL_FORMAT_R32_FLOAT_X8X24_TYPELESS: 1859 assert(ISL_DEV_GEN(dev) < 7); 1860 return 0; /* D32_FLOAT_S8X24_UINT */ 1861 case ISL_FORMAT_R32_FLOAT: 1862 assert(!has_stencil); 1863 return 1; /* D32_FLOAT */ 1864 case ISL_FORMAT_R24_UNORM_X8_TYPELESS: 1865 if (has_stencil) { 1866 assert(ISL_DEV_GEN(dev) < 7); 1867 return 2; /* D24_UNORM_S8_UINT */ 1868 } else { 1869 assert(ISL_DEV_GEN(dev) >= 5); 1870 return 3; /* D24_UNORM_X8_UINT */ 1871 } 1872 case ISL_FORMAT_R16_UNORM: 1873 assert(!has_stencil); 1874 return 5; /* D16_UNORM */ 1875 } 1876 } 1877