1 /* 2 * Mesa 3-D graphics library 3 * 4 * Copyright (C) 2014 LunarG, Inc. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the "Software"), 8 * to deal in the Software without restriction, including without limitation 9 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 * and/or sell copies of the Software, and to permit persons to whom the 11 * Software is furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included 14 * in all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 22 * DEALINGS IN THE SOFTWARE. 23 * 24 * Authors: 25 * Chia-I Wu <olv (at) lunarg.com> 26 */ 27 28 #include "ilo_debug.h" 29 #include "ilo_image.h" 30 31 enum { 32 IMAGE_TILING_NONE = 1 << GEN6_TILING_NONE, 33 IMAGE_TILING_X = 1 << GEN6_TILING_X, 34 IMAGE_TILING_Y = 1 << GEN6_TILING_Y, 35 IMAGE_TILING_W = 1 << GEN8_TILING_W, 36 37 IMAGE_TILING_ALL = (IMAGE_TILING_NONE | 38 IMAGE_TILING_X | 39 IMAGE_TILING_Y | 40 IMAGE_TILING_W) 41 }; 42 43 struct ilo_image_layout { 44 enum ilo_image_walk_type walk; 45 bool interleaved_samples; 46 47 uint8_t valid_tilings; 48 enum gen_surface_tiling tiling; 49 50 enum ilo_image_aux_type aux; 51 52 int align_i; 53 int align_j; 54 55 struct ilo_image_lod *lods; 56 int walk_layer_h0; 57 int walk_layer_h1; 58 int walk_layer_height; 59 int monolithic_width; 60 int monolithic_height; 61 }; 62 63 static enum ilo_image_walk_type 64 image_get_gen6_walk(const struct ilo_dev *dev, 65 const struct ilo_image_info *info) 66 { 67 ILO_DEV_ASSERT(dev, 6, 6); 68 69 /* TODO we want LODs to be page-aligned */ 70 if (info->type == GEN6_SURFTYPE_3D) 71 return ILO_IMAGE_WALK_3D; 72 73 /* 74 * From the Sandy Bridge PRM, volume 1 part 1, page 115: 75 * 76 * "The separate stencil buffer does not support mip mapping, thus the 77 * storage for LODs other than LOD 0 is not needed. The following 78 * QPitch equation applies only to the separate stencil buffer: 79 * 80 * QPitch = h_0" 81 * 82 * Use ILO_IMAGE_WALK_LOD and manually offset to the (page-aligned) levels 83 * when bound. 84 */ 85 if (info->bind_zs && info->format == GEN6_FORMAT_R8_UINT) 86 return ILO_IMAGE_WALK_LOD; 87 88 /* compact spacing is not supported otherwise */ 89 return ILO_IMAGE_WALK_LAYER; 90 } 91 92 static enum ilo_image_walk_type 93 image_get_gen7_walk(const struct ilo_dev *dev, 94 const struct ilo_image_info *info) 95 { 96 ILO_DEV_ASSERT(dev, 7, 8); 97 98 if (info->type == GEN6_SURFTYPE_3D) 99 return ILO_IMAGE_WALK_3D; 100 101 /* 102 * From the Ivy Bridge PRM, volume 1 part 1, page 111: 103 * 104 * "note that the depth buffer and stencil buffer have an implied value 105 * of ARYSPC_FULL" 106 * 107 * From the Ivy Bridge PRM, volume 4 part 1, page 66: 108 * 109 * "If Multisampled Surface Storage Format is MSFMT_MSS and Number of 110 * Multisamples is not MULTISAMPLECOUNT_1, this field (Surface Array 111 * Spacing) must be set to ARYSPC_LOD0." 112 */ 113 if (info->sample_count > 1) 114 assert(info->level_count == 1); 115 return (info->bind_zs || info->level_count > 1) ? 116 ILO_IMAGE_WALK_LAYER : ILO_IMAGE_WALK_LOD; 117 } 118 119 static bool 120 image_get_gen6_interleaved_samples(const struct ilo_dev *dev, 121 const struct ilo_image_info *info) 122 { 123 ILO_DEV_ASSERT(dev, 6, 8); 124 125 /* 126 * Gen6 supports only interleaved samples. It is not explicitly stated, 127 * but on Gen7+, render targets are expected to be UMS/CMS (samples 128 * non-interleaved) and depth/stencil buffers are expected to be IMS 129 * (samples interleaved). 130 * 131 * See "Multisampled Surface Storage Format" field of SURFACE_STATE. 132 */ 133 return (ilo_dev_gen(dev) == ILO_GEN(6) || info->bind_zs); 134 } 135 136 static uint8_t 137 image_get_gen6_valid_tilings(const struct ilo_dev *dev, 138 const struct ilo_image_info *info) 139 { 140 uint8_t valid_tilings = IMAGE_TILING_ALL; 141 142 ILO_DEV_ASSERT(dev, 6, 8); 143 144 if (info->valid_tilings) 145 valid_tilings &= info->valid_tilings; 146 147 /* 148 * From the Sandy Bridge PRM, volume 1 part 2, page 32: 149 * 150 * "Display/Overlay Y-Major not supported. 151 * X-Major required for Async Flips" 152 */ 153 if (unlikely(info->bind_scanout)) 154 valid_tilings &= IMAGE_TILING_X; 155 156 /* 157 * From the Sandy Bridge PRM, volume 3 part 2, page 158: 158 * 159 * "The cursor surface address must be 4K byte aligned. The cursor must 160 * be in linear memory, it cannot be tiled." 161 */ 162 if (unlikely(info->bind_cursor)) 163 valid_tilings &= IMAGE_TILING_NONE; 164 165 /* 166 * From the Sandy Bridge PRM, volume 2 part 1, page 318: 167 * 168 * "[DevSNB+]: This field (Tiled Surface) must be set to TRUE. Linear 169 * Depth Buffer is not supported." 170 * 171 * "The Depth Buffer, if tiled, must use Y-Major tiling." 172 * 173 * From the Sandy Bridge PRM, volume 1 part 2, page 22: 174 * 175 * "W-Major Tile Format is used for separate stencil." 176 */ 177 if (info->bind_zs) { 178 if (info->format == GEN6_FORMAT_R8_UINT) 179 valid_tilings &= IMAGE_TILING_W; 180 else 181 valid_tilings &= IMAGE_TILING_Y; 182 } 183 184 if (info->bind_surface_sampler || 185 info->bind_surface_dp_render || 186 info->bind_surface_dp_typed) { 187 /* 188 * From the Haswell PRM, volume 2d, page 233: 189 * 190 * "If Number of Multisamples is not MULTISAMPLECOUNT_1, this field 191 * (Tiled Surface) must be TRUE." 192 */ 193 if (info->sample_count > 1) 194 valid_tilings &= ~IMAGE_TILING_NONE; 195 196 if (ilo_dev_gen(dev) < ILO_GEN(8)) 197 valid_tilings &= ~IMAGE_TILING_W; 198 } 199 200 if (info->bind_surface_dp_render) { 201 /* 202 * From the Sandy Bridge PRM, volume 1 part 2, page 32: 203 * 204 * "NOTE: 128BPE Format Color buffer ( render target ) MUST be 205 * either TileX or Linear." 206 * 207 * From the Haswell PRM, volume 5, page 32: 208 * 209 * "NOTE: 128 BPP format color buffer (render target) supports 210 * Linear, TiledX and TiledY." 211 */ 212 if (ilo_dev_gen(dev) < ILO_GEN(7.5) && info->block_size == 16) 213 valid_tilings &= ~IMAGE_TILING_Y; 214 215 /* 216 * From the Ivy Bridge PRM, volume 4 part 1, page 63: 217 * 218 * "This field (Surface Vertical Aligment) must be set to VALIGN_4 219 * for all tiled Y Render Target surfaces." 220 * 221 * "VALIGN_4 is not supported for surface format R32G32B32_FLOAT." 222 * 223 * R32G32B32_FLOAT is not renderable and we only need an assert() here. 224 */ 225 if (ilo_dev_gen(dev) >= ILO_GEN(7) && ilo_dev_gen(dev) <= ILO_GEN(7.5)) 226 assert(info->format != GEN6_FORMAT_R32G32B32_FLOAT); 227 } 228 229 return valid_tilings; 230 } 231 232 static uint64_t 233 image_get_gen6_estimated_size(const struct ilo_dev *dev, 234 const struct ilo_image_info *info) 235 { 236 /* padding not considered */ 237 const uint64_t slice_size = info->width * info->height * 238 info->block_size / (info->block_width * info->block_height); 239 const uint64_t slice_count = 240 info->depth * info->array_size * info->sample_count; 241 const uint64_t estimated_size = slice_size * slice_count; 242 243 ILO_DEV_ASSERT(dev, 6, 8); 244 245 if (info->level_count == 1) 246 return estimated_size; 247 else 248 return estimated_size * 4 / 3; 249 } 250 251 static enum gen_surface_tiling 252 image_get_gen6_tiling(const struct ilo_dev *dev, 253 const struct ilo_image_info *info, 254 uint8_t valid_tilings) 255 { 256 ILO_DEV_ASSERT(dev, 6, 8); 257 258 switch (valid_tilings) { 259 case IMAGE_TILING_NONE: 260 return GEN6_TILING_NONE; 261 case IMAGE_TILING_X: 262 return GEN6_TILING_X; 263 case IMAGE_TILING_Y: 264 return GEN6_TILING_Y; 265 case IMAGE_TILING_W: 266 return GEN8_TILING_W; 267 default: 268 break; 269 } 270 271 /* 272 * X-tiling has the property that vertically adjacent pixels are usually in 273 * the same page. When the image size is less than a page, the image 274 * height is 1, or when the image is not accessed in blocks, there is no 275 * reason to tile. 276 * 277 * Y-tiling is similar, where vertically adjacent pixels are usually in the 278 * same cacheline. 279 */ 280 if (valid_tilings & IMAGE_TILING_NONE) { 281 const uint64_t estimated_size = 282 image_get_gen6_estimated_size(dev, info); 283 284 if (info->height == 1 || !(info->bind_surface_sampler || 285 info->bind_surface_dp_render || 286 info->bind_surface_dp_typed)) 287 return GEN6_TILING_NONE; 288 289 if (estimated_size <= 64 || (info->prefer_linear_threshold && 290 estimated_size > info->prefer_linear_threshold)) 291 return GEN6_TILING_NONE; 292 293 if (estimated_size <= 2048) 294 valid_tilings &= ~IMAGE_TILING_X; 295 } 296 297 return (valid_tilings & IMAGE_TILING_Y) ? GEN6_TILING_Y : 298 (valid_tilings & IMAGE_TILING_X) ? GEN6_TILING_X : 299 GEN6_TILING_NONE; 300 } 301 302 static bool 303 image_get_gen6_hiz_enable(const struct ilo_dev *dev, 304 const struct ilo_image_info *info) 305 { 306 ILO_DEV_ASSERT(dev, 6, 8); 307 308 /* depth buffer? */ 309 if (!info->bind_zs || 310 info->format == GEN6_FORMAT_R8_UINT || 311 info->interleaved_stencil) 312 return false; 313 314 /* we want to be able to force 8x4 alignments */ 315 if (info->type == GEN6_SURFTYPE_1D) 316 return false; 317 318 if (info->aux_disable) 319 return false; 320 321 if (ilo_debug & ILO_DEBUG_NOHIZ) 322 return false; 323 324 return true; 325 } 326 327 static bool 328 image_get_gen7_mcs_enable(const struct ilo_dev *dev, 329 const struct ilo_image_info *info, 330 enum gen_surface_tiling tiling) 331 { 332 ILO_DEV_ASSERT(dev, 7, 8); 333 334 if (!info->bind_surface_sampler && !info->bind_surface_dp_render) 335 return false; 336 337 /* 338 * From the Ivy Bridge PRM, volume 4 part 1, page 77: 339 * 340 * "For Render Target and Sampling Engine Surfaces:If the surface is 341 * multisampled (Number of Multisamples any value other than 342 * MULTISAMPLECOUNT_1), this field (MCS Enable) must be enabled." 343 * 344 * "This field must be set to 0 for all SINT MSRTs when all RT channels 345 * are not written" 346 */ 347 if (info->sample_count > 1) { 348 if (ilo_dev_gen(dev) < ILO_GEN(8)) 349 assert(!info->is_integer); 350 return true; 351 } 352 353 if (info->aux_disable) 354 return false; 355 356 /* 357 * From the Ivy Bridge PRM, volume 2 part 1, page 326: 358 * 359 * "When MCS is buffer is used for color clear of non-multisampler 360 * render target, the following restrictions apply. 361 * - Support is limited to tiled render targets. 362 * - Support is for non-mip-mapped and non-array surface types only. 363 * - Clear is supported only on the full RT; i.e., no partial clear or 364 * overlapping clears. 365 * - MCS buffer for non-MSRT is supported only for RT formats 32bpp, 366 * 64bpp and 128bpp. 367 * ..." 368 * 369 * How about SURFTYPE_3D? 370 */ 371 if (!info->bind_surface_dp_render || 372 tiling == GEN6_TILING_NONE || 373 info->level_count > 1 || 374 info->array_size > 1) 375 return false; 376 377 switch (info->block_size) { 378 case 4: 379 case 8: 380 case 16: 381 return true; 382 default: 383 return false; 384 } 385 } 386 387 static void 388 image_get_gen6_alignments(const struct ilo_dev *dev, 389 const struct ilo_image_info *info, 390 int *align_i, int *align_j) 391 { 392 ILO_DEV_ASSERT(dev, 6, 6); 393 394 /* 395 * From the Sandy Bridge PRM, volume 1 part 1, page 113: 396 * 397 * "surface format align_i align_j 398 * YUV 4:2:2 formats 4 *see below 399 * BC1-5 4 4 400 * FXT1 8 4 401 * all other formats 4 *see below" 402 * 403 * "- align_j = 4 for any depth buffer 404 * - align_j = 2 for separate stencil buffer 405 * - align_j = 4 for any render target surface is multisampled (4x) 406 * - align_j = 4 for any render target surface with Surface Vertical 407 * Alignment = VALIGN_4 408 * - align_j = 2 for any render target surface with Surface Vertical 409 * Alignment = VALIGN_2 410 * - align_j = 2 for all other render target surface 411 * - align_j = 2 for any sampling engine surface with Surface Vertical 412 * Alignment = VALIGN_2 413 * - align_j = 4 for any sampling engine surface with Surface Vertical 414 * Alignment = VALIGN_4" 415 * 416 * From the Sandy Bridge PRM, volume 4 part 1, page 86: 417 * 418 * "This field (Surface Vertical Alignment) must be set to VALIGN_2 if 419 * the Surface Format is 96 bits per element (BPE)." 420 * 421 * They can be rephrased as 422 * 423 * align_i align_j 424 * compressed formats block width block height 425 * GEN6_FORMAT_R8_UINT 4 2 426 * other depth/stencil formats 4 4 427 * 4x multisampled 4 4 428 * bpp 96 4 2 429 * others 4 2 or 4 430 */ 431 432 *align_i = (info->compressed) ? info->block_width : 4; 433 if (info->compressed) { 434 *align_j = info->block_height; 435 } else if (info->bind_zs) { 436 *align_j = (info->format == GEN6_FORMAT_R8_UINT) ? 2 : 4; 437 } else { 438 *align_j = (info->sample_count > 1 || info->block_size != 12) ? 4 : 2; 439 } 440 } 441 442 static void 443 image_get_gen7_alignments(const struct ilo_dev *dev, 444 const struct ilo_image_info *info, 445 enum gen_surface_tiling tiling, 446 int *align_i, int *align_j) 447 { 448 int i, j; 449 450 ILO_DEV_ASSERT(dev, 7, 8); 451 452 /* 453 * From the Ivy Bridge PRM, volume 1 part 1, page 110: 454 * 455 * "surface defined by surface format align_i align_j 456 * 3DSTATE_DEPTH_BUFFER D16_UNORM 8 4 457 * not D16_UNORM 4 4 458 * 3DSTATE_STENCIL_BUFFER N/A 8 8 459 * SURFACE_STATE BC*, ETC*, EAC* 4 4 460 * FXT1 8 4 461 * all others (set by SURFACE_STATE)" 462 * 463 * From the Ivy Bridge PRM, volume 4 part 1, page 63: 464 * 465 * "- This field (Surface Vertical Aligment) is intended to be set to 466 * VALIGN_4 if the surface was rendered as a depth buffer, for a 467 * multisampled (4x) render target, or for a multisampled (8x) 468 * render target, since these surfaces support only alignment of 4. 469 * - Use of VALIGN_4 for other surfaces is supported, but uses more 470 * memory. 471 * - This field must be set to VALIGN_4 for all tiled Y Render Target 472 * surfaces. 473 * - Value of 1 is not supported for format YCRCB_NORMAL (0x182), 474 * YCRCB_SWAPUVY (0x183), YCRCB_SWAPUV (0x18f), YCRCB_SWAPY (0x190) 475 * - If Number of Multisamples is not MULTISAMPLECOUNT_1, this field 476 * must be set to VALIGN_4." 477 * - VALIGN_4 is not supported for surface format R32G32B32_FLOAT." 478 * 479 * "- This field (Surface Horizontal Aligment) is intended to be set to 480 * HALIGN_8 only if the surface was rendered as a depth buffer with 481 * Z16 format or a stencil buffer, since these surfaces support only 482 * alignment of 8. 483 * - Use of HALIGN_8 for other surfaces is supported, but uses more 484 * memory. 485 * - This field must be set to HALIGN_4 if the Surface Format is BC*. 486 * - This field must be set to HALIGN_8 if the Surface Format is 487 * FXT1." 488 * 489 * They can be rephrased as 490 * 491 * align_i align_j 492 * compressed formats block width block height 493 * GEN6_FORMAT_R16_UNORM 8 4 494 * GEN6_FORMAT_R8_UINT 8 8 495 * other depth/stencil formats 4 4 496 * 2x or 4x multisampled 4 or 8 4 497 * tiled Y 4 or 8 4 (if rt) 498 * GEN6_FORMAT_R32G32B32_FLOAT 4 or 8 2 499 * others 4 or 8 2 or 4 500 */ 501 if (info->compressed) { 502 i = info->block_width; 503 j = info->block_height; 504 } else if (info->bind_zs) { 505 switch (info->format) { 506 case GEN6_FORMAT_R16_UNORM: 507 i = 8; 508 j = 4; 509 break; 510 case GEN6_FORMAT_R8_UINT: 511 i = 8; 512 j = 8; 513 break; 514 default: 515 i = 4; 516 j = 4; 517 break; 518 } 519 } else { 520 const bool valign_4 = 521 (info->sample_count > 1 || ilo_dev_gen(dev) >= ILO_GEN(8) || 522 (tiling == GEN6_TILING_Y && info->bind_surface_dp_render)); 523 524 if (ilo_dev_gen(dev) < ILO_GEN(8) && valign_4) 525 assert(info->format != GEN6_FORMAT_R32G32B32_FLOAT); 526 527 i = 4; 528 j = (valign_4) ? 4 : 2; 529 } 530 531 *align_i = i; 532 *align_j = j; 533 } 534 535 static bool 536 image_init_gen6_hardware_layout(const struct ilo_dev *dev, 537 const struct ilo_image_info *info, 538 struct ilo_image_layout *layout) 539 { 540 ILO_DEV_ASSERT(dev, 6, 8); 541 542 if (ilo_dev_gen(dev) >= ILO_GEN(7)) 543 layout->walk = image_get_gen7_walk(dev, info); 544 else 545 layout->walk = image_get_gen6_walk(dev, info); 546 547 layout->interleaved_samples = 548 image_get_gen6_interleaved_samples(dev, info); 549 550 layout->valid_tilings = image_get_gen6_valid_tilings(dev, info); 551 if (!layout->valid_tilings) 552 return false; 553 554 layout->tiling = image_get_gen6_tiling(dev, info, layout->valid_tilings); 555 556 if (image_get_gen6_hiz_enable(dev, info)) 557 layout->aux = ILO_IMAGE_AUX_HIZ; 558 else if (ilo_dev_gen(dev) >= ILO_GEN(7) && 559 image_get_gen7_mcs_enable(dev, info, layout->tiling)) 560 layout->aux = ILO_IMAGE_AUX_MCS; 561 else 562 layout->aux = ILO_IMAGE_AUX_NONE; 563 564 if (ilo_dev_gen(dev) >= ILO_GEN(7)) { 565 image_get_gen7_alignments(dev, info, layout->tiling, 566 &layout->align_i, &layout->align_j); 567 } else { 568 image_get_gen6_alignments(dev, info, 569 &layout->align_i, &layout->align_j); 570 } 571 572 return true; 573 } 574 575 static bool 576 image_init_gen6_transfer_layout(const struct ilo_dev *dev, 577 const struct ilo_image_info *info, 578 struct ilo_image_layout *layout) 579 { 580 ILO_DEV_ASSERT(dev, 6, 8); 581 582 /* we can define our own layout to save space */ 583 layout->walk = ILO_IMAGE_WALK_LOD; 584 layout->interleaved_samples = false; 585 layout->valid_tilings = IMAGE_TILING_NONE; 586 layout->tiling = GEN6_TILING_NONE; 587 layout->aux = ILO_IMAGE_AUX_NONE; 588 layout->align_i = info->block_width; 589 layout->align_j = info->block_height; 590 591 return true; 592 } 593 594 static void 595 image_get_gen6_slice_size(const struct ilo_dev *dev, 596 const struct ilo_image_info *info, 597 const struct ilo_image_layout *layout, 598 uint8_t level, 599 int *width, int *height) 600 { 601 int w, h; 602 603 ILO_DEV_ASSERT(dev, 6, 8); 604 605 w = u_minify(info->width, level); 606 h = u_minify(info->height, level); 607 608 /* 609 * From the Sandy Bridge PRM, volume 1 part 1, page 114: 610 * 611 * "The dimensions of the mip maps are first determined by applying the 612 * sizing algorithm presented in Non-Power-of-Two Mipmaps above. Then, 613 * if necessary, they are padded out to compression block boundaries." 614 */ 615 w = align(w, info->block_width); 616 h = align(h, info->block_height); 617 618 /* 619 * From the Sandy Bridge PRM, volume 1 part 1, page 111: 620 * 621 * "If the surface is multisampled (4x), these values must be adjusted 622 * as follows before proceeding: 623 * 624 * W_L = ceiling(W_L / 2) * 4 625 * H_L = ceiling(H_L / 2) * 4" 626 * 627 * From the Ivy Bridge PRM, volume 1 part 1, page 108: 628 * 629 * "If the surface is multisampled and it is a depth or stencil surface 630 * or Multisampled Surface StorageFormat in SURFACE_STATE is 631 * MSFMT_DEPTH_STENCIL, W_L and H_L must be adjusted as follows before 632 * proceeding: 633 * 634 * #samples W_L = H_L = 635 * 2 ceiling(W_L / 2) * 4 HL [no adjustment] 636 * 4 ceiling(W_L / 2) * 4 ceiling(H_L / 2) * 4 637 * 8 ceiling(W_L / 2) * 8 ceiling(H_L / 2) * 4 638 * 16 ceiling(W_L / 2) * 8 ceiling(H_L / 2) * 8" 639 * 640 * For interleaved samples (4x), where pixels 641 * 642 * (x, y ) (x+1, y ) 643 * (x, y+1) (x+1, y+1) 644 * 645 * would be is occupied by 646 * 647 * (x, y , si0) (x+1, y , si0) (x, y , si1) (x+1, y , si1) 648 * (x, y+1, si0) (x+1, y+1, si0) (x, y+1, si1) (x+1, y+1, si1) 649 * (x, y , si2) (x+1, y , si2) (x, y , si3) (x+1, y , si3) 650 * (x, y+1, si2) (x+1, y+1, si2) (x, y+1, si3) (x+1, y+1, si3) 651 * 652 * Thus the need to 653 * 654 * w = align(w, 2) * 2; 655 * y = align(y, 2) * 2; 656 */ 657 if (layout->interleaved_samples) { 658 switch (info->sample_count) { 659 case 1: 660 break; 661 case 2: 662 w = align(w, 2) * 2; 663 break; 664 case 4: 665 w = align(w, 2) * 2; 666 h = align(h, 2) * 2; 667 break; 668 case 8: 669 w = align(w, 2) * 4; 670 h = align(h, 2) * 2; 671 break; 672 case 16: 673 w = align(w, 2) * 4; 674 h = align(h, 2) * 4; 675 break; 676 default: 677 assert(!"unsupported sample count"); 678 break; 679 } 680 } 681 682 /* 683 * From the Ivy Bridge PRM, volume 1 part 1, page 108: 684 * 685 * "For separate stencil buffer, the width must be mutiplied by 2 and 686 * height divided by 2..." 687 * 688 * To make things easier (for transfer), we will just double the stencil 689 * stride in 3DSTATE_STENCIL_BUFFER. 690 */ 691 w = align(w, layout->align_i); 692 h = align(h, layout->align_j); 693 694 *width = w; 695 *height = h; 696 } 697 698 static int 699 image_get_gen6_layer_count(const struct ilo_dev *dev, 700 const struct ilo_image_info *info, 701 const struct ilo_image_layout *layout) 702 { 703 int count = info->array_size; 704 705 ILO_DEV_ASSERT(dev, 6, 8); 706 707 /* samples of the same index are stored in a layer */ 708 if (!layout->interleaved_samples) 709 count *= info->sample_count; 710 711 return count; 712 } 713 714 static void 715 image_get_gen6_walk_layer_heights(const struct ilo_dev *dev, 716 const struct ilo_image_info *info, 717 struct ilo_image_layout *layout) 718 { 719 ILO_DEV_ASSERT(dev, 6, 8); 720 721 layout->walk_layer_h0 = layout->lods[0].slice_height; 722 723 if (info->level_count > 1) { 724 layout->walk_layer_h1 = layout->lods[1].slice_height; 725 } else { 726 int dummy; 727 image_get_gen6_slice_size(dev, info, layout, 1, 728 &dummy, &layout->walk_layer_h1); 729 } 730 731 if (image_get_gen6_layer_count(dev, info, layout) == 1) { 732 layout->walk_layer_height = 0; 733 return; 734 } 735 736 /* 737 * From the Sandy Bridge PRM, volume 1 part 1, page 115: 738 * 739 * "The following equation is used for surface formats other than 740 * compressed textures: 741 * 742 * QPitch = (h0 + h1 + 11j)" 743 * 744 * "The equation for compressed textures (BC* and FXT1 surface formats) 745 * follows: 746 * 747 * QPitch = (h0 + h1 + 11j) / 4" 748 * 749 * "[DevSNB] Errata: Sampler MSAA Qpitch will be 4 greater than the 750 * value calculated in the equation above, for every other odd Surface 751 * Height starting from 1 i.e. 1,5,9,13" 752 * 753 * From the Ivy Bridge PRM, volume 1 part 1, page 111-112: 754 * 755 * "If Surface Array Spacing is set to ARYSPC_FULL (note that the depth 756 * buffer and stencil buffer have an implied value of ARYSPC_FULL): 757 * 758 * QPitch = (h0 + h1 + 12j) 759 * QPitch = (h0 + h1 + 12j) / 4 (compressed) 760 * 761 * (There are many typos or missing words here...)" 762 * 763 * To access the N-th slice, an offset of (Stride * QPitch * N) is added to 764 * the base address. The PRM divides QPitch by 4 for compressed formats 765 * because the block height for those formats are 4, and it wants QPitch to 766 * mean the number of memory rows, as opposed to texel rows, between 767 * slices. Since we use texel rows everywhere, we do not need to divide 768 * QPitch by 4. 769 */ 770 layout->walk_layer_height = layout->walk_layer_h0 + layout->walk_layer_h1 + 771 ((ilo_dev_gen(dev) >= ILO_GEN(7)) ? 12 : 11) * layout->align_j; 772 773 if (ilo_dev_gen(dev) == ILO_GEN(6) && info->sample_count > 1 && 774 info->height % 4 == 1) 775 layout->walk_layer_height += 4; 776 } 777 778 static void 779 image_get_gen6_monolithic_size(const struct ilo_dev *dev, 780 const struct ilo_image_info *info, 781 struct ilo_image_layout *layout, 782 int max_x, int max_y) 783 { 784 int align_w = 1, align_h = 1, pad_h = 0; 785 786 ILO_DEV_ASSERT(dev, 6, 8); 787 788 /* 789 * From the Sandy Bridge PRM, volume 1 part 1, page 118: 790 * 791 * "To determine the necessary padding on the bottom and right side of 792 * the surface, refer to the table in Section 7.18.3.4 for the i and j 793 * parameters for the surface format in use. The surface must then be 794 * extended to the next multiple of the alignment unit size in each 795 * dimension, and all texels contained in this extended surface must 796 * have valid GTT entries." 797 * 798 * "For cube surfaces, an additional two rows of padding are required 799 * at the bottom of the surface. This must be ensured regardless of 800 * whether the surface is stored tiled or linear. This is due to the 801 * potential rotation of cache line orientation from memory to cache." 802 * 803 * "For compressed textures (BC* and FXT1 surface formats), padding at 804 * the bottom of the surface is to an even compressed row, which is 805 * equal to a multiple of 8 uncompressed texel rows. Thus, for padding 806 * purposes, these surfaces behave as if j = 8 only for surface 807 * padding purposes. The value of 4 for j still applies for mip level 808 * alignment and QPitch calculation." 809 */ 810 if (info->bind_surface_sampler) { 811 align_w = MAX2(align_w, layout->align_i); 812 align_h = MAX2(align_h, layout->align_j); 813 814 if (info->type == GEN6_SURFTYPE_CUBE) 815 pad_h += 2; 816 817 if (info->compressed) 818 align_h = MAX2(align_h, layout->align_j * 2); 819 } 820 821 /* 822 * From the Sandy Bridge PRM, volume 1 part 1, page 118: 823 * 824 * "If the surface contains an odd number of rows of data, a final row 825 * below the surface must be allocated." 826 */ 827 if (info->bind_surface_dp_render) 828 align_h = MAX2(align_h, 2); 829 830 /* 831 * Depth Buffer Clear/Resolve works in 8x4 sample blocks. Pad to allow HiZ 832 * for unaligned non-mipmapped and non-array images. 833 */ 834 if (layout->aux == ILO_IMAGE_AUX_HIZ && 835 info->level_count == 1 && info->array_size == 1 && info->depth == 1) { 836 align_w = MAX2(align_w, 8); 837 align_h = MAX2(align_h, 4); 838 } 839 840 layout->monolithic_width = align(max_x, align_w); 841 layout->monolithic_height = align(max_y + pad_h, align_h); 842 } 843 844 static void 845 image_get_gen6_lods(const struct ilo_dev *dev, 846 const struct ilo_image_info *info, 847 struct ilo_image_layout *layout) 848 { 849 const int layer_count = image_get_gen6_layer_count(dev, info, layout); 850 int cur_x, cur_y, max_x, max_y; 851 uint8_t lv; 852 853 ILO_DEV_ASSERT(dev, 6, 8); 854 855 cur_x = 0; 856 cur_y = 0; 857 max_x = 0; 858 max_y = 0; 859 for (lv = 0; lv < info->level_count; lv++) { 860 int slice_w, slice_h, lod_w, lod_h; 861 862 image_get_gen6_slice_size(dev, info, layout, lv, &slice_w, &slice_h); 863 864 layout->lods[lv].x = cur_x; 865 layout->lods[lv].y = cur_y; 866 layout->lods[lv].slice_width = slice_w; 867 layout->lods[lv].slice_height = slice_h; 868 869 switch (layout->walk) { 870 case ILO_IMAGE_WALK_LAYER: 871 lod_w = slice_w; 872 lod_h = slice_h; 873 874 /* MIPLAYOUT_BELOW */ 875 if (lv == 1) 876 cur_x += lod_w; 877 else 878 cur_y += lod_h; 879 break; 880 case ILO_IMAGE_WALK_LOD: 881 lod_w = slice_w; 882 lod_h = slice_h * layer_count; 883 884 if (lv == 1) 885 cur_x += lod_w; 886 else 887 cur_y += lod_h; 888 889 /* every LOD begins at tile boundaries */ 890 if (info->level_count > 1) { 891 assert(info->format == GEN6_FORMAT_R8_UINT); 892 cur_x = align(cur_x, 64); 893 cur_y = align(cur_y, 64); 894 } 895 break; 896 case ILO_IMAGE_WALK_3D: 897 { 898 const int slice_count = u_minify(info->depth, lv); 899 const int slice_count_per_row = 1 << lv; 900 const int row_count = 901 (slice_count + slice_count_per_row - 1) / slice_count_per_row; 902 903 lod_w = slice_w * slice_count_per_row; 904 lod_h = slice_h * row_count; 905 } 906 907 cur_y += lod_h; 908 break; 909 default: 910 assert(!"unknown walk type"); 911 lod_w = 0; 912 lod_h = 0; 913 break; 914 } 915 916 if (max_x < layout->lods[lv].x + lod_w) 917 max_x = layout->lods[lv].x + lod_w; 918 if (max_y < layout->lods[lv].y + lod_h) 919 max_y = layout->lods[lv].y + lod_h; 920 } 921 922 if (layout->walk == ILO_IMAGE_WALK_LAYER) { 923 image_get_gen6_walk_layer_heights(dev, info, layout); 924 if (layer_count > 1) 925 max_y += layout->walk_layer_height * (layer_count - 1); 926 } else { 927 layout->walk_layer_h0 = 0; 928 layout->walk_layer_h1 = 0; 929 layout->walk_layer_height = 0; 930 } 931 932 image_get_gen6_monolithic_size(dev, info, layout, max_x, max_y); 933 } 934 935 static bool 936 image_bind_gpu(const struct ilo_image_info *info) 937 { 938 return (info->bind_surface_sampler || 939 info->bind_surface_dp_render || 940 info->bind_surface_dp_typed || 941 info->bind_zs || 942 info->bind_scanout || 943 info->bind_cursor); 944 } 945 946 static bool 947 image_validate_gen6(const struct ilo_dev *dev, 948 const struct ilo_image_info *info) 949 { 950 ILO_DEV_ASSERT(dev, 6, 8); 951 952 /* 953 * From the Ivy Bridge PRM, volume 2 part 1, page 314: 954 * 955 * "The separate stencil buffer is always enabled, thus the field in 956 * 3DSTATE_DEPTH_BUFFER to explicitly enable the separate stencil 957 * buffer has been removed Surface formats with interleaved depth and 958 * stencil are no longer supported" 959 */ 960 if (ilo_dev_gen(dev) >= ILO_GEN(7) && info->bind_zs) 961 assert(!info->interleaved_stencil); 962 963 return true; 964 } 965 966 static bool 967 image_get_gen6_layout(const struct ilo_dev *dev, 968 const struct ilo_image_info *info, 969 struct ilo_image_layout *layout) 970 { 971 ILO_DEV_ASSERT(dev, 6, 8); 972 973 if (!image_validate_gen6(dev, info)) 974 return false; 975 976 if (image_bind_gpu(info) || info->level_count > 1) { 977 if (!image_init_gen6_hardware_layout(dev, info, layout)) 978 return false; 979 } else { 980 if (!image_init_gen6_transfer_layout(dev, info, layout)) 981 return false; 982 } 983 984 /* 985 * the fact that align i and j are multiples of block width and height 986 * respectively is what makes the size of the bo a multiple of the block 987 * size, slices start at block boundaries, and many of the computations 988 * work. 989 */ 990 assert(layout->align_i % info->block_width == 0); 991 assert(layout->align_j % info->block_height == 0); 992 993 /* make sure align() works */ 994 assert(util_is_power_of_two(layout->align_i) && 995 util_is_power_of_two(layout->align_j)); 996 assert(util_is_power_of_two(info->block_width) && 997 util_is_power_of_two(info->block_height)); 998 999 image_get_gen6_lods(dev, info, layout); 1000 1001 assert(layout->walk_layer_height % info->block_height == 0); 1002 assert(layout->monolithic_width % info->block_width == 0); 1003 assert(layout->monolithic_height % info->block_height == 0); 1004 1005 return true; 1006 } 1007 1008 static bool 1009 image_set_gen6_bo_size(struct ilo_image *img, 1010 const struct ilo_dev *dev, 1011 const struct ilo_image_info *info, 1012 const struct ilo_image_layout *layout) 1013 { 1014 int stride, height; 1015 int align_w, align_h; 1016 1017 ILO_DEV_ASSERT(dev, 6, 8); 1018 1019 stride = (layout->monolithic_width / info->block_width) * info->block_size; 1020 height = layout->monolithic_height / info->block_height; 1021 1022 /* 1023 * From the Haswell PRM, volume 5, page 163: 1024 * 1025 * "For linear surfaces, additional padding of 64 bytes is required 1026 * at the bottom of the surface. This is in addition to the padding 1027 * required above." 1028 */ 1029 if (ilo_dev_gen(dev) >= ILO_GEN(7.5) && info->bind_surface_sampler && 1030 layout->tiling == GEN6_TILING_NONE) 1031 height += (64 + stride - 1) / stride; 1032 1033 /* 1034 * From the Sandy Bridge PRM, volume 4 part 1, page 81: 1035 * 1036 * "- For linear render target surfaces, the pitch must be a multiple 1037 * of the element size for non-YUV surface formats. Pitch must be a 1038 * multiple of 2 * element size for YUV surface formats. 1039 * 1040 * - For other linear surfaces, the pitch can be any multiple of 1041 * bytes. 1042 * - For tiled surfaces, the pitch must be a multiple of the tile 1043 * width." 1044 * 1045 * Different requirements may exist when the image is used in different 1046 * places, but our alignments here should be good enough that we do not 1047 * need to check info->bind_x. 1048 */ 1049 switch (layout->tiling) { 1050 case GEN6_TILING_X: 1051 align_w = 512; 1052 align_h = 8; 1053 break; 1054 case GEN6_TILING_Y: 1055 align_w = 128; 1056 align_h = 32; 1057 break; 1058 case GEN8_TILING_W: 1059 /* 1060 * From the Sandy Bridge PRM, volume 1 part 2, page 22: 1061 * 1062 * "A 4KB tile is subdivided into 8-high by 8-wide array of 1063 * Blocks for W-Major Tiles (W Tiles). Each Block is 8 rows by 8 1064 * bytes." 1065 */ 1066 align_w = 64; 1067 align_h = 64; 1068 break; 1069 default: 1070 assert(layout->tiling == GEN6_TILING_NONE); 1071 /* some good enough values */ 1072 align_w = 64; 1073 align_h = 2; 1074 break; 1075 } 1076 1077 if (info->force_bo_stride) { 1078 if (info->force_bo_stride % align_w || info->force_bo_stride < stride) 1079 return false; 1080 1081 img->bo_stride = info->force_bo_stride; 1082 } else { 1083 img->bo_stride = align(stride, align_w); 1084 } 1085 1086 img->bo_height = align(height, align_h); 1087 1088 return true; 1089 } 1090 1091 static bool 1092 image_set_gen6_hiz(struct ilo_image *img, 1093 const struct ilo_dev *dev, 1094 const struct ilo_image_info *info, 1095 const struct ilo_image_layout *layout) 1096 { 1097 const int hz_align_j = 8; 1098 enum ilo_image_walk_type hz_walk; 1099 int hz_width, hz_height; 1100 int hz_clear_w, hz_clear_h; 1101 uint8_t lv; 1102 1103 ILO_DEV_ASSERT(dev, 6, 8); 1104 1105 assert(layout->aux == ILO_IMAGE_AUX_HIZ); 1106 1107 assert(layout->walk == ILO_IMAGE_WALK_LAYER || 1108 layout->walk == ILO_IMAGE_WALK_3D); 1109 1110 /* 1111 * From the Sandy Bridge PRM, volume 2 part 1, page 312: 1112 * 1113 * "The hierarchical depth buffer does not support the LOD field, it is 1114 * assumed by hardware to be zero. A separate hierarachical depth 1115 * buffer is required for each LOD used, and the corresponding 1116 * buffer's state delivered to hardware each time a new depth buffer 1117 * state with modified LOD is delivered." 1118 * 1119 * We will put all LODs in a single bo with ILO_IMAGE_WALK_LOD. 1120 */ 1121 if (ilo_dev_gen(dev) >= ILO_GEN(7)) 1122 hz_walk = layout->walk; 1123 else 1124 hz_walk = ILO_IMAGE_WALK_LOD; 1125 1126 /* 1127 * See the Sandy Bridge PRM, volume 2 part 1, page 312, and the Ivy Bridge 1128 * PRM, volume 2 part 1, page 312-313. 1129 * 1130 * It seems HiZ buffer is aligned to 8x8, with every two rows packed into a 1131 * memory row. 1132 */ 1133 switch (hz_walk) { 1134 case ILO_IMAGE_WALK_LAYER: 1135 { 1136 const int h0 = align(layout->walk_layer_h0, hz_align_j); 1137 const int h1 = align(layout->walk_layer_h1, hz_align_j); 1138 const int htail = 1139 ((ilo_dev_gen(dev) >= ILO_GEN(7)) ? 12 : 11) * hz_align_j; 1140 const int hz_qpitch = h0 + h1 + htail; 1141 1142 hz_width = align(layout->lods[0].slice_width, 16); 1143 1144 hz_height = hz_qpitch * info->array_size / 2; 1145 if (ilo_dev_gen(dev) >= ILO_GEN(7)) 1146 hz_height = align(hz_height, 8); 1147 1148 img->aux.walk_layer_height = hz_qpitch; 1149 } 1150 break; 1151 case ILO_IMAGE_WALK_LOD: 1152 { 1153 int lod_tx[ILO_IMAGE_MAX_LEVEL_COUNT]; 1154 int lod_ty[ILO_IMAGE_MAX_LEVEL_COUNT]; 1155 int cur_tx, cur_ty; 1156 1157 /* figure out the tile offsets of LODs */ 1158 hz_width = 0; 1159 hz_height = 0; 1160 cur_tx = 0; 1161 cur_ty = 0; 1162 for (lv = 0; lv < info->level_count; lv++) { 1163 int tw, th; 1164 1165 lod_tx[lv] = cur_tx; 1166 lod_ty[lv] = cur_ty; 1167 1168 tw = align(layout->lods[lv].slice_width, 16); 1169 th = align(layout->lods[lv].slice_height, hz_align_j) * 1170 info->array_size / 2; 1171 /* convert to Y-tiles */ 1172 tw = (tw + 127) / 128; 1173 th = (th + 31) / 32; 1174 1175 if (hz_width < cur_tx + tw) 1176 hz_width = cur_tx + tw; 1177 if (hz_height < cur_ty + th) 1178 hz_height = cur_ty + th; 1179 1180 if (lv == 1) 1181 cur_tx += tw; 1182 else 1183 cur_ty += th; 1184 } 1185 1186 /* convert tile offsets to memory offsets */ 1187 for (lv = 0; lv < info->level_count; lv++) { 1188 img->aux.walk_lod_offsets[lv] = 1189 (lod_ty[lv] * hz_width + lod_tx[lv]) * 4096; 1190 } 1191 1192 hz_width *= 128; 1193 hz_height *= 32; 1194 } 1195 break; 1196 case ILO_IMAGE_WALK_3D: 1197 hz_width = align(layout->lods[0].slice_width, 16); 1198 1199 hz_height = 0; 1200 for (lv = 0; lv < info->level_count; lv++) { 1201 const int h = align(layout->lods[lv].slice_height, hz_align_j); 1202 /* according to the formula, slices are packed together vertically */ 1203 hz_height += h * u_minify(info->depth, lv); 1204 } 1205 hz_height /= 2; 1206 break; 1207 default: 1208 assert(!"unknown HiZ walk"); 1209 hz_width = 0; 1210 hz_height = 0; 1211 break; 1212 } 1213 1214 /* 1215 * In hiz_align_fb(), we will align the LODs to 8x4 sample blocks. 1216 * Experiments on Haswell show that aligning the RECTLIST primitive and 1217 * 3DSTATE_DRAWING_RECTANGLE alone are not enough. The LOD sizes must be 1218 * aligned. 1219 */ 1220 hz_clear_w = 8; 1221 hz_clear_h = 4; 1222 switch (info->sample_count) { 1223 case 1: 1224 default: 1225 break; 1226 case 2: 1227 hz_clear_w /= 2; 1228 break; 1229 case 4: 1230 hz_clear_w /= 2; 1231 hz_clear_h /= 2; 1232 break; 1233 case 8: 1234 hz_clear_w /= 4; 1235 hz_clear_h /= 2; 1236 break; 1237 case 16: 1238 hz_clear_w /= 4; 1239 hz_clear_h /= 4; 1240 break; 1241 } 1242 1243 for (lv = 0; lv < info->level_count; lv++) { 1244 if (u_minify(info->width, lv) % hz_clear_w || 1245 u_minify(info->height, lv) % hz_clear_h) 1246 break; 1247 img->aux.enables |= 1 << lv; 1248 } 1249 1250 /* we padded to allow this in image_get_gen6_monolithic_size() */ 1251 if (info->level_count == 1 && info->array_size == 1 && info->depth == 1) 1252 img->aux.enables |= 0x1; 1253 1254 /* align to Y-tile */ 1255 img->aux.bo_stride = align(hz_width, 128); 1256 img->aux.bo_height = align(hz_height, 32); 1257 1258 return true; 1259 } 1260 1261 static bool 1262 image_set_gen7_mcs(struct ilo_image *img, 1263 const struct ilo_dev *dev, 1264 const struct ilo_image_info *info, 1265 const struct ilo_image_layout *layout) 1266 { 1267 int mcs_width, mcs_height, mcs_cpp; 1268 int downscale_x, downscale_y; 1269 1270 ILO_DEV_ASSERT(dev, 7, 8); 1271 1272 assert(layout->aux == ILO_IMAGE_AUX_MCS); 1273 1274 if (info->sample_count > 1) { 1275 /* 1276 * From the Ivy Bridge PRM, volume 2 part 1, page 326, the clear 1277 * rectangle is scaled down by 8x2 for 4X MSAA and 2x2 for 8X MSAA. The 1278 * need of scale down could be that the clear rectangle is used to clear 1279 * the MCS instead of the RT. 1280 * 1281 * For 8X MSAA, we need 32 bits in MCS for every pixel in the RT. The 1282 * 2x2 factor could come from that the hardware writes 128 bits (an 1283 * OWord) at a time, and the OWord in MCS maps to a 2x2 pixel block in 1284 * the RT. For 4X MSAA, we need 8 bits in MCS for every pixel in the 1285 * RT. Similarly, we could reason that an OWord in 4X MCS maps to a 8x2 1286 * pixel block in the RT. 1287 */ 1288 switch (info->sample_count) { 1289 case 2: 1290 case 4: 1291 downscale_x = 8; 1292 downscale_y = 2; 1293 mcs_cpp = 1; 1294 break; 1295 case 8: 1296 downscale_x = 2; 1297 downscale_y = 2; 1298 mcs_cpp = 4; 1299 break; 1300 case 16: 1301 downscale_x = 2; 1302 downscale_y = 1; 1303 mcs_cpp = 8; 1304 break; 1305 default: 1306 assert(!"unsupported sample count"); 1307 return false; 1308 break; 1309 } 1310 1311 /* 1312 * It also appears that the 2x2 subspans generated by the scaled-down 1313 * clear rectangle cannot be masked. The scale-down clear rectangle 1314 * thus must be aligned to 2x2, and we need to pad. 1315 */ 1316 mcs_width = align(info->width, downscale_x * 2); 1317 mcs_height = align(info->height, downscale_y * 2); 1318 } else { 1319 /* 1320 * From the Ivy Bridge PRM, volume 2 part 1, page 327: 1321 * 1322 * " Pixels Lines 1323 * TiledY RT CL 1324 * bpp 1325 * 32 8 4 1326 * 64 4 4 1327 * 128 2 4 1328 * 1329 * TiledX RT CL 1330 * bpp 1331 * 32 16 2 1332 * 64 8 2 1333 * 128 4 2" 1334 * 1335 * This table and the two following tables define the RT alignments, the 1336 * clear rectangle alignments, and the clear rectangle scale factors. 1337 * Viewing the RT alignments as the sizes of 128-byte blocks, we can see 1338 * that the clear rectangle alignments are 16x32 blocks, and the clear 1339 * rectangle scale factors are 8x16 blocks. 1340 * 1341 * For non-MSAA RT, we need 1 bit in MCS for every 128-byte block in the 1342 * RT. Similar to the MSAA cases, we can argue that an OWord maps to 1343 * 8x16 blocks. 1344 * 1345 * One problem with this reasoning is that a Y-tile in MCS has 8x32 1346 * OWords and maps to 64x512 128-byte blocks. This differs from i965, 1347 * which says that a Y-tile maps to 128x256 blocks (\see 1348 * intel_get_non_msrt_mcs_alignment). It does not really change 1349 * anything except for the size of the allocated MCS. Let's see if we 1350 * hit out-of-bound access. 1351 */ 1352 switch (layout->tiling) { 1353 case GEN6_TILING_X: 1354 downscale_x = 64 / info->block_size; 1355 downscale_y = 2; 1356 break; 1357 case GEN6_TILING_Y: 1358 downscale_x = 32 / info->block_size; 1359 downscale_y = 4; 1360 break; 1361 default: 1362 assert(!"unsupported tiling mode"); 1363 return false; 1364 break; 1365 } 1366 1367 downscale_x *= 8; 1368 downscale_y *= 16; 1369 1370 /* 1371 * From the Haswell PRM, volume 7, page 652: 1372 * 1373 * "Clear rectangle must be aligned to two times the number of 1374 * pixels in the table shown below due to 16X16 hashing across the 1375 * slice." 1376 * 1377 * The scaled-down clear rectangle must be aligned to 4x4 instead of 1378 * 2x2, and we need to pad. 1379 */ 1380 mcs_width = align(info->width, downscale_x * 4) / downscale_x; 1381 mcs_height = align(info->height, downscale_y * 4) / downscale_y; 1382 mcs_cpp = 16; /* an OWord */ 1383 } 1384 1385 img->aux.enables = (1 << info->level_count) - 1; 1386 /* align to Y-tile */ 1387 img->aux.bo_stride = align(mcs_width * mcs_cpp, 128); 1388 img->aux.bo_height = align(mcs_height, 32); 1389 1390 return true; 1391 } 1392 1393 bool 1394 ilo_image_init(struct ilo_image *img, 1395 const struct ilo_dev *dev, 1396 const struct ilo_image_info *info) 1397 { 1398 struct ilo_image_layout layout; 1399 1400 assert(ilo_is_zeroed(img, sizeof(*img))); 1401 1402 memset(&layout, 0, sizeof(layout)); 1403 layout.lods = img->lods; 1404 1405 if (!image_get_gen6_layout(dev, info, &layout)) 1406 return false; 1407 1408 img->type = info->type; 1409 1410 img->format = info->format; 1411 img->block_width = info->block_width; 1412 img->block_height = info->block_height; 1413 img->block_size = info->block_size; 1414 1415 img->width0 = info->width; 1416 img->height0 = info->height; 1417 img->depth0 = info->depth; 1418 img->array_size = info->array_size; 1419 img->level_count = info->level_count; 1420 img->sample_count = info->sample_count; 1421 1422 img->walk = layout.walk; 1423 img->interleaved_samples = layout.interleaved_samples; 1424 1425 img->tiling = layout.tiling; 1426 1427 img->aux.type = layout.aux; 1428 1429 img->align_i = layout.align_i; 1430 img->align_j = layout.align_j; 1431 1432 img->walk_layer_height = layout.walk_layer_height; 1433 1434 if (!image_set_gen6_bo_size(img, dev, info, &layout)) 1435 return false; 1436 1437 img->scanout = info->bind_scanout; 1438 1439 switch (layout.aux) { 1440 case ILO_IMAGE_AUX_HIZ: 1441 image_set_gen6_hiz(img, dev, info, &layout); 1442 break; 1443 case ILO_IMAGE_AUX_MCS: 1444 image_set_gen7_mcs(img, dev, info, &layout); 1445 break; 1446 default: 1447 break; 1448 } 1449 1450 return true; 1451 } 1452