Home | History | Annotate | Download | only in isl
      1 /*
      2  * Copyright 2015 Intel Corporation
      3  *
      4  *  Permission is hereby granted, free of charge, to any person obtaining a
      5  *  copy of this software and associated documentation files (the "Software"),
      6  *  to deal in the Software without restriction, including without limitation
      7  *  the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8  *  and/or sell copies of the Software, and to permit persons to whom the
      9  *  Software is furnished to do so, subject to the following conditions:
     10  *
     11  *  The above copyright notice and this permission notice (including the next
     12  *  paragraph) shall be included in all copies or substantial portions of the
     13  *  Software.
     14  *
     15  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  *  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  *  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     18  *  THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     19  *  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     20  *  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
     21  *  IN THE SOFTWARE.
     22  */
     23 
     24 #include <assert.h>
     25 #include <stdarg.h>
     26 #include <stdio.h>
     27 
     28 #include "isl.h"
     29 #include "isl_gen4.h"
     30 #include "isl_gen6.h"
     31 #include "isl_gen7.h"
     32 #include "isl_gen8.h"
     33 #include "isl_gen9.h"
     34 #include "isl_priv.h"
     35 
     36 void PRINTFLIKE(3, 4) UNUSED
     37 __isl_finishme(const char *file, int line, const char *fmt, ...)
     38 {
     39    va_list ap;
     40    char buf[512];
     41 
     42    va_start(ap, fmt);
     43    vsnprintf(buf, sizeof(buf), fmt, ap);
     44    va_end(ap);
     45 
     46    fprintf(stderr, "%s:%d: FINISHME: %s\n", file, line, buf);
     47 }
     48 
     49 static const struct {
     50    uint8_t size;
     51    uint8_t align;
     52    uint8_t addr_offset;
     53    uint8_t aux_addr_offset;
     54 } ss_infos[] = {
     55    [4] = {24, 32,  4},
     56    [5] = {24, 32,  4},
     57    [6] = {24, 32,  4},
     58    [7] = {32, 32,  4, 24},
     59    [8] = {64, 64, 32, 40},
     60    [9] = {64, 64, 32, 40},
     61 };
     62 
     63 void
     64 isl_device_init(struct isl_device *dev,
     65                 const struct gen_device_info *info,
     66                 bool has_bit6_swizzling)
     67 {
     68    dev->info = info;
     69    dev->use_separate_stencil = ISL_DEV_GEN(dev) >= 6;
     70    dev->has_bit6_swizzling = has_bit6_swizzling;
     71 
     72    /* The ISL_DEV macros may be defined in the CFLAGS, thus hardcoding some
     73     * device properties at buildtime. Verify that the macros with the device
     74     * properties chosen during runtime.
     75     */
     76    ISL_DEV_GEN_SANITIZE(dev);
     77    ISL_DEV_USE_SEPARATE_STENCIL_SANITIZE(dev);
     78 
     79    /* Did we break hiz or stencil? */
     80    if (ISL_DEV_USE_SEPARATE_STENCIL(dev))
     81       assert(info->has_hiz_and_separate_stencil);
     82    if (info->must_use_separate_stencil)
     83       assert(ISL_DEV_USE_SEPARATE_STENCIL(dev));
     84 
     85    dev->ss.size = ss_infos[ISL_DEV_GEN(dev)].size;
     86    dev->ss.align = ss_infos[ISL_DEV_GEN(dev)].align;
     87    dev->ss.addr_offset = ss_infos[ISL_DEV_GEN(dev)].addr_offset;
     88    dev->ss.aux_addr_offset = ss_infos[ISL_DEV_GEN(dev)].aux_addr_offset;
     89 }
     90 
     91 /**
     92  * @brief Query the set of multisamples supported by the device.
     93  *
     94  * This function always returns non-zero, as ISL_SAMPLE_COUNT_1_BIT is always
     95  * supported.
     96  */
     97 isl_sample_count_mask_t ATTRIBUTE_CONST
     98 isl_device_get_sample_counts(struct isl_device *dev)
     99 {
    100    if (ISL_DEV_GEN(dev) >= 9) {
    101       return ISL_SAMPLE_COUNT_1_BIT |
    102              ISL_SAMPLE_COUNT_2_BIT |
    103              ISL_SAMPLE_COUNT_4_BIT |
    104              ISL_SAMPLE_COUNT_8_BIT |
    105              ISL_SAMPLE_COUNT_16_BIT;
    106    } else if (ISL_DEV_GEN(dev) >= 8) {
    107       return ISL_SAMPLE_COUNT_1_BIT |
    108              ISL_SAMPLE_COUNT_2_BIT |
    109              ISL_SAMPLE_COUNT_4_BIT |
    110              ISL_SAMPLE_COUNT_8_BIT;
    111    } else if (ISL_DEV_GEN(dev) >= 7) {
    112       return ISL_SAMPLE_COUNT_1_BIT |
    113              ISL_SAMPLE_COUNT_4_BIT |
    114              ISL_SAMPLE_COUNT_8_BIT;
    115    } else if (ISL_DEV_GEN(dev) >= 6) {
    116       return ISL_SAMPLE_COUNT_1_BIT |
    117              ISL_SAMPLE_COUNT_4_BIT;
    118    } else {
    119       return ISL_SAMPLE_COUNT_1_BIT;
    120    }
    121 }
    122 
    123 /**
    124  * @param[out] info is written only on success
    125  */
    126 static bool
    127 isl_tiling_get_info(const struct isl_device *dev,
    128                     enum isl_tiling tiling,
    129                     uint32_t format_bpb,
    130                     struct isl_tile_info *tile_info)
    131 {
    132    const uint32_t bs = format_bpb / 8;
    133    struct isl_extent2d logical_el, phys_B;
    134 
    135    if (tiling != ISL_TILING_LINEAR && !isl_is_pow2(format_bpb)) {
    136       /* It is possible to have non-power-of-two formats in a tiled buffer.
    137        * The easiest way to handle this is to treat the tile as if it is three
    138        * times as wide.  This way no pixel will ever cross a tile boundary.
    139        * This really only works on legacy X and Y tiling formats.
    140        */
    141       assert(tiling == ISL_TILING_X || tiling == ISL_TILING_Y0);
    142       assert(bs % 3 == 0 && isl_is_pow2(format_bpb / 3));
    143       return isl_tiling_get_info(dev, tiling, format_bpb / 3, tile_info);
    144    }
    145 
    146    switch (tiling) {
    147    case ISL_TILING_LINEAR:
    148       assert(bs > 0);
    149       logical_el = isl_extent2d(1, 1);
    150       phys_B = isl_extent2d(bs, 1);
    151       break;
    152 
    153    case ISL_TILING_X:
    154       assert(bs > 0);
    155       logical_el = isl_extent2d(512 / bs, 8);
    156       phys_B = isl_extent2d(512, 8);
    157       break;
    158 
    159    case ISL_TILING_Y0:
    160       assert(bs > 0);
    161       logical_el = isl_extent2d(128 / bs, 32);
    162       phys_B = isl_extent2d(128, 32);
    163       break;
    164 
    165    case ISL_TILING_W:
    166       assert(bs == 1);
    167       logical_el = isl_extent2d(64, 64);
    168       /* From the Broadwell PRM Vol 2d, RENDER_SURFACE_STATE::SurfacePitch:
    169        *
    170        *    "If the surface is a stencil buffer (and thus has Tile Mode set
    171        *    to TILEMODE_WMAJOR), the pitch must be set to 2x the value
    172        *    computed based on width, as the stencil buffer is stored with two
    173        *    rows interleaved."
    174        *
    175        * This, together with the fact that stencil buffers are referred to as
    176        * being Y-tiled in the PRMs for older hardware implies that the
    177        * physical size of a W-tile is actually the same as for a Y-tile.
    178        */
    179       phys_B = isl_extent2d(128, 32);
    180       break;
    181 
    182    case ISL_TILING_Yf:
    183    case ISL_TILING_Ys: {
    184       if (ISL_DEV_GEN(dev) < 9)
    185          return false;
    186 
    187       if (!isl_is_pow2(bs))
    188          return false;
    189 
    190       bool is_Ys = tiling == ISL_TILING_Ys;
    191 
    192       assert(bs > 0);
    193       unsigned width = 1 << (6 + (ffs(bs) / 2) + (2 * is_Ys));
    194       unsigned height = 1 << (6 - (ffs(bs) / 2) + (2 * is_Ys));
    195 
    196       logical_el = isl_extent2d(width / bs, height);
    197       phys_B = isl_extent2d(width, height);
    198       break;
    199    }
    200 
    201    case ISL_TILING_HIZ:
    202       /* HiZ buffers are required to have ISL_FORMAT_HIZ which is an 8x4
    203        * 128bpb format.  The tiling has the same physical dimensions as
    204        * Y-tiling but actually has two HiZ columns per Y-tiled column.
    205        */
    206       assert(bs == 16);
    207       logical_el = isl_extent2d(16, 16);
    208       phys_B = isl_extent2d(128, 32);
    209       break;
    210 
    211    case ISL_TILING_CCS:
    212       /* CCS surfaces are required to have one of the GENX_CCS_* formats which
    213        * have a block size of 1 or 2 bits per block and each CCS element
    214        * corresponds to one cache-line pair in the main surface.  From the Sky
    215        * Lake PRM Vol. 12 in the section on planes:
    216        *
    217        *    "The Color Control Surface (CCS) contains the compression status
    218        *    of the cache-line pairs. The compression state of the cache-line
    219        *    pair is specified by 2 bits in the CCS.  Each CCS cache-line
    220        *    represents an area on the main surface of 16x16 sets of 128 byte
    221        *    Y-tiled cache-line-pairs. CCS is always Y tiled."
    222        *
    223        * The CCS being Y-tiled implies that it's an 8x8 grid of cache-lines.
    224        * Since each cache line corresponds to a 16x16 set of cache-line pairs,
    225        * that yields total tile area of 128x128 cache-line pairs or CCS
    226        * elements.  On older hardware, each CCS element is 1 bit and the tile
    227        * is 128x256 elements.
    228        */
    229       assert(format_bpb == 1 || format_bpb == 2);
    230       logical_el = isl_extent2d(128, 256 / format_bpb);
    231       phys_B = isl_extent2d(128, 32);
    232       break;
    233 
    234    default:
    235       unreachable("not reached");
    236    } /* end switch */
    237 
    238    *tile_info = (struct isl_tile_info) {
    239       .tiling = tiling,
    240       .format_bpb = format_bpb,
    241       .logical_extent_el = logical_el,
    242       .phys_extent_B = phys_B,
    243    };
    244 
    245    return true;
    246 }
    247 
    248 /**
    249  * @param[out] tiling is set only on success
    250  */
    251 static bool
    252 isl_surf_choose_tiling(const struct isl_device *dev,
    253                        const struct isl_surf_init_info *restrict info,
    254                        enum isl_tiling *tiling)
    255 {
    256    isl_tiling_flags_t tiling_flags = info->tiling_flags;
    257 
    258    /* HiZ surfaces always use the HiZ tiling */
    259    if (info->usage & ISL_SURF_USAGE_HIZ_BIT) {
    260       assert(info->format == ISL_FORMAT_HIZ);
    261       assert(tiling_flags == ISL_TILING_HIZ_BIT);
    262       *tiling = ISL_TILING_HIZ;
    263       return true;
    264    }
    265 
    266    /* CCS surfaces always use the CCS tiling */
    267    if (info->usage & ISL_SURF_USAGE_CCS_BIT) {
    268       assert(isl_format_get_layout(info->format)->txc == ISL_TXC_CCS);
    269       assert(tiling_flags == ISL_TILING_CCS_BIT);
    270       *tiling = ISL_TILING_CCS;
    271       return true;
    272    }
    273 
    274    if (ISL_DEV_GEN(dev) >= 6) {
    275       isl_gen6_filter_tiling(dev, info, &tiling_flags);
    276    } else {
    277       isl_finishme("%s: gen%u", __func__, ISL_DEV_GEN(dev));
    278       isl_gen6_filter_tiling(dev, info, &tiling_flags);
    279    }
    280 
    281    #define CHOOSE(__tiling) \
    282       do { \
    283          if (tiling_flags & (1u << (__tiling))) { \
    284             *tiling = (__tiling); \
    285             return true; \
    286           } \
    287       } while (0)
    288 
    289    /* Of the tiling modes remaining, choose the one that offers the best
    290     * performance.
    291     */
    292 
    293    if (info->dim == ISL_SURF_DIM_1D) {
    294       /* Prefer linear for 1D surfaces because they do not benefit from
    295        * tiling. To the contrary, tiling leads to wasted memory and poor
    296        * memory locality due to the swizzling and alignment restrictions
    297        * required in tiled surfaces.
    298        */
    299       CHOOSE(ISL_TILING_LINEAR);
    300    }
    301 
    302    CHOOSE(ISL_TILING_Ys);
    303    CHOOSE(ISL_TILING_Yf);
    304    CHOOSE(ISL_TILING_Y0);
    305    CHOOSE(ISL_TILING_X);
    306    CHOOSE(ISL_TILING_W);
    307    CHOOSE(ISL_TILING_LINEAR);
    308 
    309    #undef CHOOSE
    310 
    311    /* No tiling mode accomodates the inputs. */
    312    return false;
    313 }
    314 
    315 static bool
    316 isl_choose_msaa_layout(const struct isl_device *dev,
    317                  const struct isl_surf_init_info *info,
    318                  enum isl_tiling tiling,
    319                  enum isl_msaa_layout *msaa_layout)
    320 {
    321    if (ISL_DEV_GEN(dev) >= 8) {
    322       return isl_gen8_choose_msaa_layout(dev, info, tiling, msaa_layout);
    323    } else if (ISL_DEV_GEN(dev) >= 7) {
    324       return isl_gen7_choose_msaa_layout(dev, info, tiling, msaa_layout);
    325    } else if (ISL_DEV_GEN(dev) >= 6) {
    326       return isl_gen6_choose_msaa_layout(dev, info, tiling, msaa_layout);
    327    } else {
    328       return isl_gen4_choose_msaa_layout(dev, info, tiling, msaa_layout);
    329    }
    330 }
    331 
    332 struct isl_extent2d
    333 isl_get_interleaved_msaa_px_size_sa(uint32_t samples)
    334 {
    335    assert(isl_is_pow2(samples));
    336 
    337    /* From the Broadwell PRM >> Volume 5: Memory Views >> Computing Mip Level
    338     * Sizes (p133):
    339     *
    340     *    If the surface is multisampled and it is a depth or stencil surface
    341     *    or Multisampled Surface StorageFormat in SURFACE_STATE is
    342     *    MSFMT_DEPTH_STENCIL, W_L and H_L must be adjusted as follows before
    343     *    proceeding: [...]
    344     */
    345    return (struct isl_extent2d) {
    346       .width = 1 << ((ffs(samples) - 0) / 2),
    347       .height = 1 << ((ffs(samples) - 1) / 2),
    348    };
    349 }
    350 
    351 static void
    352 isl_msaa_interleaved_scale_px_to_sa(uint32_t samples,
    353                                     uint32_t *width, uint32_t *height)
    354 {
    355    const struct isl_extent2d px_size_sa =
    356       isl_get_interleaved_msaa_px_size_sa(samples);
    357 
    358    if (width)
    359       *width = isl_align(*width, 2) * px_size_sa.width;
    360    if (height)
    361       *height = isl_align(*height, 2) * px_size_sa.height;
    362 }
    363 
    364 static enum isl_array_pitch_span
    365 isl_choose_array_pitch_span(const struct isl_device *dev,
    366                             const struct isl_surf_init_info *restrict info,
    367                             enum isl_dim_layout dim_layout,
    368                             const struct isl_extent4d *phys_level0_sa)
    369 {
    370    switch (dim_layout) {
    371    case ISL_DIM_LAYOUT_GEN9_1D:
    372    case ISL_DIM_LAYOUT_GEN4_2D:
    373       if (ISL_DEV_GEN(dev) >= 8) {
    374          /* QPitch becomes programmable in Broadwell. So choose the
    375           * most compact QPitch possible in order to conserve memory.
    376           *
    377           * From the Broadwell PRM >> Volume 2d: Command Reference: Structures
    378           * >> RENDER_SURFACE_STATE Surface QPitch (p325):
    379           *
    380           *    - Software must ensure that this field is set to a value
    381           *      sufficiently large such that the array slices in the surface
    382           *      do not overlap. Refer to the Memory Data Formats section for
    383           *      information on how surfaces are stored in memory.
    384           *
    385           *    - This field specifies the distance in rows between array
    386           *      slices.  It is used only in the following cases:
    387           *
    388           *          - Surface Array is enabled OR
    389           *          - Number of Mulitsamples is not NUMSAMPLES_1 and
    390           *            Multisampled Surface Storage Format set to MSFMT_MSS OR
    391           *          - Surface Type is SURFTYPE_CUBE
    392           */
    393          return ISL_ARRAY_PITCH_SPAN_COMPACT;
    394       } else if (ISL_DEV_GEN(dev) >= 7) {
    395          /* Note that Ivybridge introduces
    396           * RENDER_SURFACE_STATE.SurfaceArraySpacing, which provides the
    397           * driver more control over the QPitch.
    398           */
    399 
    400          if (phys_level0_sa->array_len == 1) {
    401             /* The hardware will never use the QPitch. So choose the most
    402              * compact QPitch possible in order to conserve memory.
    403              */
    404             return ISL_ARRAY_PITCH_SPAN_COMPACT;
    405          }
    406 
    407          if (isl_surf_usage_is_depth_or_stencil(info->usage) ||
    408              (info->usage & ISL_SURF_USAGE_HIZ_BIT)) {
    409             /* From the Ivybridge PRM >> Volume 1 Part 1: Graphics Core >>
    410              * Section 6.18.4.7: Surface Arrays (p112):
    411              *
    412              *    If Surface Array Spacing is set to ARYSPC_FULL (note that
    413              *    the depth buffer and stencil buffer have an implied value of
    414              *    ARYSPC_FULL):
    415              */
    416             return ISL_ARRAY_PITCH_SPAN_FULL;
    417          }
    418 
    419          if (info->levels == 1) {
    420             /* We are able to set RENDER_SURFACE_STATE.SurfaceArraySpacing
    421              * to ARYSPC_LOD0.
    422              */
    423             return ISL_ARRAY_PITCH_SPAN_COMPACT;
    424          }
    425 
    426          return ISL_ARRAY_PITCH_SPAN_FULL;
    427       } else if ((ISL_DEV_GEN(dev) == 5 || ISL_DEV_GEN(dev) == 6) &&
    428                  ISL_DEV_USE_SEPARATE_STENCIL(dev) &&
    429                  isl_surf_usage_is_stencil(info->usage)) {
    430          /* [ILK-SNB] Errata from the Sandy Bridge PRM >> Volume 4 Part 1:
    431           * Graphics Core >> Section 7.18.3.7: Surface Arrays:
    432           *
    433           *    The separate stencil buffer does not support mip mapping, thus
    434           *    the storage for LODs other than LOD 0 is not needed.
    435           */
    436          assert(info->levels == 1);
    437          assert(phys_level0_sa->array_len == 1);
    438          return ISL_ARRAY_PITCH_SPAN_COMPACT;
    439       } else {
    440          if ((ISL_DEV_GEN(dev) == 5 || ISL_DEV_GEN(dev) == 6) &&
    441              ISL_DEV_USE_SEPARATE_STENCIL(dev) &&
    442              isl_surf_usage_is_stencil(info->usage)) {
    443             /* [ILK-SNB] Errata from the Sandy Bridge PRM >> Volume 4 Part 1:
    444              * Graphics Core >> Section 7.18.3.7: Surface Arrays:
    445              *
    446              *    The separate stencil buffer does not support mip mapping,
    447              *    thus the storage for LODs other than LOD 0 is not needed.
    448              */
    449             assert(info->levels == 1);
    450             assert(phys_level0_sa->array_len == 1);
    451             return ISL_ARRAY_PITCH_SPAN_COMPACT;
    452          }
    453 
    454          if (phys_level0_sa->array_len == 1) {
    455             /* The hardware will never use the QPitch. So choose the most
    456              * compact QPitch possible in order to conserve memory.
    457              */
    458             return ISL_ARRAY_PITCH_SPAN_COMPACT;
    459          }
    460 
    461          return ISL_ARRAY_PITCH_SPAN_FULL;
    462       }
    463 
    464    case ISL_DIM_LAYOUT_GEN4_3D:
    465       /* The hardware will never use the QPitch. So choose the most
    466        * compact QPitch possible in order to conserve memory.
    467        */
    468       return ISL_ARRAY_PITCH_SPAN_COMPACT;
    469    }
    470 
    471    unreachable("bad isl_dim_layout");
    472    return ISL_ARRAY_PITCH_SPAN_FULL;
    473 }
    474 
    475 static void
    476 isl_choose_image_alignment_el(const struct isl_device *dev,
    477                               const struct isl_surf_init_info *restrict info,
    478                               enum isl_tiling tiling,
    479                               enum isl_dim_layout dim_layout,
    480                               enum isl_msaa_layout msaa_layout,
    481                               struct isl_extent3d *image_align_el)
    482 {
    483    if (info->format == ISL_FORMAT_HIZ) {
    484       assert(ISL_DEV_GEN(dev) >= 6);
    485       /* HiZ surfaces are always aligned to 16x8 pixels in the primary surface
    486        * which works out to 2x2 HiZ elments.
    487        */
    488       *image_align_el = isl_extent3d(2, 2, 1);
    489       return;
    490    }
    491 
    492    if (ISL_DEV_GEN(dev) >= 9) {
    493       isl_gen9_choose_image_alignment_el(dev, info, tiling, dim_layout,
    494                                          msaa_layout, image_align_el);
    495    } else if (ISL_DEV_GEN(dev) >= 8) {
    496       isl_gen8_choose_image_alignment_el(dev, info, tiling, dim_layout,
    497                                          msaa_layout, image_align_el);
    498    } else if (ISL_DEV_GEN(dev) >= 7) {
    499       isl_gen7_choose_image_alignment_el(dev, info, tiling, dim_layout,
    500                                           msaa_layout, image_align_el);
    501    } else if (ISL_DEV_GEN(dev) >= 6) {
    502       isl_gen6_choose_image_alignment_el(dev, info, tiling, dim_layout,
    503                                          msaa_layout, image_align_el);
    504    } else {
    505       isl_gen4_choose_image_alignment_el(dev, info, tiling, dim_layout,
    506                                          msaa_layout, image_align_el);
    507    }
    508 }
    509 
    510 static enum isl_dim_layout
    511 isl_surf_choose_dim_layout(const struct isl_device *dev,
    512                            enum isl_surf_dim logical_dim,
    513                            enum isl_tiling tiling)
    514 {
    515    if (ISL_DEV_GEN(dev) >= 9) {
    516       switch (logical_dim) {
    517       case ISL_SURF_DIM_1D:
    518          /* From the Sky Lake PRM Vol. 5, "1D Surfaces":
    519           *
    520           *    One-dimensional surfaces use a tiling mode of linear.
    521           *    Technically, they are not tiled resources, but the Tiled
    522           *    Resource Mode field in RENDER_SURFACE_STATE is still used to
    523           *    indicate the alignment requirements for this linear surface
    524           *    (See 1D Alignment requirements for how 4K and 64KB Tiled
    525           *    Resource Modes impact alignment). Alternatively, a 1D surface
    526           *    can be defined as a 2D tiled surface (e.g. TileY or TileX) with
    527           *    a height of 0.
    528           *
    529           * In other words, ISL_DIM_LAYOUT_GEN9_1D is only used for linear
    530           * surfaces and, for tiled surfaces, ISL_DIM_LAYOUT_GEN4_2D is used.
    531           */
    532          if (tiling == ISL_TILING_LINEAR)
    533             return ISL_DIM_LAYOUT_GEN9_1D;
    534          else
    535             return ISL_DIM_LAYOUT_GEN4_2D;
    536       case ISL_SURF_DIM_2D:
    537       case ISL_SURF_DIM_3D:
    538          return ISL_DIM_LAYOUT_GEN4_2D;
    539       }
    540    } else {
    541       switch (logical_dim) {
    542       case ISL_SURF_DIM_1D:
    543       case ISL_SURF_DIM_2D:
    544          return ISL_DIM_LAYOUT_GEN4_2D;
    545       case ISL_SURF_DIM_3D:
    546          return ISL_DIM_LAYOUT_GEN4_3D;
    547       }
    548    }
    549 
    550    unreachable("bad isl_surf_dim");
    551    return ISL_DIM_LAYOUT_GEN4_2D;
    552 }
    553 
    554 /**
    555  * Calculate the physical extent of the surface's first level, in units of
    556  * surface samples. The result is aligned to the format's compression block.
    557  */
    558 static void
    559 isl_calc_phys_level0_extent_sa(const struct isl_device *dev,
    560                                const struct isl_surf_init_info *restrict info,
    561                                enum isl_dim_layout dim_layout,
    562                                enum isl_tiling tiling,
    563                                enum isl_msaa_layout msaa_layout,
    564                                struct isl_extent4d *phys_level0_sa)
    565 {
    566    const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
    567 
    568    if (isl_format_is_yuv(info->format))
    569       isl_finishme("%s:%s: YUV format", __FILE__, __func__);
    570 
    571    switch (info->dim) {
    572    case ISL_SURF_DIM_1D:
    573       assert(info->height == 1);
    574       assert(info->depth == 1);
    575       assert(info->samples == 1);
    576 
    577       switch (dim_layout) {
    578       case ISL_DIM_LAYOUT_GEN4_3D:
    579          unreachable("bad isl_dim_layout");
    580 
    581       case ISL_DIM_LAYOUT_GEN9_1D:
    582       case ISL_DIM_LAYOUT_GEN4_2D:
    583          *phys_level0_sa = (struct isl_extent4d) {
    584             .w = isl_align_npot(info->width, fmtl->bw),
    585             .h = fmtl->bh,
    586             .d = 1,
    587             .a = info->array_len,
    588          };
    589          break;
    590       }
    591       break;
    592 
    593    case ISL_SURF_DIM_2D:
    594       assert(dim_layout == ISL_DIM_LAYOUT_GEN4_2D);
    595 
    596       if (tiling == ISL_TILING_Ys && info->samples > 1)
    597          isl_finishme("%s:%s: multisample TileYs layout", __FILE__, __func__);
    598 
    599       switch (msaa_layout) {
    600       case ISL_MSAA_LAYOUT_NONE:
    601          assert(info->depth == 1);
    602          assert(info->samples == 1);
    603 
    604          *phys_level0_sa = (struct isl_extent4d) {
    605             .w = isl_align_npot(info->width, fmtl->bw),
    606             .h = isl_align_npot(info->height, fmtl->bh),
    607             .d = 1,
    608             .a = info->array_len,
    609          };
    610          break;
    611 
    612       case ISL_MSAA_LAYOUT_ARRAY:
    613          assert(info->depth == 1);
    614          assert(info->levels == 1);
    615          assert(isl_format_supports_multisampling(dev->info, info->format));
    616          assert(fmtl->bw == 1 && fmtl->bh == 1);
    617 
    618          *phys_level0_sa = (struct isl_extent4d) {
    619             .w = info->width,
    620             .h = info->height,
    621             .d = 1,
    622             .a = info->array_len * info->samples,
    623          };
    624          break;
    625 
    626       case ISL_MSAA_LAYOUT_INTERLEAVED:
    627          assert(info->depth == 1);
    628          assert(info->levels == 1);
    629          assert(isl_format_supports_multisampling(dev->info, info->format));
    630 
    631          *phys_level0_sa = (struct isl_extent4d) {
    632             .w = info->width,
    633             .h = info->height,
    634             .d = 1,
    635             .a = info->array_len,
    636          };
    637 
    638          isl_msaa_interleaved_scale_px_to_sa(info->samples,
    639                                              &phys_level0_sa->w,
    640                                              &phys_level0_sa->h);
    641 
    642          phys_level0_sa->w = isl_align(phys_level0_sa->w, fmtl->bw);
    643          phys_level0_sa->h = isl_align(phys_level0_sa->h, fmtl->bh);
    644          break;
    645       }
    646       break;
    647 
    648    case ISL_SURF_DIM_3D:
    649       assert(info->array_len == 1);
    650       assert(info->samples == 1);
    651 
    652       if (fmtl->bd > 1) {
    653          isl_finishme("%s:%s: compression block with depth > 1",
    654                       __FILE__, __func__);
    655       }
    656 
    657       switch (dim_layout) {
    658       case ISL_DIM_LAYOUT_GEN9_1D:
    659          unreachable("bad isl_dim_layout");
    660 
    661       case ISL_DIM_LAYOUT_GEN4_2D:
    662          assert(ISL_DEV_GEN(dev) >= 9);
    663 
    664          *phys_level0_sa = (struct isl_extent4d) {
    665             .w = isl_align_npot(info->width, fmtl->bw),
    666             .h = isl_align_npot(info->height, fmtl->bh),
    667             .d = 1,
    668             .a = info->depth,
    669          };
    670          break;
    671 
    672       case ISL_DIM_LAYOUT_GEN4_3D:
    673          assert(ISL_DEV_GEN(dev) < 9);
    674          *phys_level0_sa = (struct isl_extent4d) {
    675             .w = isl_align(info->width, fmtl->bw),
    676             .h = isl_align(info->height, fmtl->bh),
    677             .d = info->depth,
    678             .a = 1,
    679          };
    680          break;
    681       }
    682       break;
    683    }
    684 }
    685 
    686 /**
    687  * A variant of isl_calc_phys_slice0_extent_sa() specific to
    688  * ISL_DIM_LAYOUT_GEN4_2D.
    689  */
    690 static void
    691 isl_calc_phys_slice0_extent_sa_gen4_2d(
    692       const struct isl_device *dev,
    693       const struct isl_surf_init_info *restrict info,
    694       enum isl_msaa_layout msaa_layout,
    695       const struct isl_extent3d *image_align_sa,
    696       const struct isl_extent4d *phys_level0_sa,
    697       struct isl_extent2d *phys_slice0_sa)
    698 {
    699    const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
    700 
    701    assert(phys_level0_sa->depth == 1);
    702 
    703    if (info->levels == 1) {
    704       /* Do not pad the surface to the image alignment. Instead, pad it only
    705        * to the pixel format's block alignment.
    706        *
    707        * For tiled surfaces, using a reduced alignment here avoids wasting CPU
    708        * cycles on the below mipmap layout caluclations. Reducing the
    709        * alignment here is safe because we later align the row pitch and array
    710        * pitch to the tile boundary. It is safe even for
    711        * ISL_MSAA_LAYOUT_INTERLEAVED, because phys_level0_sa is already scaled
    712        * to accomodate the interleaved samples.
    713        *
    714        * For linear surfaces, reducing the alignment here permits us to later
    715        * choose an arbitrary, non-aligned row pitch. If the surface backs
    716        * a VkBuffer, then an arbitrary pitch may be needed to accomodate
    717        * VkBufferImageCopy::bufferRowLength.
    718        */
    719       *phys_slice0_sa = (struct isl_extent2d) {
    720          .w = isl_align_npot(phys_level0_sa->w, fmtl->bw),
    721          .h = isl_align_npot(phys_level0_sa->h, fmtl->bh),
    722       };
    723       return;
    724    }
    725 
    726    uint32_t slice_top_w = 0;
    727    uint32_t slice_bottom_w = 0;
    728    uint32_t slice_left_h = 0;
    729    uint32_t slice_right_h = 0;
    730 
    731    uint32_t W0 = phys_level0_sa->w;
    732    uint32_t H0 = phys_level0_sa->h;
    733 
    734    for (uint32_t l = 0; l < info->levels; ++l) {
    735       uint32_t W = isl_minify(W0, l);
    736       uint32_t H = isl_minify(H0, l);
    737 
    738       uint32_t w = isl_align_npot(W, image_align_sa->w);
    739       uint32_t h = isl_align_npot(H, image_align_sa->h);
    740 
    741       if (l == 0) {
    742          slice_top_w = w;
    743          slice_left_h = h;
    744          slice_right_h = h;
    745       } else if (l == 1) {
    746          slice_bottom_w = w;
    747          slice_left_h += h;
    748       } else if (l == 2) {
    749          slice_bottom_w += w;
    750          slice_right_h += h;
    751       } else {
    752          slice_right_h += h;
    753       }
    754    }
    755 
    756    *phys_slice0_sa = (struct isl_extent2d) {
    757       .w = MAX(slice_top_w, slice_bottom_w),
    758       .h = MAX(slice_left_h, slice_right_h),
    759    };
    760 }
    761 
    762 /**
    763  * A variant of isl_calc_phys_slice0_extent_sa() specific to
    764  * ISL_DIM_LAYOUT_GEN4_3D.
    765  */
    766 static void
    767 isl_calc_phys_slice0_extent_sa_gen4_3d(
    768       const struct isl_device *dev,
    769       const struct isl_surf_init_info *restrict info,
    770       const struct isl_extent3d *image_align_sa,
    771       const struct isl_extent4d *phys_level0_sa,
    772       struct isl_extent2d *phys_slice0_sa)
    773 {
    774    assert(info->samples == 1);
    775    assert(phys_level0_sa->array_len == 1);
    776 
    777    uint32_t slice_w = 0;
    778    uint32_t slice_h = 0;
    779 
    780    uint32_t W0 = phys_level0_sa->w;
    781    uint32_t H0 = phys_level0_sa->h;
    782    uint32_t D0 = phys_level0_sa->d;
    783 
    784    for (uint32_t l = 0; l < info->levels; ++l) {
    785       uint32_t level_w = isl_align_npot(isl_minify(W0, l), image_align_sa->w);
    786       uint32_t level_h = isl_align_npot(isl_minify(H0, l), image_align_sa->h);
    787       uint32_t level_d = isl_align_npot(isl_minify(D0, l), image_align_sa->d);
    788 
    789       uint32_t max_layers_horiz = MIN(level_d, 1u << l);
    790       uint32_t max_layers_vert = isl_align(level_d, 1u << l) / (1u << l);
    791 
    792       slice_w = MAX(slice_w, level_w * max_layers_horiz);
    793       slice_h += level_h * max_layers_vert;
    794    }
    795 
    796    *phys_slice0_sa = (struct isl_extent2d) {
    797       .w = slice_w,
    798       .h = slice_h,
    799    };
    800 }
    801 
    802 /**
    803  * A variant of isl_calc_phys_slice0_extent_sa() specific to
    804  * ISL_DIM_LAYOUT_GEN9_1D.
    805  */
    806 static void
    807 isl_calc_phys_slice0_extent_sa_gen9_1d(
    808       const struct isl_device *dev,
    809       const struct isl_surf_init_info *restrict info,
    810       const struct isl_extent3d *image_align_sa,
    811       const struct isl_extent4d *phys_level0_sa,
    812       struct isl_extent2d *phys_slice0_sa)
    813 {
    814    MAYBE_UNUSED const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
    815 
    816    assert(phys_level0_sa->height == 1);
    817    assert(phys_level0_sa->depth == 1);
    818    assert(info->samples == 1);
    819    assert(image_align_sa->w >= fmtl->bw);
    820 
    821    uint32_t slice_w = 0;
    822    const uint32_t W0 = phys_level0_sa->w;
    823 
    824    for (uint32_t l = 0; l < info->levels; ++l) {
    825       uint32_t W = isl_minify(W0, l);
    826       uint32_t w = isl_align_npot(W, image_align_sa->w);
    827 
    828       slice_w += w;
    829    }
    830 
    831    *phys_slice0_sa = isl_extent2d(slice_w, 1);
    832 }
    833 
    834 /**
    835  * Calculate the physical extent of the surface's first array slice, in units
    836  * of surface samples. If the surface is multi-leveled, then the result will
    837  * be aligned to \a image_align_sa.
    838  */
    839 static void
    840 isl_calc_phys_slice0_extent_sa(const struct isl_device *dev,
    841                                const struct isl_surf_init_info *restrict info,
    842                                enum isl_dim_layout dim_layout,
    843                                enum isl_msaa_layout msaa_layout,
    844                                const struct isl_extent3d *image_align_sa,
    845                                const struct isl_extent4d *phys_level0_sa,
    846                                struct isl_extent2d *phys_slice0_sa)
    847 {
    848    switch (dim_layout) {
    849    case ISL_DIM_LAYOUT_GEN9_1D:
    850       isl_calc_phys_slice0_extent_sa_gen9_1d(dev, info,
    851                                              image_align_sa, phys_level0_sa,
    852                                              phys_slice0_sa);
    853       return;
    854    case ISL_DIM_LAYOUT_GEN4_2D:
    855       isl_calc_phys_slice0_extent_sa_gen4_2d(dev, info, msaa_layout,
    856                                              image_align_sa, phys_level0_sa,
    857                                              phys_slice0_sa);
    858       return;
    859    case ISL_DIM_LAYOUT_GEN4_3D:
    860       isl_calc_phys_slice0_extent_sa_gen4_3d(dev, info, image_align_sa,
    861                                              phys_level0_sa, phys_slice0_sa);
    862       return;
    863    }
    864 }
    865 
    866 /**
    867  * Calculate the pitch between physical array slices, in units of rows of
    868  * surface elements.
    869  */
    870 static uint32_t
    871 isl_calc_array_pitch_el_rows(const struct isl_device *dev,
    872                              const struct isl_surf_init_info *restrict info,
    873                              const struct isl_tile_info *tile_info,
    874                              enum isl_dim_layout dim_layout,
    875                              enum isl_array_pitch_span array_pitch_span,
    876                              const struct isl_extent3d *image_align_sa,
    877                              const struct isl_extent4d *phys_level0_sa,
    878                              const struct isl_extent2d *phys_slice0_sa)
    879 {
    880    const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
    881    uint32_t pitch_sa_rows = 0;
    882 
    883    switch (dim_layout) {
    884    case ISL_DIM_LAYOUT_GEN9_1D:
    885       /* Each row is an array slice */
    886       pitch_sa_rows = 1;
    887       break;
    888    case ISL_DIM_LAYOUT_GEN4_2D:
    889       switch (array_pitch_span) {
    890       case ISL_ARRAY_PITCH_SPAN_COMPACT:
    891          pitch_sa_rows = isl_align_npot(phys_slice0_sa->h, image_align_sa->h);
    892          break;
    893       case ISL_ARRAY_PITCH_SPAN_FULL: {
    894          /* The QPitch equation is found in the Broadwell PRM >> Volume 5:
    895           * Memory Views >> Common Surface Formats >> Surface Layout >> 2D
    896           * Surfaces >> Surface Arrays.
    897           */
    898          uint32_t H0_sa = phys_level0_sa->h;
    899          uint32_t H1_sa = isl_minify(H0_sa, 1);
    900 
    901          uint32_t h0_sa = isl_align_npot(H0_sa, image_align_sa->h);
    902          uint32_t h1_sa = isl_align_npot(H1_sa, image_align_sa->h);
    903 
    904          uint32_t m;
    905          if (ISL_DEV_GEN(dev) >= 7) {
    906             /* The QPitch equation changed slightly in Ivybridge. */
    907             m = 12;
    908          } else {
    909             m = 11;
    910          }
    911 
    912          pitch_sa_rows = h0_sa + h1_sa + (m * image_align_sa->h);
    913 
    914          if (ISL_DEV_GEN(dev) == 6 && info->samples > 1 &&
    915              (info->height % 4 == 1)) {
    916             /* [SNB] Errata from the Sandy Bridge PRM >> Volume 4 Part 1:
    917              * Graphics Core >> Section 7.18.3.7: Surface Arrays:
    918              *
    919              *    [SNB] Errata: Sampler MSAA Qpitch will be 4 greater than
    920              *    the value calculated in the equation above , for every
    921              *    other odd Surface Height starting from 1 i.e. 1,5,9,13.
    922              *
    923              * XXX(chadv): Is the errata natural corollary of the physical
    924              * layout of interleaved samples?
    925              */
    926             pitch_sa_rows += 4;
    927          }
    928 
    929          pitch_sa_rows = isl_align_npot(pitch_sa_rows, fmtl->bh);
    930          } /* end case */
    931          break;
    932       }
    933       break;
    934    case ISL_DIM_LAYOUT_GEN4_3D:
    935       assert(array_pitch_span == ISL_ARRAY_PITCH_SPAN_COMPACT);
    936       pitch_sa_rows = isl_align_npot(phys_slice0_sa->h, image_align_sa->h);
    937       break;
    938    default:
    939       unreachable("bad isl_dim_layout");
    940       break;
    941    }
    942 
    943    assert(pitch_sa_rows % fmtl->bh == 0);
    944    uint32_t pitch_el_rows = pitch_sa_rows / fmtl->bh;
    945 
    946    if (ISL_DEV_GEN(dev) >= 9 && fmtl->txc == ISL_TXC_CCS) {
    947       /*
    948        * From the Sky Lake PRM Vol 7, "MCS Buffer for Render Target(s)" (p. 632):
    949        *
    950        *    "Mip-mapped and arrayed surfaces are supported with MCS buffer
    951        *    layout with these alignments in the RT space: Horizontal
    952        *    Alignment = 128 and Vertical Alignment = 64."
    953        *
    954        * From the Sky Lake PRM Vol. 2d, "RENDER_SURFACE_STATE" (p. 435):
    955        *
    956        *    "For non-multisampled render target's CCS auxiliary surface,
    957        *    QPitch must be computed with Horizontal Alignment = 128 and
    958        *    Surface Vertical Alignment = 256. These alignments are only for
    959        *    CCS buffer and not for associated render target."
    960        *
    961        * The first restriction is already handled by isl_choose_image_alignment_el
    962        * but the second restriction, which is an extension of the first, only
    963        * applies to qpitch and must be applied here.
    964        */
    965       assert(fmtl->bh == 4);
    966       pitch_el_rows = isl_align(pitch_el_rows, 256 / 4);
    967    }
    968 
    969    if (ISL_DEV_GEN(dev) >= 9 &&
    970        info->dim == ISL_SURF_DIM_3D &&
    971        tile_info->tiling != ISL_TILING_LINEAR) {
    972       /* From the Skylake BSpec >> RENDER_SURFACE_STATE >> Surface QPitch:
    973        *
    974        *    Tile Mode != Linear: This field must be set to an integer multiple
    975        *    of the tile height
    976        */
    977       pitch_el_rows = isl_align(pitch_el_rows, tile_info->logical_extent_el.height);
    978    }
    979 
    980    return pitch_el_rows;
    981 }
    982 
    983 /**
    984  * Calculate the pitch of each surface row, in bytes.
    985  */
    986 static uint32_t
    987 isl_calc_linear_row_pitch(const struct isl_device *dev,
    988                           const struct isl_surf_init_info *restrict info,
    989                           const struct isl_extent2d *phys_slice0_sa)
    990 {
    991    const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
    992 
    993    uint32_t row_pitch = info->min_pitch;
    994 
    995    /* First, align the surface to a cache line boundary, as the PRM explains
    996     * below.
    997     *
    998     * From the Broadwell PRM >> Volume 5: Memory Views >> Common Surface
    999     * Formats >> Surface Padding Requirements >> Render Target and Media
   1000     * Surfaces:
   1001     *
   1002     *    The data port accesses data (pixels) outside of the surface if they
   1003     *    are contained in the same cache request as pixels that are within the
   1004     *    surface. These pixels will not be returned by the requesting message,
   1005     *    however if these pixels lie outside of defined pages in the GTT,
   1006     *    a GTT error will result when the cache request is processed. In order
   1007     *    to avoid these GTT errors, padding at the bottom of the surface is
   1008     *    sometimes necessary.
   1009     *
   1010     * From the Broadwell PRM >> Volume 5: Memory Views >> Common Surface
   1011     * Formats >> Surface Padding Requirements >> Sampling Engine Surfaces:
   1012     *
   1013     *    The sampling engine accesses texels outside of the surface if they
   1014     *    are contained in the same cache line as texels that are within the
   1015     *    surface.  These texels will not participate in any calculation
   1016     *    performed by the sampling engine and will not affect the result of
   1017     *    any sampling engine operation, however if these texels lie outside of
   1018     *    defined pages in the GTT, a GTT error will result when the cache line
   1019     *    is accessed. In order to avoid these GTT errors, padding at the
   1020     *    bottom and right side of a sampling engine surface is sometimes
   1021     *    necessary.
   1022     *
   1023     *    It is possible that a cache line will straddle a page boundary if the
   1024     *    base address or pitch is not aligned. All pages included in the cache
   1025     *    lines that are part of the surface must map to valid GTT entries to
   1026     *    avoid errors. To determine the necessary padding on the bottom and
   1027     *    right side of the surface, refer to the table in  Alignment Unit Size
   1028     *    section for the i and j parameters for the surface format in use. The
   1029     *    surface must then be extended to the next multiple of the alignment
   1030     *    unit size in each dimension, and all texels contained in this
   1031     *    extended surface must have valid GTT entries.
   1032     *
   1033     *    For example, suppose the surface size is 15 texels by 10 texels and
   1034     *    the alignment parameters are i=4 and j=2. In this case, the extended
   1035     *    surface would be 16 by 10. Note that these calculations are done in
   1036     *    texels, and must be converted to bytes based on the surface format
   1037     *    being used to determine whether additional pages need to be defined.
   1038     */
   1039    assert(phys_slice0_sa->w % fmtl->bw == 0);
   1040    const uint32_t bs = fmtl->bpb / 8;
   1041    row_pitch = MAX(row_pitch, bs * (phys_slice0_sa->w / fmtl->bw));
   1042 
   1043    /* From the Broadwel PRM >> Volume 2d: Command Reference: Structures >>
   1044     * RENDER_SURFACE_STATE Surface Pitch (p349):
   1045     *
   1046     *    - For linear render target surfaces and surfaces accessed with the
   1047     *      typed data port messages, the pitch must be a multiple of the
   1048     *      element size for non-YUV surface formats.  Pitch must be
   1049     *      a multiple of 2 * element size for YUV surface formats.
   1050     *
   1051     *    - [Requirements for SURFTYPE_BUFFER and SURFTYPE_STRBUF, which we
   1052     *      ignore because isl doesn't do buffers.]
   1053     *
   1054     *    - For other linear surfaces, the pitch can be any multiple of
   1055     *      bytes.
   1056     */
   1057    if (info->usage & ISL_SURF_USAGE_RENDER_TARGET_BIT) {
   1058       if (isl_format_is_yuv(info->format)) {
   1059          row_pitch = isl_align_npot(row_pitch, 2 * bs);
   1060       } else  {
   1061          row_pitch = isl_align_npot(row_pitch, bs);
   1062       }
   1063    }
   1064 
   1065    return row_pitch;
   1066 }
   1067 
   1068 /**
   1069  * Calculate and apply any padding required for the surface.
   1070  *
   1071  * @param[inout] total_h_el is updated with the new height
   1072  * @param[out] pad_bytes is overwritten with additional padding requirements.
   1073  */
   1074 static void
   1075 isl_apply_surface_padding(const struct isl_device *dev,
   1076                           const struct isl_surf_init_info *restrict info,
   1077                           const struct isl_tile_info *tile_info,
   1078                           uint32_t *total_h_el,
   1079                           uint32_t *pad_bytes)
   1080 {
   1081    const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
   1082 
   1083    *pad_bytes = 0;
   1084 
   1085    /* From the Broadwell PRM >> Volume 5: Memory Views >> Common Surface
   1086     * Formats >> Surface Padding Requirements >> Render Target and Media
   1087     * Surfaces:
   1088     *
   1089     *   The data port accesses data (pixels) outside of the surface if they
   1090     *   are contained in the same cache request as pixels that are within the
   1091     *   surface. These pixels will not be returned by the requesting message,
   1092     *   however if these pixels lie outside of defined pages in the GTT,
   1093     *   a GTT error will result when the cache request is processed. In
   1094     *   order to avoid these GTT errors, padding at the bottom of the
   1095     *   surface is sometimes necessary.
   1096     *
   1097     * From the Broadwell PRM >> Volume 5: Memory Views >> Common Surface
   1098     * Formats >> Surface Padding Requirements >> Sampling Engine Surfaces:
   1099     *
   1100     *    ... Lots of padding requirements, all listed separately below.
   1101     */
   1102 
   1103    /* We can safely ignore the first padding requirement, quoted below,
   1104     * because isl doesn't do buffers.
   1105     *
   1106     *    - [pre-BDW] For buffers, which have no inherent height, padding
   1107     *      requirements are different. A buffer must be padded to the next
   1108     *      multiple of 256 array elements, with an additional 16 bytes added
   1109     *      beyond that to account for the L1 cache line.
   1110     */
   1111 
   1112    /*
   1113     *    - For compressed textures [...], padding at the bottom of the surface
   1114     *      is to an even compressed row.
   1115     */
   1116    if (isl_format_is_compressed(info->format))
   1117       *total_h_el = isl_align(*total_h_el, 2);
   1118 
   1119    /*
   1120     *    - For cube surfaces, an additional two rows of padding are required
   1121     *      at the bottom of the surface.
   1122     */
   1123    if (info->usage & ISL_SURF_USAGE_CUBE_BIT)
   1124       *total_h_el += 2;
   1125 
   1126    /*
   1127     *    - For packed YUV, 96 bpt, 48 bpt, and 24 bpt surface formats,
   1128     *      additional padding is required. These surfaces require an extra row
   1129     *      plus 16 bytes of padding at the bottom in addition to the general
   1130     *      padding requirements.
   1131     */
   1132    if (isl_format_is_yuv(info->format) &&
   1133        (fmtl->bpb == 96 || fmtl->bpb == 48|| fmtl->bpb == 24)) {
   1134       *total_h_el += 1;
   1135       *pad_bytes += 16;
   1136    }
   1137 
   1138    /*
   1139     *    - For linear surfaces, additional padding of 64 bytes is required at
   1140     *      the bottom of the surface. This is in addition to the padding
   1141     *      required above.
   1142     */
   1143    if (tile_info->tiling == ISL_TILING_LINEAR)
   1144       *pad_bytes += 64;
   1145 
   1146    /* The below text weakens, not strengthens, the padding requirements for
   1147     * linear surfaces. Therefore we can safely ignore it.
   1148     *
   1149     *    - [BDW+] For SURFTYPE_BUFFER, SURFTYPE_1D, and SURFTYPE_2D non-array,
   1150     *      non-MSAA, non-mip-mapped surfaces in linear memory, the only
   1151     *      padding requirement is to the next aligned 64-byte boundary beyond
   1152     *      the end of the surface. The rest of the padding requirements
   1153     *      documented above do not apply to these surfaces.
   1154     */
   1155 
   1156    /*
   1157     *    - [SKL+] For SURFTYPE_2D and SURFTYPE_3D with linear mode and
   1158     *      height % 4 != 0, the surface must be padded with
   1159     *      4-(height % 4)*Surface Pitch # of bytes.
   1160     */
   1161    if (ISL_DEV_GEN(dev) >= 9 &&
   1162        tile_info->tiling == ISL_TILING_LINEAR &&
   1163        (info->dim == ISL_SURF_DIM_2D || info->dim == ISL_SURF_DIM_3D)) {
   1164       *total_h_el = isl_align(*total_h_el, 4);
   1165    }
   1166 
   1167    /*
   1168     *    - [SKL+] For SURFTYPE_1D with linear mode, the surface must be padded
   1169     *      to 4 times the Surface Pitch # of bytes
   1170     */
   1171    if (ISL_DEV_GEN(dev) >= 9 &&
   1172        tile_info->tiling == ISL_TILING_LINEAR &&
   1173        info->dim == ISL_SURF_DIM_1D) {
   1174       *total_h_el += 4;
   1175    }
   1176 }
   1177 
   1178 bool
   1179 isl_surf_init_s(const struct isl_device *dev,
   1180                 struct isl_surf *surf,
   1181                 const struct isl_surf_init_info *restrict info)
   1182 {
   1183    const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
   1184 
   1185    const struct isl_extent4d logical_level0_px = {
   1186       .w = info->width,
   1187       .h = info->height,
   1188       .d = info->depth,
   1189       .a = info->array_len,
   1190    };
   1191 
   1192    enum isl_tiling tiling;
   1193    if (!isl_surf_choose_tiling(dev, info, &tiling))
   1194       return false;
   1195 
   1196    struct isl_tile_info tile_info;
   1197    if (!isl_tiling_get_info(dev, tiling, fmtl->bpb, &tile_info))
   1198       return false;
   1199 
   1200    const enum isl_dim_layout dim_layout =
   1201       isl_surf_choose_dim_layout(dev, info->dim, tiling);
   1202 
   1203    enum isl_msaa_layout msaa_layout;
   1204    if (!isl_choose_msaa_layout(dev, info, tiling, &msaa_layout))
   1205        return false;
   1206 
   1207    struct isl_extent3d image_align_el;
   1208    isl_choose_image_alignment_el(dev, info, tiling, dim_layout, msaa_layout,
   1209                                  &image_align_el);
   1210 
   1211    struct isl_extent3d image_align_sa =
   1212       isl_extent3d_el_to_sa(info->format, image_align_el);
   1213 
   1214    struct isl_extent4d phys_level0_sa;
   1215    isl_calc_phys_level0_extent_sa(dev, info, dim_layout, tiling, msaa_layout,
   1216                                   &phys_level0_sa);
   1217    assert(phys_level0_sa.w % fmtl->bw == 0);
   1218    assert(phys_level0_sa.h % fmtl->bh == 0);
   1219 
   1220    enum isl_array_pitch_span array_pitch_span =
   1221       isl_choose_array_pitch_span(dev, info, dim_layout, &phys_level0_sa);
   1222 
   1223    struct isl_extent2d phys_slice0_sa;
   1224    isl_calc_phys_slice0_extent_sa(dev, info, dim_layout, msaa_layout,
   1225                                   &image_align_sa, &phys_level0_sa,
   1226                                   &phys_slice0_sa);
   1227    assert(phys_slice0_sa.w % fmtl->bw == 0);
   1228    assert(phys_slice0_sa.h % fmtl->bh == 0);
   1229 
   1230    const uint32_t array_pitch_el_rows =
   1231       isl_calc_array_pitch_el_rows(dev, info, &tile_info, dim_layout,
   1232                                    array_pitch_span, &image_align_sa,
   1233                                    &phys_level0_sa, &phys_slice0_sa);
   1234 
   1235    uint32_t total_h_el = phys_level0_sa.array_len * array_pitch_el_rows;
   1236 
   1237    uint32_t pad_bytes;
   1238    isl_apply_surface_padding(dev, info, &tile_info, &total_h_el, &pad_bytes);
   1239 
   1240    uint32_t row_pitch, size, base_alignment;
   1241    if (tiling == ISL_TILING_LINEAR) {
   1242       row_pitch = isl_calc_linear_row_pitch(dev, info, &phys_slice0_sa);
   1243       size = row_pitch * total_h_el + pad_bytes;
   1244 
   1245       /* From the Broadwell PRM Vol 2d, RENDER_SURFACE_STATE::SurfaceBaseAddress:
   1246        *
   1247        *    "The Base Address for linear render target surfaces and surfaces
   1248        *    accessed with the typed surface read/write data port messages must
   1249        *    be element-size aligned, for non-YUV surface formats, or a
   1250        *    multiple of 2 element-sizes for YUV surface formats. Other linear
   1251        *    surfaces have no alignment requirements (byte alignment is
   1252        *    sufficient.)"
   1253        */
   1254       base_alignment = MAX(1, info->min_alignment);
   1255       if (info->usage & ISL_SURF_USAGE_RENDER_TARGET_BIT) {
   1256          if (isl_format_is_yuv(info->format)) {
   1257             base_alignment = MAX(base_alignment, fmtl->bpb / 4);
   1258          } else {
   1259             base_alignment = MAX(base_alignment, fmtl->bpb / 8);
   1260          }
   1261       }
   1262       base_alignment = isl_round_up_to_power_of_two(base_alignment);
   1263    } else {
   1264       assert(fmtl->bpb % tile_info.format_bpb == 0);
   1265       const uint32_t tile_el_scale = fmtl->bpb / tile_info.format_bpb;
   1266 
   1267       assert(phys_slice0_sa.w % fmtl->bw == 0);
   1268       const uint32_t total_w_el = phys_slice0_sa.width / fmtl->bw;
   1269       const uint32_t total_w_tl =
   1270          isl_align_div(total_w_el * tile_el_scale,
   1271                        tile_info.logical_extent_el.width);
   1272 
   1273       row_pitch = total_w_tl * tile_info.phys_extent_B.width;
   1274       if (row_pitch < info->min_pitch) {
   1275          row_pitch = isl_align_npot(info->min_pitch,
   1276                                     tile_info.phys_extent_B.width);
   1277       }
   1278 
   1279       total_h_el += isl_align_div_npot(pad_bytes, row_pitch);
   1280       const uint32_t total_h_tl =
   1281          isl_align_div(total_h_el, tile_info.logical_extent_el.height);
   1282 
   1283       size = total_h_tl * tile_info.phys_extent_B.height * row_pitch;
   1284 
   1285       const uint32_t tile_size = tile_info.phys_extent_B.width *
   1286                                  tile_info.phys_extent_B.height;
   1287       assert(isl_is_pow2(info->min_alignment) && isl_is_pow2(tile_size));
   1288       base_alignment = MAX(info->min_alignment, tile_size);
   1289    }
   1290 
   1291    *surf = (struct isl_surf) {
   1292       .dim = info->dim,
   1293       .dim_layout = dim_layout,
   1294       .msaa_layout = msaa_layout,
   1295       .tiling = tiling,
   1296       .format = info->format,
   1297 
   1298       .levels = info->levels,
   1299       .samples = info->samples,
   1300 
   1301       .image_alignment_el = image_align_el,
   1302       .logical_level0_px = logical_level0_px,
   1303       .phys_level0_sa = phys_level0_sa,
   1304 
   1305       .size = size,
   1306       .alignment = base_alignment,
   1307       .row_pitch = row_pitch,
   1308       .array_pitch_el_rows = array_pitch_el_rows,
   1309       .array_pitch_span = array_pitch_span,
   1310 
   1311       .usage = info->usage,
   1312    };
   1313 
   1314    return true;
   1315 }
   1316 
   1317 void
   1318 isl_surf_get_tile_info(const struct isl_device *dev,
   1319                        const struct isl_surf *surf,
   1320                        struct isl_tile_info *tile_info)
   1321 {
   1322    const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format);
   1323    isl_tiling_get_info(dev, surf->tiling, fmtl->bpb, tile_info);
   1324 }
   1325 
   1326 void
   1327 isl_surf_get_hiz_surf(const struct isl_device *dev,
   1328                       const struct isl_surf *surf,
   1329                       struct isl_surf *hiz_surf)
   1330 {
   1331    assert(ISL_DEV_GEN(dev) >= 5 && ISL_DEV_USE_SEPARATE_STENCIL(dev));
   1332 
   1333    /* Multisampled depth is always interleaved */
   1334    assert(surf->msaa_layout == ISL_MSAA_LAYOUT_NONE ||
   1335           surf->msaa_layout == ISL_MSAA_LAYOUT_INTERLEAVED);
   1336 
   1337    /* From the Broadwell PRM Vol. 7, "Hierarchical Depth Buffer":
   1338     *
   1339     *    "The Surface Type, Height, Width, Depth, Minimum Array Element, Render
   1340     *    Target View Extent, and Depth Coordinate Offset X/Y of the
   1341     *    hierarchical depth buffer are inherited from the depth buffer. The
   1342     *    height and width of the hierarchical depth buffer that must be
   1343     *    allocated are computed by the following formulas, where HZ is the
   1344     *    hierarchical depth buffer and Z is the depth buffer. The Z_Height,
   1345     *    Z_Width, and Z_Depth values given in these formulas are those present
   1346     *    in 3DSTATE_DEPTH_BUFFER incremented by one.
   1347     *
   1348     *    "The value of Z_Height and Z_Width must each be multiplied by 2 before
   1349     *    being applied to the table below if Number of Multisamples is set to
   1350     *    NUMSAMPLES_4. The value of Z_Height must be multiplied by 2 and
   1351     *    Z_Width must be multiplied by 4 before being applied to the table
   1352     *    below if Number of Multisamples is set to NUMSAMPLES_8."
   1353     *
   1354     * In the Sky Lake PRM, the second paragraph is replaced with this:
   1355     *
   1356     *    "The Z_Height and Z_Width values must equal those present in
   1357     *    3DSTATE_DEPTH_BUFFER incremented by one."
   1358     *
   1359     * In other words, on Sandy Bridge through Broadwell, each 128-bit HiZ
   1360     * block corresponds to a region of 8x4 samples in the primary depth
   1361     * surface.  On Sky Lake, on the other hand, each HiZ block corresponds to
   1362     * a region of 8x4 pixels in the primary depth surface regardless of the
   1363     * number of samples.  The dimensions of a HiZ block in both pixels and
   1364     * samples are given in the table below:
   1365     *
   1366     *                    | SNB - BDW |     SKL+
   1367     *              ------+-----------+-------------
   1368     *                1x  |  8 x 4 sa |   8 x 4 sa
   1369     *               MSAA |  8 x 4 px |   8 x 4 px
   1370     *              ------+-----------+-------------
   1371     *                2x  |  8 x 4 sa |  16 x 4 sa
   1372     *               MSAA |  4 x 4 px |   8 x 4 px
   1373     *              ------+-----------+-------------
   1374     *                4x  |  8 x 4 sa |  16 x 8 sa
   1375     *               MSAA |  4 x 2 px |   8 x 4 px
   1376     *              ------+-----------+-------------
   1377     *                8x  |  8 x 4 sa |  32 x 8 sa
   1378     *               MSAA |  2 x 2 px |   8 x 4 px
   1379     *              ------+-----------+-------------
   1380     *               16x  |    N/A    | 32 x 16 sa
   1381     *               MSAA |    N/A    |  8 x  4 px
   1382     *              ------+-----------+-------------
   1383     *
   1384     * There are a number of different ways that this discrepency could be
   1385     * handled.  The way we have chosen is to simply make MSAA HiZ have the
   1386     * same number of samples as the parent surface pre-Sky Lake and always be
   1387     * single-sampled on Sky Lake and above.  Since the block sizes of
   1388     * compressed formats are given in samples, this neatly handles everything
   1389     * without the need for additional HiZ formats with different block sizes
   1390     * on SKL+.
   1391     */
   1392    const unsigned samples = ISL_DEV_GEN(dev) >= 9 ? 1 : surf->samples;
   1393 
   1394    isl_surf_init(dev, hiz_surf,
   1395                  .dim = surf->dim,
   1396                  .format = ISL_FORMAT_HIZ,
   1397                  .width = surf->logical_level0_px.width,
   1398                  .height = surf->logical_level0_px.height,
   1399                  .depth = surf->logical_level0_px.depth,
   1400                  .levels = surf->levels,
   1401                  .array_len = surf->logical_level0_px.array_len,
   1402                  .samples = samples,
   1403                  .usage = ISL_SURF_USAGE_HIZ_BIT,
   1404                  .tiling_flags = ISL_TILING_HIZ_BIT);
   1405 }
   1406 
   1407 void
   1408 isl_surf_get_mcs_surf(const struct isl_device *dev,
   1409                       const struct isl_surf *surf,
   1410                       struct isl_surf *mcs_surf)
   1411 {
   1412    /* It must be multisampled with an array layout */
   1413    assert(surf->samples > 1 && surf->msaa_layout == ISL_MSAA_LAYOUT_ARRAY);
   1414 
   1415    /* The following are true of all multisampled surfaces */
   1416    assert(surf->dim == ISL_SURF_DIM_2D);
   1417    assert(surf->levels == 1);
   1418    assert(surf->logical_level0_px.depth == 1);
   1419 
   1420    enum isl_format mcs_format;
   1421    switch (surf->samples) {
   1422    case 2:  mcs_format = ISL_FORMAT_MCS_2X;  break;
   1423    case 4:  mcs_format = ISL_FORMAT_MCS_4X;  break;
   1424    case 8:  mcs_format = ISL_FORMAT_MCS_8X;  break;
   1425    case 16: mcs_format = ISL_FORMAT_MCS_16X; break;
   1426    default:
   1427       unreachable("Invalid sample count");
   1428    }
   1429 
   1430    isl_surf_init(dev, mcs_surf,
   1431                  .dim = ISL_SURF_DIM_2D,
   1432                  .format = mcs_format,
   1433                  .width = surf->logical_level0_px.width,
   1434                  .height = surf->logical_level0_px.height,
   1435                  .depth = 1,
   1436                  .levels = 1,
   1437                  .array_len = surf->logical_level0_px.array_len,
   1438                  .samples = 1, /* MCS surfaces are really single-sampled */
   1439                  .usage = ISL_SURF_USAGE_MCS_BIT,
   1440                  .tiling_flags = ISL_TILING_Y0_BIT);
   1441 }
   1442 
   1443 bool
   1444 isl_surf_get_ccs_surf(const struct isl_device *dev,
   1445                       const struct isl_surf *surf,
   1446                       struct isl_surf *ccs_surf)
   1447 {
   1448    assert(surf->samples == 1 && surf->msaa_layout == ISL_MSAA_LAYOUT_NONE);
   1449    assert(ISL_DEV_GEN(dev) >= 7);
   1450 
   1451    if (surf->usage & ISL_SURF_USAGE_DISABLE_AUX_BIT)
   1452       return false;
   1453 
   1454    if (ISL_DEV_GEN(dev) <= 8 && surf->dim != ISL_SURF_DIM_2D)
   1455       return false;
   1456 
   1457    if (isl_format_is_compressed(surf->format))
   1458       return false;
   1459 
   1460    /* TODO: More conditions where it can fail. */
   1461 
   1462    enum isl_format ccs_format;
   1463    if (ISL_DEV_GEN(dev) >= 9) {
   1464       if (!isl_tiling_is_any_y(surf->tiling))
   1465          return false;
   1466 
   1467       switch (isl_format_get_layout(surf->format)->bpb) {
   1468       case 32:    ccs_format = ISL_FORMAT_GEN9_CCS_32BPP;   break;
   1469       case 64:    ccs_format = ISL_FORMAT_GEN9_CCS_64BPP;   break;
   1470       case 128:   ccs_format = ISL_FORMAT_GEN9_CCS_128BPP;  break;
   1471       default:
   1472          return false;
   1473       }
   1474    } else if (surf->tiling == ISL_TILING_Y0) {
   1475       switch (isl_format_get_layout(surf->format)->bpb) {
   1476       case 32:    ccs_format = ISL_FORMAT_GEN7_CCS_32BPP_Y;    break;
   1477       case 64:    ccs_format = ISL_FORMAT_GEN7_CCS_64BPP_Y;    break;
   1478       case 128:   ccs_format = ISL_FORMAT_GEN7_CCS_128BPP_Y;   break;
   1479       default:
   1480          return false;
   1481       }
   1482    } else if (surf->tiling == ISL_TILING_X) {
   1483       switch (isl_format_get_layout(surf->format)->bpb) {
   1484       case 32:    ccs_format = ISL_FORMAT_GEN7_CCS_32BPP_X;    break;
   1485       case 64:    ccs_format = ISL_FORMAT_GEN7_CCS_64BPP_X;    break;
   1486       case 128:   ccs_format = ISL_FORMAT_GEN7_CCS_128BPP_X;   break;
   1487       default:
   1488          return false;
   1489       }
   1490    } else {
   1491       return false;
   1492    }
   1493 
   1494    isl_surf_init(dev, ccs_surf,
   1495                  .dim = surf->dim,
   1496                  .format = ccs_format,
   1497                  .width = surf->logical_level0_px.width,
   1498                  .height = surf->logical_level0_px.height,
   1499                  .depth = surf->logical_level0_px.depth,
   1500                  .levels = surf->levels,
   1501                  .array_len = surf->logical_level0_px.array_len,
   1502                  .samples = 1,
   1503                  .usage = ISL_SURF_USAGE_CCS_BIT,
   1504                  .tiling_flags = ISL_TILING_CCS_BIT);
   1505 
   1506    return true;
   1507 }
   1508 
   1509 void
   1510 isl_surf_fill_state_s(const struct isl_device *dev, void *state,
   1511                       const struct isl_surf_fill_state_info *restrict info)
   1512 {
   1513 #ifndef NDEBUG
   1514    isl_surf_usage_flags_t _base_usage =
   1515       info->view->usage & (ISL_SURF_USAGE_RENDER_TARGET_BIT |
   1516                            ISL_SURF_USAGE_TEXTURE_BIT |
   1517                            ISL_SURF_USAGE_STORAGE_BIT);
   1518    /* They may only specify one of the above bits at a time */
   1519    assert(__builtin_popcount(_base_usage) == 1);
   1520    /* The only other allowed bit is ISL_SURF_USAGE_CUBE_BIT */
   1521    assert((info->view->usage & ~ISL_SURF_USAGE_CUBE_BIT) == _base_usage);
   1522 #endif
   1523 
   1524    if (info->surf->dim == ISL_SURF_DIM_3D) {
   1525       assert(info->view->base_array_layer + info->view->array_len <=
   1526              info->surf->logical_level0_px.depth);
   1527    } else {
   1528       assert(info->view->base_array_layer + info->view->array_len <=
   1529              info->surf->logical_level0_px.array_len);
   1530    }
   1531 
   1532    switch (ISL_DEV_GEN(dev)) {
   1533    case 4:
   1534       if (ISL_DEV_IS_G4X(dev)) {
   1535          /* G45 surface state is the same as gen5 */
   1536          isl_gen5_surf_fill_state_s(dev, state, info);
   1537       } else {
   1538          isl_gen4_surf_fill_state_s(dev, state, info);
   1539       }
   1540       break;
   1541    case 5:
   1542       isl_gen5_surf_fill_state_s(dev, state, info);
   1543       break;
   1544    case 6:
   1545       isl_gen6_surf_fill_state_s(dev, state, info);
   1546       break;
   1547    case 7:
   1548       if (ISL_DEV_IS_HASWELL(dev)) {
   1549          isl_gen75_surf_fill_state_s(dev, state, info);
   1550       } else {
   1551          isl_gen7_surf_fill_state_s(dev, state, info);
   1552       }
   1553       break;
   1554    case 8:
   1555       isl_gen8_surf_fill_state_s(dev, state, info);
   1556       break;
   1557    case 9:
   1558       isl_gen9_surf_fill_state_s(dev, state, info);
   1559       break;
   1560    default:
   1561       assert(!"Cannot fill surface state for this gen");
   1562    }
   1563 }
   1564 
   1565 void
   1566 isl_buffer_fill_state_s(const struct isl_device *dev, void *state,
   1567                         const struct isl_buffer_fill_state_info *restrict info)
   1568 {
   1569    switch (ISL_DEV_GEN(dev)) {
   1570    case 4:
   1571    case 5:
   1572       /* Gen 4-5 are all the same when it comes to buffer surfaces */
   1573       isl_gen5_buffer_fill_state_s(state, info);
   1574       break;
   1575    case 6:
   1576       isl_gen6_buffer_fill_state_s(state, info);
   1577       break;
   1578    case 7:
   1579       if (ISL_DEV_IS_HASWELL(dev)) {
   1580          isl_gen75_buffer_fill_state_s(state, info);
   1581       } else {
   1582          isl_gen7_buffer_fill_state_s(state, info);
   1583       }
   1584       break;
   1585    case 8:
   1586       isl_gen8_buffer_fill_state_s(state, info);
   1587       break;
   1588    case 9:
   1589       isl_gen9_buffer_fill_state_s(state, info);
   1590       break;
   1591    default:
   1592       assert(!"Cannot fill surface state for this gen");
   1593    }
   1594 }
   1595 
   1596 /**
   1597  * A variant of isl_surf_get_image_offset_sa() specific to
   1598  * ISL_DIM_LAYOUT_GEN4_2D.
   1599  */
   1600 static void
   1601 get_image_offset_sa_gen4_2d(const struct isl_surf *surf,
   1602                             uint32_t level, uint32_t logical_array_layer,
   1603                             uint32_t *x_offset_sa,
   1604                             uint32_t *y_offset_sa)
   1605 {
   1606    assert(level < surf->levels);
   1607    if (surf->dim == ISL_SURF_DIM_3D)
   1608       assert(logical_array_layer < surf->logical_level0_px.depth);
   1609    else
   1610       assert(logical_array_layer < surf->logical_level0_px.array_len);
   1611 
   1612    const struct isl_extent3d image_align_sa =
   1613       isl_surf_get_image_alignment_sa(surf);
   1614 
   1615    const uint32_t W0 = surf->phys_level0_sa.width;
   1616    const uint32_t H0 = surf->phys_level0_sa.height;
   1617 
   1618    const uint32_t phys_layer = logical_array_layer *
   1619       (surf->msaa_layout == ISL_MSAA_LAYOUT_ARRAY ? surf->samples : 1);
   1620 
   1621    uint32_t x = 0;
   1622    uint32_t y = phys_layer * isl_surf_get_array_pitch_sa_rows(surf);
   1623 
   1624    for (uint32_t l = 0; l < level; ++l) {
   1625       if (l == 1) {
   1626          uint32_t W = isl_minify(W0, l);
   1627          x += isl_align_npot(W, image_align_sa.w);
   1628       } else {
   1629          uint32_t H = isl_minify(H0, l);
   1630          y += isl_align_npot(H, image_align_sa.h);
   1631       }
   1632    }
   1633 
   1634    *x_offset_sa = x;
   1635    *y_offset_sa = y;
   1636 }
   1637 
   1638 /**
   1639  * A variant of isl_surf_get_image_offset_sa() specific to
   1640  * ISL_DIM_LAYOUT_GEN4_3D.
   1641  */
   1642 static void
   1643 get_image_offset_sa_gen4_3d(const struct isl_surf *surf,
   1644                             uint32_t level, uint32_t logical_z_offset_px,
   1645                             uint32_t *x_offset_sa,
   1646                             uint32_t *y_offset_sa)
   1647 {
   1648    assert(level < surf->levels);
   1649    assert(logical_z_offset_px < isl_minify(surf->phys_level0_sa.depth, level));
   1650    assert(surf->phys_level0_sa.array_len == 1);
   1651 
   1652    const struct isl_extent3d image_align_sa =
   1653       isl_surf_get_image_alignment_sa(surf);
   1654 
   1655    const uint32_t W0 = surf->phys_level0_sa.width;
   1656    const uint32_t H0 = surf->phys_level0_sa.height;
   1657    const uint32_t D0 = surf->phys_level0_sa.depth;
   1658 
   1659    uint32_t x = 0;
   1660    uint32_t y = 0;
   1661 
   1662    for (uint32_t l = 0; l < level; ++l) {
   1663       const uint32_t level_h = isl_align_npot(isl_minify(H0, l), image_align_sa.h);
   1664       const uint32_t level_d = isl_align_npot(isl_minify(D0, l), image_align_sa.d);
   1665       const uint32_t max_layers_vert = isl_align(level_d, 1u << l) / (1u << l);
   1666 
   1667       y += level_h * max_layers_vert;
   1668    }
   1669 
   1670    const uint32_t level_w = isl_align_npot(isl_minify(W0, level), image_align_sa.w);
   1671    const uint32_t level_h = isl_align_npot(isl_minify(H0, level), image_align_sa.h);
   1672    const uint32_t level_d = isl_align_npot(isl_minify(D0, level), image_align_sa.d);
   1673 
   1674    const uint32_t max_layers_horiz = MIN(level_d, 1u << level);
   1675 
   1676    x += level_w * (logical_z_offset_px % max_layers_horiz);
   1677    y += level_h * (logical_z_offset_px / max_layers_horiz);
   1678 
   1679    *x_offset_sa = x;
   1680    *y_offset_sa = y;
   1681 }
   1682 
   1683 /**
   1684  * A variant of isl_surf_get_image_offset_sa() specific to
   1685  * ISL_DIM_LAYOUT_GEN9_1D.
   1686  */
   1687 static void
   1688 get_image_offset_sa_gen9_1d(const struct isl_surf *surf,
   1689                             uint32_t level, uint32_t layer,
   1690                             uint32_t *x_offset_sa,
   1691                             uint32_t *y_offset_sa)
   1692 {
   1693    assert(level < surf->levels);
   1694    assert(layer < surf->phys_level0_sa.array_len);
   1695    assert(surf->phys_level0_sa.height == 1);
   1696    assert(surf->phys_level0_sa.depth == 1);
   1697    assert(surf->samples == 1);
   1698 
   1699    const uint32_t W0 = surf->phys_level0_sa.width;
   1700    const struct isl_extent3d image_align_sa =
   1701       isl_surf_get_image_alignment_sa(surf);
   1702 
   1703    uint32_t x = 0;
   1704 
   1705    for (uint32_t l = 0; l < level; ++l) {
   1706       uint32_t W = isl_minify(W0, l);
   1707       uint32_t w = isl_align_npot(W, image_align_sa.w);
   1708 
   1709       x += w;
   1710    }
   1711 
   1712    *x_offset_sa = x;
   1713    *y_offset_sa = layer * isl_surf_get_array_pitch_sa_rows(surf);
   1714 }
   1715 
   1716 /**
   1717  * Calculate the offset, in units of surface samples, to a subimage in the
   1718  * surface.
   1719  *
   1720  * @invariant level < surface levels
   1721  * @invariant logical_array_layer < logical array length of surface
   1722  * @invariant logical_z_offset_px < logical depth of surface at level
   1723  */
   1724 void
   1725 isl_surf_get_image_offset_sa(const struct isl_surf *surf,
   1726                              uint32_t level,
   1727                              uint32_t logical_array_layer,
   1728                              uint32_t logical_z_offset_px,
   1729                              uint32_t *x_offset_sa,
   1730                              uint32_t *y_offset_sa)
   1731 {
   1732    assert(level < surf->levels);
   1733    assert(logical_array_layer < surf->logical_level0_px.array_len);
   1734    assert(logical_z_offset_px
   1735           < isl_minify(surf->logical_level0_px.depth, level));
   1736 
   1737    switch (surf->dim_layout) {
   1738    case ISL_DIM_LAYOUT_GEN9_1D:
   1739       get_image_offset_sa_gen9_1d(surf, level, logical_array_layer,
   1740                                   x_offset_sa, y_offset_sa);
   1741       break;
   1742    case ISL_DIM_LAYOUT_GEN4_2D:
   1743       get_image_offset_sa_gen4_2d(surf, level, logical_array_layer
   1744                                   + logical_z_offset_px,
   1745                                   x_offset_sa, y_offset_sa);
   1746       break;
   1747    case ISL_DIM_LAYOUT_GEN4_3D:
   1748       get_image_offset_sa_gen4_3d(surf, level, logical_z_offset_px,
   1749                                   x_offset_sa, y_offset_sa);
   1750       break;
   1751 
   1752    default:
   1753       unreachable("not reached");
   1754    }
   1755 }
   1756 
   1757 void
   1758 isl_surf_get_image_offset_el(const struct isl_surf *surf,
   1759                              uint32_t level,
   1760                              uint32_t logical_array_layer,
   1761                              uint32_t logical_z_offset_px,
   1762                              uint32_t *x_offset_el,
   1763                              uint32_t *y_offset_el)
   1764 {
   1765    const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format);
   1766 
   1767    assert(level < surf->levels);
   1768    assert(logical_array_layer < surf->logical_level0_px.array_len);
   1769    assert(logical_z_offset_px
   1770           < isl_minify(surf->logical_level0_px.depth, level));
   1771 
   1772    uint32_t x_offset_sa, y_offset_sa;
   1773    isl_surf_get_image_offset_sa(surf, level,
   1774                                 logical_array_layer,
   1775                                 logical_z_offset_px,
   1776                                 &x_offset_sa,
   1777                                 &y_offset_sa);
   1778 
   1779    *x_offset_el = x_offset_sa / fmtl->bw;
   1780    *y_offset_el = y_offset_sa / fmtl->bh;
   1781 }
   1782 
   1783 void
   1784 isl_tiling_get_intratile_offset_el(const struct isl_device *dev,
   1785                                    enum isl_tiling tiling,
   1786                                    uint8_t bs,
   1787                                    uint32_t row_pitch,
   1788                                    uint32_t total_x_offset_el,
   1789                                    uint32_t total_y_offset_el,
   1790                                    uint32_t *base_address_offset,
   1791                                    uint32_t *x_offset_el,
   1792                                    uint32_t *y_offset_el)
   1793 {
   1794    if (tiling == ISL_TILING_LINEAR) {
   1795       *base_address_offset = total_y_offset_el * row_pitch +
   1796                              total_x_offset_el * bs;
   1797       *x_offset_el = 0;
   1798       *y_offset_el = 0;
   1799       return;
   1800    }
   1801 
   1802    const uint32_t bpb = bs * 8;
   1803 
   1804    struct isl_tile_info tile_info;
   1805    isl_tiling_get_info(dev, tiling, bpb, &tile_info);
   1806 
   1807    assert(row_pitch % tile_info.phys_extent_B.width == 0);
   1808 
   1809    /* For non-power-of-two formats, we need the address to be both tile and
   1810     * element-aligned.  The easiest way to achieve this is to work with a tile
   1811     * that is three times as wide as the regular tile.
   1812     *
   1813     * The tile info returned by get_tile_info has a logical size that is an
   1814     * integer number of tile_info.format_bpb size elements.  To scale the
   1815     * tile, we scale up the physical width and then treat the logical tile
   1816     * size as if it has bpb size elements.
   1817     */
   1818    const uint32_t tile_el_scale = bpb / tile_info.format_bpb;
   1819    tile_info.phys_extent_B.width *= tile_el_scale;
   1820 
   1821    /* Compute the offset into the tile */
   1822    *x_offset_el = total_x_offset_el % tile_info.logical_extent_el.w;
   1823    *y_offset_el = total_y_offset_el % tile_info.logical_extent_el.h;
   1824 
   1825    /* Compute the offset of the tile in units of whole tiles */
   1826    uint32_t x_offset_tl = total_x_offset_el / tile_info.logical_extent_el.w;
   1827    uint32_t y_offset_tl = total_y_offset_el / tile_info.logical_extent_el.h;
   1828 
   1829    *base_address_offset =
   1830       y_offset_tl * tile_info.phys_extent_B.h * row_pitch +
   1831       x_offset_tl * tile_info.phys_extent_B.h * tile_info.phys_extent_B.w;
   1832 }
   1833 
   1834 uint32_t
   1835 isl_surf_get_depth_format(const struct isl_device *dev,
   1836                           const struct isl_surf *surf)
   1837 {
   1838    /* Support for separate stencil buffers began in gen5. Support for
   1839     * interleaved depthstencil buffers ceased in gen7. The intermediate gens,
   1840     * those that supported separate and interleaved stencil, were gen5 and
   1841     * gen6.
   1842     *
   1843     * For a list of all available formats, see the Sandybridge PRM >> Volume
   1844     * 2 Part 1: 3D/Media - 3D Pipeline >> 3DSTATE_DEPTH_BUFFER >> Surface
   1845     * Format (p321).
   1846     */
   1847 
   1848    bool has_stencil = surf->usage & ISL_SURF_USAGE_STENCIL_BIT;
   1849 
   1850    assert(surf->usage & ISL_SURF_USAGE_DEPTH_BIT);
   1851 
   1852    if (has_stencil)
   1853       assert(ISL_DEV_GEN(dev) < 7);
   1854 
   1855    switch (surf->format) {
   1856    default:
   1857       unreachable("bad isl depth format");
   1858    case ISL_FORMAT_R32_FLOAT_X8X24_TYPELESS:
   1859       assert(ISL_DEV_GEN(dev) < 7);
   1860       return 0; /* D32_FLOAT_S8X24_UINT */
   1861    case ISL_FORMAT_R32_FLOAT:
   1862       assert(!has_stencil);
   1863       return 1; /* D32_FLOAT */
   1864    case ISL_FORMAT_R24_UNORM_X8_TYPELESS:
   1865       if (has_stencil) {
   1866          assert(ISL_DEV_GEN(dev) < 7);
   1867          return 2; /* D24_UNORM_S8_UINT */
   1868       } else {
   1869          assert(ISL_DEV_GEN(dev) >= 5);
   1870          return 3; /* D24_UNORM_X8_UINT */
   1871       }
   1872    case ISL_FORMAT_R16_UNORM:
   1873       assert(!has_stencil);
   1874       return 5; /* D16_UNORM */
   1875    }
   1876 }
   1877