Home | History | Annotate | Download | only in core
      1 /*
      2  * Mesa 3-D graphics library
      3  *
      4  * Copyright (C) 2014 LunarG, Inc.
      5  *
      6  * Permission is hereby granted, free of charge, to any person obtaining a
      7  * copy of this software and associated documentation files (the "Software"),
      8  * to deal in the Software without restriction, including without limitation
      9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
     10  * and/or sell copies of the Software, and to permit persons to whom the
     11  * Software is furnished to do so, subject to the following conditions:
     12  *
     13  * The above copyright notice and this permission notice shall be included
     14  * in all copies or substantial portions of the Software.
     15  *
     16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     21  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
     22  * DEALINGS IN THE SOFTWARE.
     23  *
     24  * Authors:
     25  *    Chia-I Wu <olv (at) lunarg.com>
     26  */
     27 
     28 #include "ilo_debug.h"
     29 #include "ilo_image.h"
     30 
     31 enum {
     32    IMAGE_TILING_NONE = 1 << GEN6_TILING_NONE,
     33    IMAGE_TILING_X    = 1 << GEN6_TILING_X,
     34    IMAGE_TILING_Y    = 1 << GEN6_TILING_Y,
     35    IMAGE_TILING_W    = 1 << GEN8_TILING_W,
     36 
     37    IMAGE_TILING_ALL  = (IMAGE_TILING_NONE |
     38                         IMAGE_TILING_X |
     39                         IMAGE_TILING_Y |
     40                         IMAGE_TILING_W)
     41 };
     42 
     43 struct ilo_image_layout {
     44    enum ilo_image_walk_type walk;
     45    bool interleaved_samples;
     46 
     47    uint8_t valid_tilings;
     48    enum gen_surface_tiling tiling;
     49 
     50    enum ilo_image_aux_type aux;
     51 
     52    int align_i;
     53    int align_j;
     54 
     55    struct ilo_image_lod *lods;
     56    int walk_layer_h0;
     57    int walk_layer_h1;
     58    int walk_layer_height;
     59    int monolithic_width;
     60    int monolithic_height;
     61 };
     62 
     63 static enum ilo_image_walk_type
     64 image_get_gen6_walk(const struct ilo_dev *dev,
     65                     const struct ilo_image_info *info)
     66 {
     67    ILO_DEV_ASSERT(dev, 6, 6);
     68 
     69    /* TODO we want LODs to be page-aligned */
     70    if (info->type == GEN6_SURFTYPE_3D)
     71       return ILO_IMAGE_WALK_3D;
     72 
     73    /*
     74     * From the Sandy Bridge PRM, volume 1 part 1, page 115:
     75     *
     76     *     "The separate stencil buffer does not support mip mapping, thus the
     77     *      storage for LODs other than LOD 0 is not needed. The following
     78     *      QPitch equation applies only to the separate stencil buffer:
     79     *
     80     *        QPitch = h_0"
     81     *
     82     * Use ILO_IMAGE_WALK_LOD and manually offset to the (page-aligned) levels
     83     * when bound.
     84     */
     85    if (info->bind_zs && info->format == GEN6_FORMAT_R8_UINT)
     86       return ILO_IMAGE_WALK_LOD;
     87 
     88    /* compact spacing is not supported otherwise */
     89    return ILO_IMAGE_WALK_LAYER;
     90 }
     91 
     92 static enum ilo_image_walk_type
     93 image_get_gen7_walk(const struct ilo_dev *dev,
     94                     const struct ilo_image_info *info)
     95 {
     96    ILO_DEV_ASSERT(dev, 7, 8);
     97 
     98    if (info->type == GEN6_SURFTYPE_3D)
     99       return ILO_IMAGE_WALK_3D;
    100 
    101    /*
    102     * From the Ivy Bridge PRM, volume 1 part 1, page 111:
    103     *
    104     *     "note that the depth buffer and stencil buffer have an implied value
    105     *      of ARYSPC_FULL"
    106     *
    107     * From the Ivy Bridge PRM, volume 4 part 1, page 66:
    108     *
    109     *     "If Multisampled Surface Storage Format is MSFMT_MSS and Number of
    110     *      Multisamples is not MULTISAMPLECOUNT_1, this field (Surface Array
    111     *      Spacing) must be set to ARYSPC_LOD0."
    112     */
    113    if (info->sample_count > 1)
    114       assert(info->level_count == 1);
    115    return (info->bind_zs || info->level_count > 1) ?
    116       ILO_IMAGE_WALK_LAYER : ILO_IMAGE_WALK_LOD;
    117 }
    118 
    119 static bool
    120 image_get_gen6_interleaved_samples(const struct ilo_dev *dev,
    121                                    const struct ilo_image_info *info)
    122 {
    123    ILO_DEV_ASSERT(dev, 6, 8);
    124 
    125    /*
    126     * Gen6 supports only interleaved samples.  It is not explicitly stated,
    127     * but on Gen7+, render targets are expected to be UMS/CMS (samples
    128     * non-interleaved) and depth/stencil buffers are expected to be IMS
    129     * (samples interleaved).
    130     *
    131     * See "Multisampled Surface Storage Format" field of SURFACE_STATE.
    132     */
    133    return (ilo_dev_gen(dev) == ILO_GEN(6) || info->bind_zs);
    134 }
    135 
    136 static uint8_t
    137 image_get_gen6_valid_tilings(const struct ilo_dev *dev,
    138                              const struct ilo_image_info *info)
    139 {
    140    uint8_t valid_tilings = IMAGE_TILING_ALL;
    141 
    142    ILO_DEV_ASSERT(dev, 6, 8);
    143 
    144    if (info->valid_tilings)
    145       valid_tilings &= info->valid_tilings;
    146 
    147    /*
    148     * From the Sandy Bridge PRM, volume 1 part 2, page 32:
    149     *
    150     *     "Display/Overlay   Y-Major not supported.
    151     *                        X-Major required for Async Flips"
    152     */
    153    if (unlikely(info->bind_scanout))
    154       valid_tilings &= IMAGE_TILING_X;
    155 
    156    /*
    157     * From the Sandy Bridge PRM, volume 3 part 2, page 158:
    158     *
    159     *     "The cursor surface address must be 4K byte aligned. The cursor must
    160     *      be in linear memory, it cannot be tiled."
    161     */
    162    if (unlikely(info->bind_cursor))
    163       valid_tilings &= IMAGE_TILING_NONE;
    164 
    165    /*
    166     * From the Sandy Bridge PRM, volume 2 part 1, page 318:
    167     *
    168     *     "[DevSNB+]: This field (Tiled Surface) must be set to TRUE. Linear
    169     *      Depth Buffer is not supported."
    170     *
    171     *     "The Depth Buffer, if tiled, must use Y-Major tiling."
    172     *
    173     * From the Sandy Bridge PRM, volume 1 part 2, page 22:
    174     *
    175     *     "W-Major Tile Format is used for separate stencil."
    176     */
    177    if (info->bind_zs) {
    178       if (info->format == GEN6_FORMAT_R8_UINT)
    179          valid_tilings &= IMAGE_TILING_W;
    180       else
    181          valid_tilings &= IMAGE_TILING_Y;
    182    }
    183 
    184    if (info->bind_surface_sampler ||
    185        info->bind_surface_dp_render ||
    186        info->bind_surface_dp_typed) {
    187       /*
    188        * From the Haswell PRM, volume 2d, page 233:
    189        *
    190        *     "If Number of Multisamples is not MULTISAMPLECOUNT_1, this field
    191        *      (Tiled Surface) must be TRUE."
    192        */
    193       if (info->sample_count > 1)
    194          valid_tilings &= ~IMAGE_TILING_NONE;
    195 
    196       if (ilo_dev_gen(dev) < ILO_GEN(8))
    197          valid_tilings &= ~IMAGE_TILING_W;
    198    }
    199 
    200    if (info->bind_surface_dp_render) {
    201       /*
    202        * From the Sandy Bridge PRM, volume 1 part 2, page 32:
    203        *
    204        *     "NOTE: 128BPE Format Color buffer ( render target ) MUST be
    205        *      either TileX or Linear."
    206        *
    207        * From the Haswell PRM, volume 5, page 32:
    208        *
    209        *     "NOTE: 128 BPP format color buffer (render target) supports
    210        *      Linear, TiledX and TiledY."
    211        */
    212       if (ilo_dev_gen(dev) < ILO_GEN(7.5) && info->block_size == 16)
    213          valid_tilings &= ~IMAGE_TILING_Y;
    214 
    215       /*
    216        * From the Ivy Bridge PRM, volume 4 part 1, page 63:
    217        *
    218        *     "This field (Surface Vertical Aligment) must be set to VALIGN_4
    219        *      for all tiled Y Render Target surfaces."
    220        *
    221        *     "VALIGN_4 is not supported for surface format R32G32B32_FLOAT."
    222        *
    223        * R32G32B32_FLOAT is not renderable and we only need an assert() here.
    224        */
    225       if (ilo_dev_gen(dev) >= ILO_GEN(7) && ilo_dev_gen(dev) <= ILO_GEN(7.5))
    226          assert(info->format != GEN6_FORMAT_R32G32B32_FLOAT);
    227    }
    228 
    229    return valid_tilings;
    230 }
    231 
    232 static uint64_t
    233 image_get_gen6_estimated_size(const struct ilo_dev *dev,
    234                               const struct ilo_image_info *info)
    235 {
    236    /* padding not considered */
    237    const uint64_t slice_size = info->width * info->height *
    238       info->block_size / (info->block_width * info->block_height);
    239    const uint64_t slice_count =
    240       info->depth * info->array_size * info->sample_count;
    241    const uint64_t estimated_size = slice_size * slice_count;
    242 
    243    ILO_DEV_ASSERT(dev, 6, 8);
    244 
    245    if (info->level_count == 1)
    246       return estimated_size;
    247    else
    248       return estimated_size * 4 / 3;
    249 }
    250 
    251 static enum gen_surface_tiling
    252 image_get_gen6_tiling(const struct ilo_dev *dev,
    253                       const struct ilo_image_info *info,
    254                       uint8_t valid_tilings)
    255 {
    256    ILO_DEV_ASSERT(dev, 6, 8);
    257 
    258    switch (valid_tilings) {
    259    case IMAGE_TILING_NONE:
    260       return GEN6_TILING_NONE;
    261    case IMAGE_TILING_X:
    262       return GEN6_TILING_X;
    263    case IMAGE_TILING_Y:
    264       return GEN6_TILING_Y;
    265    case IMAGE_TILING_W:
    266       return GEN8_TILING_W;
    267    default:
    268       break;
    269    }
    270 
    271    /*
    272     * X-tiling has the property that vertically adjacent pixels are usually in
    273     * the same page.  When the image size is less than a page, the image
    274     * height is 1, or when the image is not accessed in blocks, there is no
    275     * reason to tile.
    276     *
    277     * Y-tiling is similar, where vertically adjacent pixels are usually in the
    278     * same cacheline.
    279     */
    280    if (valid_tilings & IMAGE_TILING_NONE) {
    281       const uint64_t estimated_size =
    282          image_get_gen6_estimated_size(dev, info);
    283 
    284       if (info->height == 1 || !(info->bind_surface_sampler ||
    285                                  info->bind_surface_dp_render ||
    286                                  info->bind_surface_dp_typed))
    287          return GEN6_TILING_NONE;
    288 
    289       if (estimated_size <= 64 || (info->prefer_linear_threshold &&
    290                estimated_size > info->prefer_linear_threshold))
    291          return GEN6_TILING_NONE;
    292 
    293       if (estimated_size <= 2048)
    294          valid_tilings &= ~IMAGE_TILING_X;
    295    }
    296 
    297    return (valid_tilings & IMAGE_TILING_Y) ? GEN6_TILING_Y :
    298           (valid_tilings & IMAGE_TILING_X) ? GEN6_TILING_X :
    299           GEN6_TILING_NONE;
    300 }
    301 
    302 static bool
    303 image_get_gen6_hiz_enable(const struct ilo_dev *dev,
    304                           const struct ilo_image_info *info)
    305 {
    306    ILO_DEV_ASSERT(dev, 6, 8);
    307 
    308    /* depth buffer? */
    309    if (!info->bind_zs ||
    310        info->format == GEN6_FORMAT_R8_UINT ||
    311        info->interleaved_stencil)
    312       return false;
    313 
    314    /* we want to be able to force 8x4 alignments */
    315    if (info->type == GEN6_SURFTYPE_1D)
    316       return false;
    317 
    318    if (info->aux_disable)
    319       return false;
    320 
    321    if (ilo_debug & ILO_DEBUG_NOHIZ)
    322       return false;
    323 
    324    return true;
    325 }
    326 
    327 static bool
    328 image_get_gen7_mcs_enable(const struct ilo_dev *dev,
    329                           const struct ilo_image_info *info,
    330                           enum gen_surface_tiling tiling)
    331 {
    332    ILO_DEV_ASSERT(dev, 7, 8);
    333 
    334    if (!info->bind_surface_sampler && !info->bind_surface_dp_render)
    335       return false;
    336 
    337    /*
    338     * From the Ivy Bridge PRM, volume 4 part 1, page 77:
    339     *
    340     *     "For Render Target and Sampling Engine Surfaces:If the surface is
    341     *      multisampled (Number of Multisamples any value other than
    342     *      MULTISAMPLECOUNT_1), this field (MCS Enable) must be enabled."
    343     *
    344     *     "This field must be set to 0 for all SINT MSRTs when all RT channels
    345     *      are not written"
    346     */
    347    if (info->sample_count > 1) {
    348       if (ilo_dev_gen(dev) < ILO_GEN(8))
    349          assert(!info->is_integer);
    350       return true;
    351    }
    352 
    353    if (info->aux_disable)
    354       return false;
    355 
    356    /*
    357     * From the Ivy Bridge PRM, volume 2 part 1, page 326:
    358     *
    359     *     "When MCS is buffer is used for color clear of non-multisampler
    360     *      render target, the following restrictions apply.
    361     *      - Support is limited to tiled render targets.
    362     *      - Support is for non-mip-mapped and non-array surface types only.
    363     *      - Clear is supported only on the full RT; i.e., no partial clear or
    364     *        overlapping clears.
    365     *      - MCS buffer for non-MSRT is supported only for RT formats 32bpp,
    366     *        64bpp and 128bpp.
    367     *      ..."
    368     *
    369     * How about SURFTYPE_3D?
    370     */
    371    if (!info->bind_surface_dp_render ||
    372        tiling == GEN6_TILING_NONE ||
    373        info->level_count > 1 ||
    374        info->array_size > 1)
    375       return false;
    376 
    377    switch (info->block_size) {
    378    case 4:
    379    case 8:
    380    case 16:
    381       return true;
    382    default:
    383       return false;
    384    }
    385 }
    386 
    387 static void
    388 image_get_gen6_alignments(const struct ilo_dev *dev,
    389                           const struct ilo_image_info *info,
    390                           int *align_i, int *align_j)
    391 {
    392    ILO_DEV_ASSERT(dev, 6, 6);
    393 
    394    /*
    395     * From the Sandy Bridge PRM, volume 1 part 1, page 113:
    396     *
    397     *     "surface format           align_i     align_j
    398     *      YUV 4:2:2 formats        4           *see below
    399     *      BC1-5                    4           4
    400     *      FXT1                     8           4
    401     *      all other formats        4           *see below"
    402     *
    403     *     "- align_j = 4 for any depth buffer
    404     *      - align_j = 2 for separate stencil buffer
    405     *      - align_j = 4 for any render target surface is multisampled (4x)
    406     *      - align_j = 4 for any render target surface with Surface Vertical
    407     *        Alignment = VALIGN_4
    408     *      - align_j = 2 for any render target surface with Surface Vertical
    409     *        Alignment = VALIGN_2
    410     *      - align_j = 2 for all other render target surface
    411     *      - align_j = 2 for any sampling engine surface with Surface Vertical
    412     *        Alignment = VALIGN_2
    413     *      - align_j = 4 for any sampling engine surface with Surface Vertical
    414     *        Alignment = VALIGN_4"
    415     *
    416     * From the Sandy Bridge PRM, volume 4 part 1, page 86:
    417     *
    418     *     "This field (Surface Vertical Alignment) must be set to VALIGN_2 if
    419     *      the Surface Format is 96 bits per element (BPE)."
    420     *
    421     * They can be rephrased as
    422     *
    423     *                                  align_i        align_j
    424     *   compressed formats             block width    block height
    425     *   GEN6_FORMAT_R8_UINT            4              2
    426     *   other depth/stencil formats    4              4
    427     *   4x multisampled                4              4
    428     *   bpp 96                         4              2
    429     *   others                         4              2 or 4
    430     */
    431 
    432    *align_i = (info->compressed) ? info->block_width : 4;
    433    if (info->compressed) {
    434       *align_j = info->block_height;
    435    } else if (info->bind_zs) {
    436       *align_j = (info->format == GEN6_FORMAT_R8_UINT) ? 2 : 4;
    437    } else {
    438       *align_j = (info->sample_count > 1 || info->block_size != 12) ? 4 : 2;
    439    }
    440 }
    441 
    442 static void
    443 image_get_gen7_alignments(const struct ilo_dev *dev,
    444                           const struct ilo_image_info *info,
    445                           enum gen_surface_tiling tiling,
    446                           int *align_i, int *align_j)
    447 {
    448    int i, j;
    449 
    450    ILO_DEV_ASSERT(dev, 7, 8);
    451 
    452    /*
    453     * From the Ivy Bridge PRM, volume 1 part 1, page 110:
    454     *
    455     *     "surface defined by      surface format     align_i     align_j
    456     *      3DSTATE_DEPTH_BUFFER    D16_UNORM          8           4
    457     *                              not D16_UNORM      4           4
    458     *      3DSTATE_STENCIL_BUFFER  N/A                8           8
    459     *      SURFACE_STATE           BC*, ETC*, EAC*    4           4
    460     *                              FXT1               8           4
    461     *                              all others         (set by SURFACE_STATE)"
    462     *
    463     * From the Ivy Bridge PRM, volume 4 part 1, page 63:
    464     *
    465     *     "- This field (Surface Vertical Aligment) is intended to be set to
    466     *        VALIGN_4 if the surface was rendered as a depth buffer, for a
    467     *        multisampled (4x) render target, or for a multisampled (8x)
    468     *        render target, since these surfaces support only alignment of 4.
    469     *      - Use of VALIGN_4 for other surfaces is supported, but uses more
    470     *        memory.
    471     *      - This field must be set to VALIGN_4 for all tiled Y Render Target
    472     *        surfaces.
    473     *      - Value of 1 is not supported for format YCRCB_NORMAL (0x182),
    474     *        YCRCB_SWAPUVY (0x183), YCRCB_SWAPUV (0x18f), YCRCB_SWAPY (0x190)
    475     *      - If Number of Multisamples is not MULTISAMPLECOUNT_1, this field
    476     *        must be set to VALIGN_4."
    477     *      - VALIGN_4 is not supported for surface format R32G32B32_FLOAT."
    478     *
    479     *     "- This field (Surface Horizontal Aligment) is intended to be set to
    480     *        HALIGN_8 only if the surface was rendered as a depth buffer with
    481     *        Z16 format or a stencil buffer, since these surfaces support only
    482     *        alignment of 8.
    483     *      - Use of HALIGN_8 for other surfaces is supported, but uses more
    484     *        memory.
    485     *      - This field must be set to HALIGN_4 if the Surface Format is BC*.
    486     *      - This field must be set to HALIGN_8 if the Surface Format is
    487     *        FXT1."
    488     *
    489     * They can be rephrased as
    490     *
    491     *                                  align_i        align_j
    492     *  compressed formats              block width    block height
    493     *  GEN6_FORMAT_R16_UNORM           8              4
    494     *  GEN6_FORMAT_R8_UINT             8              8
    495     *  other depth/stencil formats     4              4
    496     *  2x or 4x multisampled           4 or 8         4
    497     *  tiled Y                         4 or 8         4 (if rt)
    498     *  GEN6_FORMAT_R32G32B32_FLOAT     4 or 8         2
    499     *  others                          4 or 8         2 or 4
    500     */
    501    if (info->compressed) {
    502       i = info->block_width;
    503       j = info->block_height;
    504    } else if (info->bind_zs) {
    505       switch (info->format) {
    506       case GEN6_FORMAT_R16_UNORM:
    507          i = 8;
    508          j = 4;
    509          break;
    510       case GEN6_FORMAT_R8_UINT:
    511          i = 8;
    512          j = 8;
    513          break;
    514       default:
    515          i = 4;
    516          j = 4;
    517          break;
    518       }
    519    } else {
    520       const bool valign_4 =
    521          (info->sample_count > 1 || ilo_dev_gen(dev) >= ILO_GEN(8) ||
    522           (tiling == GEN6_TILING_Y && info->bind_surface_dp_render));
    523 
    524       if (ilo_dev_gen(dev) < ILO_GEN(8) && valign_4)
    525          assert(info->format != GEN6_FORMAT_R32G32B32_FLOAT);
    526 
    527       i = 4;
    528       j = (valign_4) ? 4 : 2;
    529    }
    530 
    531    *align_i = i;
    532    *align_j = j;
    533 }
    534 
    535 static bool
    536 image_init_gen6_hardware_layout(const struct ilo_dev *dev,
    537                                 const struct ilo_image_info *info,
    538                                 struct ilo_image_layout *layout)
    539 {
    540    ILO_DEV_ASSERT(dev, 6, 8);
    541 
    542    if (ilo_dev_gen(dev) >= ILO_GEN(7))
    543       layout->walk = image_get_gen7_walk(dev, info);
    544    else
    545       layout->walk = image_get_gen6_walk(dev, info);
    546 
    547    layout->interleaved_samples =
    548       image_get_gen6_interleaved_samples(dev, info);
    549 
    550    layout->valid_tilings = image_get_gen6_valid_tilings(dev, info);
    551    if (!layout->valid_tilings)
    552       return false;
    553 
    554    layout->tiling = image_get_gen6_tiling(dev, info, layout->valid_tilings);
    555 
    556    if (image_get_gen6_hiz_enable(dev, info))
    557       layout->aux = ILO_IMAGE_AUX_HIZ;
    558    else if (ilo_dev_gen(dev) >= ILO_GEN(7) &&
    559             image_get_gen7_mcs_enable(dev, info, layout->tiling))
    560       layout->aux = ILO_IMAGE_AUX_MCS;
    561    else
    562       layout->aux = ILO_IMAGE_AUX_NONE;
    563 
    564    if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
    565       image_get_gen7_alignments(dev, info, layout->tiling,
    566             &layout->align_i, &layout->align_j);
    567    } else {
    568       image_get_gen6_alignments(dev, info,
    569             &layout->align_i, &layout->align_j);
    570    }
    571 
    572    return true;
    573 }
    574 
    575 static bool
    576 image_init_gen6_transfer_layout(const struct ilo_dev *dev,
    577                                 const struct ilo_image_info *info,
    578                                 struct ilo_image_layout *layout)
    579 {
    580    ILO_DEV_ASSERT(dev, 6, 8);
    581 
    582    /* we can define our own layout to save space */
    583    layout->walk = ILO_IMAGE_WALK_LOD;
    584    layout->interleaved_samples = false;
    585    layout->valid_tilings = IMAGE_TILING_NONE;
    586    layout->tiling = GEN6_TILING_NONE;
    587    layout->aux = ILO_IMAGE_AUX_NONE;
    588    layout->align_i = info->block_width;
    589    layout->align_j = info->block_height;
    590 
    591    return true;
    592 }
    593 
    594 static void
    595 image_get_gen6_slice_size(const struct ilo_dev *dev,
    596                           const struct ilo_image_info *info,
    597                           const struct ilo_image_layout *layout,
    598                           uint8_t level,
    599                           int *width, int *height)
    600 {
    601    int w, h;
    602 
    603    ILO_DEV_ASSERT(dev, 6, 8);
    604 
    605    w = u_minify(info->width, level);
    606    h = u_minify(info->height, level);
    607 
    608    /*
    609     * From the Sandy Bridge PRM, volume 1 part 1, page 114:
    610     *
    611     *     "The dimensions of the mip maps are first determined by applying the
    612     *      sizing algorithm presented in Non-Power-of-Two Mipmaps above. Then,
    613     *      if necessary, they are padded out to compression block boundaries."
    614     */
    615    w = align(w, info->block_width);
    616    h = align(h, info->block_height);
    617 
    618    /*
    619     * From the Sandy Bridge PRM, volume 1 part 1, page 111:
    620     *
    621     *     "If the surface is multisampled (4x), these values must be adjusted
    622     *      as follows before proceeding:
    623     *
    624     *        W_L = ceiling(W_L / 2) * 4
    625     *        H_L = ceiling(H_L / 2) * 4"
    626     *
    627     * From the Ivy Bridge PRM, volume 1 part 1, page 108:
    628     *
    629     *     "If the surface is multisampled and it is a depth or stencil surface
    630     *      or Multisampled Surface StorageFormat in SURFACE_STATE is
    631     *      MSFMT_DEPTH_STENCIL, W_L and H_L must be adjusted as follows before
    632     *      proceeding:
    633     *
    634     *        #samples  W_L =                    H_L =
    635     *        2         ceiling(W_L / 2) * 4     HL [no adjustment]
    636     *        4         ceiling(W_L / 2) * 4     ceiling(H_L / 2) * 4
    637     *        8         ceiling(W_L / 2) * 8     ceiling(H_L / 2) * 4
    638     *        16        ceiling(W_L / 2) * 8     ceiling(H_L / 2) * 8"
    639     *
    640     * For interleaved samples (4x), where pixels
    641     *
    642     *   (x, y  ) (x+1, y  )
    643     *   (x, y+1) (x+1, y+1)
    644     *
    645     * would be is occupied by
    646     *
    647     *   (x, y  , si0) (x+1, y  , si0) (x, y  , si1) (x+1, y  , si1)
    648     *   (x, y+1, si0) (x+1, y+1, si0) (x, y+1, si1) (x+1, y+1, si1)
    649     *   (x, y  , si2) (x+1, y  , si2) (x, y  , si3) (x+1, y  , si3)
    650     *   (x, y+1, si2) (x+1, y+1, si2) (x, y+1, si3) (x+1, y+1, si3)
    651     *
    652     * Thus the need to
    653     *
    654     *   w = align(w, 2) * 2;
    655     *   y = align(y, 2) * 2;
    656     */
    657    if (layout->interleaved_samples) {
    658       switch (info->sample_count) {
    659       case 1:
    660          break;
    661       case 2:
    662          w = align(w, 2) * 2;
    663          break;
    664       case 4:
    665          w = align(w, 2) * 2;
    666          h = align(h, 2) * 2;
    667          break;
    668       case 8:
    669          w = align(w, 2) * 4;
    670          h = align(h, 2) * 2;
    671          break;
    672       case 16:
    673          w = align(w, 2) * 4;
    674          h = align(h, 2) * 4;
    675          break;
    676       default:
    677          assert(!"unsupported sample count");
    678          break;
    679       }
    680    }
    681 
    682    /*
    683     * From the Ivy Bridge PRM, volume 1 part 1, page 108:
    684     *
    685     *     "For separate stencil buffer, the width must be mutiplied by 2 and
    686     *      height divided by 2..."
    687     *
    688     * To make things easier (for transfer), we will just double the stencil
    689     * stride in 3DSTATE_STENCIL_BUFFER.
    690     */
    691    w = align(w, layout->align_i);
    692    h = align(h, layout->align_j);
    693 
    694    *width = w;
    695    *height = h;
    696 }
    697 
    698 static int
    699 image_get_gen6_layer_count(const struct ilo_dev *dev,
    700                            const struct ilo_image_info *info,
    701                            const struct ilo_image_layout *layout)
    702 {
    703    int count = info->array_size;
    704 
    705    ILO_DEV_ASSERT(dev, 6, 8);
    706 
    707    /* samples of the same index are stored in a layer */
    708    if (!layout->interleaved_samples)
    709       count *= info->sample_count;
    710 
    711    return count;
    712 }
    713 
    714 static void
    715 image_get_gen6_walk_layer_heights(const struct ilo_dev *dev,
    716                                   const struct ilo_image_info *info,
    717                                   struct ilo_image_layout *layout)
    718 {
    719    ILO_DEV_ASSERT(dev, 6, 8);
    720 
    721    layout->walk_layer_h0 = layout->lods[0].slice_height;
    722 
    723    if (info->level_count > 1) {
    724       layout->walk_layer_h1 = layout->lods[1].slice_height;
    725    } else {
    726       int dummy;
    727       image_get_gen6_slice_size(dev, info, layout, 1,
    728             &dummy, &layout->walk_layer_h1);
    729    }
    730 
    731    if (image_get_gen6_layer_count(dev, info, layout) == 1) {
    732       layout->walk_layer_height = 0;
    733       return;
    734    }
    735 
    736    /*
    737     * From the Sandy Bridge PRM, volume 1 part 1, page 115:
    738     *
    739     *     "The following equation is used for surface formats other than
    740     *      compressed textures:
    741     *
    742     *        QPitch = (h0 + h1 + 11j)"
    743     *
    744     *     "The equation for compressed textures (BC* and FXT1 surface formats)
    745     *      follows:
    746     *
    747     *        QPitch = (h0 + h1 + 11j) / 4"
    748     *
    749     *     "[DevSNB] Errata: Sampler MSAA Qpitch will be 4 greater than the
    750     *      value calculated in the equation above, for every other odd Surface
    751     *      Height starting from 1 i.e. 1,5,9,13"
    752     *
    753     * From the Ivy Bridge PRM, volume 1 part 1, page 111-112:
    754     *
    755     *     "If Surface Array Spacing is set to ARYSPC_FULL (note that the depth
    756     *      buffer and stencil buffer have an implied value of ARYSPC_FULL):
    757     *
    758     *        QPitch = (h0 + h1 + 12j)
    759     *        QPitch = (h0 + h1 + 12j) / 4 (compressed)
    760     *
    761     *      (There are many typos or missing words here...)"
    762     *
    763     * To access the N-th slice, an offset of (Stride * QPitch * N) is added to
    764     * the base address.  The PRM divides QPitch by 4 for compressed formats
    765     * because the block height for those formats are 4, and it wants QPitch to
    766     * mean the number of memory rows, as opposed to texel rows, between
    767     * slices.  Since we use texel rows everywhere, we do not need to divide
    768     * QPitch by 4.
    769     */
    770    layout->walk_layer_height = layout->walk_layer_h0 + layout->walk_layer_h1 +
    771       ((ilo_dev_gen(dev) >= ILO_GEN(7)) ? 12 : 11) * layout->align_j;
    772 
    773    if (ilo_dev_gen(dev) == ILO_GEN(6) && info->sample_count > 1 &&
    774        info->height % 4 == 1)
    775       layout->walk_layer_height += 4;
    776 }
    777 
    778 static void
    779 image_get_gen6_monolithic_size(const struct ilo_dev *dev,
    780                                const struct ilo_image_info *info,
    781                                struct ilo_image_layout *layout,
    782                                int max_x, int max_y)
    783 {
    784    int align_w = 1, align_h = 1, pad_h = 0;
    785 
    786    ILO_DEV_ASSERT(dev, 6, 8);
    787 
    788    /*
    789     * From the Sandy Bridge PRM, volume 1 part 1, page 118:
    790     *
    791     *     "To determine the necessary padding on the bottom and right side of
    792     *      the surface, refer to the table in Section 7.18.3.4 for the i and j
    793     *      parameters for the surface format in use. The surface must then be
    794     *      extended to the next multiple of the alignment unit size in each
    795     *      dimension, and all texels contained in this extended surface must
    796     *      have valid GTT entries."
    797     *
    798     *     "For cube surfaces, an additional two rows of padding are required
    799     *      at the bottom of the surface. This must be ensured regardless of
    800     *      whether the surface is stored tiled or linear.  This is due to the
    801     *      potential rotation of cache line orientation from memory to cache."
    802     *
    803     *     "For compressed textures (BC* and FXT1 surface formats), padding at
    804     *      the bottom of the surface is to an even compressed row, which is
    805     *      equal to a multiple of 8 uncompressed texel rows. Thus, for padding
    806     *      purposes, these surfaces behave as if j = 8 only for surface
    807     *      padding purposes. The value of 4 for j still applies for mip level
    808     *      alignment and QPitch calculation."
    809     */
    810    if (info->bind_surface_sampler) {
    811       align_w = MAX2(align_w, layout->align_i);
    812       align_h = MAX2(align_h, layout->align_j);
    813 
    814       if (info->type == GEN6_SURFTYPE_CUBE)
    815          pad_h += 2;
    816 
    817       if (info->compressed)
    818          align_h = MAX2(align_h, layout->align_j * 2);
    819    }
    820 
    821    /*
    822     * From the Sandy Bridge PRM, volume 1 part 1, page 118:
    823     *
    824     *     "If the surface contains an odd number of rows of data, a final row
    825     *      below the surface must be allocated."
    826     */
    827    if (info->bind_surface_dp_render)
    828       align_h = MAX2(align_h, 2);
    829 
    830    /*
    831     * Depth Buffer Clear/Resolve works in 8x4 sample blocks.  Pad to allow HiZ
    832     * for unaligned non-mipmapped and non-array images.
    833     */
    834    if (layout->aux == ILO_IMAGE_AUX_HIZ &&
    835        info->level_count == 1 && info->array_size == 1 && info->depth == 1) {
    836       align_w = MAX2(align_w, 8);
    837       align_h = MAX2(align_h, 4);
    838    }
    839 
    840    layout->monolithic_width = align(max_x, align_w);
    841    layout->monolithic_height = align(max_y + pad_h, align_h);
    842 }
    843 
    844 static void
    845 image_get_gen6_lods(const struct ilo_dev *dev,
    846                     const struct ilo_image_info *info,
    847                     struct ilo_image_layout *layout)
    848 {
    849    const int layer_count = image_get_gen6_layer_count(dev, info, layout);
    850    int cur_x, cur_y, max_x, max_y;
    851    uint8_t lv;
    852 
    853    ILO_DEV_ASSERT(dev, 6, 8);
    854 
    855    cur_x = 0;
    856    cur_y = 0;
    857    max_x = 0;
    858    max_y = 0;
    859    for (lv = 0; lv < info->level_count; lv++) {
    860       int slice_w, slice_h, lod_w, lod_h;
    861 
    862       image_get_gen6_slice_size(dev, info, layout, lv, &slice_w, &slice_h);
    863 
    864       layout->lods[lv].x = cur_x;
    865       layout->lods[lv].y = cur_y;
    866       layout->lods[lv].slice_width = slice_w;
    867       layout->lods[lv].slice_height = slice_h;
    868 
    869       switch (layout->walk) {
    870       case ILO_IMAGE_WALK_LAYER:
    871          lod_w = slice_w;
    872          lod_h = slice_h;
    873 
    874          /* MIPLAYOUT_BELOW */
    875          if (lv == 1)
    876             cur_x += lod_w;
    877          else
    878             cur_y += lod_h;
    879          break;
    880       case ILO_IMAGE_WALK_LOD:
    881          lod_w = slice_w;
    882          lod_h = slice_h * layer_count;
    883 
    884          if (lv == 1)
    885             cur_x += lod_w;
    886          else
    887             cur_y += lod_h;
    888 
    889          /* every LOD begins at tile boundaries */
    890          if (info->level_count > 1) {
    891             assert(info->format == GEN6_FORMAT_R8_UINT);
    892             cur_x = align(cur_x, 64);
    893             cur_y = align(cur_y, 64);
    894          }
    895          break;
    896       case ILO_IMAGE_WALK_3D:
    897          {
    898             const int slice_count = u_minify(info->depth, lv);
    899             const int slice_count_per_row = 1 << lv;
    900             const int row_count =
    901                (slice_count + slice_count_per_row - 1) / slice_count_per_row;
    902 
    903             lod_w = slice_w * slice_count_per_row;
    904             lod_h = slice_h * row_count;
    905          }
    906 
    907          cur_y += lod_h;
    908          break;
    909       default:
    910          assert(!"unknown walk type");
    911          lod_w = 0;
    912          lod_h = 0;
    913          break;
    914       }
    915 
    916       if (max_x < layout->lods[lv].x + lod_w)
    917          max_x = layout->lods[lv].x + lod_w;
    918       if (max_y < layout->lods[lv].y + lod_h)
    919          max_y = layout->lods[lv].y + lod_h;
    920    }
    921 
    922    if (layout->walk == ILO_IMAGE_WALK_LAYER) {
    923       image_get_gen6_walk_layer_heights(dev, info, layout);
    924       if (layer_count > 1)
    925          max_y += layout->walk_layer_height * (layer_count - 1);
    926    } else {
    927       layout->walk_layer_h0 = 0;
    928       layout->walk_layer_h1 = 0;
    929       layout->walk_layer_height = 0;
    930    }
    931 
    932    image_get_gen6_monolithic_size(dev, info, layout, max_x, max_y);
    933 }
    934 
    935 static bool
    936 image_bind_gpu(const struct ilo_image_info *info)
    937 {
    938    return (info->bind_surface_sampler ||
    939            info->bind_surface_dp_render ||
    940            info->bind_surface_dp_typed ||
    941            info->bind_zs ||
    942            info->bind_scanout ||
    943            info->bind_cursor);
    944 }
    945 
    946 static bool
    947 image_validate_gen6(const struct ilo_dev *dev,
    948                     const struct ilo_image_info *info)
    949 {
    950    ILO_DEV_ASSERT(dev, 6, 8);
    951 
    952    /*
    953     * From the Ivy Bridge PRM, volume 2 part 1, page 314:
    954     *
    955     *     "The separate stencil buffer is always enabled, thus the field in
    956     *      3DSTATE_DEPTH_BUFFER to explicitly enable the separate stencil
    957     *      buffer has been removed Surface formats with interleaved depth and
    958     *      stencil are no longer supported"
    959     */
    960    if (ilo_dev_gen(dev) >= ILO_GEN(7) && info->bind_zs)
    961       assert(!info->interleaved_stencil);
    962 
    963    return true;
    964 }
    965 
    966 static bool
    967 image_get_gen6_layout(const struct ilo_dev *dev,
    968                       const struct ilo_image_info *info,
    969                       struct ilo_image_layout *layout)
    970 {
    971    ILO_DEV_ASSERT(dev, 6, 8);
    972 
    973    if (!image_validate_gen6(dev, info))
    974       return false;
    975 
    976    if (image_bind_gpu(info) || info->level_count > 1) {
    977       if (!image_init_gen6_hardware_layout(dev, info, layout))
    978          return false;
    979    } else {
    980       if (!image_init_gen6_transfer_layout(dev, info, layout))
    981          return false;
    982    }
    983 
    984    /*
    985     * the fact that align i and j are multiples of block width and height
    986     * respectively is what makes the size of the bo a multiple of the block
    987     * size, slices start at block boundaries, and many of the computations
    988     * work.
    989     */
    990    assert(layout->align_i % info->block_width == 0);
    991    assert(layout->align_j % info->block_height == 0);
    992 
    993    /* make sure align() works */
    994    assert(util_is_power_of_two(layout->align_i) &&
    995           util_is_power_of_two(layout->align_j));
    996    assert(util_is_power_of_two(info->block_width) &&
    997           util_is_power_of_two(info->block_height));
    998 
    999    image_get_gen6_lods(dev, info, layout);
   1000 
   1001    assert(layout->walk_layer_height % info->block_height == 0);
   1002    assert(layout->monolithic_width % info->block_width == 0);
   1003    assert(layout->monolithic_height % info->block_height == 0);
   1004 
   1005    return true;
   1006 }
   1007 
   1008 static bool
   1009 image_set_gen6_bo_size(struct ilo_image *img,
   1010                        const struct ilo_dev *dev,
   1011                        const struct ilo_image_info *info,
   1012                        const struct ilo_image_layout *layout)
   1013 {
   1014    int stride, height;
   1015    int align_w, align_h;
   1016 
   1017    ILO_DEV_ASSERT(dev, 6, 8);
   1018 
   1019    stride = (layout->monolithic_width / info->block_width) * info->block_size;
   1020    height = layout->monolithic_height / info->block_height;
   1021 
   1022    /*
   1023     * From the Haswell PRM, volume 5, page 163:
   1024     *
   1025     *     "For linear surfaces, additional padding of 64 bytes is required
   1026     *      at the bottom of the surface. This is in addition to the padding
   1027     *      required above."
   1028     */
   1029    if (ilo_dev_gen(dev) >= ILO_GEN(7.5) && info->bind_surface_sampler &&
   1030        layout->tiling == GEN6_TILING_NONE)
   1031       height += (64 + stride - 1) / stride;
   1032 
   1033    /*
   1034     * From the Sandy Bridge PRM, volume 4 part 1, page 81:
   1035     *
   1036     *     "- For linear render target surfaces, the pitch must be a multiple
   1037     *        of the element size for non-YUV surface formats.  Pitch must be a
   1038     *        multiple of 2 * element size for YUV surface formats.
   1039     *
   1040     *      - For other linear surfaces, the pitch can be any multiple of
   1041     *        bytes.
   1042     *      - For tiled surfaces, the pitch must be a multiple of the tile
   1043     *        width."
   1044     *
   1045     * Different requirements may exist when the image is used in different
   1046     * places, but our alignments here should be good enough that we do not
   1047     * need to check info->bind_x.
   1048     */
   1049    switch (layout->tiling) {
   1050    case GEN6_TILING_X:
   1051       align_w = 512;
   1052       align_h = 8;
   1053       break;
   1054    case GEN6_TILING_Y:
   1055       align_w = 128;
   1056       align_h = 32;
   1057       break;
   1058    case GEN8_TILING_W:
   1059       /*
   1060        * From the Sandy Bridge PRM, volume 1 part 2, page 22:
   1061        *
   1062        *     "A 4KB tile is subdivided into 8-high by 8-wide array of
   1063        *      Blocks for W-Major Tiles (W Tiles). Each Block is 8 rows by 8
   1064        *      bytes."
   1065        */
   1066       align_w = 64;
   1067       align_h = 64;
   1068       break;
   1069    default:
   1070       assert(layout->tiling == GEN6_TILING_NONE);
   1071       /* some good enough values */
   1072       align_w = 64;
   1073       align_h = 2;
   1074       break;
   1075    }
   1076 
   1077    if (info->force_bo_stride) {
   1078       if (info->force_bo_stride % align_w || info->force_bo_stride < stride)
   1079          return false;
   1080 
   1081       img->bo_stride = info->force_bo_stride;
   1082    } else {
   1083       img->bo_stride = align(stride, align_w);
   1084    }
   1085 
   1086    img->bo_height = align(height, align_h);
   1087 
   1088    return true;
   1089 }
   1090 
   1091 static bool
   1092 image_set_gen6_hiz(struct ilo_image *img,
   1093                    const struct ilo_dev *dev,
   1094                    const struct ilo_image_info *info,
   1095                    const struct ilo_image_layout *layout)
   1096 {
   1097    const int hz_align_j = 8;
   1098    enum ilo_image_walk_type hz_walk;
   1099    int hz_width, hz_height;
   1100    int hz_clear_w, hz_clear_h;
   1101    uint8_t lv;
   1102 
   1103    ILO_DEV_ASSERT(dev, 6, 8);
   1104 
   1105    assert(layout->aux == ILO_IMAGE_AUX_HIZ);
   1106 
   1107    assert(layout->walk == ILO_IMAGE_WALK_LAYER ||
   1108           layout->walk == ILO_IMAGE_WALK_3D);
   1109 
   1110    /*
   1111     * From the Sandy Bridge PRM, volume 2 part 1, page 312:
   1112     *
   1113     *     "The hierarchical depth buffer does not support the LOD field, it is
   1114     *      assumed by hardware to be zero. A separate hierarachical depth
   1115     *      buffer is required for each LOD used, and the corresponding
   1116     *      buffer's state delivered to hardware each time a new depth buffer
   1117     *      state with modified LOD is delivered."
   1118     *
   1119     * We will put all LODs in a single bo with ILO_IMAGE_WALK_LOD.
   1120     */
   1121    if (ilo_dev_gen(dev) >= ILO_GEN(7))
   1122       hz_walk = layout->walk;
   1123    else
   1124       hz_walk = ILO_IMAGE_WALK_LOD;
   1125 
   1126    /*
   1127     * See the Sandy Bridge PRM, volume 2 part 1, page 312, and the Ivy Bridge
   1128     * PRM, volume 2 part 1, page 312-313.
   1129     *
   1130     * It seems HiZ buffer is aligned to 8x8, with every two rows packed into a
   1131     * memory row.
   1132     */
   1133    switch (hz_walk) {
   1134    case ILO_IMAGE_WALK_LAYER:
   1135       {
   1136          const int h0 = align(layout->walk_layer_h0, hz_align_j);
   1137          const int h1 = align(layout->walk_layer_h1, hz_align_j);
   1138          const int htail =
   1139             ((ilo_dev_gen(dev) >= ILO_GEN(7)) ? 12 : 11) * hz_align_j;
   1140          const int hz_qpitch = h0 + h1 + htail;
   1141 
   1142          hz_width = align(layout->lods[0].slice_width, 16);
   1143 
   1144          hz_height = hz_qpitch * info->array_size / 2;
   1145          if (ilo_dev_gen(dev) >= ILO_GEN(7))
   1146             hz_height = align(hz_height, 8);
   1147 
   1148          img->aux.walk_layer_height = hz_qpitch;
   1149       }
   1150       break;
   1151    case ILO_IMAGE_WALK_LOD:
   1152       {
   1153          int lod_tx[ILO_IMAGE_MAX_LEVEL_COUNT];
   1154          int lod_ty[ILO_IMAGE_MAX_LEVEL_COUNT];
   1155          int cur_tx, cur_ty;
   1156 
   1157          /* figure out the tile offsets of LODs */
   1158          hz_width = 0;
   1159          hz_height = 0;
   1160          cur_tx = 0;
   1161          cur_ty = 0;
   1162          for (lv = 0; lv < info->level_count; lv++) {
   1163             int tw, th;
   1164 
   1165             lod_tx[lv] = cur_tx;
   1166             lod_ty[lv] = cur_ty;
   1167 
   1168             tw = align(layout->lods[lv].slice_width, 16);
   1169             th = align(layout->lods[lv].slice_height, hz_align_j) *
   1170                info->array_size / 2;
   1171             /* convert to Y-tiles */
   1172             tw = (tw + 127) / 128;
   1173             th = (th + 31) / 32;
   1174 
   1175             if (hz_width < cur_tx + tw)
   1176                hz_width = cur_tx + tw;
   1177             if (hz_height < cur_ty + th)
   1178                hz_height = cur_ty + th;
   1179 
   1180             if (lv == 1)
   1181                cur_tx += tw;
   1182             else
   1183                cur_ty += th;
   1184          }
   1185 
   1186          /* convert tile offsets to memory offsets */
   1187          for (lv = 0; lv < info->level_count; lv++) {
   1188             img->aux.walk_lod_offsets[lv] =
   1189                (lod_ty[lv] * hz_width + lod_tx[lv]) * 4096;
   1190          }
   1191 
   1192          hz_width *= 128;
   1193          hz_height *= 32;
   1194       }
   1195       break;
   1196    case ILO_IMAGE_WALK_3D:
   1197       hz_width = align(layout->lods[0].slice_width, 16);
   1198 
   1199       hz_height = 0;
   1200       for (lv = 0; lv < info->level_count; lv++) {
   1201          const int h = align(layout->lods[lv].slice_height, hz_align_j);
   1202          /* according to the formula, slices are packed together vertically */
   1203          hz_height += h * u_minify(info->depth, lv);
   1204       }
   1205       hz_height /= 2;
   1206       break;
   1207    default:
   1208       assert(!"unknown HiZ walk");
   1209       hz_width = 0;
   1210       hz_height = 0;
   1211       break;
   1212    }
   1213 
   1214    /*
   1215     * In hiz_align_fb(), we will align the LODs to 8x4 sample blocks.
   1216     * Experiments on Haswell show that aligning the RECTLIST primitive and
   1217     * 3DSTATE_DRAWING_RECTANGLE alone are not enough.  The LOD sizes must be
   1218     * aligned.
   1219     */
   1220    hz_clear_w = 8;
   1221    hz_clear_h = 4;
   1222    switch (info->sample_count) {
   1223    case 1:
   1224    default:
   1225       break;
   1226    case 2:
   1227       hz_clear_w /= 2;
   1228       break;
   1229    case 4:
   1230       hz_clear_w /= 2;
   1231       hz_clear_h /= 2;
   1232       break;
   1233    case 8:
   1234       hz_clear_w /= 4;
   1235       hz_clear_h /= 2;
   1236       break;
   1237    case 16:
   1238       hz_clear_w /= 4;
   1239       hz_clear_h /= 4;
   1240       break;
   1241    }
   1242 
   1243    for (lv = 0; lv < info->level_count; lv++) {
   1244       if (u_minify(info->width, lv) % hz_clear_w ||
   1245           u_minify(info->height, lv) % hz_clear_h)
   1246          break;
   1247       img->aux.enables |= 1 << lv;
   1248    }
   1249 
   1250    /* we padded to allow this in image_get_gen6_monolithic_size() */
   1251    if (info->level_count == 1 && info->array_size == 1 && info->depth == 1)
   1252       img->aux.enables |= 0x1;
   1253 
   1254    /* align to Y-tile */
   1255    img->aux.bo_stride = align(hz_width, 128);
   1256    img->aux.bo_height = align(hz_height, 32);
   1257 
   1258    return true;
   1259 }
   1260 
   1261 static bool
   1262 image_set_gen7_mcs(struct ilo_image *img,
   1263                    const struct ilo_dev *dev,
   1264                    const struct ilo_image_info *info,
   1265                    const struct ilo_image_layout *layout)
   1266 {
   1267    int mcs_width, mcs_height, mcs_cpp;
   1268    int downscale_x, downscale_y;
   1269 
   1270    ILO_DEV_ASSERT(dev, 7, 8);
   1271 
   1272    assert(layout->aux == ILO_IMAGE_AUX_MCS);
   1273 
   1274    if (info->sample_count > 1) {
   1275       /*
   1276        * From the Ivy Bridge PRM, volume 2 part 1, page 326, the clear
   1277        * rectangle is scaled down by 8x2 for 4X MSAA and 2x2 for 8X MSAA.  The
   1278        * need of scale down could be that the clear rectangle is used to clear
   1279        * the MCS instead of the RT.
   1280        *
   1281        * For 8X MSAA, we need 32 bits in MCS for every pixel in the RT.  The
   1282        * 2x2 factor could come from that the hardware writes 128 bits (an
   1283        * OWord) at a time, and the OWord in MCS maps to a 2x2 pixel block in
   1284        * the RT.  For 4X MSAA, we need 8 bits in MCS for every pixel in the
   1285        * RT.  Similarly, we could reason that an OWord in 4X MCS maps to a 8x2
   1286        * pixel block in the RT.
   1287        */
   1288       switch (info->sample_count) {
   1289       case 2:
   1290       case 4:
   1291          downscale_x = 8;
   1292          downscale_y = 2;
   1293          mcs_cpp = 1;
   1294          break;
   1295       case 8:
   1296          downscale_x = 2;
   1297          downscale_y = 2;
   1298          mcs_cpp = 4;
   1299          break;
   1300       case 16:
   1301          downscale_x = 2;
   1302          downscale_y = 1;
   1303          mcs_cpp = 8;
   1304          break;
   1305       default:
   1306          assert(!"unsupported sample count");
   1307          return false;
   1308          break;
   1309       }
   1310 
   1311       /*
   1312        * It also appears that the 2x2 subspans generated by the scaled-down
   1313        * clear rectangle cannot be masked.  The scale-down clear rectangle
   1314        * thus must be aligned to 2x2, and we need to pad.
   1315        */
   1316       mcs_width = align(info->width, downscale_x * 2);
   1317       mcs_height = align(info->height, downscale_y * 2);
   1318    } else {
   1319       /*
   1320        * From the Ivy Bridge PRM, volume 2 part 1, page 327:
   1321        *
   1322        *     "              Pixels  Lines
   1323        *      TiledY RT CL
   1324        *          bpp
   1325        *          32          8        4
   1326        *          64          4        4
   1327        *          128         2        4
   1328        *
   1329        *      TiledX RT CL
   1330        *          bpp
   1331        *          32          16       2
   1332        *          64          8        2
   1333        *          128         4        2"
   1334        *
   1335        * This table and the two following tables define the RT alignments, the
   1336        * clear rectangle alignments, and the clear rectangle scale factors.
   1337        * Viewing the RT alignments as the sizes of 128-byte blocks, we can see
   1338        * that the clear rectangle alignments are 16x32 blocks, and the clear
   1339        * rectangle scale factors are 8x16 blocks.
   1340        *
   1341        * For non-MSAA RT, we need 1 bit in MCS for every 128-byte block in the
   1342        * RT.  Similar to the MSAA cases, we can argue that an OWord maps to
   1343        * 8x16 blocks.
   1344        *
   1345        * One problem with this reasoning is that a Y-tile in MCS has 8x32
   1346        * OWords and maps to 64x512 128-byte blocks.  This differs from i965,
   1347        * which says that a Y-tile maps to 128x256 blocks (\see
   1348        * intel_get_non_msrt_mcs_alignment).  It does not really change
   1349        * anything except for the size of the allocated MCS.  Let's see if we
   1350        * hit out-of-bound access.
   1351        */
   1352       switch (layout->tiling) {
   1353       case GEN6_TILING_X:
   1354          downscale_x = 64 / info->block_size;
   1355          downscale_y = 2;
   1356          break;
   1357       case GEN6_TILING_Y:
   1358          downscale_x = 32 / info->block_size;
   1359          downscale_y = 4;
   1360          break;
   1361       default:
   1362          assert(!"unsupported tiling mode");
   1363          return false;
   1364          break;
   1365       }
   1366 
   1367       downscale_x *= 8;
   1368       downscale_y *= 16;
   1369 
   1370       /*
   1371        * From the Haswell PRM, volume 7, page 652:
   1372        *
   1373        *     "Clear rectangle must be aligned to two times the number of
   1374        *      pixels in the table shown below due to 16X16 hashing across the
   1375        *      slice."
   1376        *
   1377        * The scaled-down clear rectangle must be aligned to 4x4 instead of
   1378        * 2x2, and we need to pad.
   1379        */
   1380       mcs_width = align(info->width, downscale_x * 4) / downscale_x;
   1381       mcs_height = align(info->height, downscale_y * 4) / downscale_y;
   1382       mcs_cpp = 16; /* an OWord */
   1383    }
   1384 
   1385    img->aux.enables = (1 << info->level_count) - 1;
   1386    /* align to Y-tile */
   1387    img->aux.bo_stride = align(mcs_width * mcs_cpp, 128);
   1388    img->aux.bo_height = align(mcs_height, 32);
   1389 
   1390    return true;
   1391 }
   1392 
   1393 bool
   1394 ilo_image_init(struct ilo_image *img,
   1395                const struct ilo_dev *dev,
   1396                const struct ilo_image_info *info)
   1397 {
   1398    struct ilo_image_layout layout;
   1399 
   1400    assert(ilo_is_zeroed(img, sizeof(*img)));
   1401 
   1402    memset(&layout, 0, sizeof(layout));
   1403    layout.lods = img->lods;
   1404 
   1405    if (!image_get_gen6_layout(dev, info, &layout))
   1406       return false;
   1407 
   1408    img->type = info->type;
   1409 
   1410    img->format = info->format;
   1411    img->block_width = info->block_width;
   1412    img->block_height = info->block_height;
   1413    img->block_size = info->block_size;
   1414 
   1415    img->width0 = info->width;
   1416    img->height0 = info->height;
   1417    img->depth0 = info->depth;
   1418    img->array_size = info->array_size;
   1419    img->level_count = info->level_count;
   1420    img->sample_count = info->sample_count;
   1421 
   1422    img->walk = layout.walk;
   1423    img->interleaved_samples = layout.interleaved_samples;
   1424 
   1425    img->tiling = layout.tiling;
   1426 
   1427    img->aux.type = layout.aux;
   1428 
   1429    img->align_i = layout.align_i;
   1430    img->align_j = layout.align_j;
   1431 
   1432    img->walk_layer_height = layout.walk_layer_height;
   1433 
   1434    if (!image_set_gen6_bo_size(img, dev, info, &layout))
   1435       return false;
   1436 
   1437    img->scanout = info->bind_scanout;
   1438 
   1439    switch (layout.aux) {
   1440    case ILO_IMAGE_AUX_HIZ:
   1441       image_set_gen6_hiz(img, dev, info, &layout);
   1442       break;
   1443    case ILO_IMAGE_AUX_MCS:
   1444       image_set_gen7_mcs(img, dev, info, &layout);
   1445       break;
   1446    default:
   1447       break;
   1448    }
   1449 
   1450    return true;
   1451 }
   1452