Home | History | Annotate | Download | only in gfx9
      1 /*
      2  * Copyright  2017 Advanced Micro Devices, Inc.
      3  * All Rights Reserved.
      4  *
      5  * Permission is hereby granted, free of charge, to any person obtaining
      6  * a copy of this software and associated documentation files (the
      7  * "Software"), to deal in the Software without restriction, including
      8  * without limitation the rights to use, copy, modify, merge, publish,
      9  * distribute, sub license, and/or sell copies of the Software, and to
     10  * permit persons to whom the Software is furnished to do so, subject to
     11  * the following conditions:
     12  *
     13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
     14  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
     15  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
     16  * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
     17  * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     18  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
     19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
     20  * USE OR OTHER DEALINGS IN THE SOFTWARE.
     21  *
     22  * The above copyright notice and this permission notice (including the
     23  * next paragraph) shall be included in all copies or substantial portions
     24  * of the Software.
     25  */
     26 
     27 /**
     28 ************************************************************************************************************************
     29 * @file  gfx9addrlib.cpp
     30 * @brief Contgfx9ns the implementation for the Gfx9Lib class.
     31 ************************************************************************************************************************
     32 */
     33 
     34 #include "gfx9addrlib.h"
     35 
     36 #include "gfx9_gb_reg.h"
     37 
     38 #include "amdgpu_asic_addr.h"
     39 
     40 ////////////////////////////////////////////////////////////////////////////////////////////////////
     41 ////////////////////////////////////////////////////////////////////////////////////////////////////
     42 
     43 namespace Addr
     44 {
     45 
     46 /**
     47 ************************************************************************************************************************
     48 *   Gfx9HwlInit
     49 *
     50 *   @brief
     51 *       Creates an Gfx9Lib object.
     52 *
     53 *   @return
     54 *       Returns an Gfx9Lib object pointer.
     55 ************************************************************************************************************************
     56 */
     57 Addr::Lib* Gfx9HwlInit(const Client* pClient)
     58 {
     59     return V2::Gfx9Lib::CreateObj(pClient);
     60 }
     61 
     62 namespace V2
     63 {
     64 
     65 ////////////////////////////////////////////////////////////////////////////////////////////////////
     66 //                               Static Const Member
     67 ////////////////////////////////////////////////////////////////////////////////////////////////////
     68 
     69 const SwizzleModeFlags Gfx9Lib::SwizzleModeTable[ADDR_SW_MAX_TYPE] =
     70 {//Linear 256B  4KB  64KB   Var    Z    Std   Disp  Rot   XOR    T     RtOpt
     71     {1,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}, // ADDR_SW_LINEAR
     72     {0,    1,    0,    0,    0,    0,    1,    0,    0,    0,    0,    0}, // ADDR_SW_256B_S
     73     {0,    1,    0,    0,    0,    0,    0,    1,    0,    0,    0,    0}, // ADDR_SW_256B_D
     74     {0,    1,    0,    0,    0,    0,    0,    0,    1,    0,    0,    0}, // ADDR_SW_256B_R
     75 
     76     {0,    0,    1,    0,    0,    1,    0,    0,    0,    0,    0,    0}, // ADDR_SW_4KB_Z
     77     {0,    0,    1,    0,    0,    0,    1,    0,    0,    0,    0,    0}, // ADDR_SW_4KB_S
     78     {0,    0,    1,    0,    0,    0,    0,    1,    0,    0,    0,    0}, // ADDR_SW_4KB_D
     79     {0,    0,    1,    0,    0,    0,    0,    0,    1,    0,    0,    0}, // ADDR_SW_4KB_R
     80 
     81     {0,    0,    0,    1,    0,    1,    0,    0,    0,    0,    0,    0}, // ADDR_SW_64KB_Z
     82     {0,    0,    0,    1,    0,    0,    1,    0,    0,    0,    0,    0}, // ADDR_SW_64KB_S
     83     {0,    0,    0,    1,    0,    0,    0,    1,    0,    0,    0,    0}, // ADDR_SW_64KB_D
     84     {0,    0,    0,    1,    0,    0,    0,    0,    1,    0,    0,    0}, // ADDR_SW_64KB_R
     85 
     86     {0,    0,    0,    0,    1,    1,    0,    0,    0,    0,    0,    0}, // ADDR_SW_VAR_Z
     87     {0,    0,    0,    0,    1,    0,    1,    0,    0,    0,    0,    0}, // ADDR_SW_VAR_S
     88     {0,    0,    0,    0,    1,    0,    0,    1,    0,    0,    0,    0}, // ADDR_SW_VAR_D
     89     {0,    0,    0,    0,    1,    0,    0,    0,    1,    0,    0,    0}, // ADDR_SW_VAR_R
     90 
     91     {0,    0,    0,    1,    0,    1,    0,    0,    0,    1,    1,    0}, // ADDR_SW_64KB_Z_T
     92     {0,    0,    0,    1,    0,    0,    1,    0,    0,    1,    1,    0}, // ADDR_SW_64KB_S_T
     93     {0,    0,    0,    1,    0,    0,    0,    1,    0,    1,    1,    0}, // ADDR_SW_64KB_D_T
     94     {0,    0,    0,    1,    0,    0,    0,    0,    1,    1,    1,    0}, // ADDR_SW_64KB_R_T
     95 
     96     {0,    0,    1,    0,    0,    1,    0,    0,    0,    1,    0,    0}, // ADDR_SW_4KB_Z_x
     97     {0,    0,    1,    0,    0,    0,    1,    0,    0,    1,    0,    0}, // ADDR_SW_4KB_S_x
     98     {0,    0,    1,    0,    0,    0,    0,    1,    0,    1,    0,    0}, // ADDR_SW_4KB_D_x
     99     {0,    0,    1,    0,    0,    0,    0,    0,    1,    1,    0,    0}, // ADDR_SW_4KB_R_x
    100 
    101     {0,    0,    0,    1,    0,    1,    0,    0,    0,    1,    0,    0}, // ADDR_SW_64KB_Z_X
    102     {0,    0,    0,    1,    0,    0,    1,    0,    0,    1,    0,    0}, // ADDR_SW_64KB_S_X
    103     {0,    0,    0,    1,    0,    0,    0,    1,    0,    1,    0,    0}, // ADDR_SW_64KB_D_X
    104     {0,    0,    0,    1,    0,    0,    0,    0,    1,    1,    0,    0}, // ADDR_SW_64KB_R_X
    105 
    106     {0,    0,    0,    0,    1,    1,    0,    0,    0,    1,    0,    0}, // ADDR_SW_VAR_Z_X
    107     {0,    0,    0,    0,    1,    0,    1,    0,    0,    1,    0,    0}, // ADDR_SW_VAR_S_X
    108     {0,    0,    0,    0,    1,    0,    0,    1,    0,    1,    0,    0}, // ADDR_SW_VAR_D_X
    109     {0,    0,    0,    0,    1,    0,    0,    0,    1,    1,    0,    0}, // ADDR_SW_VAR_R_X
    110     {1,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}, // ADDR_SW_LINEAR_GENERAL
    111 };
    112 
    113 const UINT_32 Gfx9Lib::MipTailOffset256B[] = {2048, 1024, 512, 256, 128, 64, 32, 16,
    114                                               8, 6, 5, 4, 3, 2, 1, 0};
    115 
    116 const Dim3d   Gfx9Lib::Block256_3dS[]  = {{16, 4, 4}, {8, 4, 4}, {4, 4, 4}, {2, 4, 4}, {1, 4, 4}};
    117 
    118 const Dim3d   Gfx9Lib::Block256_3dZ[]  = {{8, 4, 8}, {4, 4, 8}, {4, 4, 4}, {4, 2, 4}, {2, 2, 4}};
    119 
    120 /**
    121 ************************************************************************************************************************
    122 *   Gfx9Lib::Gfx9Lib
    123 *
    124 *   @brief
    125 *       Constructor
    126 *
    127 ************************************************************************************************************************
    128 */
    129 Gfx9Lib::Gfx9Lib(const Client* pClient)
    130     :
    131     Lib(pClient),
    132     m_numEquations(0)
    133 {
    134     m_class = AI_ADDRLIB;
    135     memset(&m_settings, 0, sizeof(m_settings));
    136     memcpy(m_swizzleModeTable, SwizzleModeTable, sizeof(SwizzleModeTable));
    137 }
    138 
    139 /**
    140 ************************************************************************************************************************
    141 *   Gfx9Lib::~Gfx9Lib
    142 *
    143 *   @brief
    144 *       Destructor
    145 ************************************************************************************************************************
    146 */
    147 Gfx9Lib::~Gfx9Lib()
    148 {
    149 }
    150 
    151 /**
    152 ************************************************************************************************************************
    153 *   Gfx9Lib::HwlComputeHtileInfo
    154 *
    155 *   @brief
    156 *       Interface function stub of AddrComputeHtilenfo
    157 *
    158 *   @return
    159 *       ADDR_E_RETURNCODE
    160 ************************************************************************************************************************
    161 */
    162 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileInfo(
    163     const ADDR2_COMPUTE_HTILE_INFO_INPUT*    pIn,    ///< [in] input structure
    164     ADDR2_COMPUTE_HTILE_INFO_OUTPUT*         pOut    ///< [out] output structure
    165     ) const
    166 {
    167     UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pIn->hTileFlags.pipeAligned,
    168                                                        pIn->swizzleMode);
    169 
    170     UINT_32 numRbTotal = pIn->hTileFlags.rbAligned ? m_se * m_rbPerSe : 1;
    171 
    172     UINT_32 numCompressBlkPerMetaBlk, numCompressBlkPerMetaBlkLog2;
    173 
    174     if ((numPipeTotal == 1) && (numRbTotal == 1))
    175     {
    176         numCompressBlkPerMetaBlkLog2 = 10;
    177     }
    178     else
    179     {
    180         if (m_settings.applyAliasFix)
    181         {
    182             numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + Max(10u, m_pipeInterleaveLog2);
    183         }
    184         else
    185         {
    186             numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + 10;
    187         }
    188     }
    189 
    190     numCompressBlkPerMetaBlk = 1 << numCompressBlkPerMetaBlkLog2;
    191 
    192     Dim3d metaBlkDim = {8, 8, 1};
    193     UINT_32 totalAmpBits = numCompressBlkPerMetaBlkLog2;
    194     UINT_32 widthAmp = (pIn->numMipLevels > 1) ? (totalAmpBits >> 1) : RoundHalf(totalAmpBits);
    195     UINT_32 heightAmp = totalAmpBits - widthAmp;
    196     metaBlkDim.w <<= widthAmp;
    197     metaBlkDim.h <<= heightAmp;
    198 
    199 #if DEBUG
    200     Dim3d metaBlkDimDbg = {8, 8, 1};
    201     for (UINT_32 index = 0; index < numCompressBlkPerMetaBlkLog2; index++)
    202     {
    203         if ((metaBlkDimDbg.h < metaBlkDimDbg.w) ||
    204             ((pIn->numMipLevels > 1) && (metaBlkDimDbg.h == metaBlkDimDbg.w)))
    205         {
    206             metaBlkDimDbg.h <<= 1;
    207         }
    208         else
    209         {
    210             metaBlkDimDbg.w <<= 1;
    211         }
    212     }
    213     ADDR_ASSERT((metaBlkDimDbg.w == metaBlkDim.w) && (metaBlkDimDbg.h == metaBlkDim.h));
    214 #endif
    215 
    216     UINT_32 numMetaBlkX;
    217     UINT_32 numMetaBlkY;
    218     UINT_32 numMetaBlkZ;
    219 
    220     GetMetaMipInfo(pIn->numMipLevels, &metaBlkDim, FALSE, pOut->pMipInfo,
    221                    pIn->unalignedWidth, pIn->unalignedHeight, pIn->numSlices,
    222                    &numMetaBlkX, &numMetaBlkY, &numMetaBlkZ);
    223 
    224     UINT_32 sizeAlign = numPipeTotal * numRbTotal * m_pipeInterleaveBytes;
    225 
    226     if (m_settings.htileAlignFix)
    227     {
    228         sizeAlign <<= 1;
    229     }
    230 
    231     pOut->pitch      = numMetaBlkX * metaBlkDim.w;
    232     pOut->height     = numMetaBlkY * metaBlkDim.h;
    233     pOut->sliceSize  = numMetaBlkX * numMetaBlkY * numCompressBlkPerMetaBlk * 4;
    234 
    235     pOut->metaBlkWidth = metaBlkDim.w;
    236     pOut->metaBlkHeight = metaBlkDim.h;
    237     pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY;
    238 
    239     pOut->baseAlign = Max(numCompressBlkPerMetaBlk * 4, sizeAlign);
    240 
    241     if (m_settings.metaBaseAlignFix)
    242     {
    243         pOut->baseAlign = Max(pOut->baseAlign, GetBlockSize(pIn->swizzleMode));
    244     }
    245 
    246     if ((IsXor(pIn->swizzleMode) == FALSE) && (numPipeTotal > 2))
    247     {
    248         UINT_32 additionalAlign = numPipeTotal * numCompressBlkPerMetaBlk * 2;
    249 
    250         if (additionalAlign > sizeAlign)
    251         {
    252             sizeAlign = additionalAlign;
    253         }
    254     }
    255 
    256     pOut->htileBytes = PowTwoAlign(pOut->sliceSize * numMetaBlkZ, sizeAlign);
    257 
    258     return ADDR_OK;
    259 }
    260 
    261 /**
    262 ************************************************************************************************************************
    263 *   Gfx9Lib::HwlComputeCmaskInfo
    264 *
    265 *   @brief
    266 *       Interface function stub of AddrComputeCmaskInfo
    267 *
    268 *   @return
    269 *       ADDR_E_RETURNCODE
    270 ************************************************************************************************************************
    271 */
    272 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeCmaskInfo(
    273     const ADDR2_COMPUTE_CMASK_INFO_INPUT*    pIn,    ///< [in] input structure
    274     ADDR2_COMPUTE_CMASK_INFO_OUTPUT*         pOut    ///< [out] output structure
    275     ) const
    276 {
    277 // TODO: Clarify with AddrLib team
    278 //     ADDR_ASSERT(pIn->resourceType == ADDR_RSRC_TEX_2D);
    279 
    280     UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pIn->cMaskFlags.pipeAligned,
    281                                                        pIn->swizzleMode);
    282 
    283     UINT_32 numRbTotal = pIn->cMaskFlags.rbAligned ? m_se * m_rbPerSe : 1;
    284 
    285     UINT_32 numCompressBlkPerMetaBlkLog2, numCompressBlkPerMetaBlk;
    286 
    287     if ((numPipeTotal == 1) && (numRbTotal == 1))
    288     {
    289         numCompressBlkPerMetaBlkLog2 = 13;
    290     }
    291     else
    292     {
    293         if (m_settings.applyAliasFix)
    294         {
    295             numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + Max(10u, m_pipeInterleaveLog2);
    296         }
    297         else
    298         {
    299             numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + 10;
    300         }
    301 
    302         numCompressBlkPerMetaBlkLog2 = Max(numCompressBlkPerMetaBlkLog2, 13u);
    303     }
    304 
    305     numCompressBlkPerMetaBlk = 1 << numCompressBlkPerMetaBlkLog2;
    306 
    307     Dim2d metaBlkDim = {8, 8};
    308     UINT_32 totalAmpBits = numCompressBlkPerMetaBlkLog2;
    309     UINT_32 heightAmp = totalAmpBits >> 1;
    310     UINT_32 widthAmp = totalAmpBits - heightAmp;
    311     metaBlkDim.w <<= widthAmp;
    312     metaBlkDim.h <<= heightAmp;
    313 
    314 #if DEBUG
    315     Dim2d metaBlkDimDbg = {8, 8};
    316     for (UINT_32 index = 0; index < numCompressBlkPerMetaBlkLog2; index++)
    317     {
    318         if (metaBlkDimDbg.h < metaBlkDimDbg.w)
    319         {
    320             metaBlkDimDbg.h <<= 1;
    321         }
    322         else
    323         {
    324             metaBlkDimDbg.w <<= 1;
    325         }
    326     }
    327     ADDR_ASSERT((metaBlkDimDbg.w == metaBlkDim.w) && (metaBlkDimDbg.h == metaBlkDim.h));
    328 #endif
    329 
    330     UINT_32 numMetaBlkX = (pIn->unalignedWidth  + metaBlkDim.w - 1) / metaBlkDim.w;
    331     UINT_32 numMetaBlkY = (pIn->unalignedHeight + metaBlkDim.h - 1) / metaBlkDim.h;
    332     UINT_32 numMetaBlkZ = Max(pIn->numSlices, 1u);
    333 
    334     UINT_32 sizeAlign = numPipeTotal * numRbTotal * m_pipeInterleaveBytes;
    335 
    336     pOut->pitch      = numMetaBlkX * metaBlkDim.w;
    337     pOut->height     = numMetaBlkY * metaBlkDim.h;
    338     pOut->sliceSize  = (numMetaBlkX * numMetaBlkY * numCompressBlkPerMetaBlk) >> 1;
    339     pOut->cmaskBytes = PowTwoAlign(pOut->sliceSize * numMetaBlkZ, sizeAlign);
    340     pOut->baseAlign  = Max(numCompressBlkPerMetaBlk >> 1, sizeAlign);
    341 
    342     if (m_settings.metaBaseAlignFix)
    343     {
    344         pOut->baseAlign = Max(pOut->baseAlign, GetBlockSize(pIn->swizzleMode));
    345     }
    346 
    347     pOut->metaBlkWidth = metaBlkDim.w;
    348     pOut->metaBlkHeight = metaBlkDim.h;
    349 
    350     pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY;
    351 
    352     return ADDR_OK;
    353 }
    354 
    355 /**
    356 ************************************************************************************************************************
    357 *   Gfx9Lib::GetMetaMipInfo
    358 *
    359 *   @brief
    360 *       Get meta mip info
    361 *
    362 *   @return
    363 *       N/A
    364 ************************************************************************************************************************
    365 */
    366 VOID Gfx9Lib::GetMetaMipInfo(
    367     UINT_32 numMipLevels,           ///< [in]  number of mip levels
    368     Dim3d* pMetaBlkDim,             ///< [in]  meta block dimension
    369     BOOL_32 dataThick,              ///< [in]  data surface is thick
    370     ADDR2_META_MIP_INFO* pInfo,     ///< [out] meta mip info
    371     UINT_32 mip0Width,              ///< [in]  mip0 width
    372     UINT_32 mip0Height,             ///< [in]  mip0 height
    373     UINT_32 mip0Depth,              ///< [in]  mip0 depth
    374     UINT_32* pNumMetaBlkX,          ///< [out] number of metablock X in mipchain
    375     UINT_32* pNumMetaBlkY,          ///< [out] number of metablock Y in mipchain
    376     UINT_32* pNumMetaBlkZ)          ///< [out] number of metablock Z in mipchain
    377     const
    378 {
    379     UINT_32 numMetaBlkX = (mip0Width  + pMetaBlkDim->w - 1) / pMetaBlkDim->w;
    380     UINT_32 numMetaBlkY = (mip0Height + pMetaBlkDim->h - 1) / pMetaBlkDim->h;
    381     UINT_32 numMetaBlkZ = (mip0Depth  + pMetaBlkDim->d - 1) / pMetaBlkDim->d;
    382     UINT_32 tailWidth   = pMetaBlkDim->w;
    383     UINT_32 tailHeight  = pMetaBlkDim->h >> 1;
    384     UINT_32 tailDepth   = pMetaBlkDim->d;
    385     BOOL_32 inTail      = FALSE;
    386     AddrMajorMode major = ADDR_MAJOR_MAX_TYPE;
    387 
    388     if (numMipLevels > 1)
    389     {
    390         if (dataThick && (numMetaBlkZ > numMetaBlkX) && (numMetaBlkZ > numMetaBlkY))
    391         {
    392             // Z major
    393             major = ADDR_MAJOR_Z;
    394         }
    395         else if (numMetaBlkX >= numMetaBlkY)
    396         {
    397             // X major
    398             major = ADDR_MAJOR_X;
    399         }
    400         else
    401         {
    402             // Y major
    403             major = ADDR_MAJOR_Y;
    404         }
    405 
    406         inTail = ((mip0Width <= tailWidth) &&
    407                   (mip0Height <= tailHeight) &&
    408                   ((dataThick == FALSE) || (mip0Depth <= tailDepth)));
    409 
    410         if (inTail == FALSE)
    411         {
    412             UINT_32 orderLimit;
    413             UINT_32 *pMipDim;
    414             UINT_32 *pOrderDim;
    415 
    416             if (major == ADDR_MAJOR_Z)
    417             {
    418                 // Z major
    419                 pMipDim = &numMetaBlkY;
    420                 pOrderDim = &numMetaBlkZ;
    421                 orderLimit = 4;
    422             }
    423             else if (major == ADDR_MAJOR_X)
    424             {
    425                 // X major
    426                 pMipDim = &numMetaBlkY;
    427                 pOrderDim = &numMetaBlkX;
    428                 orderLimit = 4;
    429             }
    430             else
    431             {
    432                 // Y major
    433                 pMipDim = &numMetaBlkX;
    434                 pOrderDim = &numMetaBlkY;
    435                 orderLimit = 2;
    436             }
    437 
    438             if ((*pMipDim < 3) && (*pOrderDim > orderLimit) && (numMipLevels > 3))
    439             {
    440                 *pMipDim += 2;
    441             }
    442             else
    443             {
    444                 *pMipDim += ((*pMipDim / 2) + (*pMipDim & 1));
    445             }
    446         }
    447     }
    448 
    449     if (pInfo != NULL)
    450     {
    451         UINT_32 mipWidth  = mip0Width;
    452         UINT_32 mipHeight = mip0Height;
    453         UINT_32 mipDepth  = mip0Depth;
    454         Dim3d   mipCoord  = {0};
    455 
    456         for (UINT_32 mip = 0; mip < numMipLevels; mip++)
    457         {
    458             if (inTail)
    459             {
    460                 GetMetaMiptailInfo(&pInfo[mip], mipCoord, numMipLevels - mip,
    461                                    pMetaBlkDim);
    462                 break;
    463             }
    464             else
    465             {
    466                 mipWidth  = PowTwoAlign(mipWidth, pMetaBlkDim->w);
    467                 mipHeight = PowTwoAlign(mipHeight, pMetaBlkDim->h);
    468                 mipDepth  = PowTwoAlign(mipDepth, pMetaBlkDim->d);
    469 
    470                 pInfo[mip].inMiptail = FALSE;
    471                 pInfo[mip].startX = mipCoord.w;
    472                 pInfo[mip].startY = mipCoord.h;
    473                 pInfo[mip].startZ = mipCoord.d;
    474                 pInfo[mip].width  = mipWidth;
    475                 pInfo[mip].height = mipHeight;
    476                 pInfo[mip].depth  = dataThick ? mipDepth : 1;
    477 
    478                 if ((mip >= 3) || (mip & 1))
    479                 {
    480                     switch (major)
    481                     {
    482                         case ADDR_MAJOR_X:
    483                             mipCoord.w += mipWidth;
    484                             break;
    485                         case ADDR_MAJOR_Y:
    486                             mipCoord.h += mipHeight;
    487                             break;
    488                         case ADDR_MAJOR_Z:
    489                             mipCoord.d += mipDepth;
    490                             break;
    491                         default:
    492                             break;
    493                     }
    494                 }
    495                 else
    496                 {
    497                     switch (major)
    498                     {
    499                         case ADDR_MAJOR_X:
    500                             mipCoord.h += mipHeight;
    501                             break;
    502                         case ADDR_MAJOR_Y:
    503                             mipCoord.w += mipWidth;
    504                             break;
    505                         case ADDR_MAJOR_Z:
    506                             mipCoord.h += mipHeight;
    507                             break;
    508                         default:
    509                             break;
    510                     }
    511                 }
    512 
    513                 mipWidth  = Max(mipWidth >> 1, 1u);
    514                 mipHeight = Max(mipHeight >> 1, 1u);
    515                 mipDepth = Max(mipDepth >> 1, 1u);
    516 
    517                 inTail = ((mipWidth <= tailWidth) &&
    518                           (mipHeight <= tailHeight) &&
    519                           ((dataThick == FALSE) || (mipDepth <= tailDepth)));
    520             }
    521         }
    522     }
    523 
    524     *pNumMetaBlkX = numMetaBlkX;
    525     *pNumMetaBlkY = numMetaBlkY;
    526     *pNumMetaBlkZ = numMetaBlkZ;
    527 }
    528 
    529 /**
    530 ************************************************************************************************************************
    531 *   Gfx9Lib::HwlComputeDccInfo
    532 *
    533 *   @brief
    534 *       Interface function to compute DCC key info
    535 *
    536 *   @return
    537 *       ADDR_E_RETURNCODE
    538 ************************************************************************************************************************
    539 */
    540 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeDccInfo(
    541     const ADDR2_COMPUTE_DCCINFO_INPUT*    pIn,    ///< [in] input structure
    542     ADDR2_COMPUTE_DCCINFO_OUTPUT*         pOut    ///< [out] output structure
    543     ) const
    544 {
    545     BOOL_32 dataLinear = IsLinear(pIn->swizzleMode);
    546     BOOL_32 metaLinear = pIn->dccKeyFlags.linear;
    547     BOOL_32 pipeAligned = pIn->dccKeyFlags.pipeAligned;
    548 
    549     if (dataLinear)
    550     {
    551         metaLinear = TRUE;
    552     }
    553     else if (metaLinear == TRUE)
    554     {
    555         pipeAligned = FALSE;
    556     }
    557 
    558     UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pipeAligned, pIn->swizzleMode);
    559 
    560     if (metaLinear)
    561     {
    562         // Linear metadata supporting was removed for GFX9! No one can use this feature on GFX9.
    563         ADDR_ASSERT_ALWAYS();
    564 
    565         pOut->dccRamBaseAlign = numPipeTotal * m_pipeInterleaveBytes;
    566         pOut->dccRamSize = PowTwoAlign((pIn->dataSurfaceSize / 256), pOut->dccRamBaseAlign);
    567     }
    568     else
    569     {
    570         BOOL_32 dataThick = IsThick(pIn->resourceType, pIn->swizzleMode);
    571 
    572         UINT_32 minMetaBlkSize = dataThick ? 65536 : 4096;
    573 
    574         UINT_32 numFrags = Max(pIn->numFrags, 1u);
    575         UINT_32 numSlices = Max(pIn->numSlices, 1u);
    576 
    577         minMetaBlkSize /= numFrags;
    578 
    579         UINT_32 numCompressBlkPerMetaBlk = minMetaBlkSize;
    580 
    581         UINT_32 numRbTotal = pIn->dccKeyFlags.rbAligned ? m_se * m_rbPerSe : 1;
    582 
    583         if ((numPipeTotal > 1) || (numRbTotal > 1))
    584         {
    585             const UINT_32 thinBlkSize = 1 << (m_settings.applyAliasFix ? Max(10u, m_pipeInterleaveLog2) : 10);
    586 
    587             numCompressBlkPerMetaBlk =
    588                 Max(numCompressBlkPerMetaBlk, m_se * m_rbPerSe * (dataThick ? 262144 : thinBlkSize));
    589 
    590             if (numCompressBlkPerMetaBlk > 65536 * pIn->bpp)
    591             {
    592                 numCompressBlkPerMetaBlk = 65536 * pIn->bpp;
    593             }
    594         }
    595 
    596         Dim3d compressBlkDim = GetDccCompressBlk(pIn->resourceType, pIn->swizzleMode, pIn->bpp);
    597         Dim3d metaBlkDim = compressBlkDim;
    598 
    599         for (UINT_32 index = 1; index < numCompressBlkPerMetaBlk; index <<= 1)
    600         {
    601             if ((metaBlkDim.h < metaBlkDim.w) ||
    602                 ((pIn->numMipLevels > 1) && (metaBlkDim.h == metaBlkDim.w)))
    603             {
    604                 if ((dataThick == FALSE) || (metaBlkDim.h <= metaBlkDim.d))
    605                 {
    606                     metaBlkDim.h <<= 1;
    607                 }
    608                 else
    609                 {
    610                     metaBlkDim.d <<= 1;
    611                 }
    612             }
    613             else
    614             {
    615                 if ((dataThick == FALSE) || (metaBlkDim.w <= metaBlkDim.d))
    616                 {
    617                     metaBlkDim.w <<= 1;
    618                 }
    619                 else
    620                 {
    621                     metaBlkDim.d <<= 1;
    622                 }
    623             }
    624         }
    625 
    626         UINT_32 numMetaBlkX;
    627         UINT_32 numMetaBlkY;
    628         UINT_32 numMetaBlkZ;
    629 
    630         GetMetaMipInfo(pIn->numMipLevels, &metaBlkDim, dataThick, pOut->pMipInfo,
    631                        pIn->unalignedWidth, pIn->unalignedHeight, numSlices,
    632                        &numMetaBlkX, &numMetaBlkY, &numMetaBlkZ);
    633 
    634         UINT_32 sizeAlign = numPipeTotal * numRbTotal * m_pipeInterleaveBytes;
    635 
    636         if (numFrags > m_maxCompFrag)
    637         {
    638             sizeAlign *= (numFrags / m_maxCompFrag);
    639         }
    640 
    641         pOut->dccRamSize = numMetaBlkX * numMetaBlkY * numMetaBlkZ *
    642                            numCompressBlkPerMetaBlk * numFrags;
    643         pOut->dccRamSize = PowTwoAlign(pOut->dccRamSize, sizeAlign);
    644         pOut->dccRamBaseAlign = Max(numCompressBlkPerMetaBlk, sizeAlign);
    645 
    646         if (m_settings.metaBaseAlignFix)
    647         {
    648             pOut->dccRamBaseAlign = Max(pOut->dccRamBaseAlign, GetBlockSize(pIn->swizzleMode));
    649         }
    650 
    651         pOut->pitch = numMetaBlkX * metaBlkDim.w;
    652         pOut->height = numMetaBlkY * metaBlkDim.h;
    653         pOut->depth = numMetaBlkZ * metaBlkDim.d;
    654 
    655         pOut->compressBlkWidth = compressBlkDim.w;
    656         pOut->compressBlkHeight = compressBlkDim.h;
    657         pOut->compressBlkDepth = compressBlkDim.d;
    658 
    659         pOut->metaBlkWidth = metaBlkDim.w;
    660         pOut->metaBlkHeight = metaBlkDim.h;
    661         pOut->metaBlkDepth = metaBlkDim.d;
    662 
    663         pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY;
    664         pOut->fastClearSizePerSlice =
    665             pOut->metaBlkNumPerSlice * numCompressBlkPerMetaBlk * Min(numFrags, m_maxCompFrag);
    666     }
    667 
    668     return ADDR_OK;
    669 }
    670 
    671 /**
    672 ************************************************************************************************************************
    673 *   Gfx9Lib::HwlGetMaxAlignments
    674 *
    675 *   @brief
    676 *       Gets maximum alignments
    677 *   @return
    678 *       ADDR_E_RETURNCODE
    679 ************************************************************************************************************************
    680 */
    681 ADDR_E_RETURNCODE Gfx9Lib::HwlGetMaxAlignments(
    682     ADDR_GET_MAX_ALIGNMENTS_OUTPUT* pOut    ///< [out] output structure
    683     ) const
    684 {
    685     pOut->baseAlign = HwlComputeSurfaceBaseAlign(ADDR_SW_64KB);
    686 
    687     return ADDR_OK;
    688 }
    689 
    690 /**
    691 ************************************************************************************************************************
    692 *   Gfx9Lib::HwlComputeCmaskAddrFromCoord
    693 *
    694 *   @brief
    695 *       Interface function stub of AddrComputeCmaskAddrFromCoord
    696 *
    697 *   @return
    698 *       ADDR_E_RETURNCODE
    699 ************************************************************************************************************************
    700 */
    701 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeCmaskAddrFromCoord(
    702     const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT*   pIn,    ///< [in] input structure
    703     ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT*        pOut)   ///< [out] output structure
    704 {
    705     ADDR2_COMPUTE_CMASK_INFO_INPUT input = {0};
    706     input.size            = sizeof(input);
    707     input.cMaskFlags      = pIn->cMaskFlags;
    708     input.colorFlags      = pIn->colorFlags;
    709     input.unalignedWidth  = Max(pIn->unalignedWidth, 1u);
    710     input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
    711     input.numSlices       = Max(pIn->numSlices, 1u);
    712     input.swizzleMode     = pIn->swizzleMode;
    713     input.resourceType    = pIn->resourceType;
    714 
    715     ADDR2_COMPUTE_CMASK_INFO_OUTPUT output = {0};
    716     output.size = sizeof(output);
    717 
    718     ADDR_E_RETURNCODE returnCode = ComputeCmaskInfo(&input, &output);
    719 
    720     if (returnCode == ADDR_OK)
    721     {
    722         UINT_32 fmaskBpp              = GetFmaskBpp(pIn->numSamples, pIn->numFrags);
    723         UINT_32 fmaskElementBytesLog2 = Log2(fmaskBpp >> 3);
    724         UINT_32 metaBlkWidthLog2      = Log2(output.metaBlkWidth);
    725         UINT_32 metaBlkHeightLog2     = Log2(output.metaBlkHeight);
    726 
    727         const CoordEq* pMetaEq = GetMetaEquation({0, fmaskElementBytesLog2, 0, pIn->cMaskFlags,
    728                                                   Gfx9DataFmask, pIn->swizzleMode, pIn->resourceType,
    729                                                   metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0});
    730 
    731         UINT_32 xb = pIn->x / output.metaBlkWidth;
    732         UINT_32 yb = pIn->y / output.metaBlkHeight;
    733         UINT_32 zb = pIn->slice;
    734 
    735         UINT_32 pitchInBlock     = output.pitch / output.metaBlkWidth;
    736         UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
    737         UINT_32 blockIndex       = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
    738 
    739         UINT_64 address = pMetaEq->solve(pIn->x, pIn->y, pIn->slice, 0, blockIndex);
    740 
    741         pOut->addr = address >> 1;
    742         pOut->bitPosition = static_cast<UINT_32>((address & 1) << 2);
    743 
    744 
    745         UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->cMaskFlags.pipeAligned,
    746                                                            pIn->swizzleMode);
    747 
    748         UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
    749 
    750         pOut->addr ^= (pipeXor << m_pipeInterleaveLog2);
    751     }
    752 
    753     return returnCode;
    754 }
    755 
    756 /**
    757 ************************************************************************************************************************
    758 *   Gfx9Lib::HwlComputeHtileAddrFromCoord
    759 *
    760 *   @brief
    761 *       Interface function stub of AddrComputeHtileAddrFromCoord
    762 *
    763 *   @return
    764 *       ADDR_E_RETURNCODE
    765 ************************************************************************************************************************
    766 */
    767 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileAddrFromCoord(
    768     const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT*   pIn,    ///< [in] input structure
    769     ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT*        pOut)   ///< [out] output structure
    770 {
    771     ADDR_E_RETURNCODE returnCode = ADDR_OK;
    772 
    773     if (pIn->numMipLevels > 1)
    774     {
    775         returnCode = ADDR_NOTIMPLEMENTED;
    776     }
    777     else
    778     {
    779         ADDR2_COMPUTE_HTILE_INFO_INPUT input = {0};
    780         input.size            = sizeof(input);
    781         input.hTileFlags      = pIn->hTileFlags;
    782         input.depthFlags      = pIn->depthflags;
    783         input.swizzleMode     = pIn->swizzleMode;
    784         input.unalignedWidth  = Max(pIn->unalignedWidth, 1u);
    785         input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
    786         input.numSlices       = Max(pIn->numSlices, 1u);
    787         input.numMipLevels    = Max(pIn->numMipLevels, 1u);
    788 
    789         ADDR2_COMPUTE_HTILE_INFO_OUTPUT output = {0};
    790         output.size = sizeof(output);
    791 
    792         returnCode = ComputeHtileInfo(&input, &output);
    793 
    794         if (returnCode == ADDR_OK)
    795         {
    796             UINT_32 elementBytesLog2  = Log2(pIn->bpp >> 3);
    797             UINT_32 metaBlkWidthLog2  = Log2(output.metaBlkWidth);
    798             UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
    799             UINT_32 numSamplesLog2    = Log2(pIn->numSamples);
    800 
    801             const CoordEq* pMetaEq = GetMetaEquation({0, elementBytesLog2, numSamplesLog2, pIn->hTileFlags,
    802                                                       Gfx9DataDepthStencil, pIn->swizzleMode, ADDR_RSRC_TEX_2D,
    803                                                       metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0});
    804 
    805             UINT_32 xb = pIn->x / output.metaBlkWidth;
    806             UINT_32 yb = pIn->y / output.metaBlkHeight;
    807             UINT_32 zb = pIn->slice;
    808 
    809             UINT_32 pitchInBlock     = output.pitch / output.metaBlkWidth;
    810             UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
    811             UINT_32 blockIndex       = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
    812 
    813             UINT_64 address = pMetaEq->solve(pIn->x, pIn->y, pIn->slice, 0, blockIndex);
    814 
    815             pOut->addr = address >> 1;
    816 
    817             UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->hTileFlags.pipeAligned,
    818                                                                pIn->swizzleMode);
    819 
    820             UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
    821 
    822             pOut->addr ^= (pipeXor << m_pipeInterleaveLog2);
    823         }
    824     }
    825 
    826     return returnCode;
    827 }
    828 
    829 /**
    830 ************************************************************************************************************************
    831 *   Gfx9Lib::HwlComputeHtileCoordFromAddr
    832 *
    833 *   @brief
    834 *       Interface function stub of AddrComputeHtileCoordFromAddr
    835 *
    836 *   @return
    837 *       ADDR_E_RETURNCODE
    838 ************************************************************************************************************************
    839 */
    840 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileCoordFromAddr(
    841     const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT*   pIn,    ///< [in] input structure
    842     ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT*        pOut)   ///< [out] output structure
    843 {
    844     ADDR_E_RETURNCODE returnCode = ADDR_OK;
    845 
    846     if (pIn->numMipLevels > 1)
    847     {
    848         returnCode = ADDR_NOTIMPLEMENTED;
    849     }
    850     else
    851     {
    852         ADDR2_COMPUTE_HTILE_INFO_INPUT input = {0};
    853         input.size            = sizeof(input);
    854         input.hTileFlags      = pIn->hTileFlags;
    855         input.swizzleMode     = pIn->swizzleMode;
    856         input.unalignedWidth  = Max(pIn->unalignedWidth, 1u);
    857         input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
    858         input.numSlices       = Max(pIn->numSlices, 1u);
    859         input.numMipLevels    = Max(pIn->numMipLevels, 1u);
    860 
    861         ADDR2_COMPUTE_HTILE_INFO_OUTPUT output = {0};
    862         output.size = sizeof(output);
    863 
    864         returnCode = ComputeHtileInfo(&input, &output);
    865 
    866         if (returnCode == ADDR_OK)
    867         {
    868             UINT_32 elementBytesLog2  = Log2(pIn->bpp >> 3);
    869             UINT_32 metaBlkWidthLog2  = Log2(output.metaBlkWidth);
    870             UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
    871             UINT_32 numSamplesLog2    = Log2(pIn->numSamples);
    872 
    873             const CoordEq* pMetaEq = GetMetaEquation({0, elementBytesLog2, numSamplesLog2, pIn->hTileFlags,
    874                                                       Gfx9DataDepthStencil, pIn->swizzleMode, ADDR_RSRC_TEX_2D,
    875                                                       metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0});
    876 
    877             UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->hTileFlags.pipeAligned,
    878                                                                pIn->swizzleMode);
    879 
    880             UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
    881 
    882             UINT_64 nibbleAddress = (pIn->addr ^ (pipeXor << m_pipeInterleaveLog2)) << 1;
    883 
    884             UINT_32 pitchInBlock     = output.pitch / output.metaBlkWidth;
    885             UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
    886 
    887             UINT_32 x, y, z, s, m;
    888             pMetaEq->solveAddr(nibbleAddress, sliceSizeInBlock, x, y, z, s, m);
    889 
    890             pOut->slice = m / sliceSizeInBlock;
    891             pOut->y     = ((m % sliceSizeInBlock) / pitchInBlock) * output.metaBlkHeight + y;
    892             pOut->x     = (m % pitchInBlock) * output.metaBlkWidth + x;
    893         }
    894     }
    895 
    896     return returnCode;
    897 }
    898 
    899 /**
    900 ************************************************************************************************************************
    901 *   Gfx9Lib::HwlComputeDccAddrFromCoord
    902 *
    903 *   @brief
    904 *       Interface function stub of AddrComputeDccAddrFromCoord
    905 *
    906 *   @return
    907 *       ADDR_E_RETURNCODE
    908 ************************************************************************************************************************
    909 */
    910 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeDccAddrFromCoord(
    911     const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT*  pIn,
    912     ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT* pOut)
    913 {
    914     ADDR_E_RETURNCODE returnCode = ADDR_OK;
    915 
    916     if ((pIn->numMipLevels > 1) || (pIn->mipId > 1) || pIn->dccKeyFlags.linear)
    917     {
    918         returnCode = ADDR_NOTIMPLEMENTED;
    919     }
    920     else
    921     {
    922         ADDR2_COMPUTE_DCCINFO_INPUT input = {0};
    923         input.size            = sizeof(input);
    924         input.dccKeyFlags     = pIn->dccKeyFlags;
    925         input.colorFlags      = pIn->colorFlags;
    926         input.swizzleMode     = pIn->swizzleMode;
    927         input.resourceType    = pIn->resourceType;
    928         input.bpp             = pIn->bpp;
    929         input.unalignedWidth  = Max(pIn->unalignedWidth, 1u);
    930         input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
    931         input.numSlices       = Max(pIn->numSlices, 1u);
    932         input.numFrags        = Max(pIn->numFrags, 1u);
    933         input.numMipLevels    = Max(pIn->numMipLevels, 1u);
    934 
    935         ADDR2_COMPUTE_DCCINFO_OUTPUT output = {0};
    936         output.size = sizeof(output);
    937 
    938         returnCode = ComputeDccInfo(&input, &output);
    939 
    940         if (returnCode == ADDR_OK)
    941         {
    942             UINT_32 elementBytesLog2  = Log2(pIn->bpp >> 3);
    943             UINT_32 numSamplesLog2    = Log2(pIn->numFrags);
    944             UINT_32 metaBlkWidthLog2  = Log2(output.metaBlkWidth);
    945             UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
    946             UINT_32 metaBlkDepthLog2  = Log2(output.metaBlkDepth);
    947             UINT_32 compBlkWidthLog2  = Log2(output.compressBlkWidth);
    948             UINT_32 compBlkHeightLog2 = Log2(output.compressBlkHeight);
    949             UINT_32 compBlkDepthLog2  = Log2(output.compressBlkDepth);
    950 
    951             const CoordEq* pMetaEq = GetMetaEquation({pIn->mipId, elementBytesLog2, numSamplesLog2, pIn->dccKeyFlags,
    952                                                       Gfx9DataColor, pIn->swizzleMode, pIn->resourceType,
    953                                                       metaBlkWidthLog2, metaBlkHeightLog2, metaBlkDepthLog2,
    954                                                       compBlkWidthLog2, compBlkHeightLog2, compBlkDepthLog2});
    955 
    956             UINT_32 xb = pIn->x / output.metaBlkWidth;
    957             UINT_32 yb = pIn->y / output.metaBlkHeight;
    958             UINT_32 zb = pIn->slice / output.metaBlkDepth;
    959 
    960             UINT_32 pitchInBlock     = output.pitch / output.metaBlkWidth;
    961             UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
    962             UINT_32 blockIndex       = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
    963 
    964             UINT_64 address = pMetaEq->solve(pIn->x, pIn->y, pIn->slice, pIn->sample, blockIndex);
    965 
    966             pOut->addr = address >> 1;
    967 
    968             UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->dccKeyFlags.pipeAligned,
    969                                                                pIn->swizzleMode);
    970 
    971             UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
    972 
    973             pOut->addr ^= (pipeXor << m_pipeInterleaveLog2);
    974         }
    975     }
    976 
    977     return returnCode;
    978 }
    979 
    980 /**
    981 ************************************************************************************************************************
    982 *   Gfx9Lib::HwlInitGlobalParams
    983 *
    984 *   @brief
    985 *       Initializes global parameters
    986 *
    987 *   @return
    988 *       TRUE if all settings are valid
    989 *
    990 ************************************************************************************************************************
    991 */
    992 BOOL_32 Gfx9Lib::HwlInitGlobalParams(
    993     const ADDR_CREATE_INPUT* pCreateIn) ///< [in] create input
    994 {
    995     BOOL_32 valid = TRUE;
    996 
    997     if (m_settings.isArcticIsland)
    998     {
    999         GB_ADDR_CONFIG gbAddrConfig;
   1000 
   1001         gbAddrConfig.u32All = pCreateIn->regValue.gbAddrConfig;
   1002 
   1003         // These values are copied from CModel code
   1004         switch (gbAddrConfig.bits.NUM_PIPES)
   1005         {
   1006             case ADDR_CONFIG_1_PIPE:
   1007                 m_pipes = 1;
   1008                 m_pipesLog2 = 0;
   1009                 break;
   1010             case ADDR_CONFIG_2_PIPE:
   1011                 m_pipes = 2;
   1012                 m_pipesLog2 = 1;
   1013                 break;
   1014             case ADDR_CONFIG_4_PIPE:
   1015                 m_pipes = 4;
   1016                 m_pipesLog2 = 2;
   1017                 break;
   1018             case ADDR_CONFIG_8_PIPE:
   1019                 m_pipes = 8;
   1020                 m_pipesLog2 = 3;
   1021                 break;
   1022             case ADDR_CONFIG_16_PIPE:
   1023                 m_pipes = 16;
   1024                 m_pipesLog2 = 4;
   1025                 break;
   1026             case ADDR_CONFIG_32_PIPE:
   1027                 m_pipes = 32;
   1028                 m_pipesLog2 = 5;
   1029                 break;
   1030             default:
   1031                 ADDR_ASSERT_ALWAYS();
   1032                 break;
   1033         }
   1034 
   1035         switch (gbAddrConfig.bits.PIPE_INTERLEAVE_SIZE)
   1036         {
   1037             case ADDR_CONFIG_PIPE_INTERLEAVE_256B:
   1038                 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_256B;
   1039                 m_pipeInterleaveLog2 = 8;
   1040                 break;
   1041             case ADDR_CONFIG_PIPE_INTERLEAVE_512B:
   1042                 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_512B;
   1043                 m_pipeInterleaveLog2 = 9;
   1044                 break;
   1045             case ADDR_CONFIG_PIPE_INTERLEAVE_1KB:
   1046                 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_1KB;
   1047                 m_pipeInterleaveLog2 = 10;
   1048                 break;
   1049             case ADDR_CONFIG_PIPE_INTERLEAVE_2KB:
   1050                 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_2KB;
   1051                 m_pipeInterleaveLog2 = 11;
   1052                 break;
   1053             default:
   1054                 ADDR_ASSERT_ALWAYS();
   1055                 break;
   1056         }
   1057 
   1058         switch (gbAddrConfig.bits.NUM_BANKS)
   1059         {
   1060             case ADDR_CONFIG_1_BANK:
   1061                 m_banks = 1;
   1062                 m_banksLog2 = 0;
   1063                 break;
   1064             case ADDR_CONFIG_2_BANK:
   1065                 m_banks = 2;
   1066                 m_banksLog2 = 1;
   1067                 break;
   1068             case ADDR_CONFIG_4_BANK:
   1069                 m_banks = 4;
   1070                 m_banksLog2 = 2;
   1071                 break;
   1072             case ADDR_CONFIG_8_BANK:
   1073                 m_banks = 8;
   1074                 m_banksLog2 = 3;
   1075                 break;
   1076             case ADDR_CONFIG_16_BANK:
   1077                 m_banks = 16;
   1078                 m_banksLog2 = 4;
   1079                 break;
   1080             default:
   1081                 ADDR_ASSERT_ALWAYS();
   1082                 break;
   1083         }
   1084 
   1085         switch (gbAddrConfig.bits.NUM_SHADER_ENGINES)
   1086         {
   1087             case ADDR_CONFIG_1_SHADER_ENGINE:
   1088                 m_se = 1;
   1089                 m_seLog2 = 0;
   1090                 break;
   1091             case ADDR_CONFIG_2_SHADER_ENGINE:
   1092                 m_se = 2;
   1093                 m_seLog2 = 1;
   1094                 break;
   1095             case ADDR_CONFIG_4_SHADER_ENGINE:
   1096                 m_se = 4;
   1097                 m_seLog2 = 2;
   1098                 break;
   1099             case ADDR_CONFIG_8_SHADER_ENGINE:
   1100                 m_se = 8;
   1101                 m_seLog2 = 3;
   1102                 break;
   1103             default:
   1104                 ADDR_ASSERT_ALWAYS();
   1105                 break;
   1106         }
   1107 
   1108         switch (gbAddrConfig.bits.NUM_RB_PER_SE)
   1109         {
   1110             case ADDR_CONFIG_1_RB_PER_SHADER_ENGINE:
   1111                 m_rbPerSe = 1;
   1112                 m_rbPerSeLog2 = 0;
   1113                 break;
   1114             case ADDR_CONFIG_2_RB_PER_SHADER_ENGINE:
   1115                 m_rbPerSe = 2;
   1116                 m_rbPerSeLog2 = 1;
   1117                 break;
   1118             case ADDR_CONFIG_4_RB_PER_SHADER_ENGINE:
   1119                 m_rbPerSe = 4;
   1120                 m_rbPerSeLog2 = 2;
   1121                 break;
   1122             default:
   1123                 ADDR_ASSERT_ALWAYS();
   1124                 break;
   1125         }
   1126 
   1127         switch (gbAddrConfig.bits.MAX_COMPRESSED_FRAGS)
   1128         {
   1129             case ADDR_CONFIG_1_MAX_COMPRESSED_FRAGMENTS:
   1130                 m_maxCompFrag = 1;
   1131                 m_maxCompFragLog2 = 0;
   1132                 break;
   1133             case ADDR_CONFIG_2_MAX_COMPRESSED_FRAGMENTS:
   1134                 m_maxCompFrag = 2;
   1135                 m_maxCompFragLog2 = 1;
   1136                 break;
   1137             case ADDR_CONFIG_4_MAX_COMPRESSED_FRAGMENTS:
   1138                 m_maxCompFrag = 4;
   1139                 m_maxCompFragLog2 = 2;
   1140                 break;
   1141             case ADDR_CONFIG_8_MAX_COMPRESSED_FRAGMENTS:
   1142                 m_maxCompFrag = 8;
   1143                 m_maxCompFragLog2 = 3;
   1144                 break;
   1145             default:
   1146                 ADDR_ASSERT_ALWAYS();
   1147                 break;
   1148         }
   1149 
   1150         m_blockVarSizeLog2 = pCreateIn->regValue.blockVarSizeLog2;
   1151         ADDR_ASSERT((m_blockVarSizeLog2 == 0) ||
   1152                     ((m_blockVarSizeLog2 >= 17u) && (m_blockVarSizeLog2 <= 20u)));
   1153         m_blockVarSizeLog2 = Min(Max(17u, m_blockVarSizeLog2), 20u);
   1154     }
   1155     else
   1156     {
   1157         valid = FALSE;
   1158         ADDR_NOT_IMPLEMENTED();
   1159     }
   1160 
   1161     if (valid)
   1162     {
   1163         InitEquationTable();
   1164     }
   1165 
   1166     return valid;
   1167 }
   1168 
   1169 /**
   1170 ************************************************************************************************************************
   1171 *   Gfx9Lib::HwlConvertChipFamily
   1172 *
   1173 *   @brief
   1174 *       Convert familyID defined in atiid.h to ChipFamily and set m_chipFamily/m_chipRevision
   1175 *   @return
   1176 *       ChipFamily
   1177 ************************************************************************************************************************
   1178 */
   1179 ChipFamily Gfx9Lib::HwlConvertChipFamily(
   1180     UINT_32 uChipFamily,        ///< [in] chip family defined in atiih.h
   1181     UINT_32 uChipRevision)      ///< [in] chip revision defined in "asic_family"_id.h
   1182 {
   1183     ChipFamily family = ADDR_CHIP_FAMILY_AI;
   1184 
   1185     switch (uChipFamily)
   1186     {
   1187         case FAMILY_AI:
   1188             m_settings.isArcticIsland = 1;
   1189             m_settings.isVega10    = ASICREV_IS_VEGA10_P(uChipRevision);
   1190 
   1191             m_settings.isDce12 = 1;
   1192 
   1193             if (m_settings.isVega10 == 0)
   1194             {
   1195                 m_settings.htileAlignFix = 1;
   1196                 m_settings.applyAliasFix = 1;
   1197             }
   1198 
   1199             m_settings.metaBaseAlignFix = 1;
   1200 
   1201             m_settings.depthPipeXorDisable = 1;
   1202             break;
   1203         case FAMILY_RV:
   1204             m_settings.isArcticIsland = 1;
   1205             m_settings.isRaven        = ASICREV_IS_RAVEN(uChipRevision);
   1206 
   1207             if (m_settings.isRaven)
   1208             {
   1209                 m_settings.isDcn1   = 1;
   1210             }
   1211 
   1212             m_settings.metaBaseAlignFix = 1;
   1213 
   1214             if (ASICREV_IS_RAVEN(uChipRevision))
   1215             {
   1216                 m_settings.depthPipeXorDisable = 1;
   1217             }
   1218             break;
   1219 
   1220         default:
   1221             ADDR_ASSERT(!"This should be a Fusion");
   1222             break;
   1223     }
   1224 
   1225     return family;
   1226 }
   1227 
   1228 /**
   1229 ************************************************************************************************************************
   1230 *   Gfx9Lib::InitRbEquation
   1231 *
   1232 *   @brief
   1233 *       Init RB equation
   1234 *   @return
   1235 *       N/A
   1236 ************************************************************************************************************************
   1237 */
   1238 VOID Gfx9Lib::GetRbEquation(
   1239     CoordEq* pRbEq,             ///< [out] rb equation
   1240     UINT_32  numRbPerSeLog2,    ///< [in] number of rb per shader engine
   1241     UINT_32  numSeLog2)         ///< [in] number of shader engine
   1242     const
   1243 {
   1244     // RB's are distributed on 16x16, except when we have 1 rb per se, in which case its 32x32
   1245     UINT_32 rbRegion = (numRbPerSeLog2 == 0) ? 5 : 4;
   1246     Coordinate cx('x', rbRegion);
   1247     Coordinate cy('y', rbRegion);
   1248 
   1249     UINT_32 start = 0;
   1250     UINT_32 numRbTotalLog2 = numRbPerSeLog2 + numSeLog2;
   1251 
   1252     // Clear the rb equation
   1253     pRbEq->resize(0);
   1254     pRbEq->resize(numRbTotalLog2);
   1255 
   1256     if ((numSeLog2 > 0) && (numRbPerSeLog2 == 1))
   1257     {
   1258         // Special case when more than 1 SE, and 2 RB per SE
   1259         (*pRbEq)[0].add(cx);
   1260         (*pRbEq)[0].add(cy);
   1261         cx++;
   1262         cy++;
   1263 
   1264         if (m_settings.applyAliasFix == false)
   1265         {
   1266             (*pRbEq)[0].add(cy);
   1267         }
   1268 
   1269         (*pRbEq)[0].add(cy);
   1270         start++;
   1271     }
   1272 
   1273     UINT_32 numBits = 2 * (numRbTotalLog2 - start);
   1274 
   1275     for (UINT_32 i = 0; i < numBits; i++)
   1276     {
   1277         UINT_32 idx =
   1278             start + (((start + i) >= numRbTotalLog2) ? (2 * (numRbTotalLog2 - start) - i - 1) : i);
   1279 
   1280         if ((i % 2) == 1)
   1281         {
   1282             (*pRbEq)[idx].add(cx);
   1283             cx++;
   1284         }
   1285         else
   1286         {
   1287             (*pRbEq)[idx].add(cy);
   1288             cy++;
   1289         }
   1290     }
   1291 }
   1292 
   1293 /**
   1294 ************************************************************************************************************************
   1295 *   Gfx9Lib::GetDataEquation
   1296 *
   1297 *   @brief
   1298 *       Get data equation for fmask and Z
   1299 *   @return
   1300 *       N/A
   1301 ************************************************************************************************************************
   1302 */
   1303 VOID Gfx9Lib::GetDataEquation(
   1304     CoordEq* pDataEq,               ///< [out] data surface equation
   1305     Gfx9DataType dataSurfaceType,   ///< [in] data surface type
   1306     AddrSwizzleMode swizzleMode,    ///< [in] data surface swizzle mode
   1307     AddrResourceType resourceType,  ///< [in] data surface resource type
   1308     UINT_32 elementBytesLog2,       ///< [in] data surface element bytes
   1309     UINT_32 numSamplesLog2)         ///< [in] data surface sample count
   1310     const
   1311 {
   1312     Coordinate cx('x', 0);
   1313     Coordinate cy('y', 0);
   1314     Coordinate cz('z', 0);
   1315     Coordinate cs('s', 0);
   1316 
   1317     // Clear the equation
   1318     pDataEq->resize(0);
   1319     pDataEq->resize(27);
   1320 
   1321     if (dataSurfaceType == Gfx9DataColor)
   1322     {
   1323         if (IsLinear(swizzleMode))
   1324         {
   1325             Coordinate cm('m', 0);
   1326 
   1327             pDataEq->resize(49);
   1328 
   1329             for (UINT_32 i = 0; i < 49; i++)
   1330             {
   1331                 (*pDataEq)[i].add(cm);
   1332                 cm++;
   1333             }
   1334         }
   1335         else if (IsThick(resourceType, swizzleMode))
   1336         {
   1337             // Color 3d_S and 3d_Z modes, 3d_D is same as color 2d
   1338             UINT_32 i;
   1339             if (IsStandardSwizzle(resourceType, swizzleMode))
   1340             {
   1341                 // Standard 3d swizzle
   1342                 // Fill in bottom x bits
   1343                 for (i = elementBytesLog2; i < 4; i++)
   1344                 {
   1345                     (*pDataEq)[i].add(cx);
   1346                     cx++;
   1347                 }
   1348                 // Fill in 2 bits of y and then z
   1349                 for (i = 4; i < 6; i++)
   1350                 {
   1351                     (*pDataEq)[i].add(cy);
   1352                     cy++;
   1353                 }
   1354                 for (i = 6; i < 8; i++)
   1355                 {
   1356                     (*pDataEq)[i].add(cz);
   1357                     cz++;
   1358                 }
   1359                 if (elementBytesLog2 < 2)
   1360                 {
   1361                     // fill in z & y bit
   1362                     (*pDataEq)[8].add(cz);
   1363                     (*pDataEq)[9].add(cy);
   1364                     cz++;
   1365                     cy++;
   1366                 }
   1367                 else if (elementBytesLog2 == 2)
   1368                 {
   1369                     // fill in y and x bit
   1370                     (*pDataEq)[8].add(cy);
   1371                     (*pDataEq)[9].add(cx);
   1372                     cy++;
   1373                     cx++;
   1374                 }
   1375                 else
   1376                 {
   1377                     // fill in 2 x bits
   1378                     (*pDataEq)[8].add(cx);
   1379                     cx++;
   1380                     (*pDataEq)[9].add(cx);
   1381                     cx++;
   1382                 }
   1383             }
   1384             else
   1385             {
   1386                 // Z 3d swizzle
   1387                 UINT_32 m2dEnd = (elementBytesLog2 ==0) ? 3 : ((elementBytesLog2 < 4) ? 4 : 5);
   1388                 UINT_32 numZs = (elementBytesLog2 == 0 || elementBytesLog2 == 4) ?
   1389                                 2 : ((elementBytesLog2 == 1) ? 3 : 1);
   1390                 pDataEq->mort2d(cx, cy, elementBytesLog2, m2dEnd);
   1391                 for (i = m2dEnd + 1; i <= m2dEnd + numZs; i++)
   1392                 {
   1393                     (*pDataEq)[i].add(cz);
   1394                     cz++;
   1395                 }
   1396                 if ((elementBytesLog2 == 0) || (elementBytesLog2 == 3))
   1397                 {
   1398                     // add an x and z
   1399                     (*pDataEq)[6].add(cx);
   1400                     (*pDataEq)[7].add(cz);
   1401                     cx++;
   1402                     cz++;
   1403                 }
   1404                 else if (elementBytesLog2 == 2)
   1405                 {
   1406                     // add a y and z
   1407                     (*pDataEq)[6].add(cy);
   1408                     (*pDataEq)[7].add(cz);
   1409                     cy++;
   1410                     cz++;
   1411                 }
   1412                 // add y and x
   1413                 (*pDataEq)[8].add(cy);
   1414                 (*pDataEq)[9].add(cx);
   1415                 cy++;
   1416                 cx++;
   1417             }
   1418             // Fill in bit 10 and up
   1419             pDataEq->mort3d( cz, cy, cx, 10 );
   1420         }
   1421         else if (IsThin(resourceType, swizzleMode))
   1422         {
   1423             UINT_32 blockSizeLog2 = GetBlockSizeLog2(swizzleMode);
   1424             // Color 2D
   1425             UINT_32 microYBits = (8 - elementBytesLog2) / 2;
   1426             UINT_32 tileSplitStart = blockSizeLog2 - numSamplesLog2;
   1427             UINT_32 i;
   1428             // Fill in bottom x bits
   1429             for (i = elementBytesLog2; i < 4; i++)
   1430             {
   1431                 (*pDataEq)[i].add(cx);
   1432                 cx++;
   1433             }
   1434             // Fill in bottom y bits
   1435             for (i = 4; i < 4 + microYBits; i++)
   1436             {
   1437                 (*pDataEq)[i].add(cy);
   1438                 cy++;
   1439             }
   1440             // Fill in last of the micro_x bits
   1441             for (i = 4 + microYBits; i < 8; i++)
   1442             {
   1443                 (*pDataEq)[i].add(cx);
   1444                 cx++;
   1445             }
   1446             // Fill in x/y bits below sample split
   1447             pDataEq->mort2d(cy, cx, 8, tileSplitStart - 1);
   1448             // Fill in sample bits
   1449             for (i = 0; i < numSamplesLog2; i++)
   1450             {
   1451                 cs.set('s', i);
   1452                 (*pDataEq)[tileSplitStart + i].add(cs);
   1453             }
   1454             // Fill in x/y bits above sample split
   1455             if ((numSamplesLog2 & 1) ^ (blockSizeLog2 & 1))
   1456             {
   1457                 pDataEq->mort2d(cx, cy, blockSizeLog2);
   1458             }
   1459             else
   1460             {
   1461                 pDataEq->mort2d(cy, cx, blockSizeLog2);
   1462             }
   1463         }
   1464         else
   1465         {
   1466             ADDR_ASSERT_ALWAYS();
   1467         }
   1468     }
   1469     else
   1470     {
   1471         // Fmask or depth
   1472         UINT_32 sampleStart = elementBytesLog2;
   1473         UINT_32 pixelStart = elementBytesLog2 + numSamplesLog2;
   1474         UINT_32 ymajStart = 6 + numSamplesLog2;
   1475 
   1476         for (UINT_32 s = 0; s < numSamplesLog2; s++)
   1477         {
   1478             cs.set('s', s);
   1479             (*pDataEq)[sampleStart + s].add(cs);
   1480         }
   1481 
   1482         // Put in the x-major order pixel bits
   1483         pDataEq->mort2d(cx, cy, pixelStart, ymajStart - 1);
   1484         // Put in the y-major order pixel bits
   1485         pDataEq->mort2d(cy, cx, ymajStart);
   1486     }
   1487 }
   1488 
   1489 /**
   1490 ************************************************************************************************************************
   1491 *   Gfx9Lib::GetPipeEquation
   1492 *
   1493 *   @brief
   1494 *       Get pipe equation
   1495 *   @return
   1496 *       N/A
   1497 ************************************************************************************************************************
   1498 */
   1499 VOID Gfx9Lib::GetPipeEquation(
   1500     CoordEq*         pPipeEq,            ///< [out] pipe equation
   1501     CoordEq*         pDataEq,            ///< [in] data equation
   1502     UINT_32          pipeInterleaveLog2, ///< [in] pipe interleave
   1503     UINT_32          numPipeLog2,        ///< [in] number of pipes
   1504     UINT_32          numSamplesLog2,     ///< [in] data surface sample count
   1505     Gfx9DataType     dataSurfaceType,    ///< [in] data surface type
   1506     AddrSwizzleMode  swizzleMode,        ///< [in] data surface swizzle mode
   1507     AddrResourceType resourceType        ///< [in] data surface resource type
   1508     ) const
   1509 {
   1510     UINT_32 blockSizeLog2 = GetBlockSizeLog2(swizzleMode);
   1511     CoordEq dataEq;
   1512 
   1513     pDataEq->copy(dataEq);
   1514 
   1515     if (dataSurfaceType == Gfx9DataColor)
   1516     {
   1517         INT_32 shift = static_cast<INT_32>(numSamplesLog2);
   1518         dataEq.shift(-shift, blockSizeLog2 - numSamplesLog2);
   1519     }
   1520 
   1521     dataEq.copy(*pPipeEq, pipeInterleaveLog2, numPipeLog2);
   1522 
   1523     // This section should only apply to z/stencil, maybe fmask
   1524     // If the pipe bit is below the comp block size,
   1525     // then keep moving up the address until we find a bit that is above
   1526     UINT_32 pipeStart = 0;
   1527 
   1528     if (dataSurfaceType != Gfx9DataColor)
   1529     {
   1530         Coordinate tileMin('x', 3);
   1531 
   1532         while (dataEq[pipeInterleaveLog2 + pipeStart][0] < tileMin)
   1533         {
   1534             pipeStart++;
   1535         }
   1536 
   1537         // if pipe is 0, then the first pipe bit is above the comp block size,
   1538         // so we don't need to do anything
   1539         // Note, this if condition is not necessary, since if we execute the loop when pipe==0,
   1540         // we will get the same pipe equation
   1541         if (pipeStart != 0)
   1542         {
   1543             for (UINT_32 i = 0; i < numPipeLog2; i++)
   1544             {
   1545                 // Copy the jth bit above pipe interleave to the current pipe equation bit
   1546                 dataEq[pipeInterleaveLog2 + pipeStart + i].copyto((*pPipeEq)[i]);
   1547             }
   1548         }
   1549     }
   1550 
   1551     if (IsPrt(swizzleMode))
   1552     {
   1553         // Clear out bits above the block size if prt's are enabled
   1554         dataEq.resize(blockSizeLog2);
   1555         dataEq.resize(48);
   1556     }
   1557 
   1558     if (IsXor(swizzleMode))
   1559     {
   1560         CoordEq xorMask;
   1561 
   1562         if (IsThick(resourceType, swizzleMode))
   1563         {
   1564             CoordEq xorMask2;
   1565 
   1566             dataEq.copy(xorMask2, pipeInterleaveLog2 + numPipeLog2, 2 * numPipeLog2);
   1567 
   1568             xorMask.resize(numPipeLog2);
   1569 
   1570             for (UINT_32 pipeIdx = 0; pipeIdx < numPipeLog2; pipeIdx++)
   1571             {
   1572                 xorMask[pipeIdx].add(xorMask2[2 * pipeIdx]);
   1573                 xorMask[pipeIdx].add(xorMask2[2 * pipeIdx + 1]);
   1574             }
   1575         }
   1576         else
   1577         {
   1578             // Xor in the bits above the pipe+gpu bits
   1579             dataEq.copy(xorMask, pipeInterleaveLog2 + pipeStart + numPipeLog2, numPipeLog2);
   1580 
   1581             if ((numSamplesLog2 == 0) && (IsPrt(swizzleMode) == FALSE))
   1582             {
   1583                 Coordinate co;
   1584                 CoordEq xorMask2;
   1585                 // if 1xaa and not prt, then xor in the z bits
   1586                 xorMask2.resize(0);
   1587                 xorMask2.resize(numPipeLog2);
   1588                 for (UINT_32 pipeIdx = 0; pipeIdx < numPipeLog2; pipeIdx++)
   1589                 {
   1590                     co.set('z', numPipeLog2 - 1 - pipeIdx);
   1591                     xorMask2[pipeIdx].add(co);
   1592                 }
   1593 
   1594                 pPipeEq->xorin(xorMask2);
   1595             }
   1596         }
   1597 
   1598         xorMask.reverse();
   1599         pPipeEq->xorin(xorMask);
   1600     }
   1601 }
   1602 /**
   1603 ************************************************************************************************************************
   1604 *   Gfx9Lib::GetMetaEquation
   1605 *
   1606 *   @brief
   1607 *       Get meta equation for cmask/htile/DCC
   1608 *   @return
   1609 *       Pointer to a calculated meta equation
   1610 ************************************************************************************************************************
   1611 */
   1612 const CoordEq* Gfx9Lib::GetMetaEquation(
   1613     const MetaEqParams& metaEqParams)
   1614 {
   1615     UINT_32 cachedMetaEqIndex;
   1616 
   1617     for (cachedMetaEqIndex = 0; cachedMetaEqIndex < MaxCachedMetaEq; cachedMetaEqIndex++)
   1618     {
   1619         if (memcmp(&metaEqParams,
   1620                    &m_cachedMetaEqKey[cachedMetaEqIndex],
   1621                    static_cast<UINT_32>(sizeof(metaEqParams))) == 0)
   1622         {
   1623             break;
   1624         }
   1625     }
   1626 
   1627     CoordEq* pMetaEq = NULL;
   1628 
   1629     if (cachedMetaEqIndex < MaxCachedMetaEq)
   1630     {
   1631         pMetaEq = &m_cachedMetaEq[cachedMetaEqIndex];
   1632     }
   1633     else
   1634     {
   1635         m_cachedMetaEqKey[m_metaEqOverrideIndex] = metaEqParams;
   1636 
   1637         pMetaEq = &m_cachedMetaEq[m_metaEqOverrideIndex++];
   1638 
   1639         m_metaEqOverrideIndex %= MaxCachedMetaEq;
   1640 
   1641         GenMetaEquation(pMetaEq,
   1642                         metaEqParams.maxMip,
   1643                         metaEqParams.elementBytesLog2,
   1644                         metaEqParams.numSamplesLog2,
   1645                         metaEqParams.metaFlag,
   1646                         metaEqParams.dataSurfaceType,
   1647                         metaEqParams.swizzleMode,
   1648                         metaEqParams.resourceType,
   1649                         metaEqParams.metaBlkWidthLog2,
   1650                         metaEqParams.metaBlkHeightLog2,
   1651                         metaEqParams.metaBlkDepthLog2,
   1652                         metaEqParams.compBlkWidthLog2,
   1653                         metaEqParams.compBlkHeightLog2,
   1654                         metaEqParams.compBlkDepthLog2);
   1655     }
   1656 
   1657     return pMetaEq;
   1658 }
   1659 
   1660 /**
   1661 ************************************************************************************************************************
   1662 *   Gfx9Lib::GenMetaEquation
   1663 *
   1664 *   @brief
   1665 *       Get meta equation for cmask/htile/DCC
   1666 *   @return
   1667 *       N/A
   1668 ************************************************************************************************************************
   1669 */
   1670 VOID Gfx9Lib::GenMetaEquation(
   1671     CoordEq*         pMetaEq,               ///< [out] meta equation
   1672     UINT_32          maxMip,                ///< [in] max mip Id
   1673     UINT_32          elementBytesLog2,      ///< [in] data surface element bytes
   1674     UINT_32          numSamplesLog2,        ///< [in] data surface sample count
   1675     ADDR2_META_FLAGS metaFlag,              ///< [in] meta falg
   1676     Gfx9DataType     dataSurfaceType,       ///< [in] data surface type
   1677     AddrSwizzleMode  swizzleMode,           ///< [in] data surface swizzle mode
   1678     AddrResourceType resourceType,          ///< [in] data surface resource type
   1679     UINT_32          metaBlkWidthLog2,      ///< [in] meta block width
   1680     UINT_32          metaBlkHeightLog2,     ///< [in] meta block height
   1681     UINT_32          metaBlkDepthLog2,      ///< [in] meta block depth
   1682     UINT_32          compBlkWidthLog2,      ///< [in] compress block width
   1683     UINT_32          compBlkHeightLog2,     ///< [in] compress block height
   1684     UINT_32          compBlkDepthLog2)      ///< [in] compress block depth
   1685     const
   1686 {
   1687     UINT_32 numPipeTotalLog2   = GetPipeLog2ForMetaAddressing(metaFlag.pipeAligned, swizzleMode);
   1688     UINT_32 pipeInterleaveLog2 = m_pipeInterleaveLog2;
   1689 
   1690     // Get the correct data address and rb equation
   1691     CoordEq dataEq;
   1692     GetDataEquation(&dataEq, dataSurfaceType, swizzleMode, resourceType,
   1693                     elementBytesLog2, numSamplesLog2);
   1694 
   1695     // Get pipe and rb equations
   1696     CoordEq pipeEquation;
   1697     GetPipeEquation(&pipeEquation, &dataEq, pipeInterleaveLog2, numPipeTotalLog2,
   1698                     numSamplesLog2, dataSurfaceType, swizzleMode, resourceType);
   1699     numPipeTotalLog2 = pipeEquation.getsize();
   1700 
   1701     if (metaFlag.linear)
   1702     {
   1703         // Linear metadata supporting was removed for GFX9! No one can use this feature.
   1704         ADDR_ASSERT_ALWAYS();
   1705 
   1706         ADDR_ASSERT(dataSurfaceType == Gfx9DataColor);
   1707 
   1708         dataEq.copy(*pMetaEq);
   1709 
   1710         if (IsLinear(swizzleMode))
   1711         {
   1712             if (metaFlag.pipeAligned)
   1713             {
   1714                 // Remove the pipe bits
   1715                 INT_32 shift = static_cast<INT_32>(numPipeTotalLog2);
   1716                 pMetaEq->shift(-shift, pipeInterleaveLog2);
   1717             }
   1718             // Divide by comp block size, which for linear (which is always color) is 256 B
   1719             pMetaEq->shift(-8);
   1720 
   1721             if (metaFlag.pipeAligned)
   1722             {
   1723                 // Put pipe bits back in
   1724                 pMetaEq->shift(numPipeTotalLog2, pipeInterleaveLog2);
   1725 
   1726                 for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
   1727                 {
   1728                     pipeEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2 + i]);
   1729                 }
   1730             }
   1731         }
   1732 
   1733         pMetaEq->shift(1);
   1734     }
   1735     else
   1736     {
   1737         UINT_32 maxCompFragLog2 = static_cast<INT_32>(m_maxCompFragLog2);
   1738         UINT_32 compFragLog2 =
   1739             ((dataSurfaceType == Gfx9DataColor) && (numSamplesLog2 > maxCompFragLog2)) ?
   1740             maxCompFragLog2 : numSamplesLog2;
   1741 
   1742         UINT_32 uncompFragLog2 = numSamplesLog2 - compFragLog2;
   1743 
   1744         // Make sure the metaaddr is cleared
   1745         pMetaEq->resize(0);
   1746         pMetaEq->resize(27);
   1747 
   1748         if (IsThick(resourceType, swizzleMode))
   1749         {
   1750             Coordinate cx('x', 0);
   1751             Coordinate cy('y', 0);
   1752             Coordinate cz('z', 0);
   1753 
   1754             if (maxMip > 0)
   1755             {
   1756                 pMetaEq->mort3d(cy, cx, cz);
   1757             }
   1758             else
   1759             {
   1760                 pMetaEq->mort3d(cx, cy, cz);
   1761             }
   1762         }
   1763         else
   1764         {
   1765             Coordinate cx('x', 0);
   1766             Coordinate cy('y', 0);
   1767             Coordinate cs;
   1768 
   1769             if (maxMip > 0)
   1770             {
   1771                 pMetaEq->mort2d(cy, cx, compFragLog2);
   1772             }
   1773             else
   1774             {
   1775                 pMetaEq->mort2d(cx, cy, compFragLog2);
   1776             }
   1777 
   1778             //------------------------------------------------------------------------------------------------------------------------
   1779             // Put the compressible fragments at the lsb
   1780             // the uncompressible frags will be at the msb of the micro address
   1781             //------------------------------------------------------------------------------------------------------------------------
   1782             for (UINT_32 s = 0; s < compFragLog2; s++)
   1783             {
   1784                 cs.set('s', s);
   1785                 (*pMetaEq)[s].add(cs);
   1786             }
   1787         }
   1788 
   1789         // Keep a copy of the pipe equations
   1790         CoordEq origPipeEquation;
   1791         pipeEquation.copy(origPipeEquation);
   1792 
   1793         Coordinate co;
   1794         // filter out everything under the compressed block size
   1795         co.set('x', compBlkWidthLog2);
   1796         pMetaEq->Filter('<', co, 0, 'x');
   1797         co.set('y', compBlkHeightLog2);
   1798         pMetaEq->Filter('<', co, 0, 'y');
   1799         co.set('z', compBlkDepthLog2);
   1800         pMetaEq->Filter('<', co, 0, 'z');
   1801 
   1802         // For non-color, filter out sample bits
   1803         if (dataSurfaceType != Gfx9DataColor)
   1804         {
   1805             co.set('x', 0);
   1806             pMetaEq->Filter('<', co, 0, 's');
   1807         }
   1808 
   1809         // filter out everything above the metablock size
   1810         co.set('x', metaBlkWidthLog2 - 1);
   1811         pMetaEq->Filter('>', co, 0, 'x');
   1812         co.set('y', metaBlkHeightLog2 - 1);
   1813         pMetaEq->Filter('>', co, 0, 'y');
   1814         co.set('z', metaBlkDepthLog2 - 1);
   1815         pMetaEq->Filter('>', co, 0, 'z');
   1816 
   1817         // filter out everything above the metablock size for the channel bits
   1818         co.set('x', metaBlkWidthLog2 - 1);
   1819         pipeEquation.Filter('>', co, 0, 'x');
   1820         co.set('y', metaBlkHeightLog2 - 1);
   1821         pipeEquation.Filter('>', co, 0, 'y');
   1822         co.set('z', metaBlkDepthLog2 - 1);
   1823         pipeEquation.Filter('>', co, 0, 'z');
   1824 
   1825         // Make sure we still have the same number of channel bits
   1826         if (pipeEquation.getsize() != numPipeTotalLog2)
   1827         {
   1828             ADDR_ASSERT_ALWAYS();
   1829         }
   1830 
   1831         // Loop through all channel and rb bits,
   1832         // and make sure these components exist in the metadata address
   1833         for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
   1834         {
   1835             for (UINT_32 j = pipeEquation[i].getsize(); j > 0; j--)
   1836             {
   1837                 if (pMetaEq->Exists(pipeEquation[i][j - 1]) == FALSE)
   1838                 {
   1839                     ADDR_ASSERT_ALWAYS();
   1840                 }
   1841             }
   1842         }
   1843 
   1844         const UINT_32 numSeLog2     = metaFlag.rbAligned ? m_seLog2      : 0;
   1845         const UINT_32 numRbPeSeLog2 = metaFlag.rbAligned ? m_rbPerSeLog2 : 0;
   1846         const UINT_32 numRbTotalLog2 = numRbPeSeLog2 + numSeLog2;
   1847         CoordEq       origRbEquation;
   1848 
   1849         GetRbEquation(&origRbEquation, numRbPeSeLog2, numSeLog2);
   1850 
   1851         CoordEq rbEquation = origRbEquation;
   1852 
   1853         for (UINT_32 i = 0; i < numRbTotalLog2; i++)
   1854         {
   1855             for (UINT_32 j = rbEquation[i].getsize(); j > 0; j--)
   1856             {
   1857                 if (pMetaEq->Exists(rbEquation[i][j - 1]) == FALSE)
   1858                 {
   1859                     ADDR_ASSERT_ALWAYS();
   1860                 }
   1861             }
   1862         }
   1863 
   1864         if (m_settings.applyAliasFix)
   1865         {
   1866             co.set('z', -1);
   1867         }
   1868 
   1869         // Loop through each rb id bit; if it is equal to any of the filtered channel bits, clear it
   1870         for (UINT_32 i = 0; i < numRbTotalLog2; i++)
   1871         {
   1872             for (UINT_32 j = 0; j < numPipeTotalLog2; j++)
   1873             {
   1874                 BOOL_32 isRbEquationInPipeEquation = FALSE;
   1875 
   1876                 if (m_settings.applyAliasFix)
   1877                 {
   1878                     CoordTerm filteredPipeEq;
   1879                     filteredPipeEq = pipeEquation[j];
   1880 
   1881                     filteredPipeEq.Filter('>', co, 0, 'z');
   1882 
   1883                     isRbEquationInPipeEquation = (rbEquation[i] == filteredPipeEq);
   1884                 }
   1885                 else
   1886                 {
   1887                     isRbEquationInPipeEquation = (rbEquation[i] == pipeEquation[j]);
   1888                 }
   1889 
   1890                 if (isRbEquationInPipeEquation)
   1891                 {
   1892                     rbEquation[i].Clear();
   1893                 }
   1894             }
   1895         }
   1896 
   1897          bool rbAppendedWithPipeBits[1 << (MaxSeLog2 + MaxRbPerSeLog2)] = {};
   1898 
   1899         // Loop through each bit of the channel, get the smallest coordinate,
   1900         // and remove it from the metaaddr, and rb_equation
   1901         for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
   1902         {
   1903             pipeEquation[i].getsmallest(co);
   1904 
   1905             UINT_32 old_size = pMetaEq->getsize();
   1906             pMetaEq->Filter('=', co);
   1907             UINT_32 new_size = pMetaEq->getsize();
   1908             if (new_size != old_size-1)
   1909             {
   1910                 ADDR_ASSERT_ALWAYS();
   1911             }
   1912             pipeEquation.remove(co);
   1913             for (UINT_32 j = 0; j < numRbTotalLog2; j++)
   1914             {
   1915                 if (rbEquation[j].remove(co))
   1916                 {
   1917                     // if we actually removed something from this bit, then add the remaining
   1918                     // channel bits, as these can be removed for this bit
   1919                     for (UINT_32 k = 0; k < pipeEquation[i].getsize(); k++)
   1920                     {
   1921                         if (pipeEquation[i][k] != co)
   1922                         {
   1923                             rbEquation[j].add(pipeEquation[i][k]);
   1924                             rbAppendedWithPipeBits[j] = true;
   1925                         }
   1926                     }
   1927                 }
   1928             }
   1929         }
   1930 
   1931         // Loop through the rb bits and see what remain;
   1932         // filter out the smallest coordinate if it remains
   1933         UINT_32 rbBitsLeft = 0;
   1934         for (UINT_32 i = 0; i < numRbTotalLog2; i++)
   1935         {
   1936             BOOL_32 isRbEqAppended = FALSE;
   1937 
   1938             if (m_settings.applyAliasFix)
   1939             {
   1940                 isRbEqAppended = (rbEquation[i].getsize() > (rbAppendedWithPipeBits[i] ? 1 : 0));
   1941             }
   1942             else
   1943             {
   1944                 isRbEqAppended = (rbEquation[i].getsize() > 0);
   1945             }
   1946 
   1947             if (isRbEqAppended)
   1948             {
   1949                 rbBitsLeft++;
   1950                 rbEquation[i].getsmallest(co);
   1951                 UINT_32 old_size = pMetaEq->getsize();
   1952                 pMetaEq->Filter('=', co);
   1953                 UINT_32 new_size = pMetaEq->getsize();
   1954                 if (new_size != old_size - 1)
   1955                 {
   1956                     // assert warning
   1957                 }
   1958                 for (UINT_32 j = i + 1; j < numRbTotalLog2; j++)
   1959                 {
   1960                     if (rbEquation[j].remove(co))
   1961                     {
   1962                         // if we actually removed something from this bit, then add the remaining
   1963                         // rb bits, as these can be removed for this bit
   1964                         for (UINT_32 k = 0; k < rbEquation[i].getsize(); k++)
   1965                         {
   1966                             if (rbEquation[i][k] != co)
   1967                             {
   1968                                 rbEquation[j].add(rbEquation[i][k]);
   1969                                 rbAppendedWithPipeBits[j] |= rbAppendedWithPipeBits[i];
   1970                             }
   1971                         }
   1972                     }
   1973                 }
   1974             }
   1975         }
   1976 
   1977         // capture the size of the metaaddr
   1978         UINT_32 metaSize = pMetaEq->getsize();
   1979         // resize to 49 bits...make this a nibble address
   1980         pMetaEq->resize(49);
   1981         // Concatenate the macro address above the current address
   1982         for (UINT_32 i = metaSize, j = 0; i < 49; i++, j++)
   1983         {
   1984             co.set('m', j);
   1985             (*pMetaEq)[i].add(co);
   1986         }
   1987 
   1988         // Multiply by meta element size (in nibbles)
   1989         if (dataSurfaceType == Gfx9DataColor)
   1990         {
   1991             pMetaEq->shift(1);
   1992         }
   1993         else if (dataSurfaceType == Gfx9DataDepthStencil)
   1994         {
   1995             pMetaEq->shift(3);
   1996         }
   1997 
   1998         //------------------------------------------------------------------------------------------
   1999         // Note the pipeInterleaveLog2+1 is because address is a nibble address
   2000         // Shift up from pipe interleave number of channel
   2001         // and rb bits left, and uncompressed fragments
   2002         //------------------------------------------------------------------------------------------
   2003 
   2004         pMetaEq->shift(numPipeTotalLog2 + rbBitsLeft + uncompFragLog2, pipeInterleaveLog2 + 1);
   2005 
   2006         // Put in the channel bits
   2007         for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
   2008         {
   2009             origPipeEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2+1 + i]);
   2010         }
   2011 
   2012         // Put in remaining rb bits
   2013         for (UINT_32 i = 0, j = 0; j < rbBitsLeft; i = (i + 1) % numRbTotalLog2)
   2014         {
   2015             BOOL_32 isRbEqAppended = FALSE;
   2016 
   2017             if (m_settings.applyAliasFix)
   2018             {
   2019                 isRbEqAppended = (rbEquation[i].getsize() > (rbAppendedWithPipeBits[i] ? 1 : 0));
   2020             }
   2021             else
   2022             {
   2023                 isRbEqAppended = (rbEquation[i].getsize() > 0);
   2024             }
   2025 
   2026             if (isRbEqAppended)
   2027             {
   2028                 origRbEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2 + 1 + numPipeTotalLog2 + j]);
   2029                 // Mark any rb bit we add in to the rb mask
   2030                 j++;
   2031             }
   2032         }
   2033 
   2034         //------------------------------------------------------------------------------------------
   2035         // Put in the uncompressed fragment bits
   2036         //------------------------------------------------------------------------------------------
   2037         for (UINT_32 i = 0; i < uncompFragLog2; i++)
   2038         {
   2039             co.set('s', compFragLog2 + i);
   2040             (*pMetaEq)[pipeInterleaveLog2 + 1 + numPipeTotalLog2 + rbBitsLeft + i].add(co);
   2041         }
   2042     }
   2043 }
   2044 
   2045 /**
   2046 ************************************************************************************************************************
   2047 *   Gfx9Lib::IsEquationSupported
   2048 *
   2049 *   @brief
   2050 *       Check if equation is supported for given swizzle mode and resource type.
   2051 *
   2052 *   @return
   2053 *       TRUE if supported
   2054 ************************************************************************************************************************
   2055 */
   2056 BOOL_32 Gfx9Lib::IsEquationSupported(
   2057     AddrResourceType rsrcType,
   2058     AddrSwizzleMode  swMode,
   2059     UINT_32          elementBytesLog2) const
   2060 {
   2061     BOOL_32 supported = (elementBytesLog2 < MaxElementBytesLog2) &&
   2062                         (IsLinear(swMode) == FALSE) &&
   2063                         (((IsTex2d(rsrcType) == TRUE) &&
   2064                           ((elementBytesLog2 < 4) ||
   2065                            ((IsRotateSwizzle(swMode) == FALSE) &&
   2066                             (IsZOrderSwizzle(swMode) == FALSE)))) ||
   2067                          ((IsTex3d(rsrcType) == TRUE) &&
   2068                           (IsRotateSwizzle(swMode) == FALSE) &&
   2069                           (IsBlock256b(swMode) == FALSE)));
   2070 
   2071     return supported;
   2072 }
   2073 
   2074 /**
   2075 ************************************************************************************************************************
   2076 *   Gfx9Lib::InitEquationTable
   2077 *
   2078 *   @brief
   2079 *       Initialize Equation table.
   2080 *
   2081 *   @return
   2082 *       N/A
   2083 ************************************************************************************************************************
   2084 */
   2085 VOID Gfx9Lib::InitEquationTable()
   2086 {
   2087     memset(m_equationTable, 0, sizeof(m_equationTable));
   2088 
   2089     // Loop all possible resource type (2D/3D)
   2090     for (UINT_32 rsrcTypeIdx = 0; rsrcTypeIdx < MaxRsrcType; rsrcTypeIdx++)
   2091     {
   2092         AddrResourceType rsrcType = static_cast<AddrResourceType>(rsrcTypeIdx + ADDR_RSRC_TEX_2D);
   2093 
   2094         // Loop all possible swizzle mode
   2095         for (UINT_32 swModeIdx = 0; swModeIdx < MaxSwMode; swModeIdx++)
   2096         {
   2097             AddrSwizzleMode swMode = static_cast<AddrSwizzleMode>(swModeIdx);
   2098 
   2099             // Loop all possible bpp
   2100             for (UINT_32 bppIdx = 0; bppIdx < MaxElementBytesLog2; bppIdx++)
   2101             {
   2102                 UINT_32 equationIndex = ADDR_INVALID_EQUATION_INDEX;
   2103 
   2104                 // Check if the input is supported
   2105                 if (IsEquationSupported(rsrcType, swMode, bppIdx))
   2106                 {
   2107                     ADDR_EQUATION equation;
   2108                     ADDR_E_RETURNCODE retCode;
   2109 
   2110                     memset(&equation, 0, sizeof(ADDR_EQUATION));
   2111 
   2112                     // Generate the equation
   2113                     if (IsBlock256b(swMode) && IsTex2d(rsrcType))
   2114                     {
   2115                         retCode = ComputeBlock256Equation(rsrcType, swMode, bppIdx, &equation);
   2116                     }
   2117                     else if (IsThin(rsrcType, swMode))
   2118                     {
   2119                         retCode = ComputeThinEquation(rsrcType, swMode, bppIdx, &equation);
   2120                     }
   2121                     else
   2122                     {
   2123                         retCode = ComputeThickEquation(rsrcType, swMode, bppIdx, &equation);
   2124                     }
   2125 
   2126                     // Only fill the equation into the table if the return code is ADDR_OK,
   2127                     // otherwise if the return code is not ADDR_OK, it indicates this is not
   2128                     // a valid input, we do nothing but just fill invalid equation index
   2129                     // into the lookup table.
   2130                     if (retCode == ADDR_OK)
   2131                     {
   2132                         equationIndex = m_numEquations;
   2133                         ADDR_ASSERT(equationIndex < EquationTableSize);
   2134 
   2135                         m_equationTable[equationIndex] = equation;
   2136 
   2137                         m_numEquations++;
   2138                     }
   2139                     else
   2140                     {
   2141                         ADDR_ASSERT_ALWAYS();
   2142                     }
   2143                 }
   2144 
   2145                 // Fill the index into the lookup table, if the combination is not supported
   2146                 // fill the invalid equation index
   2147                 m_equationLookupTable[rsrcTypeIdx][swModeIdx][bppIdx] = equationIndex;
   2148             }
   2149         }
   2150     }
   2151 }
   2152 
   2153 /**
   2154 ************************************************************************************************************************
   2155 *   Gfx9Lib::HwlGetEquationIndex
   2156 *
   2157 *   @brief
   2158 *       Interface function stub of GetEquationIndex
   2159 *
   2160 *   @return
   2161 *       ADDR_E_RETURNCODE
   2162 ************************************************************************************************************************
   2163 */
   2164 UINT_32 Gfx9Lib::HwlGetEquationIndex(
   2165     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
   2166     ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut
   2167     ) const
   2168 {
   2169     AddrResourceType rsrcType         = pIn->resourceType;
   2170     AddrSwizzleMode  swMode           = pIn->swizzleMode;
   2171     UINT_32          elementBytesLog2 = Log2(pIn->bpp >> 3);
   2172     UINT_32          index            = ADDR_INVALID_EQUATION_INDEX;
   2173 
   2174     if (IsEquationSupported(rsrcType, swMode, elementBytesLog2))
   2175     {
   2176         UINT_32 rsrcTypeIdx = static_cast<UINT_32>(rsrcType) - 1;
   2177         UINT_32 swModeIdx   = static_cast<UINT_32>(swMode);
   2178 
   2179         index = m_equationLookupTable[rsrcTypeIdx][swModeIdx][elementBytesLog2];
   2180     }
   2181 
   2182     if (pOut->pMipInfo != NULL)
   2183     {
   2184         for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
   2185         {
   2186             pOut->pMipInfo[i].equationIndex = index;
   2187         }
   2188     }
   2189 
   2190     return index;
   2191 }
   2192 
   2193 /**
   2194 ************************************************************************************************************************
   2195 *   Gfx9Lib::HwlComputeBlock256Equation
   2196 *
   2197 *   @brief
   2198 *       Interface function stub of ComputeBlock256Equation
   2199 *
   2200 *   @return
   2201 *       ADDR_E_RETURNCODE
   2202 ************************************************************************************************************************
   2203 */
   2204 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeBlock256Equation(
   2205     AddrResourceType rsrcType,
   2206     AddrSwizzleMode  swMode,
   2207     UINT_32          elementBytesLog2,
   2208     ADDR_EQUATION*   pEquation) const
   2209 {
   2210     ADDR_E_RETURNCODE ret = ADDR_OK;
   2211 
   2212     pEquation->numBits = 8;
   2213 
   2214     UINT_32 i = 0;
   2215     for (; i < elementBytesLog2; i++)
   2216     {
   2217         InitChannel(1, 0 , i, &pEquation->addr[i]);
   2218     }
   2219 
   2220     ADDR_CHANNEL_SETTING* pixelBit = &pEquation->addr[elementBytesLog2];
   2221 
   2222     const UINT_32 maxBitsUsed = 4;
   2223     ADDR_CHANNEL_SETTING x[maxBitsUsed] = {};
   2224     ADDR_CHANNEL_SETTING y[maxBitsUsed] = {};
   2225 
   2226     for (i = 0; i < maxBitsUsed; i++)
   2227     {
   2228         InitChannel(1, 0, elementBytesLog2 + i, &x[i]);
   2229         InitChannel(1, 1, i, &y[i]);
   2230     }
   2231 
   2232     if (IsStandardSwizzle(rsrcType, swMode))
   2233     {
   2234         switch (elementBytesLog2)
   2235         {
   2236             case 0:
   2237                 pixelBit[0] = x[0];
   2238                 pixelBit[1] = x[1];
   2239                 pixelBit[2] = x[2];
   2240                 pixelBit[3] = x[3];
   2241                 pixelBit[4] = y[0];
   2242                 pixelBit[5] = y[1];
   2243                 pixelBit[6] = y[2];
   2244                 pixelBit[7] = y[3];
   2245                 break;
   2246             case 1:
   2247                 pixelBit[0] = x[0];
   2248                 pixelBit[1] = x[1];
   2249                 pixelBit[2] = x[2];
   2250                 pixelBit[3] = y[0];
   2251                 pixelBit[4] = y[1];
   2252                 pixelBit[5] = y[2];
   2253                 pixelBit[6] = x[3];
   2254                 break;
   2255             case 2:
   2256                 pixelBit[0] = x[0];
   2257                 pixelBit[1] = x[1];
   2258                 pixelBit[2] = y[0];
   2259                 pixelBit[3] = y[1];
   2260                 pixelBit[4] = y[2];
   2261                 pixelBit[5] = x[2];
   2262                 break;
   2263             case 3:
   2264                 pixelBit[0] = x[0];
   2265                 pixelBit[1] = y[0];
   2266                 pixelBit[2] = y[1];
   2267                 pixelBit[3] = x[1];
   2268                 pixelBit[4] = x[2];
   2269                 break;
   2270             case 4:
   2271                 pixelBit[0] = y[0];
   2272                 pixelBit[1] = y[1];
   2273                 pixelBit[2] = x[0];
   2274                 pixelBit[3] = x[1];
   2275                 break;
   2276             default:
   2277                 ADDR_ASSERT_ALWAYS();
   2278                 ret = ADDR_INVALIDPARAMS;
   2279                 break;
   2280         }
   2281     }
   2282     else if (IsDisplaySwizzle(rsrcType, swMode))
   2283     {
   2284         switch (elementBytesLog2)
   2285         {
   2286             case 0:
   2287                 pixelBit[0] = x[0];
   2288                 pixelBit[1] = x[1];
   2289                 pixelBit[2] = x[2];
   2290                 pixelBit[3] = y[1];
   2291                 pixelBit[4] = y[0];
   2292                 pixelBit[5] = y[2];
   2293                 pixelBit[6] = x[3];
   2294                 pixelBit[7] = y[3];
   2295                 break;
   2296             case 1:
   2297                 pixelBit[0] = x[0];
   2298                 pixelBit[1] = x[1];
   2299                 pixelBit[2] = x[2];
   2300                 pixelBit[3] = y[0];
   2301                 pixelBit[4] = y[1];
   2302                 pixelBit[5] = y[2];
   2303                 pixelBit[6] = x[3];
   2304                 break;
   2305             case 2:
   2306                 pixelBit[0] = x[0];
   2307                 pixelBit[1] = x[1];
   2308                 pixelBit[2] = y[0];
   2309                 pixelBit[3] = x[2];
   2310                 pixelBit[4] = y[1];
   2311                 pixelBit[5] = y[2];
   2312                 break;
   2313             case 3:
   2314                 pixelBit[0] = x[0];
   2315                 pixelBit[1] = y[0];
   2316                 pixelBit[2] = x[1];
   2317                 pixelBit[3] = x[2];
   2318                 pixelBit[4] = y[1];
   2319                 break;
   2320             case 4:
   2321                 pixelBit[0] = x[0];
   2322                 pixelBit[1] = y[0];
   2323                 pixelBit[2] = x[1];
   2324                 pixelBit[3] = y[1];
   2325                 break;
   2326             default:
   2327                 ADDR_ASSERT_ALWAYS();
   2328                 ret = ADDR_INVALIDPARAMS;
   2329                 break;
   2330         }
   2331     }
   2332     else if (IsRotateSwizzle(swMode))
   2333     {
   2334         switch (elementBytesLog2)
   2335         {
   2336             case 0:
   2337                 pixelBit[0] = y[0];
   2338                 pixelBit[1] = y[1];
   2339                 pixelBit[2] = y[2];
   2340                 pixelBit[3] = x[1];
   2341                 pixelBit[4] = x[0];
   2342                 pixelBit[5] = x[2];
   2343                 pixelBit[6] = x[3];
   2344                 pixelBit[7] = y[3];
   2345                 break;
   2346             case 1:
   2347                 pixelBit[0] = y[0];
   2348                 pixelBit[1] = y[1];
   2349                 pixelBit[2] = y[2];
   2350                 pixelBit[3] = x[0];
   2351                 pixelBit[4] = x[1];
   2352                 pixelBit[5] = x[2];
   2353                 pixelBit[6] = x[3];
   2354                 break;
   2355             case 2:
   2356                 pixelBit[0] = y[0];
   2357                 pixelBit[1] = y[1];
   2358                 pixelBit[2] = x[0];
   2359                 pixelBit[3] = y[2];
   2360                 pixelBit[4] = x[1];
   2361                 pixelBit[5] = x[2];
   2362                 break;
   2363             case 3:
   2364                 pixelBit[0] = y[0];
   2365                 pixelBit[1] = x[0];
   2366                 pixelBit[2] = y[1];
   2367                 pixelBit[3] = x[1];
   2368                 pixelBit[4] = x[2];
   2369                 break;
   2370             default:
   2371                 ADDR_ASSERT_ALWAYS();
   2372             case 4:
   2373                 ret = ADDR_INVALIDPARAMS;
   2374                 break;
   2375         }
   2376     }
   2377     else
   2378     {
   2379         ADDR_ASSERT_ALWAYS();
   2380         ret = ADDR_INVALIDPARAMS;
   2381     }
   2382 
   2383     // Post validation
   2384     if (ret == ADDR_OK)
   2385     {
   2386         Dim2d microBlockDim = Block256_2d[elementBytesLog2];
   2387         ADDR_ASSERT((2u << GetMaxValidChannelIndex(pEquation->addr, 8, 0)) ==
   2388                     (microBlockDim.w * (1 << elementBytesLog2)));
   2389         ADDR_ASSERT((2u << GetMaxValidChannelIndex(pEquation->addr, 8, 1)) == microBlockDim.h);
   2390     }
   2391 
   2392     return ret;
   2393 }
   2394 
   2395 /**
   2396 ************************************************************************************************************************
   2397 *   Gfx9Lib::HwlComputeThinEquation
   2398 *
   2399 *   @brief
   2400 *       Interface function stub of ComputeThinEquation
   2401 *
   2402 *   @return
   2403 *       ADDR_E_RETURNCODE
   2404 ************************************************************************************************************************
   2405 */
   2406 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeThinEquation(
   2407     AddrResourceType rsrcType,
   2408     AddrSwizzleMode  swMode,
   2409     UINT_32          elementBytesLog2,
   2410     ADDR_EQUATION*   pEquation) const
   2411 {
   2412     ADDR_E_RETURNCODE ret = ADDR_OK;
   2413 
   2414     UINT_32 blockSizeLog2 = GetBlockSizeLog2(swMode);
   2415 
   2416     UINT_32 maxXorBits = blockSizeLog2;
   2417     if (IsNonPrtXor(swMode))
   2418     {
   2419         // For non-prt-xor, maybe need to initialize some more bits for xor
   2420         // The highest xor bit used in equation will be max the following 3 items:
   2421         // 1. m_pipeInterleaveLog2 + 2 * pipeXorBits
   2422         // 2. m_pipeInterleaveLog2 + pipeXorBits + 2 * bankXorBits
   2423         // 3. blockSizeLog2
   2424 
   2425         maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 + 2 * GetPipeXorBits(blockSizeLog2));
   2426         maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 +
   2427                                      GetPipeXorBits(blockSizeLog2) +
   2428                                      2 * GetBankXorBits(blockSizeLog2));
   2429     }
   2430 
   2431     const UINT_32 maxBitsUsed = 14;
   2432     ADDR_ASSERT((2 * maxBitsUsed) >= maxXorBits);
   2433     ADDR_CHANNEL_SETTING x[maxBitsUsed] = {};
   2434     ADDR_CHANNEL_SETTING y[maxBitsUsed] = {};
   2435 
   2436     const UINT_32 extraXorBits = 16;
   2437     ADDR_ASSERT(extraXorBits >= maxXorBits - blockSizeLog2);
   2438     ADDR_CHANNEL_SETTING xorExtra[extraXorBits] = {};
   2439 
   2440     for (UINT_32 i = 0; i < maxBitsUsed; i++)
   2441     {
   2442         InitChannel(1, 0, elementBytesLog2 + i, &x[i]);
   2443         InitChannel(1, 1, i, &y[i]);
   2444     }
   2445 
   2446     ADDR_CHANNEL_SETTING* pixelBit = pEquation->addr;
   2447 
   2448     for (UINT_32 i = 0; i < elementBytesLog2; i++)
   2449     {
   2450         InitChannel(1, 0 , i, &pixelBit[i]);
   2451     }
   2452 
   2453     UINT_32 xIdx = 0;
   2454     UINT_32 yIdx = 0;
   2455     UINT_32 lowBits = 0;
   2456 
   2457     if (IsZOrderSwizzle(swMode))
   2458     {
   2459         if (elementBytesLog2 <= 3)
   2460         {
   2461             for (UINT_32 i = elementBytesLog2; i < 6; i++)
   2462             {
   2463                 pixelBit[i] = (((i - elementBytesLog2) & 1) == 0) ? x[xIdx++] : y[yIdx++];
   2464             }
   2465 
   2466             lowBits = 6;
   2467         }
   2468         else
   2469         {
   2470             ret = ADDR_INVALIDPARAMS;
   2471         }
   2472     }
   2473     else
   2474     {
   2475         ret = HwlComputeBlock256Equation(rsrcType, swMode, elementBytesLog2, pEquation);
   2476 
   2477         if (ret == ADDR_OK)
   2478         {
   2479             Dim2d microBlockDim = Block256_2d[elementBytesLog2];
   2480             xIdx = Log2(microBlockDim.w);
   2481             yIdx = Log2(microBlockDim.h);
   2482             lowBits = 8;
   2483         }
   2484     }
   2485 
   2486     if (ret == ADDR_OK)
   2487     {
   2488         for (UINT_32 i = lowBits; i < blockSizeLog2; i++)
   2489         {
   2490             pixelBit[i] = ((i & 1) == 0) ? y[yIdx++] : x[xIdx++];
   2491         }
   2492 
   2493         for (UINT_32 i = blockSizeLog2; i < maxXorBits; i++)
   2494         {
   2495             xorExtra[i - blockSizeLog2] = ((i & 1) == 0) ? y[yIdx++] : x[xIdx++];
   2496         }
   2497 
   2498         if (IsXor(swMode))
   2499         {
   2500             // Fill XOR bits
   2501             UINT_32 pipeStart = m_pipeInterleaveLog2;
   2502             UINT_32 pipeXorBits = GetPipeXorBits(blockSizeLog2);
   2503 
   2504             UINT_32 bankStart = pipeStart + pipeXorBits;
   2505             UINT_32 bankXorBits = GetBankXorBits(blockSizeLog2);
   2506 
   2507             for (UINT_32 i = 0; i < pipeXorBits; i++)
   2508             {
   2509                 UINT_32               xor1BitPos = pipeStart + 2 * pipeXorBits - 1 - i;
   2510                 ADDR_CHANNEL_SETTING* pXor1Src   = (xor1BitPos < blockSizeLog2) ?
   2511                                                    &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
   2512 
   2513                 InitChannel(&pEquation->xor1[pipeStart + i], pXor1Src);
   2514             }
   2515 
   2516             for (UINT_32 i = 0; i < bankXorBits; i++)
   2517             {
   2518                 UINT_32               xor1BitPos = bankStart + 2 * bankXorBits - 1 - i;
   2519                 ADDR_CHANNEL_SETTING* pXor1Src   = (xor1BitPos < blockSizeLog2) ?
   2520                                                    &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
   2521 
   2522                 InitChannel(&pEquation->xor1[bankStart + i], pXor1Src);
   2523             }
   2524 
   2525             if (IsPrt(swMode) == FALSE)
   2526             {
   2527                 for (UINT_32 i = 0; i < pipeXorBits; i++)
   2528                 {
   2529                     InitChannel(1, 2, pipeXorBits - i - 1, &pEquation->xor2[pipeStart + i]);
   2530                 }
   2531 
   2532                 for (UINT_32 i = 0; i < bankXorBits; i++)
   2533                 {
   2534                     InitChannel(1, 2, bankXorBits - i - 1 + pipeXorBits, &pEquation->xor2[bankStart + i]);
   2535                 }
   2536             }
   2537         }
   2538 
   2539         pEquation->numBits = blockSizeLog2;
   2540     }
   2541 
   2542     return ret;
   2543 }
   2544 
   2545 /**
   2546 ************************************************************************************************************************
   2547 *   Gfx9Lib::HwlComputeThickEquation
   2548 *
   2549 *   @brief
   2550 *       Interface function stub of ComputeThickEquation
   2551 *
   2552 *   @return
   2553 *       ADDR_E_RETURNCODE
   2554 ************************************************************************************************************************
   2555 */
   2556 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeThickEquation(
   2557     AddrResourceType rsrcType,
   2558     AddrSwizzleMode  swMode,
   2559     UINT_32          elementBytesLog2,
   2560     ADDR_EQUATION*   pEquation) const
   2561 {
   2562     ADDR_E_RETURNCODE ret = ADDR_OK;
   2563 
   2564     ADDR_ASSERT(IsTex3d(rsrcType));
   2565 
   2566     UINT_32 blockSizeLog2 = GetBlockSizeLog2(swMode);
   2567 
   2568     UINT_32 maxXorBits = blockSizeLog2;
   2569     if (IsNonPrtXor(swMode))
   2570     {
   2571         // For non-prt-xor, maybe need to initialize some more bits for xor
   2572         // The highest xor bit used in equation will be max the following 3:
   2573         // 1. m_pipeInterleaveLog2 + 3 * pipeXorBits
   2574         // 2. m_pipeInterleaveLog2 + pipeXorBits + 3 * bankXorBits
   2575         // 3. blockSizeLog2
   2576 
   2577         maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 + 3 * GetPipeXorBits(blockSizeLog2));
   2578         maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 +
   2579                                      GetPipeXorBits(blockSizeLog2) +
   2580                                      3 * GetBankXorBits(blockSizeLog2));
   2581     }
   2582 
   2583     for (UINT_32 i = 0; i < elementBytesLog2; i++)
   2584     {
   2585         InitChannel(1, 0 , i, &pEquation->addr[i]);
   2586     }
   2587 
   2588     ADDR_CHANNEL_SETTING* pixelBit = &pEquation->addr[elementBytesLog2];
   2589 
   2590     const UINT_32 maxBitsUsed = 12;
   2591     ADDR_ASSERT((3 * maxBitsUsed) >= maxXorBits);
   2592     ADDR_CHANNEL_SETTING x[maxBitsUsed] = {};
   2593     ADDR_CHANNEL_SETTING y[maxBitsUsed] = {};
   2594     ADDR_CHANNEL_SETTING z[maxBitsUsed] = {};
   2595 
   2596     const UINT_32 extraXorBits = 24;
   2597     ADDR_ASSERT(extraXorBits >= maxXorBits - blockSizeLog2);
   2598     ADDR_CHANNEL_SETTING xorExtra[extraXorBits] = {};
   2599 
   2600     for (UINT_32 i = 0; i < maxBitsUsed; i++)
   2601     {
   2602         InitChannel(1, 0, elementBytesLog2 + i, &x[i]);
   2603         InitChannel(1, 1, i, &y[i]);
   2604         InitChannel(1, 2, i, &z[i]);
   2605     }
   2606 
   2607     if (IsZOrderSwizzle(swMode))
   2608     {
   2609         switch (elementBytesLog2)
   2610         {
   2611             case 0:
   2612                 pixelBit[0]  = x[0];
   2613                 pixelBit[1]  = y[0];
   2614                 pixelBit[2]  = x[1];
   2615                 pixelBit[3]  = y[1];
   2616                 pixelBit[4]  = z[0];
   2617                 pixelBit[5]  = z[1];
   2618                 pixelBit[6]  = x[2];
   2619                 pixelBit[7]  = z[2];
   2620                 pixelBit[8]  = y[2];
   2621                 pixelBit[9]  = x[3];
   2622                 break;
   2623             case 1:
   2624                 pixelBit[0]  = x[0];
   2625                 pixelBit[1]  = y[0];
   2626                 pixelBit[2]  = x[1];
   2627                 pixelBit[3]  = y[1];
   2628                 pixelBit[4]  = z[0];
   2629                 pixelBit[5]  = z[1];
   2630                 pixelBit[6]  = z[2];
   2631                 pixelBit[7]  = y[2];
   2632                 pixelBit[8]  = x[2];
   2633                 break;
   2634             case 2:
   2635                 pixelBit[0]  = x[0];
   2636                 pixelBit[1]  = y[0];
   2637                 pixelBit[2]  = x[1];
   2638                 pixelBit[3]  = z[0];
   2639                 pixelBit[4]  = y[1];
   2640                 pixelBit[5]  = z[1];
   2641                 pixelBit[6]  = y[2];
   2642                 pixelBit[7]  = x[2];
   2643                 break;
   2644             case 3:
   2645                 pixelBit[0]  = x[0];
   2646                 pixelBit[1]  = y[0];
   2647                 pixelBit[2]  = z[0];
   2648                 pixelBit[3]  = x[1];
   2649                 pixelBit[4]  = z[1];
   2650                 pixelBit[5]  = y[1];
   2651                 pixelBit[6]  = x[2];
   2652                 break;
   2653             case 4:
   2654                 pixelBit[0]  = x[0];
   2655                 pixelBit[1]  = y[0];
   2656                 pixelBit[2]  = z[0];
   2657                 pixelBit[3]  = z[1];
   2658                 pixelBit[4]  = y[1];
   2659                 pixelBit[5]  = x[1];
   2660                 break;
   2661             default:
   2662                 ADDR_ASSERT_ALWAYS();
   2663                 ret = ADDR_INVALIDPARAMS;
   2664                 break;
   2665         }
   2666     }
   2667     else if (IsStandardSwizzle(rsrcType, swMode))
   2668     {
   2669         switch (elementBytesLog2)
   2670         {
   2671             case 0:
   2672                 pixelBit[0]  = x[0];
   2673                 pixelBit[1]  = x[1];
   2674                 pixelBit[2]  = x[2];
   2675                 pixelBit[3]  = x[3];
   2676                 pixelBit[4]  = y[0];
   2677                 pixelBit[5]  = y[1];
   2678                 pixelBit[6]  = z[0];
   2679                 pixelBit[7]  = z[1];
   2680                 pixelBit[8]  = z[2];
   2681                 pixelBit[9]  = y[2];
   2682                 break;
   2683             case 1:
   2684                 pixelBit[0]  = x[0];
   2685                 pixelBit[1]  = x[1];
   2686                 pixelBit[2]  = x[2];
   2687                 pixelBit[3]  = y[0];
   2688                 pixelBit[4]  = y[1];
   2689                 pixelBit[5]  = z[0];
   2690                 pixelBit[6]  = z[1];
   2691                 pixelBit[7]  = z[2];
   2692                 pixelBit[8]  = y[2];
   2693                 break;
   2694             case 2:
   2695                 pixelBit[0]  = x[0];
   2696                 pixelBit[1]  = x[1];
   2697                 pixelBit[2]  = y[0];
   2698                 pixelBit[3]  = y[1];
   2699                 pixelBit[4]  = z[0];
   2700                 pixelBit[5]  = z[1];
   2701                 pixelBit[6]  = y[2];
   2702                 pixelBit[7]  = x[2];
   2703                 break;
   2704             case 3:
   2705                 pixelBit[0]  = x[0];
   2706                 pixelBit[1]  = y[0];
   2707                 pixelBit[2]  = y[1];
   2708                 pixelBit[3]  = z[0];
   2709                 pixelBit[4]  = z[1];
   2710                 pixelBit[5]  = x[1];
   2711                 pixelBit[6]  = x[2];
   2712                 break;
   2713             case 4:
   2714                 pixelBit[0]  = y[0];
   2715                 pixelBit[1]  = y[1];
   2716                 pixelBit[2]  = z[0];
   2717                 pixelBit[3]  = z[1];
   2718                 pixelBit[4]  = x[0];
   2719                 pixelBit[5]  = x[1];
   2720                 break;
   2721             default:
   2722                 ADDR_ASSERT_ALWAYS();
   2723                 ret = ADDR_INVALIDPARAMS;
   2724                 break;
   2725         }
   2726     }
   2727     else
   2728     {
   2729         ADDR_ASSERT_ALWAYS();
   2730         ret = ADDR_INVALIDPARAMS;
   2731     }
   2732 
   2733     if (ret == ADDR_OK)
   2734     {
   2735         Dim3d microBlockDim = Block1K_3d[elementBytesLog2];
   2736         UINT_32 xIdx = Log2(microBlockDim.w);
   2737         UINT_32 yIdx = Log2(microBlockDim.h);
   2738         UINT_32 zIdx = Log2(microBlockDim.d);
   2739 
   2740         pixelBit = pEquation->addr;
   2741 
   2742         const UINT_32 lowBits = 10;
   2743         ADDR_ASSERT(pEquation->addr[lowBits - 1].valid == 1);
   2744         ADDR_ASSERT(pEquation->addr[lowBits].valid == 0);
   2745 
   2746         for (UINT_32 i = lowBits; i < blockSizeLog2; i++)
   2747         {
   2748             if ((i % 3) == 0)
   2749             {
   2750                 pixelBit[i] = x[xIdx++];
   2751             }
   2752             else if ((i % 3) == 1)
   2753             {
   2754                 pixelBit[i] = z[zIdx++];
   2755             }
   2756             else
   2757             {
   2758                 pixelBit[i] = y[yIdx++];
   2759             }
   2760         }
   2761 
   2762         for (UINT_32 i = blockSizeLog2; i < maxXorBits; i++)
   2763         {
   2764             if ((i % 3) == 0)
   2765             {
   2766                 xorExtra[i - blockSizeLog2] = x[xIdx++];
   2767             }
   2768             else if ((i % 3) == 1)
   2769             {
   2770                 xorExtra[i - blockSizeLog2] = z[zIdx++];
   2771             }
   2772             else
   2773             {
   2774                 xorExtra[i - blockSizeLog2] = y[yIdx++];
   2775             }
   2776         }
   2777 
   2778         if (IsXor(swMode))
   2779         {
   2780             // Fill XOR bits
   2781             UINT_32 pipeStart = m_pipeInterleaveLog2;
   2782             UINT_32 pipeXorBits = GetPipeXorBits(blockSizeLog2);
   2783             for (UINT_32 i = 0; i < pipeXorBits; i++)
   2784             {
   2785                 UINT_32               xor1BitPos = pipeStart + (3 * pipeXorBits) - 1 - (2 * i);
   2786                 ADDR_CHANNEL_SETTING* pXor1Src   = (xor1BitPos < blockSizeLog2) ?
   2787                                                    &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
   2788 
   2789                 InitChannel(&pEquation->xor1[pipeStart + i], pXor1Src);
   2790 
   2791                 UINT_32               xor2BitPos = pipeStart + (3 * pipeXorBits) - 2 - (2 * i);
   2792                 ADDR_CHANNEL_SETTING* pXor2Src   = (xor2BitPos < blockSizeLog2) ?
   2793                                                    &pEquation->addr[xor2BitPos] : &xorExtra[xor2BitPos - blockSizeLog2];
   2794 
   2795                 InitChannel(&pEquation->xor2[pipeStart + i], pXor2Src);
   2796             }
   2797 
   2798             UINT_32 bankStart = pipeStart + pipeXorBits;
   2799             UINT_32 bankXorBits = GetBankXorBits(blockSizeLog2);
   2800             for (UINT_32 i = 0; i < bankXorBits; i++)
   2801             {
   2802                 UINT_32               xor1BitPos = bankStart + (3 * bankXorBits) - 1 - (2 * i);
   2803                 ADDR_CHANNEL_SETTING* pXor1Src   = (xor1BitPos < blockSizeLog2) ?
   2804                                                    &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
   2805 
   2806                 InitChannel(&pEquation->xor1[bankStart + i], pXor1Src);
   2807 
   2808                 UINT_32               xor2BitPos = bankStart + (3 * bankXorBits) - 2 - (2 * i);
   2809                 ADDR_CHANNEL_SETTING* pXor2Src   = (xor2BitPos < blockSizeLog2) ?
   2810                                                    &pEquation->addr[xor2BitPos] : &xorExtra[xor2BitPos - blockSizeLog2];
   2811 
   2812                 InitChannel(&pEquation->xor2[bankStart + i], pXor2Src);
   2813             }
   2814         }
   2815 
   2816         pEquation->numBits = blockSizeLog2;
   2817     }
   2818 
   2819     return ret;
   2820 }
   2821 
   2822 /**
   2823 ************************************************************************************************************************
   2824 *   Gfx9Lib::IsValidDisplaySwizzleMode
   2825 *
   2826 *   @brief
   2827 *       Check if a swizzle mode is supported by display engine
   2828 *
   2829 *   @return
   2830 *       TRUE is swizzle mode is supported by display engine
   2831 ************************************************************************************************************************
   2832 */
   2833 BOOL_32 Gfx9Lib::IsValidDisplaySwizzleMode(
   2834     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
   2835 {
   2836     BOOL_32 support = FALSE;
   2837 
   2838     const AddrResourceType resourceType = pIn->resourceType;
   2839     (void)resourceType;
   2840     const AddrSwizzleMode swizzleMode = pIn->swizzleMode;
   2841 
   2842     if (m_settings.isDce12)
   2843     {
   2844         switch (swizzleMode)
   2845         {
   2846             case ADDR_SW_256B_D:
   2847             case ADDR_SW_256B_R:
   2848                 support = (pIn->bpp == 32);
   2849                 break;
   2850 
   2851             case ADDR_SW_LINEAR:
   2852             case ADDR_SW_4KB_D:
   2853             case ADDR_SW_4KB_R:
   2854             case ADDR_SW_64KB_D:
   2855             case ADDR_SW_64KB_R:
   2856             case ADDR_SW_VAR_D:
   2857             case ADDR_SW_VAR_R:
   2858             case ADDR_SW_4KB_D_X:
   2859             case ADDR_SW_4KB_R_X:
   2860             case ADDR_SW_64KB_D_X:
   2861             case ADDR_SW_64KB_R_X:
   2862             case ADDR_SW_VAR_D_X:
   2863             case ADDR_SW_VAR_R_X:
   2864                 support = (pIn->bpp <= 64);
   2865                 break;
   2866 
   2867             default:
   2868                 break;
   2869         }
   2870     }
   2871     else if (m_settings.isDcn1)
   2872     {
   2873         switch (swizzleMode)
   2874         {
   2875             case ADDR_SW_4KB_D:
   2876             case ADDR_SW_64KB_D:
   2877             case ADDR_SW_VAR_D:
   2878             case ADDR_SW_64KB_D_T:
   2879             case ADDR_SW_4KB_D_X:
   2880             case ADDR_SW_64KB_D_X:
   2881             case ADDR_SW_VAR_D_X:
   2882                 support = (pIn->bpp == 64);
   2883                 break;
   2884 
   2885             case ADDR_SW_LINEAR:
   2886             case ADDR_SW_4KB_S:
   2887             case ADDR_SW_64KB_S:
   2888             case ADDR_SW_VAR_S:
   2889             case ADDR_SW_64KB_S_T:
   2890             case ADDR_SW_4KB_S_X:
   2891             case ADDR_SW_64KB_S_X:
   2892             case ADDR_SW_VAR_S_X:
   2893                 support = (pIn->bpp <= 64);
   2894                 break;
   2895 
   2896             default:
   2897                 break;
   2898         }
   2899     }
   2900     else
   2901     {
   2902         ADDR_NOT_IMPLEMENTED();
   2903     }
   2904 
   2905     return support;
   2906 }
   2907 
   2908 /**
   2909 ************************************************************************************************************************
   2910 *   Gfx9Lib::HwlComputePipeBankXor
   2911 *
   2912 *   @brief
   2913 *       Generate a PipeBankXor value to be ORed into bits above pipeInterleaveBits of address
   2914 *
   2915 *   @return
   2916 *       PipeBankXor value
   2917 ************************************************************************************************************************
   2918 */
   2919 ADDR_E_RETURNCODE Gfx9Lib::HwlComputePipeBankXor(
   2920     const ADDR2_COMPUTE_PIPEBANKXOR_INPUT* pIn,
   2921     ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT*      pOut) const
   2922 {
   2923     UINT_32 macroBlockBits = GetBlockSizeLog2(pIn->swizzleMode);
   2924     UINT_32 pipeBits       = GetPipeXorBits(macroBlockBits);
   2925     UINT_32 bankBits       = GetBankXorBits(macroBlockBits);
   2926 
   2927     UINT_32 pipeXor = 0;
   2928     UINT_32 bankXor = 0;
   2929 
   2930     const UINT_32 bankMask = (1 << bankBits) - 1;
   2931     const UINT_32 index    = pIn->surfIndex & bankMask;
   2932 
   2933     const UINT_32 bpp      = pIn->flags.fmask ?
   2934                              GetFmaskBpp(pIn->numSamples, pIn->numFrags) : GetElemLib()->GetBitsPerPixel(pIn->format);
   2935     if (bankBits == 4)
   2936     {
   2937         static const UINT_32 BankXorSmallBpp[] = {0, 7, 4, 3, 8, 15, 12, 11, 1, 6, 5, 2, 9, 14, 13, 10};
   2938         static const UINT_32 BankXorLargeBpp[] = {0, 7, 8, 15, 4, 3, 12, 11, 1, 6, 9, 14, 5, 2, 13, 10};
   2939 
   2940         bankXor = (bpp <= 32) ? BankXorSmallBpp[index] : BankXorLargeBpp[index];
   2941     }
   2942     else if (bankBits > 0)
   2943     {
   2944         UINT_32 bankIncrease = (1 << (bankBits - 1)) - 1;
   2945         bankIncrease = (bankIncrease == 0) ? 1 : bankIncrease;
   2946         bankXor = (index * bankIncrease) & bankMask;
   2947     }
   2948 
   2949     pOut->pipeBankXor = (bankXor << pipeBits) | pipeXor;
   2950 
   2951     return ADDR_OK;
   2952 }
   2953 
   2954 /**
   2955 ************************************************************************************************************************
   2956 *   Gfx9Lib::HwlComputeSlicePipeBankXor
   2957 *
   2958 *   @brief
   2959 *       Generate slice PipeBankXor value based on base PipeBankXor value and slice id
   2960 *
   2961 *   @return
   2962 *       PipeBankXor value
   2963 ************************************************************************************************************************
   2964 */
   2965 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSlicePipeBankXor(
   2966     const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT* pIn,
   2967     ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT*      pOut) const
   2968 {
   2969     UINT_32 macroBlockBits = GetBlockSizeLog2(pIn->swizzleMode);
   2970     UINT_32 pipeBits       = GetPipeXorBits(macroBlockBits);
   2971     UINT_32 bankBits       = GetBankXorBits(macroBlockBits);
   2972 
   2973     UINT_32 pipeXor        = ReverseBitVector(pIn->slice, pipeBits);
   2974     UINT_32 bankXor        = ReverseBitVector(pIn->slice >> pipeBits, bankBits);
   2975 
   2976     pOut->pipeBankXor = pIn->basePipeBankXor ^ (pipeXor | (bankXor << pipeBits));
   2977 
   2978     return ADDR_OK;
   2979 }
   2980 
   2981 /**
   2982 ************************************************************************************************************************
   2983 *   Gfx9Lib::HwlComputeSubResourceOffsetForSwizzlePattern
   2984 *
   2985 *   @brief
   2986 *       Compute sub resource offset to support swizzle pattern
   2987 *
   2988 *   @return
   2989 *       Offset
   2990 ************************************************************************************************************************
   2991 */
   2992 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSubResourceOffsetForSwizzlePattern(
   2993     const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT* pIn,
   2994     ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT*      pOut) const
   2995 {
   2996     ADDR_ASSERT(IsThin(pIn->resourceType, pIn->swizzleMode));
   2997 
   2998     UINT_32 macroBlockBits = GetBlockSizeLog2(pIn->swizzleMode);
   2999     UINT_32 pipeBits       = GetPipeXorBits(macroBlockBits);
   3000     UINT_32 bankBits       = GetBankXorBits(macroBlockBits);
   3001     UINT_32 pipeXor        = ReverseBitVector(pIn->slice, pipeBits);
   3002     UINT_32 bankXor        = ReverseBitVector(pIn->slice >> pipeBits, bankBits);
   3003     UINT_32 pipeBankXor    = ((pipeXor | (bankXor << pipeBits)) ^ (pIn->pipeBankXor)) << m_pipeInterleaveLog2;
   3004 
   3005     pOut->offset = pIn->slice * pIn->sliceSize +
   3006                    pIn->macroBlockOffset +
   3007                    (pIn->mipTailOffset ^ pipeBankXor) -
   3008                    static_cast<UINT_64>(pipeBankXor);
   3009     return ADDR_OK;
   3010 }
   3011 
   3012 /**
   3013 ************************************************************************************************************************
   3014 *   Gfx9Lib::HwlComputeSurfaceInfoSanityCheck
   3015 *
   3016 *   @brief
   3017 *       Compute surface info sanity check
   3018 *
   3019 *   @return
   3020 *       Offset
   3021 ************************************************************************************************************************
   3022 */
   3023 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoSanityCheck(
   3024     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
   3025 {
   3026     BOOL_32 invalid = FALSE;
   3027 
   3028     if ((pIn->bpp > 128) || (pIn->width == 0) || (pIn->numFrags > 8) || (pIn->numSamples > 16))
   3029     {
   3030         invalid = TRUE;
   3031     }
   3032     else if ((pIn->swizzleMode >= ADDR_SW_MAX_TYPE)    ||
   3033              (pIn->resourceType >= ADDR_RSRC_MAX_TYPE))
   3034     {
   3035         invalid = TRUE;
   3036     }
   3037 
   3038     BOOL_32 mipmap = (pIn->numMipLevels > 1);
   3039     BOOL_32 msaa   = (pIn->numFrags > 1);
   3040 
   3041     ADDR2_SURFACE_FLAGS flags = pIn->flags;
   3042     BOOL_32 zbuffer = (flags.depth || flags.stencil);
   3043     BOOL_32 color   = flags.color;
   3044     BOOL_32 display = flags.display || flags.rotated;
   3045 
   3046     AddrResourceType rsrcType    = pIn->resourceType;
   3047     BOOL_32          tex3d       = IsTex3d(rsrcType);
   3048     AddrSwizzleMode  swizzle     = pIn->swizzleMode;
   3049     BOOL_32          linear      = IsLinear(swizzle);
   3050     BOOL_32          blk256B     = IsBlock256b(swizzle);
   3051     BOOL_32          blkVar      = IsBlockVariable(swizzle);
   3052     BOOL_32          isNonPrtXor = IsNonPrtXor(swizzle);
   3053     BOOL_32          prt         = flags.prt;
   3054     BOOL_32          stereo      = flags.qbStereo;
   3055 
   3056     if (invalid == FALSE)
   3057     {
   3058         if ((pIn->numFrags > 1) &&
   3059             (GetBlockSize(swizzle) < (m_pipeInterleaveBytes * pIn->numFrags)))
   3060         {
   3061             // MSAA surface must have blk_bytes/pipe_interleave >= num_samples
   3062             invalid = TRUE;
   3063         }
   3064     }
   3065 
   3066     if (invalid == FALSE)
   3067     {
   3068         switch (rsrcType)
   3069         {
   3070             case ADDR_RSRC_TEX_1D:
   3071                 invalid = msaa || zbuffer || display || (linear == FALSE) || stereo;
   3072                 break;
   3073             case ADDR_RSRC_TEX_2D:
   3074                 invalid = (msaa && mipmap) || (stereo && msaa) || (stereo && mipmap);
   3075                 break;
   3076             case ADDR_RSRC_TEX_3D:
   3077                 invalid = msaa || zbuffer || display || stereo;
   3078                 break;
   3079             default:
   3080                 invalid = TRUE;
   3081                 break;
   3082         }
   3083     }
   3084 
   3085     if (invalid == FALSE)
   3086     {
   3087         if (display)
   3088         {
   3089             invalid = (IsValidDisplaySwizzleMode(pIn) == FALSE);
   3090         }
   3091     }
   3092 
   3093     if (invalid == FALSE)
   3094     {
   3095         if (linear)
   3096         {
   3097             invalid = ((ADDR_RSRC_TEX_1D != rsrcType) && prt) ||
   3098                       zbuffer || msaa || (pIn->bpp == 0) || ((pIn->bpp % 8) != 0);
   3099         }
   3100         else
   3101         {
   3102             if (blk256B || blkVar || isNonPrtXor)
   3103             {
   3104                 invalid = prt;
   3105                 if (blk256B)
   3106                 {
   3107                     invalid = invalid || zbuffer || tex3d || mipmap || msaa;
   3108                 }
   3109             }
   3110 
   3111             if (invalid == FALSE)
   3112             {
   3113                 if (IsZOrderSwizzle(swizzle))
   3114                 {
   3115                     invalid = color && msaa;
   3116                 }
   3117                 else if (IsStandardSwizzle(rsrcType, swizzle))
   3118                 {
   3119                     invalid = zbuffer;
   3120                 }
   3121                 else if (IsDisplaySwizzle(rsrcType, swizzle))
   3122                 {
   3123                     invalid = zbuffer;
   3124                 }
   3125                 else if (IsRotateSwizzle(swizzle))
   3126                 {
   3127                     invalid = zbuffer || (pIn->bpp > 64) || tex3d;
   3128                 }
   3129                 else
   3130                 {
   3131                     ADDR_ASSERT(!"invalid swizzle mode");
   3132                     invalid = TRUE;
   3133                 }
   3134             }
   3135         }
   3136     }
   3137 
   3138     ADDR_ASSERT(invalid == FALSE);
   3139 
   3140     return invalid ? ADDR_INVALIDPARAMS : ADDR_OK;
   3141 }
   3142 
   3143 /**
   3144 ************************************************************************************************************************
   3145 *   Gfx9Lib::HwlGetPreferredSurfaceSetting
   3146 *
   3147 *   @brief
   3148 *       Internal function to get suggested surface information for cliet to use
   3149 *
   3150 *   @return
   3151 *       ADDR_E_RETURNCODE
   3152 ************************************************************************************************************************
   3153 */
   3154 ADDR_E_RETURNCODE Gfx9Lib::HwlGetPreferredSurfaceSetting(
   3155     const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn,
   3156     ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT*      pOut) const
   3157 {
   3158     // Macro define resource block type
   3159     enum AddrBlockType
   3160     {
   3161         AddrBlockMicro     = 0, // Resource uses 256B block
   3162         AddrBlock4KB       = 1, // Resource uses 4KB block
   3163         AddrBlock64KB      = 2, // Resource uses 64KB block
   3164         AddrBlockVar       = 3, // Resource uses var blcok
   3165         AddrBlockLinear    = 4, // Resource uses linear swizzle mode
   3166 
   3167         AddrBlockMaxTiledType = AddrBlock64KB + 1,
   3168     };
   3169 
   3170     enum AddrBlockSet
   3171     {
   3172         AddrBlockSetMicro     = 1 << AddrBlockMicro,
   3173         AddrBlockSetMacro4KB  = 1 << AddrBlock4KB,
   3174         AddrBlockSetMacro64KB = 1 << AddrBlock64KB,
   3175         AddrBlockSetVar       = 1 << AddrBlockVar,
   3176         AddrBlockSetLinear    = 1 << AddrBlockLinear,
   3177 
   3178         AddrBlockSetMacro = AddrBlockSetMacro4KB | AddrBlockSetMacro64KB,
   3179     };
   3180 
   3181     enum AddrSwSet
   3182     {
   3183         AddrSwSetZ = 1 << ADDR_SW_Z,
   3184         AddrSwSetS = 1 << ADDR_SW_S,
   3185         AddrSwSetD = 1 << ADDR_SW_D,
   3186         AddrSwSetR = 1 << ADDR_SW_R,
   3187 
   3188         AddrSwSetAll = AddrSwSetZ | AddrSwSetS | AddrSwSetD | AddrSwSetR,
   3189     };
   3190 
   3191     ADDR_E_RETURNCODE returnCode = ADDR_OK;
   3192     ElemLib*          pElemLib   = GetElemLib();
   3193 
   3194     // Set format to INVALID will skip this conversion
   3195     UINT_32 expandX = 1;
   3196     UINT_32 expandY = 1;
   3197     UINT_32 bpp     = pIn->bpp;
   3198     UINT_32 width   = pIn->width;
   3199     UINT_32 height  = pIn->height;
   3200 
   3201     if (pIn->format != ADDR_FMT_INVALID)
   3202     {
   3203         // Don't care for this case
   3204         ElemMode elemMode = ADDR_UNCOMPRESSED;
   3205 
   3206         // Get compression/expansion factors and element mode which indicates compression/expansion
   3207         bpp = pElemLib->GetBitsPerPixel(pIn->format,
   3208                                         &elemMode,
   3209                                         &expandX,
   3210                                         &expandY);
   3211 
   3212         UINT_32 basePitch = 0;
   3213         GetElemLib()->AdjustSurfaceInfo(elemMode,
   3214                                         expandX,
   3215                                         expandY,
   3216                                         &bpp,
   3217                                         &basePitch,
   3218                                         &width,
   3219                                         &height);
   3220     }
   3221 
   3222     UINT_32 numSamples   = Max(pIn->numSamples, 1u);
   3223     UINT_32 numFrags     = (pIn->numFrags == 0) ? numSamples : pIn->numFrags;
   3224     UINT_32 slice        = Max(pIn->numSlices, 1u);
   3225     UINT_32 numMipLevels = Max(pIn->numMipLevels, 1u);
   3226     UINT_32 minSizeAlign = NextPow2(pIn->minSizeAlign);
   3227 
   3228     if (pIn->flags.fmask)
   3229     {
   3230         bpp        = GetFmaskBpp(numSamples, numFrags);
   3231         numFrags   = 1;
   3232         numSamples = 1;
   3233         pOut->resourceType = ADDR_RSRC_TEX_2D;
   3234     }
   3235     else
   3236     {
   3237         // The output may get changed for volume(3D) texture resource in future
   3238         pOut->resourceType = pIn->resourceType;
   3239     }
   3240 
   3241     if (bpp < 8)
   3242     {
   3243         ADDR_ASSERT_ALWAYS();
   3244 
   3245         returnCode = ADDR_INVALIDPARAMS;
   3246     }
   3247     else if (IsTex1d(pOut->resourceType))
   3248     {
   3249         pOut->swizzleMode         = ADDR_SW_LINEAR;
   3250         pOut->validBlockSet.value = AddrBlockSetLinear;
   3251         pOut->canXor              = FALSE;
   3252     }
   3253     else
   3254     {
   3255         ADDR2_BLOCK_SET blockSet;
   3256         blockSet.value = 0;
   3257 
   3258         ADDR2_SWTYPE_SET addrPreferredSwSet, addrValidSwSet, clientPreferredSwSet;
   3259         addrPreferredSwSet.value = AddrSwSetS;
   3260         addrValidSwSet           = addrPreferredSwSet;
   3261         clientPreferredSwSet     = pIn->preferredSwSet;
   3262 
   3263         if (clientPreferredSwSet.value == 0)
   3264         {
   3265             clientPreferredSwSet.value = AddrSwSetAll;
   3266         }
   3267 
   3268         // prt Xor and non-xor will have less height align requirement for stereo surface
   3269         BOOL_32 prtXor          = (pIn->flags.prt || pIn->flags.qbStereo) && (pIn->noXor == FALSE);
   3270         BOOL_32 displayResource = FALSE;
   3271 
   3272         pOut->canXor = (pIn->flags.prt == FALSE) && (pIn->noXor == FALSE);
   3273 
   3274         // Filter out improper swType and blockSet by HW restriction
   3275         if (pIn->flags.fmask || pIn->flags.depth || pIn->flags.stencil)
   3276         {
   3277             ADDR_ASSERT(IsTex2d(pOut->resourceType));
   3278             blockSet.value           = AddrBlockSetMacro;
   3279             addrPreferredSwSet.value = AddrSwSetZ;
   3280             addrValidSwSet.value     = AddrSwSetZ;
   3281 
   3282             if (pIn->flags.depth && pIn->flags.texture)
   3283             {
   3284                 if (((bpp == 16) && (numFrags >= 4)) ||
   3285                     ((bpp == 32) && (numFrags >= 2)))
   3286                 {
   3287                     // When _X/_T swizzle mode was used for MSAA depth texture, TC will get zplane
   3288                     // equation from wrong address within memory range a tile covered and use the
   3289                     // garbage data for compressed Z reading which finally leads to corruption.
   3290                     pOut->canXor = FALSE;
   3291                     prtXor       = FALSE;
   3292                 }
   3293             }
   3294         }
   3295         else if (ElemLib::IsBlockCompressed(pIn->format))
   3296         {
   3297             // block compressed formats (BCx, ASTC, ETC2) must be either S or D modes.
   3298             // Not sure under what circumstances "_D" would be appropriate as these formats
   3299             // are not displayable.
   3300             blockSet.value = AddrBlockSetMacro;
   3301 
   3302             // This isn't to be used as texture and caller doesn't allow macro tiled.
   3303             if ((pIn->flags.texture == FALSE) &&
   3304                 (pIn->forbiddenBlock.macro4KB && pIn->forbiddenBlock.macro64KB))
   3305             {
   3306                 blockSet.value |= AddrBlockSetLinear;
   3307             }
   3308 
   3309             addrPreferredSwSet.value = AddrSwSetD;
   3310             addrValidSwSet.value     = AddrSwSetS | AddrSwSetD;
   3311         }
   3312         else if (ElemLib::IsMacroPixelPacked(pIn->format))
   3313         {
   3314             // macro pixel packed formats (BG_RG, GB_GR) does not support the Z modes.
   3315             // Its notclear under what circumstances the D or R modes would be appropriate
   3316             // since these formats are not displayable.
   3317             blockSet.value  = AddrBlockSetLinear | AddrBlockSetMacro;
   3318 
   3319             addrPreferredSwSet.value = AddrSwSetS;
   3320             addrValidSwSet.value     = AddrSwSetS | AddrSwSetD | AddrSwSetR;
   3321         }
   3322         else if (IsTex3d(pOut->resourceType))
   3323         {
   3324             blockSet.value = AddrBlockSetLinear | AddrBlockSetMacro;
   3325 
   3326             if (pIn->flags.prt)
   3327             {
   3328                 // PRT cannot use SW_D which gives an unexpected block dimension
   3329                 addrPreferredSwSet.value = AddrSwSetZ;
   3330                 addrValidSwSet.value     = AddrSwSetZ | AddrSwSetS;
   3331             }
   3332             else if ((numMipLevels > 1) && (slice >= width) && (slice >= height))
   3333             {
   3334                 // When depth (Z) is the maximum dimension then must use one of the SW_*_S
   3335                 // or SW_*_Z modes if mipmapping is desired on a 3D surface
   3336                 addrPreferredSwSet.value = AddrSwSetZ;
   3337                 addrValidSwSet.value     = AddrSwSetZ | AddrSwSetS;
   3338             }
   3339             else if (pIn->flags.color)
   3340             {
   3341                 addrPreferredSwSet.value = AddrSwSetD;
   3342                 addrValidSwSet.value     = AddrSwSetZ | AddrSwSetS | AddrSwSetD;
   3343             }
   3344             else
   3345             {
   3346                 addrPreferredSwSet.value = AddrSwSetZ;
   3347                 addrValidSwSet.value     = AddrSwSetZ | AddrSwSetD;
   3348                 if (bpp != 128)
   3349                 {
   3350                     addrValidSwSet.value |= AddrSwSetS;
   3351                 }
   3352             }
   3353         }
   3354         else
   3355         {
   3356             addrPreferredSwSet.value = ((pIn->flags.display == TRUE) ||
   3357                                         (pIn->flags.overlay == TRUE) ||
   3358                                         (pIn->bpp           == 128)) ? AddrSwSetD : AddrSwSetS;
   3359 
   3360             addrValidSwSet.value     = AddrSwSetS | AddrSwSetD | AddrSwSetR;
   3361 
   3362             if (numMipLevels > 1)
   3363             {
   3364                 ADDR_ASSERT(numFrags == 1);
   3365                 blockSet.value = AddrBlockSetLinear | AddrBlockSetMacro;
   3366             }
   3367             else if ((numFrags > 1) || (numSamples > 1))
   3368             {
   3369                 ADDR_ASSERT(IsTex2d(pOut->resourceType));
   3370                 blockSet.value = AddrBlockSetMacro;
   3371             }
   3372             else
   3373             {
   3374                 ADDR_ASSERT(IsTex2d(pOut->resourceType));
   3375                 blockSet.value = AddrBlockSetLinear | AddrBlockSetMicro | AddrBlockSetMacro;
   3376 
   3377                 displayResource = pIn->flags.rotated || pIn->flags.display;
   3378 
   3379                 if (displayResource)
   3380                 {
   3381                     addrPreferredSwSet.value = pIn->flags.rotated ? AddrSwSetR : AddrSwSetD;
   3382 
   3383                     if (pIn->bpp > 64)
   3384                     {
   3385                         blockSet.value = 0;
   3386                     }
   3387                     else if (m_settings.isDce12)
   3388                     {
   3389                         if (pIn->bpp != 32)
   3390                         {
   3391                             blockSet.micro = FALSE;
   3392                         }
   3393 
   3394                         // DCE12 does not support display surface to be _T swizzle mode
   3395                         prtXor = FALSE;
   3396 
   3397                         addrValidSwSet.value = AddrSwSetD | AddrSwSetR;
   3398                     }
   3399                     else if (m_settings.isDcn1)
   3400                     {
   3401                         // _R is not supported by Dcn1
   3402                         if (pIn->bpp == 64)
   3403                         {
   3404                             addrPreferredSwSet.value = AddrSwSetD;
   3405                             addrValidSwSet.value     = AddrSwSetD;
   3406                         }
   3407                         else
   3408                         {
   3409                             addrPreferredSwSet.value = AddrSwSetS;
   3410                             addrValidSwSet.value     = AddrSwSetS | AddrSwSetD;
   3411                         }
   3412 
   3413                         blockSet.micro = FALSE;
   3414                     }
   3415                     else
   3416                     {
   3417                         ADDR_NOT_IMPLEMENTED();
   3418                         returnCode = ADDR_NOTSUPPORTED;
   3419                     }
   3420                 }
   3421             }
   3422         }
   3423 
   3424         ADDR_ASSERT((addrValidSwSet.value & addrPreferredSwSet.value) == addrPreferredSwSet.value);
   3425 
   3426         pOut->clientPreferredSwSet = clientPreferredSwSet;
   3427 
   3428         // Clamp client preferred set to valid set
   3429         clientPreferredSwSet.value &= addrValidSwSet.value;
   3430 
   3431         pOut->validSwTypeSet = addrValidSwSet;
   3432 
   3433         if (clientPreferredSwSet.value == 0)
   3434         {
   3435             // Client asks for an invalid swizzle type...
   3436             ADDR_ASSERT_ALWAYS();
   3437             returnCode = ADDR_INVALIDPARAMS;
   3438         }
   3439         else
   3440         {
   3441             if (IsPow2(clientPreferredSwSet.value))
   3442             {
   3443                 // Only one swizzle type left, use it directly
   3444                 addrPreferredSwSet.value = clientPreferredSwSet.value;
   3445             }
   3446             else if ((clientPreferredSwSet.value & addrPreferredSwSet.value) == 0)
   3447             {
   3448                 // Client wants 2 or more a valid swizzle type but none of them is addrlib preferred
   3449                 if (clientPreferredSwSet.sw_D)
   3450                 {
   3451                     addrPreferredSwSet.value = AddrSwSetD;
   3452                 }
   3453                 else if (clientPreferredSwSet.sw_Z)
   3454                 {
   3455                     addrPreferredSwSet.value = AddrSwSetZ;
   3456                 }
   3457                 else if (clientPreferredSwSet.sw_R)
   3458                 {
   3459                     addrPreferredSwSet.value = AddrSwSetR;
   3460                 }
   3461                 else
   3462                 {
   3463                     ADDR_ASSERT(clientPreferredSwSet.sw_S);
   3464                     addrPreferredSwSet.value = AddrSwSetS;
   3465                 }
   3466             }
   3467 
   3468             if ((numFrags > 1) &&
   3469                 (GetBlockSize(ADDR_SW_4KB) < (m_pipeInterleaveBytes * numFrags)))
   3470             {
   3471                 // MSAA surface must have blk_bytes/pipe_interleave >= num_samples
   3472                 blockSet.macro4KB = FALSE;
   3473             }
   3474 
   3475             if (pIn->flags.prt)
   3476             {
   3477                 blockSet.value &= AddrBlockSetMacro64KB;
   3478             }
   3479 
   3480             // Apply customized forbidden setting
   3481             blockSet.value &= ~pIn->forbiddenBlock.value;
   3482 
   3483             if (pIn->maxAlign > 0)
   3484             {
   3485                 if (pIn->maxAlign < GetBlockSize(ADDR_SW_64KB))
   3486                 {
   3487                     blockSet.macro64KB = FALSE;
   3488                 }
   3489 
   3490                 if (pIn->maxAlign < GetBlockSize(ADDR_SW_4KB))
   3491                 {
   3492                     blockSet.macro4KB = FALSE;
   3493                 }
   3494 
   3495                 if (pIn->maxAlign < GetBlockSize(ADDR_SW_256B))
   3496                 {
   3497                     blockSet.micro = FALSE;
   3498                 }
   3499             }
   3500 
   3501             Dim3d blkAlign[AddrBlockMaxTiledType]  = {{0}, {0}, {0}};
   3502             Dim3d paddedDim[AddrBlockMaxTiledType] = {{0}, {0}, {0}};
   3503             UINT_64 padSize[AddrBlockMaxTiledType] = {0};
   3504 
   3505             if (blockSet.micro)
   3506             {
   3507                 returnCode = ComputeBlockDimensionForSurf(&blkAlign[AddrBlockMicro].w,
   3508                                                           &blkAlign[AddrBlockMicro].h,
   3509                                                           &blkAlign[AddrBlockMicro].d,
   3510                                                           bpp,
   3511                                                           numFrags,
   3512                                                           pOut->resourceType,
   3513                                                           ADDR_SW_256B);
   3514 
   3515                 if (returnCode == ADDR_OK)
   3516                 {
   3517                     if (displayResource)
   3518                     {
   3519                         blkAlign[AddrBlockMicro].w = PowTwoAlign(blkAlign[AddrBlockMicro].w, 32);
   3520                     }
   3521                     else if ((blkAlign[AddrBlockMicro].w >= width) && (blkAlign[AddrBlockMicro].h >= height) &&
   3522                              (minSizeAlign <= GetBlockSize(ADDR_SW_256B)))
   3523                     {
   3524                         // If one 256B block can contain the surface, don't bother bigger block type
   3525                         blockSet.macro4KB = FALSE;
   3526                         blockSet.macro64KB = FALSE;
   3527                         blockSet.var = FALSE;
   3528                     }
   3529 
   3530                     padSize[AddrBlockMicro] = ComputePadSize(&blkAlign[AddrBlockMicro], width, height,
   3531                                                              slice, &paddedDim[AddrBlockMicro]);
   3532                 }
   3533             }
   3534 
   3535             if ((returnCode == ADDR_OK) && blockSet.macro4KB)
   3536             {
   3537                 returnCode = ComputeBlockDimensionForSurf(&blkAlign[AddrBlock4KB].w,
   3538                                                           &blkAlign[AddrBlock4KB].h,
   3539                                                           &blkAlign[AddrBlock4KB].d,
   3540                                                           bpp,
   3541                                                           numFrags,
   3542                                                           pOut->resourceType,
   3543                                                           ADDR_SW_4KB);
   3544 
   3545                 if (returnCode == ADDR_OK)
   3546                 {
   3547                     if (displayResource)
   3548                     {
   3549                         blkAlign[AddrBlock4KB].w = PowTwoAlign(blkAlign[AddrBlock4KB].w, 32);
   3550                     }
   3551 
   3552                     padSize[AddrBlock4KB] = ComputePadSize(&blkAlign[AddrBlock4KB], width, height,
   3553                                                            slice, &paddedDim[AddrBlock4KB]);
   3554 
   3555                     ADDR_ASSERT(padSize[AddrBlock4KB] >= padSize[AddrBlockMicro]);
   3556                 }
   3557             }
   3558 
   3559             if ((returnCode == ADDR_OK) && blockSet.macro64KB)
   3560             {
   3561                 returnCode = ComputeBlockDimensionForSurf(&blkAlign[AddrBlock64KB].w,
   3562                                                           &blkAlign[AddrBlock64KB].h,
   3563                                                           &blkAlign[AddrBlock64KB].d,
   3564                                                           bpp,
   3565                                                           numFrags,
   3566                                                           pOut->resourceType,
   3567                                                           ADDR_SW_64KB);
   3568 
   3569                 if (returnCode == ADDR_OK)
   3570                 {
   3571                     if (displayResource)
   3572                     {
   3573                         blkAlign[AddrBlock64KB].w = PowTwoAlign(blkAlign[AddrBlock64KB].w, 32);
   3574                     }
   3575 
   3576                     padSize[AddrBlock64KB] = ComputePadSize(&blkAlign[AddrBlock64KB], width, height,
   3577                                                             slice, &paddedDim[AddrBlock64KB]);
   3578 
   3579                     ADDR_ASSERT(padSize[AddrBlock64KB] >= padSize[AddrBlock4KB]);
   3580                     ADDR_ASSERT(padSize[AddrBlock64KB] >= padSize[AddrBlockMicro]);
   3581                 }
   3582             }
   3583 
   3584             if (returnCode == ADDR_OK)
   3585             {
   3586                 UINT_64 minSizeAlignInElement = Max(minSizeAlign / (bpp >> 3), 1u);
   3587 
   3588                 for (UINT_32 i = AddrBlockMicro; i < AddrBlockMaxTiledType; i++)
   3589                 {
   3590                     padSize[i] = PowTwoAlign(padSize[i], minSizeAlignInElement);
   3591                 }
   3592 
   3593                 // Use minimum block type which meets all conditions above if flag minimizeAlign was set
   3594                 if (pIn->flags.minimizeAlign)
   3595                 {
   3596                     // If padded size of 64KB block is larger than padded size of 256B block or 4KB
   3597                     // block, filter out 64KB block from candidate list
   3598                     if (blockSet.macro64KB &&
   3599                         ((blockSet.micro && (padSize[AddrBlockMicro] < padSize[AddrBlock64KB])) ||
   3600                          (blockSet.macro4KB && (padSize[AddrBlock4KB] < padSize[AddrBlock64KB]))))
   3601                     {
   3602                         blockSet.macro64KB = FALSE;
   3603                     }
   3604 
   3605                     // If padded size of 4KB block is larger than padded size of 256B block,
   3606                     // filter out 4KB block from candidate list
   3607                     if (blockSet.macro4KB &&
   3608                         blockSet.micro &&
   3609                         (padSize[AddrBlockMicro] < padSize[AddrBlock4KB]))
   3610                     {
   3611                         blockSet.macro4KB = FALSE;
   3612                     }
   3613                 }
   3614                 // Filter out 64KB/4KB block if a smaller block type has 2/3 or less memory footprint
   3615                 else if (pIn->flags.opt4space)
   3616                 {
   3617                     UINT_64 threshold = blockSet.micro ? padSize[AddrBlockMicro] :
   3618                                         (blockSet.macro4KB ? padSize[AddrBlock4KB] : padSize[AddrBlock64KB]);
   3619 
   3620                     threshold += threshold >> 1;
   3621 
   3622                     if (blockSet.macro64KB && (padSize[AddrBlock64KB] > threshold))
   3623                     {
   3624                         blockSet.macro64KB = FALSE;
   3625                     }
   3626 
   3627                     if (blockSet.macro4KB && (padSize[AddrBlock4KB] > threshold))
   3628                     {
   3629                         blockSet.macro4KB = FALSE;
   3630                     }
   3631                 }
   3632                 else
   3633                 {
   3634                     if (blockSet.macro64KB &&
   3635                         (padSize[AddrBlock64KB] >= static_cast<UINT_64>(width) * height * slice * 2) &&
   3636                         ((blockSet.value & ~AddrBlockSetMacro64KB) != 0))
   3637                     {
   3638                         // If 64KB block waste more than half memory on padding, filter it out from
   3639                         // candidate list when it is not the only choice left
   3640                         blockSet.macro64KB = FALSE;
   3641                     }
   3642                 }
   3643 
   3644                 if (blockSet.value == 0)
   3645                 {
   3646                     // Bad things happen, client will not get any useful information from AddrLib.
   3647                     // Maybe we should fill in some output earlier instead of outputing nothing?
   3648                     ADDR_ASSERT_ALWAYS();
   3649                     returnCode = ADDR_INVALIDPARAMS;
   3650                 }
   3651                 else
   3652                 {
   3653                     pOut->validBlockSet = blockSet;
   3654                     pOut->canXor = pOut->canXor &&
   3655                                    (blockSet.macro4KB || blockSet.macro64KB || blockSet.var);
   3656 
   3657                     if (blockSet.macro64KB || blockSet.macro4KB)
   3658                     {
   3659                         if (addrPreferredSwSet.value == AddrSwSetZ)
   3660                         {
   3661                             pOut->swizzleMode = blockSet.macro64KB ? ADDR_SW_64KB_Z : ADDR_SW_4KB_Z;
   3662                         }
   3663                         else if (addrPreferredSwSet.value == AddrSwSetS)
   3664                         {
   3665                             pOut->swizzleMode = blockSet.macro64KB ? ADDR_SW_64KB_S : ADDR_SW_4KB_S;
   3666                         }
   3667                         else if (addrPreferredSwSet.value == AddrSwSetD)
   3668                         {
   3669                             pOut->swizzleMode = blockSet.macro64KB ? ADDR_SW_64KB_D : ADDR_SW_4KB_D;
   3670                         }
   3671                         else
   3672                         {
   3673                             ADDR_ASSERT(addrPreferredSwSet.value == AddrSwSetR);
   3674                             pOut->swizzleMode = blockSet.macro64KB ? ADDR_SW_64KB_R : ADDR_SW_4KB_R;
   3675                         }
   3676 
   3677                         if (prtXor && blockSet.macro64KB)
   3678                         {
   3679                             // Client wants PRTXOR, give back _T swizzle mode if 64KB is available
   3680                             const UINT_32 prtGap = ADDR_SW_64KB_Z_T - ADDR_SW_64KB_Z;
   3681                             pOut->swizzleMode = static_cast<AddrSwizzleMode>(pOut->swizzleMode + prtGap);
   3682                         }
   3683                         else if (pOut->canXor)
   3684                         {
   3685                             // Client wants XOR and this is allowed, return XOR version swizzle mode
   3686                             const UINT_32 xorGap = ADDR_SW_4KB_Z_X - ADDR_SW_4KB_Z;
   3687                             pOut->swizzleMode = static_cast<AddrSwizzleMode>(pOut->swizzleMode + xorGap);
   3688                         }
   3689                     }
   3690                     else if (blockSet.micro)
   3691                     {
   3692                         if (addrPreferredSwSet.value == AddrSwSetS)
   3693                         {
   3694                             pOut->swizzleMode = ADDR_SW_256B_S;
   3695                         }
   3696                         else if (addrPreferredSwSet.value == AddrSwSetD)
   3697                         {
   3698                             pOut->swizzleMode = ADDR_SW_256B_D;
   3699                         }
   3700                         else
   3701                         {
   3702                             ADDR_ASSERT(addrPreferredSwSet.value == AddrSwSetR);
   3703                             pOut->swizzleMode = ADDR_SW_256B_R;
   3704                         }
   3705                     }
   3706                     else if (blockSet.linear)
   3707                     {
   3708                         // Fall into this branch doesn't mean linear is suitable, only no other choices!
   3709                         pOut->swizzleMode = ADDR_SW_LINEAR;
   3710                     }
   3711                     else
   3712                     {
   3713                         ADDR_ASSERT(blockSet.var);
   3714 
   3715                         // Designer consider VAR swizzle mode is usless for most cases
   3716                         ADDR_UNHANDLED_CASE();
   3717 
   3718                         returnCode = ADDR_NOTSUPPORTED;
   3719                     }
   3720 
   3721 #if DEBUG
   3722                     // Post sanity check, at least AddrLib should accept the output generated by its own
   3723                     if (pOut->swizzleMode != ADDR_SW_LINEAR)
   3724                     {
   3725                         ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {0};
   3726                         localIn.flags = pIn->flags;
   3727                         localIn.swizzleMode = pOut->swizzleMode;
   3728                         localIn.resourceType = pOut->resourceType;
   3729                         localIn.format = pIn->format;
   3730                         localIn.bpp = bpp;
   3731                         localIn.width = width;
   3732                         localIn.height = height;
   3733                         localIn.numSlices = slice;
   3734                         localIn.numMipLevels = numMipLevels;
   3735                         localIn.numSamples = numSamples;
   3736                         localIn.numFrags = numFrags;
   3737 
   3738                         HwlComputeSurfaceInfoSanityCheck(&localIn);
   3739 
   3740                     }
   3741 #endif
   3742                 }
   3743             }
   3744         }
   3745     }
   3746 
   3747     return returnCode;
   3748 }
   3749 
   3750 /**
   3751 ************************************************************************************************************************
   3752 *   Gfx9Lib::ComputeStereoInfo
   3753 *
   3754 *   @brief
   3755 *       Compute height alignment and right eye pipeBankXor for stereo surface
   3756 *
   3757 *   @return
   3758 *       Error code
   3759 *
   3760 ************************************************************************************************************************
   3761 */
   3762 ADDR_E_RETURNCODE Gfx9Lib::ComputeStereoInfo(
   3763     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
   3764     ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut,
   3765     UINT_32*                                pHeightAlign
   3766     ) const
   3767 {
   3768     ADDR_E_RETURNCODE returnCode = ADDR_OK;
   3769 
   3770     UINT_32 eqIndex = HwlGetEquationIndex(pIn, pOut);
   3771 
   3772     if (eqIndex < m_numEquations)
   3773     {
   3774         if (IsXor(pIn->swizzleMode))
   3775         {
   3776             const UINT_32        blkSizeLog2       = GetBlockSizeLog2(pIn->swizzleMode);
   3777             const UINT_32        numPipeBits       = GetPipeXorBits(blkSizeLog2);
   3778             const UINT_32        numBankBits       = GetBankXorBits(blkSizeLog2);
   3779             const UINT_32        bppLog2           = Log2(pIn->bpp >> 3);
   3780             const UINT_32        maxYCoordBlock256 = Log2(Block256_2d[bppLog2].h) - 1;
   3781             const ADDR_EQUATION *pEqToCheck        = &m_equationTable[eqIndex];
   3782 
   3783             ADDR_ASSERT(maxYCoordBlock256 ==
   3784                         GetMaxValidChannelIndex(&pEqToCheck->addr[0], GetBlockSizeLog2(ADDR_SW_256B), 1));
   3785 
   3786             const UINT_32 maxYCoordInBaseEquation =
   3787                 (blkSizeLog2 - GetBlockSizeLog2(ADDR_SW_256B)) / 2 + maxYCoordBlock256;
   3788 
   3789             ADDR_ASSERT(maxYCoordInBaseEquation ==
   3790                         GetMaxValidChannelIndex(&pEqToCheck->addr[0], blkSizeLog2, 1));
   3791 
   3792             const UINT_32 maxYCoordInPipeXor = (numPipeBits == 0) ? 0 : maxYCoordBlock256 + numPipeBits;
   3793 
   3794             ADDR_ASSERT(maxYCoordInPipeXor ==
   3795                         GetMaxValidChannelIndex(&pEqToCheck->xor1[m_pipeInterleaveLog2], numPipeBits, 1));
   3796 
   3797             const UINT_32 maxYCoordInBankXor = (numBankBits == 0) ?
   3798                                                0 : maxYCoordBlock256 + (numPipeBits + 1) / 2 + numBankBits;
   3799 
   3800             ADDR_ASSERT(maxYCoordInBankXor ==
   3801                         GetMaxValidChannelIndex(&pEqToCheck->xor1[m_pipeInterleaveLog2 + numPipeBits], numBankBits, 1));
   3802 
   3803             const UINT_32 maxYCoordInPipeBankXor = Max(maxYCoordInPipeXor, maxYCoordInBankXor);
   3804 
   3805             if (maxYCoordInPipeBankXor > maxYCoordInBaseEquation)
   3806             {
   3807                 *pHeightAlign = 1u << maxYCoordInPipeBankXor;
   3808 
   3809                 if (pOut->pStereoInfo != NULL)
   3810                 {
   3811                     pOut->pStereoInfo->rightSwizzle = 0;
   3812 
   3813                     if ((PowTwoAlign(pIn->height, *pHeightAlign) % (*pHeightAlign * 2)) != 0)
   3814                     {
   3815                         if (maxYCoordInPipeXor == maxYCoordInPipeBankXor)
   3816                         {
   3817                             pOut->pStereoInfo->rightSwizzle |= (1u << 1);
   3818                         }
   3819 
   3820                         if (maxYCoordInBankXor == maxYCoordInPipeBankXor)
   3821                         {
   3822                             pOut->pStereoInfo->rightSwizzle |=
   3823                                 1u << ((numPipeBits % 2) ? numPipeBits : numPipeBits + 1);
   3824                         }
   3825 
   3826                         ADDR_ASSERT(pOut->pStereoInfo->rightSwizzle ==
   3827                                     GetCoordActiveMask(&pEqToCheck->xor1[m_pipeInterleaveLog2],
   3828                                                        numPipeBits + numBankBits, 1, maxYCoordInPipeBankXor));
   3829                     }
   3830                 }
   3831             }
   3832         }
   3833     }
   3834     else
   3835     {
   3836         ADDR_ASSERT_ALWAYS();
   3837         returnCode = ADDR_ERROR;
   3838     }
   3839 
   3840     return returnCode;
   3841 }
   3842 
   3843 /**
   3844 ************************************************************************************************************************
   3845 *   Gfx9Lib::HwlComputeSurfaceInfoTiled
   3846 *
   3847 *   @brief
   3848 *       Internal function to calculate alignment for tiled surface
   3849 *
   3850 *   @return
   3851 *       ADDR_E_RETURNCODE
   3852 ************************************************************************************************************************
   3853 */
   3854 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoTiled(
   3855      const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,    ///< [in] input structure
   3856      ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut    ///< [out] output structure
   3857      ) const
   3858 {
   3859     ADDR_E_RETURNCODE returnCode = ComputeBlockDimensionForSurf(&pOut->blockWidth,
   3860                                                                 &pOut->blockHeight,
   3861                                                                 &pOut->blockSlices,
   3862                                                                 pIn->bpp,
   3863                                                                 pIn->numFrags,
   3864                                                                 pIn->resourceType,
   3865                                                                 pIn->swizzleMode);
   3866 
   3867     if (returnCode == ADDR_OK)
   3868     {
   3869         UINT_32 pitchAlignInElement = pOut->blockWidth;
   3870 
   3871         if ((IsTex2d(pIn->resourceType) == TRUE) &&
   3872             (pIn->flags.display || pIn->flags.rotated) &&
   3873             (pIn->numMipLevels <= 1) &&
   3874             (pIn->numSamples <= 1) &&
   3875             (pIn->numFrags <= 1))
   3876         {
   3877             // Display engine needs pitch align to be at least 32 pixels.
   3878             pitchAlignInElement = PowTwoAlign(pitchAlignInElement, 32);
   3879         }
   3880 
   3881         pOut->pitch = PowTwoAlign(pIn->width, pitchAlignInElement);
   3882 
   3883         if ((pIn->numMipLevels <= 1) && (pIn->pitchInElement > 0))
   3884         {
   3885             if ((pIn->pitchInElement % pitchAlignInElement) != 0)
   3886             {
   3887                 returnCode = ADDR_INVALIDPARAMS;
   3888             }
   3889             else if (pIn->pitchInElement < pOut->pitch)
   3890             {
   3891                 returnCode = ADDR_INVALIDPARAMS;
   3892             }
   3893             else
   3894             {
   3895                 pOut->pitch = pIn->pitchInElement;
   3896             }
   3897         }
   3898 
   3899         UINT_32 heightAlign = 0;
   3900 
   3901         if (pIn->flags.qbStereo)
   3902         {
   3903             returnCode = ComputeStereoInfo(pIn, pOut, &heightAlign);
   3904         }
   3905 
   3906         if (returnCode == ADDR_OK)
   3907         {
   3908             pOut->height = PowTwoAlign(pIn->height, pOut->blockHeight);
   3909 
   3910             if (heightAlign > 1)
   3911             {
   3912                 pOut->height = PowTwoAlign(pOut->height, heightAlign);
   3913             }
   3914 
   3915             pOut->numSlices = PowTwoAlign(pIn->numSlices, pOut->blockSlices);
   3916 
   3917             pOut->epitchIsHeight   = FALSE;
   3918             pOut->mipChainInTail   = FALSE;
   3919             pOut->firstMipIdInTail = pIn->numMipLevels;
   3920 
   3921             pOut->mipChainPitch    = pOut->pitch;
   3922             pOut->mipChainHeight   = pOut->height;
   3923             pOut->mipChainSlice    = pOut->numSlices;
   3924 
   3925             if (pIn->numMipLevels > 1)
   3926             {
   3927                 pOut->firstMipIdInTail = GetMipChainInfo(pIn->resourceType,
   3928                                                          pIn->swizzleMode,
   3929                                                          pIn->bpp,
   3930                                                          pIn->width,
   3931                                                          pIn->height,
   3932                                                          pIn->numSlices,
   3933                                                          pOut->blockWidth,
   3934                                                          pOut->blockHeight,
   3935                                                          pOut->blockSlices,
   3936                                                          pIn->numMipLevels,
   3937                                                          pOut->pMipInfo);
   3938 
   3939                 const UINT_32 endingMipId = Min(pOut->firstMipIdInTail, pIn->numMipLevels - 1);
   3940 
   3941                 if (endingMipId == 0)
   3942                 {
   3943                     const Dim3d tailMaxDim = GetMipTailDim(pIn->resourceType,
   3944                                                            pIn->swizzleMode,
   3945                                                            pOut->blockWidth,
   3946                                                            pOut->blockHeight,
   3947                                                            pOut->blockSlices);
   3948 
   3949                     pOut->epitchIsHeight = TRUE;
   3950                     pOut->pitch          = tailMaxDim.w;
   3951                     pOut->height         = tailMaxDim.h;
   3952                     pOut->numSlices      = IsThick(pIn->resourceType, pIn->swizzleMode) ?
   3953                                            tailMaxDim.d : pIn->numSlices;
   3954                     pOut->mipChainInTail = TRUE;
   3955                 }
   3956                 else
   3957                 {
   3958                     UINT_32 mip0WidthInBlk  = pOut->pitch  / pOut->blockWidth;
   3959                     UINT_32 mip0HeightInBlk = pOut->height / pOut->blockHeight;
   3960 
   3961                     AddrMajorMode majorMode = GetMajorMode(pIn->resourceType,
   3962                                                            pIn->swizzleMode,
   3963                                                            mip0WidthInBlk,
   3964                                                            mip0HeightInBlk,
   3965                                                            pOut->numSlices / pOut->blockSlices);
   3966                     if (majorMode == ADDR_MAJOR_Y)
   3967                     {
   3968                         UINT_32 mip1WidthInBlk = RoundHalf(mip0WidthInBlk);
   3969 
   3970                         if ((mip1WidthInBlk == 1) && (endingMipId > 2))
   3971                         {
   3972                             mip1WidthInBlk++;
   3973                         }
   3974 
   3975                         pOut->mipChainPitch += (mip1WidthInBlk * pOut->blockWidth);
   3976 
   3977                         pOut->epitchIsHeight = FALSE;
   3978                     }
   3979                     else
   3980                     {
   3981                         UINT_32 mip1HeightInBlk = RoundHalf(mip0HeightInBlk);
   3982 
   3983                         if ((mip1HeightInBlk == 1) && (endingMipId > 2))
   3984                         {
   3985                             mip1HeightInBlk++;
   3986                         }
   3987 
   3988                         pOut->mipChainHeight += (mip1HeightInBlk * pOut->blockHeight);
   3989 
   3990                         pOut->epitchIsHeight = TRUE;
   3991                     }
   3992                 }
   3993 
   3994                 if (pOut->pMipInfo != NULL)
   3995                 {
   3996                     UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
   3997 
   3998                     for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
   3999                     {
   4000                         Dim3d   mipStartPos          = {0};
   4001                         UINT_32 mipTailOffsetInBytes = 0;
   4002 
   4003                         mipStartPos = GetMipStartPos(pIn->resourceType,
   4004                                                      pIn->swizzleMode,
   4005                                                      pOut->pitch,
   4006                                                      pOut->height,
   4007                                                      pOut->numSlices,
   4008                                                      pOut->blockWidth,
   4009                                                      pOut->blockHeight,
   4010                                                      pOut->blockSlices,
   4011                                                      i,
   4012                                                      elementBytesLog2,
   4013                                                      &mipTailOffsetInBytes);
   4014 
   4015                         UINT_32 pitchInBlock     =
   4016                             pOut->mipChainPitch / pOut->blockWidth;
   4017                         UINT_32 sliceInBlock     =
   4018                             (pOut->mipChainHeight / pOut->blockHeight) * pitchInBlock;
   4019                         UINT_64 blockIndex       =
   4020                             mipStartPos.d * sliceInBlock + mipStartPos.h * pitchInBlock + mipStartPos.w;
   4021                         UINT_64 macroBlockOffset =
   4022                             blockIndex << GetBlockSizeLog2(pIn->swizzleMode);
   4023 
   4024                         pOut->pMipInfo[i].macroBlockOffset = macroBlockOffset;
   4025                         pOut->pMipInfo[i].mipTailOffset    = mipTailOffsetInBytes;
   4026                     }
   4027                 }
   4028             }
   4029             else if (pOut->pMipInfo != NULL)
   4030             {
   4031                 pOut->pMipInfo[0].pitch  = pOut->pitch;
   4032                 pOut->pMipInfo[0].height = pOut->height;
   4033                 pOut->pMipInfo[0].depth  = IsTex3d(pIn->resourceType)? pOut->numSlices : 1;
   4034                 pOut->pMipInfo[0].offset = 0;
   4035             }
   4036 
   4037             pOut->sliceSize = static_cast<UINT_64>(pOut->mipChainPitch) * pOut->mipChainHeight *
   4038                               (pIn->bpp >> 3) * pIn->numFrags;
   4039             pOut->surfSize  = pOut->sliceSize * pOut->mipChainSlice;
   4040             pOut->baseAlign = HwlComputeSurfaceBaseAlign(pIn->swizzleMode);
   4041 
   4042             if (pIn->flags.prt)
   4043             {
   4044                 pOut->baseAlign = Max(pOut->baseAlign, PrtAlignment);
   4045             }
   4046         }
   4047     }
   4048 
   4049     return returnCode;
   4050 }
   4051 
   4052 /**
   4053 ************************************************************************************************************************
   4054 *   Gfx9Lib::HwlComputeSurfaceInfoLinear
   4055 *
   4056 *   @brief
   4057 *       Internal function to calculate alignment for linear surface
   4058 *
   4059 *   @return
   4060 *       ADDR_E_RETURNCODE
   4061 ************************************************************************************************************************
   4062 */
   4063 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoLinear(
   4064      const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,    ///< [in] input structure
   4065      ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut    ///< [out] output structure
   4066      ) const
   4067 {
   4068     ADDR_E_RETURNCODE returnCode   = ADDR_OK;
   4069     UINT_32           pitch        = 0;
   4070     UINT_32           actualHeight = 0;
   4071     UINT_32           elementBytes = pIn->bpp >> 3;
   4072     const UINT_32     alignment    = pIn->flags.prt ? PrtAlignment : 256;
   4073 
   4074     if (IsTex1d(pIn->resourceType))
   4075     {
   4076         if (pIn->height > 1)
   4077         {
   4078             returnCode = ADDR_INVALIDPARAMS;
   4079         }
   4080         else
   4081         {
   4082             const UINT_32 pitchAlignInElement = alignment / elementBytes;
   4083 
   4084             pitch        = PowTwoAlign(pIn->width, pitchAlignInElement);
   4085             actualHeight = pIn->numMipLevels;
   4086 
   4087             if (pIn->flags.prt == FALSE)
   4088             {
   4089                 returnCode = ApplyCustomizedPitchHeight(pIn, elementBytes, pitchAlignInElement,
   4090                                                         &pitch, &actualHeight);
   4091             }
   4092 
   4093             if (returnCode == ADDR_OK)
   4094             {
   4095                 if (pOut->pMipInfo != NULL)
   4096                 {
   4097                     for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
   4098                     {
   4099                         pOut->pMipInfo[i].offset = pitch * elementBytes * i;
   4100                         pOut->pMipInfo[i].pitch  = pitch;
   4101                         pOut->pMipInfo[i].height = 1;
   4102                         pOut->pMipInfo[i].depth  = 1;
   4103                     }
   4104                 }
   4105             }
   4106         }
   4107     }
   4108     else
   4109     {
   4110         returnCode = ComputeSurfaceLinearPadding(pIn, &pitch, &actualHeight, pOut->pMipInfo);
   4111     }
   4112 
   4113     if ((pitch == 0) || (actualHeight == 0))
   4114     {
   4115         returnCode = ADDR_INVALIDPARAMS;
   4116     }
   4117 
   4118     if (returnCode == ADDR_OK)
   4119     {
   4120         pOut->pitch          = pitch;
   4121         pOut->height         = pIn->height;
   4122         pOut->numSlices      = pIn->numSlices;
   4123         pOut->mipChainPitch  = pitch;
   4124         pOut->mipChainHeight = actualHeight;
   4125         pOut->mipChainSlice  = pOut->numSlices;
   4126         pOut->epitchIsHeight = (pIn->numMipLevels > 1) ? TRUE : FALSE;
   4127         pOut->sliceSize      = static_cast<UINT_64>(pOut->pitch) * actualHeight * elementBytes;
   4128         pOut->surfSize       = pOut->sliceSize * pOut->numSlices;
   4129         pOut->baseAlign      = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? (pIn->bpp / 8) : alignment;
   4130         pOut->blockWidth     = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? 1 : (256 / elementBytes);
   4131         pOut->blockHeight    = 1;
   4132         pOut->blockSlices    = 1;
   4133     }
   4134 
   4135     // Post calculation validate
   4136     ADDR_ASSERT(pOut->sliceSize > 0);
   4137 
   4138     return returnCode;
   4139 }
   4140 
   4141 /**
   4142 ************************************************************************************************************************
   4143 *   Gfx9Lib::GetMipChainInfo
   4144 *
   4145 *   @brief
   4146 *       Internal function to get out information about mip chain
   4147 *
   4148 *   @return
   4149 *       Smaller value between Id of first mip fitted in mip tail and max Id of mip being created
   4150 ************************************************************************************************************************
   4151 */
   4152 UINT_32 Gfx9Lib::GetMipChainInfo(
   4153     AddrResourceType  resourceType,
   4154     AddrSwizzleMode   swizzleMode,
   4155     UINT_32           bpp,
   4156     UINT_32           mip0Width,
   4157     UINT_32           mip0Height,
   4158     UINT_32           mip0Depth,
   4159     UINT_32           blockWidth,
   4160     UINT_32           blockHeight,
   4161     UINT_32           blockDepth,
   4162     UINT_32           numMipLevel,
   4163     ADDR2_MIP_INFO*   pMipInfo) const
   4164 {
   4165     const Dim3d tailMaxDim =
   4166         GetMipTailDim(resourceType, swizzleMode, blockWidth, blockHeight, blockDepth);
   4167 
   4168     UINT_32 mipPitch         = mip0Width;
   4169     UINT_32 mipHeight        = mip0Height;
   4170     UINT_32 mipDepth         = IsTex3d(resourceType) ? mip0Depth : 1;
   4171     UINT_32 offset           = 0;
   4172     UINT_32 firstMipIdInTail = numMipLevel;
   4173     BOOL_32 inTail           = FALSE;
   4174     BOOL_32 finalDim         = FALSE;
   4175     BOOL_32 is3dThick        = IsThick(resourceType, swizzleMode);
   4176     BOOL_32 is3dThin         = IsTex3d(resourceType) && (is3dThick == FALSE);
   4177 
   4178     for (UINT_32 mipId = 0; mipId < numMipLevel; mipId++)
   4179     {
   4180         if (inTail)
   4181         {
   4182             if (finalDim == FALSE)
   4183             {
   4184                 UINT_32 mipSize;
   4185 
   4186                 if (is3dThick)
   4187                 {
   4188                     mipSize = mipPitch * mipHeight * mipDepth * (bpp >> 3);
   4189                 }
   4190                 else
   4191                 {
   4192                     mipSize = mipPitch * mipHeight * (bpp >> 3);
   4193                 }
   4194 
   4195                 if (mipSize <= 256)
   4196                 {
   4197                     UINT_32 index = Log2(bpp >> 3);
   4198 
   4199                     if (is3dThick)
   4200                     {
   4201                         mipPitch  = Block256_3dZ[index].w;
   4202                         mipHeight = Block256_3dZ[index].h;
   4203                         mipDepth  = Block256_3dZ[index].d;
   4204                     }
   4205                     else
   4206                     {
   4207                         mipPitch  = Block256_2d[index].w;
   4208                         mipHeight = Block256_2d[index].h;
   4209                     }
   4210 
   4211                     finalDim = TRUE;
   4212                 }
   4213             }
   4214         }
   4215         else
   4216         {
   4217             inTail = IsInMipTail(resourceType, swizzleMode, tailMaxDim,
   4218                                  mipPitch, mipHeight, mipDepth);
   4219 
   4220             if (inTail)
   4221             {
   4222                 firstMipIdInTail = mipId;
   4223                 mipPitch         = tailMaxDim.w;
   4224                 mipHeight        = tailMaxDim.h;
   4225 
   4226                 if (is3dThick)
   4227                 {
   4228                     mipDepth = tailMaxDim.d;
   4229                 }
   4230             }
   4231             else
   4232             {
   4233                 mipPitch  = PowTwoAlign(mipPitch,  blockWidth);
   4234                 mipHeight = PowTwoAlign(mipHeight, blockHeight);
   4235 
   4236                 if (is3dThick)
   4237                 {
   4238                     mipDepth = PowTwoAlign(mipDepth,  blockDepth);
   4239                 }
   4240             }
   4241         }
   4242 
   4243         if (pMipInfo != NULL)
   4244         {
   4245             pMipInfo[mipId].pitch  = mipPitch;
   4246             pMipInfo[mipId].height = mipHeight;
   4247             pMipInfo[mipId].depth  = mipDepth;
   4248             pMipInfo[mipId].offset = offset;
   4249         }
   4250 
   4251         offset += (mipPitch * mipHeight * mipDepth * (bpp >> 3));
   4252 
   4253         if (finalDim)
   4254         {
   4255             if (is3dThin)
   4256             {
   4257                 mipDepth = Max(mipDepth >> 1, 1u);
   4258             }
   4259         }
   4260         else
   4261         {
   4262             mipPitch  = Max(mipPitch >> 1, 1u);
   4263             mipHeight = Max(mipHeight >> 1, 1u);
   4264 
   4265             if (is3dThick || is3dThin)
   4266             {
   4267                 mipDepth = Max(mipDepth >> 1, 1u);
   4268             }
   4269         }
   4270     }
   4271 
   4272     return firstMipIdInTail;
   4273 }
   4274 
   4275 /**
   4276 ************************************************************************************************************************
   4277 *   Gfx9Lib::GetMetaMiptailInfo
   4278 *
   4279 *   @brief
   4280 *       Get mip tail coordinate information.
   4281 *
   4282 *   @return
   4283 *       N/A
   4284 ************************************************************************************************************************
   4285 */
   4286 VOID Gfx9Lib::GetMetaMiptailInfo(
   4287     ADDR2_META_MIP_INFO*    pInfo,          ///< [out] output structure to store per mip coord
   4288     Dim3d                   mipCoord,       ///< [in] mip tail base coord
   4289     UINT_32                 numMipInTail,   ///< [in] number of mips in tail
   4290     Dim3d*                  pMetaBlkDim     ///< [in] meta block width/height/depth
   4291     ) const
   4292 {
   4293     BOOL_32 isThick   = (pMetaBlkDim->d > 1);
   4294     UINT_32 mipWidth  = pMetaBlkDim->w;
   4295     UINT_32 mipHeight = pMetaBlkDim->h >> 1;
   4296     UINT_32 mipDepth  = pMetaBlkDim->d;
   4297     UINT_32 minInc;
   4298 
   4299     if (isThick)
   4300     {
   4301         minInc = (pMetaBlkDim->h >= 512) ? 128 : ((pMetaBlkDim->h == 256) ? 64 : 32);
   4302     }
   4303     else if (pMetaBlkDim->h >= 1024)
   4304     {
   4305         minInc = 256;
   4306     }
   4307     else if (pMetaBlkDim->h == 512)
   4308     {
   4309         minInc = 128;
   4310     }
   4311     else
   4312     {
   4313         minInc = 64;
   4314     }
   4315 
   4316     UINT_32 blk32MipId = 0xFFFFFFFF;
   4317 
   4318     for (UINT_32 mip = 0; mip < numMipInTail; mip++)
   4319     {
   4320         pInfo[mip].inMiptail = TRUE;
   4321         pInfo[mip].startX = mipCoord.w;
   4322         pInfo[mip].startY = mipCoord.h;
   4323         pInfo[mip].startZ = mipCoord.d;
   4324         pInfo[mip].width = mipWidth;
   4325         pInfo[mip].height = mipHeight;
   4326         pInfo[mip].depth = mipDepth;
   4327 
   4328         if (mipWidth <= 32)
   4329         {
   4330             if (blk32MipId == 0xFFFFFFFF)
   4331             {
   4332                 blk32MipId = mip;
   4333             }
   4334 
   4335             mipCoord.w = pInfo[blk32MipId].startX;
   4336             mipCoord.h = pInfo[blk32MipId].startY;
   4337             mipCoord.d = pInfo[blk32MipId].startZ;
   4338 
   4339             switch (mip - blk32MipId)
   4340             {
   4341                 case 0:
   4342                     mipCoord.w += 32;       // 16x16
   4343                     break;
   4344                 case 1:
   4345                     mipCoord.h += 32;       // 8x8
   4346                     break;
   4347                 case 2:
   4348                     mipCoord.h += 32;       // 4x4
   4349                     mipCoord.w += 16;
   4350                     break;
   4351                 case 3:
   4352                     mipCoord.h += 32;       // 2x2
   4353                     mipCoord.w += 32;
   4354                     break;
   4355                 case 4:
   4356                     mipCoord.h += 32;       // 1x1
   4357                     mipCoord.w += 48;
   4358                     break;
   4359                 // The following are for BC/ASTC formats
   4360                 case 5:
   4361                     mipCoord.h += 48;       // 1/2 x 1/2
   4362                     break;
   4363                 case 6:
   4364                     mipCoord.h += 48;       // 1/4 x 1/4
   4365                     mipCoord.w += 16;
   4366                     break;
   4367                 case 7:
   4368                     mipCoord.h += 48;       // 1/8 x 1/8
   4369                     mipCoord.w += 32;
   4370                     break;
   4371                 case 8:
   4372                     mipCoord.h += 48;       // 1/16 x 1/16
   4373                     mipCoord.w += 48;
   4374                     break;
   4375                 default:
   4376                     ADDR_ASSERT_ALWAYS();
   4377                     break;
   4378             }
   4379 
   4380             mipWidth = ((mip - blk32MipId) == 0) ? 16 : 8;
   4381             mipHeight = mipWidth;
   4382 
   4383             if (isThick)
   4384             {
   4385                 mipDepth = mipWidth;
   4386             }
   4387         }
   4388         else
   4389         {
   4390             if (mipWidth <= minInc)
   4391             {
   4392                 // if we're below the minimal increment...
   4393                 if (isThick)
   4394                 {
   4395                     // For 3d, just go in z direction
   4396                     mipCoord.d += mipDepth;
   4397                 }
   4398                 else
   4399                 {
   4400                     // For 2d, first go across, then down
   4401                     if ((mipWidth * 2) == minInc)
   4402                     {
   4403                         // if we're 2 mips below, that's when we go back in x, and down in y
   4404                         mipCoord.w -= minInc;
   4405                         mipCoord.h += minInc;
   4406                     }
   4407                     else
   4408                     {
   4409                         // otherwise, just go across in x
   4410                         mipCoord.w += minInc;
   4411                     }
   4412                 }
   4413             }
   4414             else
   4415             {
   4416                 // On even mip, go down, otherwise, go across
   4417                 if (mip & 1)
   4418                 {
   4419                     mipCoord.w += mipWidth;
   4420                 }
   4421                 else
   4422                 {
   4423                     mipCoord.h += mipHeight;
   4424                 }
   4425             }
   4426             // Divide the width by 2
   4427             mipWidth >>= 1;
   4428             // After the first mip in tail, the mip is always a square
   4429             mipHeight = mipWidth;
   4430             // ...or for 3d, a cube
   4431             if (isThick)
   4432             {
   4433                 mipDepth = mipWidth;
   4434             }
   4435         }
   4436     }
   4437 }
   4438 
   4439 /**
   4440 ************************************************************************************************************************
   4441 *   Gfx9Lib::GetMipStartPos
   4442 *
   4443 *   @brief
   4444 *       Internal function to get out information about mip logical start position
   4445 *
   4446 *   @return
   4447 *       logical start position in macro block width/heith/depth of one mip level within one slice
   4448 ************************************************************************************************************************
   4449 */
   4450 Dim3d Gfx9Lib::GetMipStartPos(
   4451     AddrResourceType  resourceType,
   4452     AddrSwizzleMode   swizzleMode,
   4453     UINT_32           width,
   4454     UINT_32           height,
   4455     UINT_32           depth,
   4456     UINT_32           blockWidth,
   4457     UINT_32           blockHeight,
   4458     UINT_32           blockDepth,
   4459     UINT_32           mipId,
   4460     UINT_32           log2ElementBytes,
   4461     UINT_32*          pMipTailBytesOffset) const
   4462 {
   4463     Dim3d       mipStartPos = {0};
   4464     const Dim3d tailMaxDim  = GetMipTailDim(resourceType, swizzleMode, blockWidth, blockHeight, blockDepth);
   4465 
   4466     // Report mip in tail if Mip0 is already in mip tail
   4467     BOOL_32 inMipTail      = IsInMipTail(resourceType, swizzleMode, tailMaxDim, width, height, depth);
   4468     UINT_32 log2blkSize    = GetBlockSizeLog2(swizzleMode);
   4469     UINT_32 mipIndexInTail = mipId;
   4470 
   4471     if (inMipTail == FALSE)
   4472     {
   4473         // Mip 0 dimension, unit in block
   4474         UINT_32 mipWidthInBlk   = width  / blockWidth;
   4475         UINT_32 mipHeightInBlk  = height / blockHeight;
   4476         UINT_32 mipDepthInBlk   = depth  / blockDepth;
   4477         AddrMajorMode majorMode = GetMajorMode(resourceType,
   4478                                                swizzleMode,
   4479                                                mipWidthInBlk,
   4480                                                mipHeightInBlk,
   4481                                                mipDepthInBlk);
   4482 
   4483         UINT_32 endingMip = mipId + 1;
   4484 
   4485         for (UINT_32 i = 1; i <= mipId; i++)
   4486         {
   4487             if ((i == 1) || (i == 3))
   4488             {
   4489                 if (majorMode == ADDR_MAJOR_Y)
   4490                 {
   4491                     mipStartPos.w += mipWidthInBlk;
   4492                 }
   4493                 else
   4494                 {
   4495                     mipStartPos.h += mipHeightInBlk;
   4496                 }
   4497             }
   4498             else
   4499             {
   4500                 if (majorMode == ADDR_MAJOR_X)
   4501                 {
   4502                    mipStartPos.w += mipWidthInBlk;
   4503                 }
   4504                 else if (majorMode == ADDR_MAJOR_Y)
   4505                 {
   4506                    mipStartPos.h += mipHeightInBlk;
   4507                 }
   4508                 else
   4509                 {
   4510                    mipStartPos.d += mipDepthInBlk;
   4511                 }
   4512             }
   4513 
   4514             BOOL_32 inTail = FALSE;
   4515 
   4516             if (IsThick(resourceType, swizzleMode))
   4517             {
   4518                 UINT_32 dim = log2blkSize % 3;
   4519 
   4520                 if (dim == 0)
   4521                 {
   4522                     inTail =
   4523                         (mipWidthInBlk <= 2) && (mipHeightInBlk == 1) && (mipDepthInBlk <= 2);
   4524                 }
   4525                 else if (dim == 1)
   4526                 {
   4527                     inTail =
   4528                         (mipWidthInBlk == 1) && (mipHeightInBlk <= 2) && (mipDepthInBlk <= 2);
   4529                 }
   4530                 else
   4531                 {
   4532                     inTail =
   4533                         (mipWidthInBlk <= 2) && (mipHeightInBlk <= 2) && (mipDepthInBlk == 1);
   4534                 }
   4535             }
   4536             else
   4537             {
   4538                 if (log2blkSize & 1)
   4539                 {
   4540                     inTail = (mipWidthInBlk <= 2) && (mipHeightInBlk == 1);
   4541                 }
   4542                 else
   4543                 {
   4544                     inTail = (mipWidthInBlk == 1) && (mipHeightInBlk <= 2);
   4545                 }
   4546             }
   4547 
   4548             if (inTail)
   4549             {
   4550                 endingMip = i;
   4551                 break;
   4552             }
   4553 
   4554             mipWidthInBlk  = RoundHalf(mipWidthInBlk);
   4555             mipHeightInBlk = RoundHalf(mipHeightInBlk);
   4556             mipDepthInBlk  = RoundHalf(mipDepthInBlk);
   4557         }
   4558 
   4559         if (mipId >= endingMip)
   4560         {
   4561             inMipTail      = TRUE;
   4562             mipIndexInTail = mipId - endingMip;
   4563         }
   4564     }
   4565 
   4566     if (inMipTail)
   4567     {
   4568         UINT_32 index = mipIndexInTail + MaxMacroBits - log2blkSize;
   4569         ADDR_ASSERT(index < sizeof(MipTailOffset256B) / sizeof(UINT_32));
   4570         *pMipTailBytesOffset = MipTailOffset256B[index] << 8;
   4571     }
   4572 
   4573     return mipStartPos;
   4574 }
   4575 
   4576 /**
   4577 ************************************************************************************************************************
   4578 *   Gfx9Lib::HwlComputeSurfaceAddrFromCoordTiled
   4579 *
   4580 *   @brief
   4581 *       Internal function to calculate address from coord for tiled swizzle surface
   4582 *
   4583 *   @return
   4584 *       ADDR_E_RETURNCODE
   4585 ************************************************************************************************************************
   4586 */
   4587 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceAddrFromCoordTiled(
   4588      const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,    ///< [in] input structure
   4589      ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT*      pOut    ///< [out] output structure
   4590      ) const
   4591 {
   4592     ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {0};
   4593     localIn.swizzleMode  = pIn->swizzleMode;
   4594     localIn.flags        = pIn->flags;
   4595     localIn.resourceType = pIn->resourceType;
   4596     localIn.bpp          = pIn->bpp;
   4597     localIn.width        = Max(pIn->unalignedWidth, 1u);
   4598     localIn.height       = Max(pIn->unalignedHeight, 1u);
   4599     localIn.numSlices    = Max(pIn->numSlices, 1u);
   4600     localIn.numMipLevels = Max(pIn->numMipLevels, 1u);
   4601     localIn.numSamples   = Max(pIn->numSamples, 1u);
   4602     localIn.numFrags     = Max(pIn->numFrags, 1u);
   4603     if (localIn.numMipLevels <= 1)
   4604     {
   4605         localIn.pitchInElement = pIn->pitchInElement;
   4606     }
   4607 
   4608     ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {0};
   4609     ADDR_E_RETURNCODE returnCode = ComputeSurfaceInfoTiled(&localIn, &localOut);
   4610 
   4611     BOOL_32 valid = (returnCode == ADDR_OK) &&
   4612                     (IsThin(pIn->resourceType, pIn->swizzleMode) ||
   4613                      IsThick(pIn->resourceType, pIn->swizzleMode)) &&
   4614                     ((pIn->pipeBankXor == 0) || (IsXor(pIn->swizzleMode)));
   4615 
   4616     if (valid)
   4617     {
   4618         UINT_32 log2ElementBytes   = Log2(pIn->bpp >> 3);
   4619         Dim3d   mipStartPos        = {0};
   4620         UINT_32 mipTailBytesOffset = 0;
   4621 
   4622         if (pIn->numMipLevels > 1)
   4623         {
   4624             // Mip-map chain cannot be MSAA surface
   4625             ADDR_ASSERT((pIn->numSamples <= 1) && (pIn->numFrags<= 1));
   4626 
   4627             mipStartPos = GetMipStartPos(pIn->resourceType,
   4628                                          pIn->swizzleMode,
   4629                                          localOut.pitch,
   4630                                          localOut.height,
   4631                                          localOut.numSlices,
   4632                                          localOut.blockWidth,
   4633                                          localOut.blockHeight,
   4634                                          localOut.blockSlices,
   4635                                          pIn->mipId,
   4636                                          log2ElementBytes,
   4637                                          &mipTailBytesOffset);
   4638         }
   4639 
   4640         UINT_32 interleaveOffset = 0;
   4641         UINT_32 pipeBits = 0;
   4642         UINT_32 pipeXor = 0;
   4643         UINT_32 bankBits = 0;
   4644         UINT_32 bankXor = 0;
   4645 
   4646         if (IsThin(pIn->resourceType, pIn->swizzleMode))
   4647         {
   4648             UINT_32 blockOffset = 0;
   4649             UINT_32 log2blkSize = GetBlockSizeLog2(pIn->swizzleMode);
   4650 
   4651             if (IsZOrderSwizzle(pIn->swizzleMode))
   4652             {
   4653                 // Morton generation
   4654                 if ((log2ElementBytes == 0) || (log2ElementBytes == 2))
   4655                 {
   4656                     UINT_32 totalLowBits = 6 - log2ElementBytes;
   4657                     UINT_32 mortBits = totalLowBits / 2;
   4658                     UINT_32 lowBitsValue = MortonGen2d(pIn->y, pIn->x, mortBits);
   4659                     // Are 9 bits enough?
   4660                     UINT_32 highBitsValue =
   4661                         MortonGen2d(pIn->x >> mortBits, pIn->y >> mortBits, 9) << totalLowBits;
   4662                     blockOffset = lowBitsValue | highBitsValue;
   4663                     ADDR_ASSERT(blockOffset == lowBitsValue + highBitsValue);
   4664                 }
   4665                 else
   4666                 {
   4667                     blockOffset = MortonGen2d(pIn->y, pIn->x, 13);
   4668                 }
   4669 
   4670                 // Fill LSBs with sample bits
   4671                 if (pIn->numSamples > 1)
   4672                 {
   4673                     blockOffset *= pIn->numSamples;
   4674                     blockOffset |= pIn->sample;
   4675                 }
   4676 
   4677                 // Shift according to BytesPP
   4678                 blockOffset <<= log2ElementBytes;
   4679             }
   4680             else
   4681             {
   4682                 // Micro block offset
   4683                 UINT_32 microBlockOffset = ComputeSurface2DMicroBlockOffset(pIn);
   4684                 blockOffset = microBlockOffset;
   4685 
   4686                 // Micro block dimension
   4687                 ADDR_ASSERT(log2ElementBytes < MaxNumOfBpp);
   4688                 Dim2d microBlockDim = Block256_2d[log2ElementBytes];
   4689                 // Morton generation, does 12 bit enough?
   4690                 blockOffset |=
   4691                     MortonGen2d((pIn->x / microBlockDim.w), (pIn->y / microBlockDim.h), 12) << 8;
   4692 
   4693                 // Sample bits start location
   4694                 UINT_32 sampleStart = log2blkSize - Log2(pIn->numSamples);
   4695                 // Join sample bits information to the highest Macro block bits
   4696                 if (IsNonPrtXor(pIn->swizzleMode))
   4697                 {
   4698                     // Non-prt-Xor : xor highest Macro block bits with sample bits
   4699                     blockOffset = blockOffset ^ (pIn->sample << sampleStart);
   4700                 }
   4701                 else
   4702                 {
   4703                     // Non-Xor or prt-Xor: replace highest Macro block bits with sample bits
   4704                     // after this op, the blockOffset only contains log2 Macro block size bits
   4705                     blockOffset %= (1 << sampleStart);
   4706                     blockOffset |= (pIn->sample << sampleStart);
   4707                     ADDR_ASSERT((blockOffset >> log2blkSize) == 0);
   4708                 }
   4709             }
   4710 
   4711             if (IsXor(pIn->swizzleMode))
   4712             {
   4713                 // Mask off bits above Macro block bits to keep page synonyms working for prt
   4714                 if (IsPrt(pIn->swizzleMode))
   4715                 {
   4716                     blockOffset &= ((1 << log2blkSize) - 1);
   4717                 }
   4718 
   4719                 // Preserve offset inside pipe interleave
   4720                 interleaveOffset = blockOffset & ((1 << m_pipeInterleaveLog2) - 1);
   4721                 blockOffset >>= m_pipeInterleaveLog2;
   4722 
   4723                 // Pipe/Se xor bits
   4724                 pipeBits = GetPipeXorBits(log2blkSize);
   4725                 // Pipe xor
   4726                 pipeXor = FoldXor2d(blockOffset, pipeBits);
   4727                 blockOffset >>= pipeBits;
   4728 
   4729                 // Bank xor bits
   4730                 bankBits = GetBankXorBits(log2blkSize);
   4731                 // Bank Xor
   4732                 bankXor = FoldXor2d(blockOffset, bankBits);
   4733                 blockOffset >>= bankBits;
   4734 
   4735                 // Put all the part back together
   4736                 blockOffset <<= bankBits;
   4737                 blockOffset |= bankXor;
   4738                 blockOffset <<= pipeBits;
   4739                 blockOffset |= pipeXor;
   4740                 blockOffset <<= m_pipeInterleaveLog2;
   4741                 blockOffset |= interleaveOffset;
   4742             }
   4743 
   4744             ADDR_ASSERT((blockOffset | mipTailBytesOffset) == (blockOffset + mipTailBytesOffset));
   4745             ADDR_ASSERT((mipTailBytesOffset == 0u) || (blockOffset < (1u << log2blkSize)));
   4746 
   4747             blockOffset |= mipTailBytesOffset;
   4748 
   4749             if (IsNonPrtXor(pIn->swizzleMode) && (pIn->numSamples <= 1))
   4750             {
   4751                 // Apply slice xor if not MSAA/PRT
   4752                 blockOffset ^= (ReverseBitVector(pIn->slice, pipeBits) << m_pipeInterleaveLog2);
   4753                 blockOffset ^= (ReverseBitVector(pIn->slice >> pipeBits, bankBits) <<
   4754                                 (m_pipeInterleaveLog2 + pipeBits));
   4755             }
   4756 
   4757             returnCode = ApplyCustomerPipeBankXor(pIn->swizzleMode, pIn->pipeBankXor,
   4758                                                   bankBits, pipeBits, &blockOffset);
   4759 
   4760             blockOffset %= (1 << log2blkSize);
   4761 
   4762             UINT_32 pitchInMacroBlock = localOut.mipChainPitch / localOut.blockWidth;
   4763             UINT_32 paddedHeightInMacroBlock = localOut.mipChainHeight / localOut.blockHeight;
   4764             UINT_32 sliceSizeInMacroBlock = pitchInMacroBlock * paddedHeightInMacroBlock;
   4765             UINT_32 macroBlockIndex =
   4766                 (pIn->slice + mipStartPos.d) * sliceSizeInMacroBlock +
   4767                 ((pIn->y / localOut.blockHeight) + mipStartPos.h) * pitchInMacroBlock +
   4768                 ((pIn->x / localOut.blockWidth) + mipStartPos.w);
   4769 
   4770             UINT_64 macroBlockOffset = (static_cast<UINT_64>(macroBlockIndex) <<
   4771                                        GetBlockSizeLog2(pIn->swizzleMode));
   4772 
   4773             pOut->addr = blockOffset | macroBlockOffset;
   4774         }
   4775         else
   4776         {
   4777             UINT_32 log2blkSize = GetBlockSizeLog2(pIn->swizzleMode);
   4778 
   4779             Dim3d microBlockDim = Block1K_3d[log2ElementBytes];
   4780 
   4781             UINT_32 blockOffset = MortonGen3d((pIn->x / microBlockDim.w),
   4782                                               (pIn->y / microBlockDim.h),
   4783                                               (pIn->slice / microBlockDim.d),
   4784                                               8);
   4785 
   4786             blockOffset <<= 10;
   4787             blockOffset |= ComputeSurface3DMicroBlockOffset(pIn);
   4788 
   4789             if (IsXor(pIn->swizzleMode))
   4790             {
   4791                 // Mask off bits above Macro block bits to keep page synonyms working for prt
   4792                 if (IsPrt(pIn->swizzleMode))
   4793                 {
   4794                     blockOffset &= ((1 << log2blkSize) - 1);
   4795                 }
   4796 
   4797                 // Preserve offset inside pipe interleave
   4798                 interleaveOffset = blockOffset & ((1 << m_pipeInterleaveLog2) - 1);
   4799                 blockOffset >>= m_pipeInterleaveLog2;
   4800 
   4801                 // Pipe/Se xor bits
   4802                 pipeBits = GetPipeXorBits(log2blkSize);
   4803                 // Pipe xor
   4804                 pipeXor = FoldXor3d(blockOffset, pipeBits);
   4805                 blockOffset >>= pipeBits;
   4806 
   4807                 // Bank xor bits
   4808                 bankBits = GetBankXorBits(log2blkSize);
   4809                 // Bank Xor
   4810                 bankXor = FoldXor3d(blockOffset, bankBits);
   4811                 blockOffset >>= bankBits;
   4812 
   4813                 // Put all the part back together
   4814                 blockOffset <<= bankBits;
   4815                 blockOffset |= bankXor;
   4816                 blockOffset <<= pipeBits;
   4817                 blockOffset |= pipeXor;
   4818                 blockOffset <<= m_pipeInterleaveLog2;
   4819                 blockOffset |= interleaveOffset;
   4820             }
   4821 
   4822             ADDR_ASSERT((blockOffset | mipTailBytesOffset) == (blockOffset + mipTailBytesOffset));
   4823             ADDR_ASSERT((mipTailBytesOffset == 0u) || (blockOffset < (1u << log2blkSize)));
   4824             blockOffset |= mipTailBytesOffset;
   4825 
   4826             returnCode = ApplyCustomerPipeBankXor(pIn->swizzleMode, pIn->pipeBankXor,
   4827                                                   bankBits, pipeBits, &blockOffset);
   4828 
   4829             blockOffset %= (1 << log2blkSize);
   4830 
   4831             UINT_32 xb = pIn->x / localOut.blockWidth  + mipStartPos.w;
   4832             UINT_32 yb = pIn->y / localOut.blockHeight + mipStartPos.h;
   4833             UINT_32 zb = pIn->slice / localOut.blockSlices + + mipStartPos.d;
   4834 
   4835             UINT_32 pitchInBlock = localOut.mipChainPitch / localOut.blockWidth;
   4836             UINT_32 sliceSizeInBlock =
   4837                 (localOut.mipChainHeight / localOut.blockHeight) * pitchInBlock;
   4838             UINT_32 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
   4839 
   4840             pOut->addr = blockOffset | (blockIndex << log2blkSize);
   4841         }
   4842     }
   4843     else
   4844     {
   4845         returnCode = ADDR_INVALIDPARAMS;
   4846     }
   4847 
   4848     return returnCode;
   4849 }
   4850 
   4851 /**
   4852 ************************************************************************************************************************
   4853 *   Gfx9Lib::ComputeSurfaceInfoLinear
   4854 *
   4855 *   @brief
   4856 *       Internal function to calculate padding for linear swizzle 2D/3D surface
   4857 *
   4858 *   @return
   4859 *       N/A
   4860 ************************************************************************************************************************
   4861 */
   4862 ADDR_E_RETURNCODE Gfx9Lib::ComputeSurfaceLinearPadding(
   4863     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,                    ///< [in] input srtucture
   4864     UINT_32*                                pMipmap0PaddedWidth,    ///< [out] padded width in element
   4865     UINT_32*                                pSlice0PaddedHeight,    ///< [out] padded height for HW
   4866     ADDR2_MIP_INFO*                         pMipInfo                ///< [out] per mip information
   4867     ) const
   4868 {
   4869     ADDR_E_RETURNCODE returnCode = ADDR_OK;
   4870 
   4871     UINT_32 elementBytes        = pIn->bpp >> 3;
   4872     UINT_32 pitchAlignInElement = 0;
   4873 
   4874     if (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL)
   4875     {
   4876         ADDR_ASSERT(pIn->numMipLevels <= 1);
   4877         ADDR_ASSERT(pIn->numSlices <= 1);
   4878         pitchAlignInElement = 1;
   4879     }
   4880     else
   4881     {
   4882         pitchAlignInElement = (256 / elementBytes);
   4883     }
   4884 
   4885     UINT_32 mipChainWidth      = PowTwoAlign(pIn->width, pitchAlignInElement);
   4886     UINT_32 slice0PaddedHeight = pIn->height;
   4887 
   4888     returnCode = ApplyCustomizedPitchHeight(pIn, elementBytes, pitchAlignInElement,
   4889                                             &mipChainWidth, &slice0PaddedHeight);
   4890 
   4891     if (returnCode == ADDR_OK)
   4892     {
   4893         UINT_32 mipChainHeight = 0;
   4894         UINT_32 mipHeight      = pIn->height;
   4895 
   4896         for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
   4897         {
   4898             if (pMipInfo != NULL)
   4899             {
   4900                 pMipInfo[i].offset = mipChainWidth * mipChainHeight * elementBytes;
   4901                 pMipInfo[i].pitch  = mipChainWidth;
   4902                 pMipInfo[i].height = mipHeight;
   4903                 pMipInfo[i].depth  = 1;
   4904             }
   4905 
   4906             mipChainHeight += mipHeight;
   4907             mipHeight = RoundHalf(mipHeight);
   4908             mipHeight = Max(mipHeight, 1u);
   4909         }
   4910 
   4911         *pMipmap0PaddedWidth = mipChainWidth;
   4912         *pSlice0PaddedHeight = (pIn->numMipLevels > 1) ? mipChainHeight : slice0PaddedHeight;
   4913     }
   4914 
   4915     return returnCode;
   4916 }
   4917 
   4918 } // V2
   4919 } // Addr
   4920