1 /* 2 * Copyright 2017 Advanced Micro Devices, Inc. 3 * All Rights Reserved. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining 6 * a copy of this software and associated documentation files (the 7 * "Software"), to deal in the Software without restriction, including 8 * without limitation the rights to use, copy, modify, merge, publish, 9 * distribute, sub license, and/or sell copies of the Software, and to 10 * permit persons to whom the Software is furnished to do so, subject to 11 * the following conditions: 12 * 13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 14 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 15 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 16 * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS 17 * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 20 * USE OR OTHER DEALINGS IN THE SOFTWARE. 21 * 22 * The above copyright notice and this permission notice (including the 23 * next paragraph) shall be included in all copies or substantial portions 24 * of the Software. 25 */ 26 27 /** 28 ************************************************************************************************************************ 29 * @file gfx9addrlib.cpp 30 * @brief Contgfx9ns the implementation for the Gfx9Lib class. 31 ************************************************************************************************************************ 32 */ 33 34 #include "gfx9addrlib.h" 35 36 #include "gfx9_gb_reg.h" 37 38 #include "amdgpu_asic_addr.h" 39 40 //////////////////////////////////////////////////////////////////////////////////////////////////// 41 //////////////////////////////////////////////////////////////////////////////////////////////////// 42 43 namespace Addr 44 { 45 46 /** 47 ************************************************************************************************************************ 48 * Gfx9HwlInit 49 * 50 * @brief 51 * Creates an Gfx9Lib object. 52 * 53 * @return 54 * Returns an Gfx9Lib object pointer. 55 ************************************************************************************************************************ 56 */ 57 Addr::Lib* Gfx9HwlInit(const Client* pClient) 58 { 59 return V2::Gfx9Lib::CreateObj(pClient); 60 } 61 62 namespace V2 63 { 64 65 //////////////////////////////////////////////////////////////////////////////////////////////////// 66 // Static Const Member 67 //////////////////////////////////////////////////////////////////////////////////////////////////// 68 69 const SwizzleModeFlags Gfx9Lib::SwizzleModeTable[ADDR_SW_MAX_TYPE] = 70 {//Linear 256B 4KB 64KB Var Z Std Disp Rot XOR T RtOpt 71 {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // ADDR_SW_LINEAR 72 {0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_256B_S 73 {0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0}, // ADDR_SW_256B_D 74 {0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0}, // ADDR_SW_256B_R 75 76 {0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0}, // ADDR_SW_4KB_Z 77 {0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_4KB_S 78 {0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0}, // ADDR_SW_4KB_D 79 {0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0}, // ADDR_SW_4KB_R 80 81 {0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0}, // ADDR_SW_64KB_Z 82 {0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_64KB_S 83 {0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0}, // ADDR_SW_64KB_D 84 {0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0}, // ADDR_SW_64KB_R 85 86 {0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0}, // ADDR_SW_VAR_Z 87 {0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_VAR_S 88 {0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0}, // ADDR_SW_VAR_D 89 {0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0}, // ADDR_SW_VAR_R 90 91 {0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0}, // ADDR_SW_64KB_Z_T 92 {0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0}, // ADDR_SW_64KB_S_T 93 {0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0}, // ADDR_SW_64KB_D_T 94 {0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0}, // ADDR_SW_64KB_R_T 95 96 {0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0}, // ADDR_SW_4KB_Z_x 97 {0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0}, // ADDR_SW_4KB_S_x 98 {0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0}, // ADDR_SW_4KB_D_x 99 {0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0}, // ADDR_SW_4KB_R_x 100 101 {0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0}, // ADDR_SW_64KB_Z_X 102 {0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0}, // ADDR_SW_64KB_S_X 103 {0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0}, // ADDR_SW_64KB_D_X 104 {0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0}, // ADDR_SW_64KB_R_X 105 106 {0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0}, // ADDR_SW_VAR_Z_X 107 {0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0}, // ADDR_SW_VAR_S_X 108 {0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0}, // ADDR_SW_VAR_D_X 109 {0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0}, // ADDR_SW_VAR_R_X 110 {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // ADDR_SW_LINEAR_GENERAL 111 }; 112 113 const UINT_32 Gfx9Lib::MipTailOffset256B[] = {2048, 1024, 512, 256, 128, 64, 32, 16, 114 8, 6, 5, 4, 3, 2, 1, 0}; 115 116 const Dim3d Gfx9Lib::Block256_3dS[] = {{16, 4, 4}, {8, 4, 4}, {4, 4, 4}, {2, 4, 4}, {1, 4, 4}}; 117 118 const Dim3d Gfx9Lib::Block256_3dZ[] = {{8, 4, 8}, {4, 4, 8}, {4, 4, 4}, {4, 2, 4}, {2, 2, 4}}; 119 120 /** 121 ************************************************************************************************************************ 122 * Gfx9Lib::Gfx9Lib 123 * 124 * @brief 125 * Constructor 126 * 127 ************************************************************************************************************************ 128 */ 129 Gfx9Lib::Gfx9Lib(const Client* pClient) 130 : 131 Lib(pClient), 132 m_numEquations(0) 133 { 134 m_class = AI_ADDRLIB; 135 memset(&m_settings, 0, sizeof(m_settings)); 136 memcpy(m_swizzleModeTable, SwizzleModeTable, sizeof(SwizzleModeTable)); 137 } 138 139 /** 140 ************************************************************************************************************************ 141 * Gfx9Lib::~Gfx9Lib 142 * 143 * @brief 144 * Destructor 145 ************************************************************************************************************************ 146 */ 147 Gfx9Lib::~Gfx9Lib() 148 { 149 } 150 151 /** 152 ************************************************************************************************************************ 153 * Gfx9Lib::HwlComputeHtileInfo 154 * 155 * @brief 156 * Interface function stub of AddrComputeHtilenfo 157 * 158 * @return 159 * ADDR_E_RETURNCODE 160 ************************************************************************************************************************ 161 */ 162 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileInfo( 163 const ADDR2_COMPUTE_HTILE_INFO_INPUT* pIn, ///< [in] input structure 164 ADDR2_COMPUTE_HTILE_INFO_OUTPUT* pOut ///< [out] output structure 165 ) const 166 { 167 UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pIn->hTileFlags.pipeAligned, 168 pIn->swizzleMode); 169 170 UINT_32 numRbTotal = pIn->hTileFlags.rbAligned ? m_se * m_rbPerSe : 1; 171 172 UINT_32 numCompressBlkPerMetaBlk, numCompressBlkPerMetaBlkLog2; 173 174 if ((numPipeTotal == 1) && (numRbTotal == 1)) 175 { 176 numCompressBlkPerMetaBlkLog2 = 10; 177 } 178 else 179 { 180 if (m_settings.applyAliasFix) 181 { 182 numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + Max(10u, m_pipeInterleaveLog2); 183 } 184 else 185 { 186 numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + 10; 187 } 188 } 189 190 numCompressBlkPerMetaBlk = 1 << numCompressBlkPerMetaBlkLog2; 191 192 Dim3d metaBlkDim = {8, 8, 1}; 193 UINT_32 totalAmpBits = numCompressBlkPerMetaBlkLog2; 194 UINT_32 widthAmp = (pIn->numMipLevels > 1) ? (totalAmpBits >> 1) : RoundHalf(totalAmpBits); 195 UINT_32 heightAmp = totalAmpBits - widthAmp; 196 metaBlkDim.w <<= widthAmp; 197 metaBlkDim.h <<= heightAmp; 198 199 #if DEBUG 200 Dim3d metaBlkDimDbg = {8, 8, 1}; 201 for (UINT_32 index = 0; index < numCompressBlkPerMetaBlkLog2; index++) 202 { 203 if ((metaBlkDimDbg.h < metaBlkDimDbg.w) || 204 ((pIn->numMipLevels > 1) && (metaBlkDimDbg.h == metaBlkDimDbg.w))) 205 { 206 metaBlkDimDbg.h <<= 1; 207 } 208 else 209 { 210 metaBlkDimDbg.w <<= 1; 211 } 212 } 213 ADDR_ASSERT((metaBlkDimDbg.w == metaBlkDim.w) && (metaBlkDimDbg.h == metaBlkDim.h)); 214 #endif 215 216 UINT_32 numMetaBlkX; 217 UINT_32 numMetaBlkY; 218 UINT_32 numMetaBlkZ; 219 220 GetMetaMipInfo(pIn->numMipLevels, &metaBlkDim, FALSE, pOut->pMipInfo, 221 pIn->unalignedWidth, pIn->unalignedHeight, pIn->numSlices, 222 &numMetaBlkX, &numMetaBlkY, &numMetaBlkZ); 223 224 UINT_32 sizeAlign = numPipeTotal * numRbTotal * m_pipeInterleaveBytes; 225 226 if (m_settings.htileAlignFix) 227 { 228 sizeAlign <<= 1; 229 } 230 231 pOut->pitch = numMetaBlkX * metaBlkDim.w; 232 pOut->height = numMetaBlkY * metaBlkDim.h; 233 pOut->sliceSize = numMetaBlkX * numMetaBlkY * numCompressBlkPerMetaBlk * 4; 234 235 pOut->metaBlkWidth = metaBlkDim.w; 236 pOut->metaBlkHeight = metaBlkDim.h; 237 pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY; 238 239 pOut->baseAlign = Max(numCompressBlkPerMetaBlk * 4, sizeAlign); 240 241 if (m_settings.metaBaseAlignFix) 242 { 243 pOut->baseAlign = Max(pOut->baseAlign, GetBlockSize(pIn->swizzleMode)); 244 } 245 246 if ((IsXor(pIn->swizzleMode) == FALSE) && (numPipeTotal > 2)) 247 { 248 UINT_32 additionalAlign = numPipeTotal * numCompressBlkPerMetaBlk * 2; 249 250 if (additionalAlign > sizeAlign) 251 { 252 sizeAlign = additionalAlign; 253 } 254 } 255 256 pOut->htileBytes = PowTwoAlign(pOut->sliceSize * numMetaBlkZ, sizeAlign); 257 258 return ADDR_OK; 259 } 260 261 /** 262 ************************************************************************************************************************ 263 * Gfx9Lib::HwlComputeCmaskInfo 264 * 265 * @brief 266 * Interface function stub of AddrComputeCmaskInfo 267 * 268 * @return 269 * ADDR_E_RETURNCODE 270 ************************************************************************************************************************ 271 */ 272 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeCmaskInfo( 273 const ADDR2_COMPUTE_CMASK_INFO_INPUT* pIn, ///< [in] input structure 274 ADDR2_COMPUTE_CMASK_INFO_OUTPUT* pOut ///< [out] output structure 275 ) const 276 { 277 // TODO: Clarify with AddrLib team 278 // ADDR_ASSERT(pIn->resourceType == ADDR_RSRC_TEX_2D); 279 280 UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pIn->cMaskFlags.pipeAligned, 281 pIn->swizzleMode); 282 283 UINT_32 numRbTotal = pIn->cMaskFlags.rbAligned ? m_se * m_rbPerSe : 1; 284 285 UINT_32 numCompressBlkPerMetaBlkLog2, numCompressBlkPerMetaBlk; 286 287 if ((numPipeTotal == 1) && (numRbTotal == 1)) 288 { 289 numCompressBlkPerMetaBlkLog2 = 13; 290 } 291 else 292 { 293 if (m_settings.applyAliasFix) 294 { 295 numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + Max(10u, m_pipeInterleaveLog2); 296 } 297 else 298 { 299 numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + 10; 300 } 301 302 numCompressBlkPerMetaBlkLog2 = Max(numCompressBlkPerMetaBlkLog2, 13u); 303 } 304 305 numCompressBlkPerMetaBlk = 1 << numCompressBlkPerMetaBlkLog2; 306 307 Dim2d metaBlkDim = {8, 8}; 308 UINT_32 totalAmpBits = numCompressBlkPerMetaBlkLog2; 309 UINT_32 heightAmp = totalAmpBits >> 1; 310 UINT_32 widthAmp = totalAmpBits - heightAmp; 311 metaBlkDim.w <<= widthAmp; 312 metaBlkDim.h <<= heightAmp; 313 314 #if DEBUG 315 Dim2d metaBlkDimDbg = {8, 8}; 316 for (UINT_32 index = 0; index < numCompressBlkPerMetaBlkLog2; index++) 317 { 318 if (metaBlkDimDbg.h < metaBlkDimDbg.w) 319 { 320 metaBlkDimDbg.h <<= 1; 321 } 322 else 323 { 324 metaBlkDimDbg.w <<= 1; 325 } 326 } 327 ADDR_ASSERT((metaBlkDimDbg.w == metaBlkDim.w) && (metaBlkDimDbg.h == metaBlkDim.h)); 328 #endif 329 330 UINT_32 numMetaBlkX = (pIn->unalignedWidth + metaBlkDim.w - 1) / metaBlkDim.w; 331 UINT_32 numMetaBlkY = (pIn->unalignedHeight + metaBlkDim.h - 1) / metaBlkDim.h; 332 UINT_32 numMetaBlkZ = Max(pIn->numSlices, 1u); 333 334 UINT_32 sizeAlign = numPipeTotal * numRbTotal * m_pipeInterleaveBytes; 335 336 pOut->pitch = numMetaBlkX * metaBlkDim.w; 337 pOut->height = numMetaBlkY * metaBlkDim.h; 338 pOut->sliceSize = (numMetaBlkX * numMetaBlkY * numCompressBlkPerMetaBlk) >> 1; 339 pOut->cmaskBytes = PowTwoAlign(pOut->sliceSize * numMetaBlkZ, sizeAlign); 340 pOut->baseAlign = Max(numCompressBlkPerMetaBlk >> 1, sizeAlign); 341 342 if (m_settings.metaBaseAlignFix) 343 { 344 pOut->baseAlign = Max(pOut->baseAlign, GetBlockSize(pIn->swizzleMode)); 345 } 346 347 pOut->metaBlkWidth = metaBlkDim.w; 348 pOut->metaBlkHeight = metaBlkDim.h; 349 350 pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY; 351 352 return ADDR_OK; 353 } 354 355 /** 356 ************************************************************************************************************************ 357 * Gfx9Lib::GetMetaMipInfo 358 * 359 * @brief 360 * Get meta mip info 361 * 362 * @return 363 * N/A 364 ************************************************************************************************************************ 365 */ 366 VOID Gfx9Lib::GetMetaMipInfo( 367 UINT_32 numMipLevels, ///< [in] number of mip levels 368 Dim3d* pMetaBlkDim, ///< [in] meta block dimension 369 BOOL_32 dataThick, ///< [in] data surface is thick 370 ADDR2_META_MIP_INFO* pInfo, ///< [out] meta mip info 371 UINT_32 mip0Width, ///< [in] mip0 width 372 UINT_32 mip0Height, ///< [in] mip0 height 373 UINT_32 mip0Depth, ///< [in] mip0 depth 374 UINT_32* pNumMetaBlkX, ///< [out] number of metablock X in mipchain 375 UINT_32* pNumMetaBlkY, ///< [out] number of metablock Y in mipchain 376 UINT_32* pNumMetaBlkZ) ///< [out] number of metablock Z in mipchain 377 const 378 { 379 UINT_32 numMetaBlkX = (mip0Width + pMetaBlkDim->w - 1) / pMetaBlkDim->w; 380 UINT_32 numMetaBlkY = (mip0Height + pMetaBlkDim->h - 1) / pMetaBlkDim->h; 381 UINT_32 numMetaBlkZ = (mip0Depth + pMetaBlkDim->d - 1) / pMetaBlkDim->d; 382 UINT_32 tailWidth = pMetaBlkDim->w; 383 UINT_32 tailHeight = pMetaBlkDim->h >> 1; 384 UINT_32 tailDepth = pMetaBlkDim->d; 385 BOOL_32 inTail = FALSE; 386 AddrMajorMode major = ADDR_MAJOR_MAX_TYPE; 387 388 if (numMipLevels > 1) 389 { 390 if (dataThick && (numMetaBlkZ > numMetaBlkX) && (numMetaBlkZ > numMetaBlkY)) 391 { 392 // Z major 393 major = ADDR_MAJOR_Z; 394 } 395 else if (numMetaBlkX >= numMetaBlkY) 396 { 397 // X major 398 major = ADDR_MAJOR_X; 399 } 400 else 401 { 402 // Y major 403 major = ADDR_MAJOR_Y; 404 } 405 406 inTail = ((mip0Width <= tailWidth) && 407 (mip0Height <= tailHeight) && 408 ((dataThick == FALSE) || (mip0Depth <= tailDepth))); 409 410 if (inTail == FALSE) 411 { 412 UINT_32 orderLimit; 413 UINT_32 *pMipDim; 414 UINT_32 *pOrderDim; 415 416 if (major == ADDR_MAJOR_Z) 417 { 418 // Z major 419 pMipDim = &numMetaBlkY; 420 pOrderDim = &numMetaBlkZ; 421 orderLimit = 4; 422 } 423 else if (major == ADDR_MAJOR_X) 424 { 425 // X major 426 pMipDim = &numMetaBlkY; 427 pOrderDim = &numMetaBlkX; 428 orderLimit = 4; 429 } 430 else 431 { 432 // Y major 433 pMipDim = &numMetaBlkX; 434 pOrderDim = &numMetaBlkY; 435 orderLimit = 2; 436 } 437 438 if ((*pMipDim < 3) && (*pOrderDim > orderLimit) && (numMipLevels > 3)) 439 { 440 *pMipDim += 2; 441 } 442 else 443 { 444 *pMipDim += ((*pMipDim / 2) + (*pMipDim & 1)); 445 } 446 } 447 } 448 449 if (pInfo != NULL) 450 { 451 UINT_32 mipWidth = mip0Width; 452 UINT_32 mipHeight = mip0Height; 453 UINT_32 mipDepth = mip0Depth; 454 Dim3d mipCoord = {0}; 455 456 for (UINT_32 mip = 0; mip < numMipLevels; mip++) 457 { 458 if (inTail) 459 { 460 GetMetaMiptailInfo(&pInfo[mip], mipCoord, numMipLevels - mip, 461 pMetaBlkDim); 462 break; 463 } 464 else 465 { 466 mipWidth = PowTwoAlign(mipWidth, pMetaBlkDim->w); 467 mipHeight = PowTwoAlign(mipHeight, pMetaBlkDim->h); 468 mipDepth = PowTwoAlign(mipDepth, pMetaBlkDim->d); 469 470 pInfo[mip].inMiptail = FALSE; 471 pInfo[mip].startX = mipCoord.w; 472 pInfo[mip].startY = mipCoord.h; 473 pInfo[mip].startZ = mipCoord.d; 474 pInfo[mip].width = mipWidth; 475 pInfo[mip].height = mipHeight; 476 pInfo[mip].depth = dataThick ? mipDepth : 1; 477 478 if ((mip >= 3) || (mip & 1)) 479 { 480 switch (major) 481 { 482 case ADDR_MAJOR_X: 483 mipCoord.w += mipWidth; 484 break; 485 case ADDR_MAJOR_Y: 486 mipCoord.h += mipHeight; 487 break; 488 case ADDR_MAJOR_Z: 489 mipCoord.d += mipDepth; 490 break; 491 default: 492 break; 493 } 494 } 495 else 496 { 497 switch (major) 498 { 499 case ADDR_MAJOR_X: 500 mipCoord.h += mipHeight; 501 break; 502 case ADDR_MAJOR_Y: 503 mipCoord.w += mipWidth; 504 break; 505 case ADDR_MAJOR_Z: 506 mipCoord.h += mipHeight; 507 break; 508 default: 509 break; 510 } 511 } 512 513 mipWidth = Max(mipWidth >> 1, 1u); 514 mipHeight = Max(mipHeight >> 1, 1u); 515 mipDepth = Max(mipDepth >> 1, 1u); 516 517 inTail = ((mipWidth <= tailWidth) && 518 (mipHeight <= tailHeight) && 519 ((dataThick == FALSE) || (mipDepth <= tailDepth))); 520 } 521 } 522 } 523 524 *pNumMetaBlkX = numMetaBlkX; 525 *pNumMetaBlkY = numMetaBlkY; 526 *pNumMetaBlkZ = numMetaBlkZ; 527 } 528 529 /** 530 ************************************************************************************************************************ 531 * Gfx9Lib::HwlComputeDccInfo 532 * 533 * @brief 534 * Interface function to compute DCC key info 535 * 536 * @return 537 * ADDR_E_RETURNCODE 538 ************************************************************************************************************************ 539 */ 540 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeDccInfo( 541 const ADDR2_COMPUTE_DCCINFO_INPUT* pIn, ///< [in] input structure 542 ADDR2_COMPUTE_DCCINFO_OUTPUT* pOut ///< [out] output structure 543 ) const 544 { 545 BOOL_32 dataLinear = IsLinear(pIn->swizzleMode); 546 BOOL_32 metaLinear = pIn->dccKeyFlags.linear; 547 BOOL_32 pipeAligned = pIn->dccKeyFlags.pipeAligned; 548 549 if (dataLinear) 550 { 551 metaLinear = TRUE; 552 } 553 else if (metaLinear == TRUE) 554 { 555 pipeAligned = FALSE; 556 } 557 558 UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pipeAligned, pIn->swizzleMode); 559 560 if (metaLinear) 561 { 562 // Linear metadata supporting was removed for GFX9! No one can use this feature on GFX9. 563 ADDR_ASSERT_ALWAYS(); 564 565 pOut->dccRamBaseAlign = numPipeTotal * m_pipeInterleaveBytes; 566 pOut->dccRamSize = PowTwoAlign((pIn->dataSurfaceSize / 256), pOut->dccRamBaseAlign); 567 } 568 else 569 { 570 BOOL_32 dataThick = IsThick(pIn->resourceType, pIn->swizzleMode); 571 572 UINT_32 minMetaBlkSize = dataThick ? 65536 : 4096; 573 574 UINT_32 numFrags = Max(pIn->numFrags, 1u); 575 UINT_32 numSlices = Max(pIn->numSlices, 1u); 576 577 minMetaBlkSize /= numFrags; 578 579 UINT_32 numCompressBlkPerMetaBlk = minMetaBlkSize; 580 581 UINT_32 numRbTotal = pIn->dccKeyFlags.rbAligned ? m_se * m_rbPerSe : 1; 582 583 if ((numPipeTotal > 1) || (numRbTotal > 1)) 584 { 585 const UINT_32 thinBlkSize = 1 << (m_settings.applyAliasFix ? Max(10u, m_pipeInterleaveLog2) : 10); 586 587 numCompressBlkPerMetaBlk = 588 Max(numCompressBlkPerMetaBlk, m_se * m_rbPerSe * (dataThick ? 262144 : thinBlkSize)); 589 590 if (numCompressBlkPerMetaBlk > 65536 * pIn->bpp) 591 { 592 numCompressBlkPerMetaBlk = 65536 * pIn->bpp; 593 } 594 } 595 596 Dim3d compressBlkDim = GetDccCompressBlk(pIn->resourceType, pIn->swizzleMode, pIn->bpp); 597 Dim3d metaBlkDim = compressBlkDim; 598 599 for (UINT_32 index = 1; index < numCompressBlkPerMetaBlk; index <<= 1) 600 { 601 if ((metaBlkDim.h < metaBlkDim.w) || 602 ((pIn->numMipLevels > 1) && (metaBlkDim.h == metaBlkDim.w))) 603 { 604 if ((dataThick == FALSE) || (metaBlkDim.h <= metaBlkDim.d)) 605 { 606 metaBlkDim.h <<= 1; 607 } 608 else 609 { 610 metaBlkDim.d <<= 1; 611 } 612 } 613 else 614 { 615 if ((dataThick == FALSE) || (metaBlkDim.w <= metaBlkDim.d)) 616 { 617 metaBlkDim.w <<= 1; 618 } 619 else 620 { 621 metaBlkDim.d <<= 1; 622 } 623 } 624 } 625 626 UINT_32 numMetaBlkX; 627 UINT_32 numMetaBlkY; 628 UINT_32 numMetaBlkZ; 629 630 GetMetaMipInfo(pIn->numMipLevels, &metaBlkDim, dataThick, pOut->pMipInfo, 631 pIn->unalignedWidth, pIn->unalignedHeight, numSlices, 632 &numMetaBlkX, &numMetaBlkY, &numMetaBlkZ); 633 634 UINT_32 sizeAlign = numPipeTotal * numRbTotal * m_pipeInterleaveBytes; 635 636 if (numFrags > m_maxCompFrag) 637 { 638 sizeAlign *= (numFrags / m_maxCompFrag); 639 } 640 641 pOut->dccRamSize = numMetaBlkX * numMetaBlkY * numMetaBlkZ * 642 numCompressBlkPerMetaBlk * numFrags; 643 pOut->dccRamSize = PowTwoAlign(pOut->dccRamSize, sizeAlign); 644 pOut->dccRamBaseAlign = Max(numCompressBlkPerMetaBlk, sizeAlign); 645 646 if (m_settings.metaBaseAlignFix) 647 { 648 pOut->dccRamBaseAlign = Max(pOut->dccRamBaseAlign, GetBlockSize(pIn->swizzleMode)); 649 } 650 651 pOut->pitch = numMetaBlkX * metaBlkDim.w; 652 pOut->height = numMetaBlkY * metaBlkDim.h; 653 pOut->depth = numMetaBlkZ * metaBlkDim.d; 654 655 pOut->compressBlkWidth = compressBlkDim.w; 656 pOut->compressBlkHeight = compressBlkDim.h; 657 pOut->compressBlkDepth = compressBlkDim.d; 658 659 pOut->metaBlkWidth = metaBlkDim.w; 660 pOut->metaBlkHeight = metaBlkDim.h; 661 pOut->metaBlkDepth = metaBlkDim.d; 662 663 pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY; 664 pOut->fastClearSizePerSlice = 665 pOut->metaBlkNumPerSlice * numCompressBlkPerMetaBlk * Min(numFrags, m_maxCompFrag); 666 } 667 668 return ADDR_OK; 669 } 670 671 /** 672 ************************************************************************************************************************ 673 * Gfx9Lib::HwlGetMaxAlignments 674 * 675 * @brief 676 * Gets maximum alignments 677 * @return 678 * ADDR_E_RETURNCODE 679 ************************************************************************************************************************ 680 */ 681 ADDR_E_RETURNCODE Gfx9Lib::HwlGetMaxAlignments( 682 ADDR_GET_MAX_ALIGNMENTS_OUTPUT* pOut ///< [out] output structure 683 ) const 684 { 685 pOut->baseAlign = HwlComputeSurfaceBaseAlign(ADDR_SW_64KB); 686 687 return ADDR_OK; 688 } 689 690 /** 691 ************************************************************************************************************************ 692 * Gfx9Lib::HwlComputeCmaskAddrFromCoord 693 * 694 * @brief 695 * Interface function stub of AddrComputeCmaskAddrFromCoord 696 * 697 * @return 698 * ADDR_E_RETURNCODE 699 ************************************************************************************************************************ 700 */ 701 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeCmaskAddrFromCoord( 702 const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure 703 ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] output structure 704 { 705 ADDR2_COMPUTE_CMASK_INFO_INPUT input = {0}; 706 input.size = sizeof(input); 707 input.cMaskFlags = pIn->cMaskFlags; 708 input.colorFlags = pIn->colorFlags; 709 input.unalignedWidth = Max(pIn->unalignedWidth, 1u); 710 input.unalignedHeight = Max(pIn->unalignedHeight, 1u); 711 input.numSlices = Max(pIn->numSlices, 1u); 712 input.swizzleMode = pIn->swizzleMode; 713 input.resourceType = pIn->resourceType; 714 715 ADDR2_COMPUTE_CMASK_INFO_OUTPUT output = {0}; 716 output.size = sizeof(output); 717 718 ADDR_E_RETURNCODE returnCode = ComputeCmaskInfo(&input, &output); 719 720 if (returnCode == ADDR_OK) 721 { 722 UINT_32 fmaskBpp = GetFmaskBpp(pIn->numSamples, pIn->numFrags); 723 UINT_32 fmaskElementBytesLog2 = Log2(fmaskBpp >> 3); 724 UINT_32 metaBlkWidthLog2 = Log2(output.metaBlkWidth); 725 UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight); 726 727 const CoordEq* pMetaEq = GetMetaEquation({0, fmaskElementBytesLog2, 0, pIn->cMaskFlags, 728 Gfx9DataFmask, pIn->swizzleMode, pIn->resourceType, 729 metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0}); 730 731 UINT_32 xb = pIn->x / output.metaBlkWidth; 732 UINT_32 yb = pIn->y / output.metaBlkHeight; 733 UINT_32 zb = pIn->slice; 734 735 UINT_32 pitchInBlock = output.pitch / output.metaBlkWidth; 736 UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock; 737 UINT_32 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb; 738 739 UINT_64 address = pMetaEq->solve(pIn->x, pIn->y, pIn->slice, 0, blockIndex); 740 741 pOut->addr = address >> 1; 742 pOut->bitPosition = static_cast<UINT_32>((address & 1) << 2); 743 744 745 UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->cMaskFlags.pipeAligned, 746 pIn->swizzleMode); 747 748 UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1)); 749 750 pOut->addr ^= (pipeXor << m_pipeInterleaveLog2); 751 } 752 753 return returnCode; 754 } 755 756 /** 757 ************************************************************************************************************************ 758 * Gfx9Lib::HwlComputeHtileAddrFromCoord 759 * 760 * @brief 761 * Interface function stub of AddrComputeHtileAddrFromCoord 762 * 763 * @return 764 * ADDR_E_RETURNCODE 765 ************************************************************************************************************************ 766 */ 767 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileAddrFromCoord( 768 const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure 769 ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] output structure 770 { 771 ADDR_E_RETURNCODE returnCode = ADDR_OK; 772 773 if (pIn->numMipLevels > 1) 774 { 775 returnCode = ADDR_NOTIMPLEMENTED; 776 } 777 else 778 { 779 ADDR2_COMPUTE_HTILE_INFO_INPUT input = {0}; 780 input.size = sizeof(input); 781 input.hTileFlags = pIn->hTileFlags; 782 input.depthFlags = pIn->depthflags; 783 input.swizzleMode = pIn->swizzleMode; 784 input.unalignedWidth = Max(pIn->unalignedWidth, 1u); 785 input.unalignedHeight = Max(pIn->unalignedHeight, 1u); 786 input.numSlices = Max(pIn->numSlices, 1u); 787 input.numMipLevels = Max(pIn->numMipLevels, 1u); 788 789 ADDR2_COMPUTE_HTILE_INFO_OUTPUT output = {0}; 790 output.size = sizeof(output); 791 792 returnCode = ComputeHtileInfo(&input, &output); 793 794 if (returnCode == ADDR_OK) 795 { 796 UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3); 797 UINT_32 metaBlkWidthLog2 = Log2(output.metaBlkWidth); 798 UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight); 799 UINT_32 numSamplesLog2 = Log2(pIn->numSamples); 800 801 const CoordEq* pMetaEq = GetMetaEquation({0, elementBytesLog2, numSamplesLog2, pIn->hTileFlags, 802 Gfx9DataDepthStencil, pIn->swizzleMode, ADDR_RSRC_TEX_2D, 803 metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0}); 804 805 UINT_32 xb = pIn->x / output.metaBlkWidth; 806 UINT_32 yb = pIn->y / output.metaBlkHeight; 807 UINT_32 zb = pIn->slice; 808 809 UINT_32 pitchInBlock = output.pitch / output.metaBlkWidth; 810 UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock; 811 UINT_32 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb; 812 813 UINT_64 address = pMetaEq->solve(pIn->x, pIn->y, pIn->slice, 0, blockIndex); 814 815 pOut->addr = address >> 1; 816 817 UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->hTileFlags.pipeAligned, 818 pIn->swizzleMode); 819 820 UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1)); 821 822 pOut->addr ^= (pipeXor << m_pipeInterleaveLog2); 823 } 824 } 825 826 return returnCode; 827 } 828 829 /** 830 ************************************************************************************************************************ 831 * Gfx9Lib::HwlComputeHtileCoordFromAddr 832 * 833 * @brief 834 * Interface function stub of AddrComputeHtileCoordFromAddr 835 * 836 * @return 837 * ADDR_E_RETURNCODE 838 ************************************************************************************************************************ 839 */ 840 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileCoordFromAddr( 841 const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT* pIn, ///< [in] input structure 842 ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT* pOut) ///< [out] output structure 843 { 844 ADDR_E_RETURNCODE returnCode = ADDR_OK; 845 846 if (pIn->numMipLevels > 1) 847 { 848 returnCode = ADDR_NOTIMPLEMENTED; 849 } 850 else 851 { 852 ADDR2_COMPUTE_HTILE_INFO_INPUT input = {0}; 853 input.size = sizeof(input); 854 input.hTileFlags = pIn->hTileFlags; 855 input.swizzleMode = pIn->swizzleMode; 856 input.unalignedWidth = Max(pIn->unalignedWidth, 1u); 857 input.unalignedHeight = Max(pIn->unalignedHeight, 1u); 858 input.numSlices = Max(pIn->numSlices, 1u); 859 input.numMipLevels = Max(pIn->numMipLevels, 1u); 860 861 ADDR2_COMPUTE_HTILE_INFO_OUTPUT output = {0}; 862 output.size = sizeof(output); 863 864 returnCode = ComputeHtileInfo(&input, &output); 865 866 if (returnCode == ADDR_OK) 867 { 868 UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3); 869 UINT_32 metaBlkWidthLog2 = Log2(output.metaBlkWidth); 870 UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight); 871 UINT_32 numSamplesLog2 = Log2(pIn->numSamples); 872 873 const CoordEq* pMetaEq = GetMetaEquation({0, elementBytesLog2, numSamplesLog2, pIn->hTileFlags, 874 Gfx9DataDepthStencil, pIn->swizzleMode, ADDR_RSRC_TEX_2D, 875 metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0}); 876 877 UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->hTileFlags.pipeAligned, 878 pIn->swizzleMode); 879 880 UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1)); 881 882 UINT_64 nibbleAddress = (pIn->addr ^ (pipeXor << m_pipeInterleaveLog2)) << 1; 883 884 UINT_32 pitchInBlock = output.pitch / output.metaBlkWidth; 885 UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock; 886 887 UINT_32 x, y, z, s, m; 888 pMetaEq->solveAddr(nibbleAddress, sliceSizeInBlock, x, y, z, s, m); 889 890 pOut->slice = m / sliceSizeInBlock; 891 pOut->y = ((m % sliceSizeInBlock) / pitchInBlock) * output.metaBlkHeight + y; 892 pOut->x = (m % pitchInBlock) * output.metaBlkWidth + x; 893 } 894 } 895 896 return returnCode; 897 } 898 899 /** 900 ************************************************************************************************************************ 901 * Gfx9Lib::HwlComputeDccAddrFromCoord 902 * 903 * @brief 904 * Interface function stub of AddrComputeDccAddrFromCoord 905 * 906 * @return 907 * ADDR_E_RETURNCODE 908 ************************************************************************************************************************ 909 */ 910 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeDccAddrFromCoord( 911 const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn, 912 ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT* pOut) 913 { 914 ADDR_E_RETURNCODE returnCode = ADDR_OK; 915 916 if ((pIn->numMipLevels > 1) || (pIn->mipId > 1) || pIn->dccKeyFlags.linear) 917 { 918 returnCode = ADDR_NOTIMPLEMENTED; 919 } 920 else 921 { 922 ADDR2_COMPUTE_DCCINFO_INPUT input = {0}; 923 input.size = sizeof(input); 924 input.dccKeyFlags = pIn->dccKeyFlags; 925 input.colorFlags = pIn->colorFlags; 926 input.swizzleMode = pIn->swizzleMode; 927 input.resourceType = pIn->resourceType; 928 input.bpp = pIn->bpp; 929 input.unalignedWidth = Max(pIn->unalignedWidth, 1u); 930 input.unalignedHeight = Max(pIn->unalignedHeight, 1u); 931 input.numSlices = Max(pIn->numSlices, 1u); 932 input.numFrags = Max(pIn->numFrags, 1u); 933 input.numMipLevels = Max(pIn->numMipLevels, 1u); 934 935 ADDR2_COMPUTE_DCCINFO_OUTPUT output = {0}; 936 output.size = sizeof(output); 937 938 returnCode = ComputeDccInfo(&input, &output); 939 940 if (returnCode == ADDR_OK) 941 { 942 UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3); 943 UINT_32 numSamplesLog2 = Log2(pIn->numFrags); 944 UINT_32 metaBlkWidthLog2 = Log2(output.metaBlkWidth); 945 UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight); 946 UINT_32 metaBlkDepthLog2 = Log2(output.metaBlkDepth); 947 UINT_32 compBlkWidthLog2 = Log2(output.compressBlkWidth); 948 UINT_32 compBlkHeightLog2 = Log2(output.compressBlkHeight); 949 UINT_32 compBlkDepthLog2 = Log2(output.compressBlkDepth); 950 951 const CoordEq* pMetaEq = GetMetaEquation({pIn->mipId, elementBytesLog2, numSamplesLog2, pIn->dccKeyFlags, 952 Gfx9DataColor, pIn->swizzleMode, pIn->resourceType, 953 metaBlkWidthLog2, metaBlkHeightLog2, metaBlkDepthLog2, 954 compBlkWidthLog2, compBlkHeightLog2, compBlkDepthLog2}); 955 956 UINT_32 xb = pIn->x / output.metaBlkWidth; 957 UINT_32 yb = pIn->y / output.metaBlkHeight; 958 UINT_32 zb = pIn->slice / output.metaBlkDepth; 959 960 UINT_32 pitchInBlock = output.pitch / output.metaBlkWidth; 961 UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock; 962 UINT_32 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb; 963 964 UINT_64 address = pMetaEq->solve(pIn->x, pIn->y, pIn->slice, pIn->sample, blockIndex); 965 966 pOut->addr = address >> 1; 967 968 UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->dccKeyFlags.pipeAligned, 969 pIn->swizzleMode); 970 971 UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1)); 972 973 pOut->addr ^= (pipeXor << m_pipeInterleaveLog2); 974 } 975 } 976 977 return returnCode; 978 } 979 980 /** 981 ************************************************************************************************************************ 982 * Gfx9Lib::HwlInitGlobalParams 983 * 984 * @brief 985 * Initializes global parameters 986 * 987 * @return 988 * TRUE if all settings are valid 989 * 990 ************************************************************************************************************************ 991 */ 992 BOOL_32 Gfx9Lib::HwlInitGlobalParams( 993 const ADDR_CREATE_INPUT* pCreateIn) ///< [in] create input 994 { 995 BOOL_32 valid = TRUE; 996 997 if (m_settings.isArcticIsland) 998 { 999 GB_ADDR_CONFIG gbAddrConfig; 1000 1001 gbAddrConfig.u32All = pCreateIn->regValue.gbAddrConfig; 1002 1003 // These values are copied from CModel code 1004 switch (gbAddrConfig.bits.NUM_PIPES) 1005 { 1006 case ADDR_CONFIG_1_PIPE: 1007 m_pipes = 1; 1008 m_pipesLog2 = 0; 1009 break; 1010 case ADDR_CONFIG_2_PIPE: 1011 m_pipes = 2; 1012 m_pipesLog2 = 1; 1013 break; 1014 case ADDR_CONFIG_4_PIPE: 1015 m_pipes = 4; 1016 m_pipesLog2 = 2; 1017 break; 1018 case ADDR_CONFIG_8_PIPE: 1019 m_pipes = 8; 1020 m_pipesLog2 = 3; 1021 break; 1022 case ADDR_CONFIG_16_PIPE: 1023 m_pipes = 16; 1024 m_pipesLog2 = 4; 1025 break; 1026 case ADDR_CONFIG_32_PIPE: 1027 m_pipes = 32; 1028 m_pipesLog2 = 5; 1029 break; 1030 default: 1031 ADDR_ASSERT_ALWAYS(); 1032 break; 1033 } 1034 1035 switch (gbAddrConfig.bits.PIPE_INTERLEAVE_SIZE) 1036 { 1037 case ADDR_CONFIG_PIPE_INTERLEAVE_256B: 1038 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_256B; 1039 m_pipeInterleaveLog2 = 8; 1040 break; 1041 case ADDR_CONFIG_PIPE_INTERLEAVE_512B: 1042 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_512B; 1043 m_pipeInterleaveLog2 = 9; 1044 break; 1045 case ADDR_CONFIG_PIPE_INTERLEAVE_1KB: 1046 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_1KB; 1047 m_pipeInterleaveLog2 = 10; 1048 break; 1049 case ADDR_CONFIG_PIPE_INTERLEAVE_2KB: 1050 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_2KB; 1051 m_pipeInterleaveLog2 = 11; 1052 break; 1053 default: 1054 ADDR_ASSERT_ALWAYS(); 1055 break; 1056 } 1057 1058 switch (gbAddrConfig.bits.NUM_BANKS) 1059 { 1060 case ADDR_CONFIG_1_BANK: 1061 m_banks = 1; 1062 m_banksLog2 = 0; 1063 break; 1064 case ADDR_CONFIG_2_BANK: 1065 m_banks = 2; 1066 m_banksLog2 = 1; 1067 break; 1068 case ADDR_CONFIG_4_BANK: 1069 m_banks = 4; 1070 m_banksLog2 = 2; 1071 break; 1072 case ADDR_CONFIG_8_BANK: 1073 m_banks = 8; 1074 m_banksLog2 = 3; 1075 break; 1076 case ADDR_CONFIG_16_BANK: 1077 m_banks = 16; 1078 m_banksLog2 = 4; 1079 break; 1080 default: 1081 ADDR_ASSERT_ALWAYS(); 1082 break; 1083 } 1084 1085 switch (gbAddrConfig.bits.NUM_SHADER_ENGINES) 1086 { 1087 case ADDR_CONFIG_1_SHADER_ENGINE: 1088 m_se = 1; 1089 m_seLog2 = 0; 1090 break; 1091 case ADDR_CONFIG_2_SHADER_ENGINE: 1092 m_se = 2; 1093 m_seLog2 = 1; 1094 break; 1095 case ADDR_CONFIG_4_SHADER_ENGINE: 1096 m_se = 4; 1097 m_seLog2 = 2; 1098 break; 1099 case ADDR_CONFIG_8_SHADER_ENGINE: 1100 m_se = 8; 1101 m_seLog2 = 3; 1102 break; 1103 default: 1104 ADDR_ASSERT_ALWAYS(); 1105 break; 1106 } 1107 1108 switch (gbAddrConfig.bits.NUM_RB_PER_SE) 1109 { 1110 case ADDR_CONFIG_1_RB_PER_SHADER_ENGINE: 1111 m_rbPerSe = 1; 1112 m_rbPerSeLog2 = 0; 1113 break; 1114 case ADDR_CONFIG_2_RB_PER_SHADER_ENGINE: 1115 m_rbPerSe = 2; 1116 m_rbPerSeLog2 = 1; 1117 break; 1118 case ADDR_CONFIG_4_RB_PER_SHADER_ENGINE: 1119 m_rbPerSe = 4; 1120 m_rbPerSeLog2 = 2; 1121 break; 1122 default: 1123 ADDR_ASSERT_ALWAYS(); 1124 break; 1125 } 1126 1127 switch (gbAddrConfig.bits.MAX_COMPRESSED_FRAGS) 1128 { 1129 case ADDR_CONFIG_1_MAX_COMPRESSED_FRAGMENTS: 1130 m_maxCompFrag = 1; 1131 m_maxCompFragLog2 = 0; 1132 break; 1133 case ADDR_CONFIG_2_MAX_COMPRESSED_FRAGMENTS: 1134 m_maxCompFrag = 2; 1135 m_maxCompFragLog2 = 1; 1136 break; 1137 case ADDR_CONFIG_4_MAX_COMPRESSED_FRAGMENTS: 1138 m_maxCompFrag = 4; 1139 m_maxCompFragLog2 = 2; 1140 break; 1141 case ADDR_CONFIG_8_MAX_COMPRESSED_FRAGMENTS: 1142 m_maxCompFrag = 8; 1143 m_maxCompFragLog2 = 3; 1144 break; 1145 default: 1146 ADDR_ASSERT_ALWAYS(); 1147 break; 1148 } 1149 1150 m_blockVarSizeLog2 = pCreateIn->regValue.blockVarSizeLog2; 1151 ADDR_ASSERT((m_blockVarSizeLog2 == 0) || 1152 ((m_blockVarSizeLog2 >= 17u) && (m_blockVarSizeLog2 <= 20u))); 1153 m_blockVarSizeLog2 = Min(Max(17u, m_blockVarSizeLog2), 20u); 1154 } 1155 else 1156 { 1157 valid = FALSE; 1158 ADDR_NOT_IMPLEMENTED(); 1159 } 1160 1161 if (valid) 1162 { 1163 InitEquationTable(); 1164 } 1165 1166 return valid; 1167 } 1168 1169 /** 1170 ************************************************************************************************************************ 1171 * Gfx9Lib::HwlConvertChipFamily 1172 * 1173 * @brief 1174 * Convert familyID defined in atiid.h to ChipFamily and set m_chipFamily/m_chipRevision 1175 * @return 1176 * ChipFamily 1177 ************************************************************************************************************************ 1178 */ 1179 ChipFamily Gfx9Lib::HwlConvertChipFamily( 1180 UINT_32 uChipFamily, ///< [in] chip family defined in atiih.h 1181 UINT_32 uChipRevision) ///< [in] chip revision defined in "asic_family"_id.h 1182 { 1183 ChipFamily family = ADDR_CHIP_FAMILY_AI; 1184 1185 switch (uChipFamily) 1186 { 1187 case FAMILY_AI: 1188 m_settings.isArcticIsland = 1; 1189 m_settings.isVega10 = ASICREV_IS_VEGA10_P(uChipRevision); 1190 1191 m_settings.isDce12 = 1; 1192 1193 if (m_settings.isVega10 == 0) 1194 { 1195 m_settings.htileAlignFix = 1; 1196 m_settings.applyAliasFix = 1; 1197 } 1198 1199 m_settings.metaBaseAlignFix = 1; 1200 1201 m_settings.depthPipeXorDisable = 1; 1202 break; 1203 case FAMILY_RV: 1204 m_settings.isArcticIsland = 1; 1205 m_settings.isRaven = ASICREV_IS_RAVEN(uChipRevision); 1206 1207 if (m_settings.isRaven) 1208 { 1209 m_settings.isDcn1 = 1; 1210 } 1211 1212 m_settings.metaBaseAlignFix = 1; 1213 1214 if (ASICREV_IS_RAVEN(uChipRevision)) 1215 { 1216 m_settings.depthPipeXorDisable = 1; 1217 } 1218 break; 1219 1220 default: 1221 ADDR_ASSERT(!"This should be a Fusion"); 1222 break; 1223 } 1224 1225 return family; 1226 } 1227 1228 /** 1229 ************************************************************************************************************************ 1230 * Gfx9Lib::InitRbEquation 1231 * 1232 * @brief 1233 * Init RB equation 1234 * @return 1235 * N/A 1236 ************************************************************************************************************************ 1237 */ 1238 VOID Gfx9Lib::GetRbEquation( 1239 CoordEq* pRbEq, ///< [out] rb equation 1240 UINT_32 numRbPerSeLog2, ///< [in] number of rb per shader engine 1241 UINT_32 numSeLog2) ///< [in] number of shader engine 1242 const 1243 { 1244 // RB's are distributed on 16x16, except when we have 1 rb per se, in which case its 32x32 1245 UINT_32 rbRegion = (numRbPerSeLog2 == 0) ? 5 : 4; 1246 Coordinate cx('x', rbRegion); 1247 Coordinate cy('y', rbRegion); 1248 1249 UINT_32 start = 0; 1250 UINT_32 numRbTotalLog2 = numRbPerSeLog2 + numSeLog2; 1251 1252 // Clear the rb equation 1253 pRbEq->resize(0); 1254 pRbEq->resize(numRbTotalLog2); 1255 1256 if ((numSeLog2 > 0) && (numRbPerSeLog2 == 1)) 1257 { 1258 // Special case when more than 1 SE, and 2 RB per SE 1259 (*pRbEq)[0].add(cx); 1260 (*pRbEq)[0].add(cy); 1261 cx++; 1262 cy++; 1263 1264 if (m_settings.applyAliasFix == false) 1265 { 1266 (*pRbEq)[0].add(cy); 1267 } 1268 1269 (*pRbEq)[0].add(cy); 1270 start++; 1271 } 1272 1273 UINT_32 numBits = 2 * (numRbTotalLog2 - start); 1274 1275 for (UINT_32 i = 0; i < numBits; i++) 1276 { 1277 UINT_32 idx = 1278 start + (((start + i) >= numRbTotalLog2) ? (2 * (numRbTotalLog2 - start) - i - 1) : i); 1279 1280 if ((i % 2) == 1) 1281 { 1282 (*pRbEq)[idx].add(cx); 1283 cx++; 1284 } 1285 else 1286 { 1287 (*pRbEq)[idx].add(cy); 1288 cy++; 1289 } 1290 } 1291 } 1292 1293 /** 1294 ************************************************************************************************************************ 1295 * Gfx9Lib::GetDataEquation 1296 * 1297 * @brief 1298 * Get data equation for fmask and Z 1299 * @return 1300 * N/A 1301 ************************************************************************************************************************ 1302 */ 1303 VOID Gfx9Lib::GetDataEquation( 1304 CoordEq* pDataEq, ///< [out] data surface equation 1305 Gfx9DataType dataSurfaceType, ///< [in] data surface type 1306 AddrSwizzleMode swizzleMode, ///< [in] data surface swizzle mode 1307 AddrResourceType resourceType, ///< [in] data surface resource type 1308 UINT_32 elementBytesLog2, ///< [in] data surface element bytes 1309 UINT_32 numSamplesLog2) ///< [in] data surface sample count 1310 const 1311 { 1312 Coordinate cx('x', 0); 1313 Coordinate cy('y', 0); 1314 Coordinate cz('z', 0); 1315 Coordinate cs('s', 0); 1316 1317 // Clear the equation 1318 pDataEq->resize(0); 1319 pDataEq->resize(27); 1320 1321 if (dataSurfaceType == Gfx9DataColor) 1322 { 1323 if (IsLinear(swizzleMode)) 1324 { 1325 Coordinate cm('m', 0); 1326 1327 pDataEq->resize(49); 1328 1329 for (UINT_32 i = 0; i < 49; i++) 1330 { 1331 (*pDataEq)[i].add(cm); 1332 cm++; 1333 } 1334 } 1335 else if (IsThick(resourceType, swizzleMode)) 1336 { 1337 // Color 3d_S and 3d_Z modes, 3d_D is same as color 2d 1338 UINT_32 i; 1339 if (IsStandardSwizzle(resourceType, swizzleMode)) 1340 { 1341 // Standard 3d swizzle 1342 // Fill in bottom x bits 1343 for (i = elementBytesLog2; i < 4; i++) 1344 { 1345 (*pDataEq)[i].add(cx); 1346 cx++; 1347 } 1348 // Fill in 2 bits of y and then z 1349 for (i = 4; i < 6; i++) 1350 { 1351 (*pDataEq)[i].add(cy); 1352 cy++; 1353 } 1354 for (i = 6; i < 8; i++) 1355 { 1356 (*pDataEq)[i].add(cz); 1357 cz++; 1358 } 1359 if (elementBytesLog2 < 2) 1360 { 1361 // fill in z & y bit 1362 (*pDataEq)[8].add(cz); 1363 (*pDataEq)[9].add(cy); 1364 cz++; 1365 cy++; 1366 } 1367 else if (elementBytesLog2 == 2) 1368 { 1369 // fill in y and x bit 1370 (*pDataEq)[8].add(cy); 1371 (*pDataEq)[9].add(cx); 1372 cy++; 1373 cx++; 1374 } 1375 else 1376 { 1377 // fill in 2 x bits 1378 (*pDataEq)[8].add(cx); 1379 cx++; 1380 (*pDataEq)[9].add(cx); 1381 cx++; 1382 } 1383 } 1384 else 1385 { 1386 // Z 3d swizzle 1387 UINT_32 m2dEnd = (elementBytesLog2 ==0) ? 3 : ((elementBytesLog2 < 4) ? 4 : 5); 1388 UINT_32 numZs = (elementBytesLog2 == 0 || elementBytesLog2 == 4) ? 1389 2 : ((elementBytesLog2 == 1) ? 3 : 1); 1390 pDataEq->mort2d(cx, cy, elementBytesLog2, m2dEnd); 1391 for (i = m2dEnd + 1; i <= m2dEnd + numZs; i++) 1392 { 1393 (*pDataEq)[i].add(cz); 1394 cz++; 1395 } 1396 if ((elementBytesLog2 == 0) || (elementBytesLog2 == 3)) 1397 { 1398 // add an x and z 1399 (*pDataEq)[6].add(cx); 1400 (*pDataEq)[7].add(cz); 1401 cx++; 1402 cz++; 1403 } 1404 else if (elementBytesLog2 == 2) 1405 { 1406 // add a y and z 1407 (*pDataEq)[6].add(cy); 1408 (*pDataEq)[7].add(cz); 1409 cy++; 1410 cz++; 1411 } 1412 // add y and x 1413 (*pDataEq)[8].add(cy); 1414 (*pDataEq)[9].add(cx); 1415 cy++; 1416 cx++; 1417 } 1418 // Fill in bit 10 and up 1419 pDataEq->mort3d( cz, cy, cx, 10 ); 1420 } 1421 else if (IsThin(resourceType, swizzleMode)) 1422 { 1423 UINT_32 blockSizeLog2 = GetBlockSizeLog2(swizzleMode); 1424 // Color 2D 1425 UINT_32 microYBits = (8 - elementBytesLog2) / 2; 1426 UINT_32 tileSplitStart = blockSizeLog2 - numSamplesLog2; 1427 UINT_32 i; 1428 // Fill in bottom x bits 1429 for (i = elementBytesLog2; i < 4; i++) 1430 { 1431 (*pDataEq)[i].add(cx); 1432 cx++; 1433 } 1434 // Fill in bottom y bits 1435 for (i = 4; i < 4 + microYBits; i++) 1436 { 1437 (*pDataEq)[i].add(cy); 1438 cy++; 1439 } 1440 // Fill in last of the micro_x bits 1441 for (i = 4 + microYBits; i < 8; i++) 1442 { 1443 (*pDataEq)[i].add(cx); 1444 cx++; 1445 } 1446 // Fill in x/y bits below sample split 1447 pDataEq->mort2d(cy, cx, 8, tileSplitStart - 1); 1448 // Fill in sample bits 1449 for (i = 0; i < numSamplesLog2; i++) 1450 { 1451 cs.set('s', i); 1452 (*pDataEq)[tileSplitStart + i].add(cs); 1453 } 1454 // Fill in x/y bits above sample split 1455 if ((numSamplesLog2 & 1) ^ (blockSizeLog2 & 1)) 1456 { 1457 pDataEq->mort2d(cx, cy, blockSizeLog2); 1458 } 1459 else 1460 { 1461 pDataEq->mort2d(cy, cx, blockSizeLog2); 1462 } 1463 } 1464 else 1465 { 1466 ADDR_ASSERT_ALWAYS(); 1467 } 1468 } 1469 else 1470 { 1471 // Fmask or depth 1472 UINT_32 sampleStart = elementBytesLog2; 1473 UINT_32 pixelStart = elementBytesLog2 + numSamplesLog2; 1474 UINT_32 ymajStart = 6 + numSamplesLog2; 1475 1476 for (UINT_32 s = 0; s < numSamplesLog2; s++) 1477 { 1478 cs.set('s', s); 1479 (*pDataEq)[sampleStart + s].add(cs); 1480 } 1481 1482 // Put in the x-major order pixel bits 1483 pDataEq->mort2d(cx, cy, pixelStart, ymajStart - 1); 1484 // Put in the y-major order pixel bits 1485 pDataEq->mort2d(cy, cx, ymajStart); 1486 } 1487 } 1488 1489 /** 1490 ************************************************************************************************************************ 1491 * Gfx9Lib::GetPipeEquation 1492 * 1493 * @brief 1494 * Get pipe equation 1495 * @return 1496 * N/A 1497 ************************************************************************************************************************ 1498 */ 1499 VOID Gfx9Lib::GetPipeEquation( 1500 CoordEq* pPipeEq, ///< [out] pipe equation 1501 CoordEq* pDataEq, ///< [in] data equation 1502 UINT_32 pipeInterleaveLog2, ///< [in] pipe interleave 1503 UINT_32 numPipeLog2, ///< [in] number of pipes 1504 UINT_32 numSamplesLog2, ///< [in] data surface sample count 1505 Gfx9DataType dataSurfaceType, ///< [in] data surface type 1506 AddrSwizzleMode swizzleMode, ///< [in] data surface swizzle mode 1507 AddrResourceType resourceType ///< [in] data surface resource type 1508 ) const 1509 { 1510 UINT_32 blockSizeLog2 = GetBlockSizeLog2(swizzleMode); 1511 CoordEq dataEq; 1512 1513 pDataEq->copy(dataEq); 1514 1515 if (dataSurfaceType == Gfx9DataColor) 1516 { 1517 INT_32 shift = static_cast<INT_32>(numSamplesLog2); 1518 dataEq.shift(-shift, blockSizeLog2 - numSamplesLog2); 1519 } 1520 1521 dataEq.copy(*pPipeEq, pipeInterleaveLog2, numPipeLog2); 1522 1523 // This section should only apply to z/stencil, maybe fmask 1524 // If the pipe bit is below the comp block size, 1525 // then keep moving up the address until we find a bit that is above 1526 UINT_32 pipeStart = 0; 1527 1528 if (dataSurfaceType != Gfx9DataColor) 1529 { 1530 Coordinate tileMin('x', 3); 1531 1532 while (dataEq[pipeInterleaveLog2 + pipeStart][0] < tileMin) 1533 { 1534 pipeStart++; 1535 } 1536 1537 // if pipe is 0, then the first pipe bit is above the comp block size, 1538 // so we don't need to do anything 1539 // Note, this if condition is not necessary, since if we execute the loop when pipe==0, 1540 // we will get the same pipe equation 1541 if (pipeStart != 0) 1542 { 1543 for (UINT_32 i = 0; i < numPipeLog2; i++) 1544 { 1545 // Copy the jth bit above pipe interleave to the current pipe equation bit 1546 dataEq[pipeInterleaveLog2 + pipeStart + i].copyto((*pPipeEq)[i]); 1547 } 1548 } 1549 } 1550 1551 if (IsPrt(swizzleMode)) 1552 { 1553 // Clear out bits above the block size if prt's are enabled 1554 dataEq.resize(blockSizeLog2); 1555 dataEq.resize(48); 1556 } 1557 1558 if (IsXor(swizzleMode)) 1559 { 1560 CoordEq xorMask; 1561 1562 if (IsThick(resourceType, swizzleMode)) 1563 { 1564 CoordEq xorMask2; 1565 1566 dataEq.copy(xorMask2, pipeInterleaveLog2 + numPipeLog2, 2 * numPipeLog2); 1567 1568 xorMask.resize(numPipeLog2); 1569 1570 for (UINT_32 pipeIdx = 0; pipeIdx < numPipeLog2; pipeIdx++) 1571 { 1572 xorMask[pipeIdx].add(xorMask2[2 * pipeIdx]); 1573 xorMask[pipeIdx].add(xorMask2[2 * pipeIdx + 1]); 1574 } 1575 } 1576 else 1577 { 1578 // Xor in the bits above the pipe+gpu bits 1579 dataEq.copy(xorMask, pipeInterleaveLog2 + pipeStart + numPipeLog2, numPipeLog2); 1580 1581 if ((numSamplesLog2 == 0) && (IsPrt(swizzleMode) == FALSE)) 1582 { 1583 Coordinate co; 1584 CoordEq xorMask2; 1585 // if 1xaa and not prt, then xor in the z bits 1586 xorMask2.resize(0); 1587 xorMask2.resize(numPipeLog2); 1588 for (UINT_32 pipeIdx = 0; pipeIdx < numPipeLog2; pipeIdx++) 1589 { 1590 co.set('z', numPipeLog2 - 1 - pipeIdx); 1591 xorMask2[pipeIdx].add(co); 1592 } 1593 1594 pPipeEq->xorin(xorMask2); 1595 } 1596 } 1597 1598 xorMask.reverse(); 1599 pPipeEq->xorin(xorMask); 1600 } 1601 } 1602 /** 1603 ************************************************************************************************************************ 1604 * Gfx9Lib::GetMetaEquation 1605 * 1606 * @brief 1607 * Get meta equation for cmask/htile/DCC 1608 * @return 1609 * Pointer to a calculated meta equation 1610 ************************************************************************************************************************ 1611 */ 1612 const CoordEq* Gfx9Lib::GetMetaEquation( 1613 const MetaEqParams& metaEqParams) 1614 { 1615 UINT_32 cachedMetaEqIndex; 1616 1617 for (cachedMetaEqIndex = 0; cachedMetaEqIndex < MaxCachedMetaEq; cachedMetaEqIndex++) 1618 { 1619 if (memcmp(&metaEqParams, 1620 &m_cachedMetaEqKey[cachedMetaEqIndex], 1621 static_cast<UINT_32>(sizeof(metaEqParams))) == 0) 1622 { 1623 break; 1624 } 1625 } 1626 1627 CoordEq* pMetaEq = NULL; 1628 1629 if (cachedMetaEqIndex < MaxCachedMetaEq) 1630 { 1631 pMetaEq = &m_cachedMetaEq[cachedMetaEqIndex]; 1632 } 1633 else 1634 { 1635 m_cachedMetaEqKey[m_metaEqOverrideIndex] = metaEqParams; 1636 1637 pMetaEq = &m_cachedMetaEq[m_metaEqOverrideIndex++]; 1638 1639 m_metaEqOverrideIndex %= MaxCachedMetaEq; 1640 1641 GenMetaEquation(pMetaEq, 1642 metaEqParams.maxMip, 1643 metaEqParams.elementBytesLog2, 1644 metaEqParams.numSamplesLog2, 1645 metaEqParams.metaFlag, 1646 metaEqParams.dataSurfaceType, 1647 metaEqParams.swizzleMode, 1648 metaEqParams.resourceType, 1649 metaEqParams.metaBlkWidthLog2, 1650 metaEqParams.metaBlkHeightLog2, 1651 metaEqParams.metaBlkDepthLog2, 1652 metaEqParams.compBlkWidthLog2, 1653 metaEqParams.compBlkHeightLog2, 1654 metaEqParams.compBlkDepthLog2); 1655 } 1656 1657 return pMetaEq; 1658 } 1659 1660 /** 1661 ************************************************************************************************************************ 1662 * Gfx9Lib::GenMetaEquation 1663 * 1664 * @brief 1665 * Get meta equation for cmask/htile/DCC 1666 * @return 1667 * N/A 1668 ************************************************************************************************************************ 1669 */ 1670 VOID Gfx9Lib::GenMetaEquation( 1671 CoordEq* pMetaEq, ///< [out] meta equation 1672 UINT_32 maxMip, ///< [in] max mip Id 1673 UINT_32 elementBytesLog2, ///< [in] data surface element bytes 1674 UINT_32 numSamplesLog2, ///< [in] data surface sample count 1675 ADDR2_META_FLAGS metaFlag, ///< [in] meta falg 1676 Gfx9DataType dataSurfaceType, ///< [in] data surface type 1677 AddrSwizzleMode swizzleMode, ///< [in] data surface swizzle mode 1678 AddrResourceType resourceType, ///< [in] data surface resource type 1679 UINT_32 metaBlkWidthLog2, ///< [in] meta block width 1680 UINT_32 metaBlkHeightLog2, ///< [in] meta block height 1681 UINT_32 metaBlkDepthLog2, ///< [in] meta block depth 1682 UINT_32 compBlkWidthLog2, ///< [in] compress block width 1683 UINT_32 compBlkHeightLog2, ///< [in] compress block height 1684 UINT_32 compBlkDepthLog2) ///< [in] compress block depth 1685 const 1686 { 1687 UINT_32 numPipeTotalLog2 = GetPipeLog2ForMetaAddressing(metaFlag.pipeAligned, swizzleMode); 1688 UINT_32 pipeInterleaveLog2 = m_pipeInterleaveLog2; 1689 1690 // Get the correct data address and rb equation 1691 CoordEq dataEq; 1692 GetDataEquation(&dataEq, dataSurfaceType, swizzleMode, resourceType, 1693 elementBytesLog2, numSamplesLog2); 1694 1695 // Get pipe and rb equations 1696 CoordEq pipeEquation; 1697 GetPipeEquation(&pipeEquation, &dataEq, pipeInterleaveLog2, numPipeTotalLog2, 1698 numSamplesLog2, dataSurfaceType, swizzleMode, resourceType); 1699 numPipeTotalLog2 = pipeEquation.getsize(); 1700 1701 if (metaFlag.linear) 1702 { 1703 // Linear metadata supporting was removed for GFX9! No one can use this feature. 1704 ADDR_ASSERT_ALWAYS(); 1705 1706 ADDR_ASSERT(dataSurfaceType == Gfx9DataColor); 1707 1708 dataEq.copy(*pMetaEq); 1709 1710 if (IsLinear(swizzleMode)) 1711 { 1712 if (metaFlag.pipeAligned) 1713 { 1714 // Remove the pipe bits 1715 INT_32 shift = static_cast<INT_32>(numPipeTotalLog2); 1716 pMetaEq->shift(-shift, pipeInterleaveLog2); 1717 } 1718 // Divide by comp block size, which for linear (which is always color) is 256 B 1719 pMetaEq->shift(-8); 1720 1721 if (metaFlag.pipeAligned) 1722 { 1723 // Put pipe bits back in 1724 pMetaEq->shift(numPipeTotalLog2, pipeInterleaveLog2); 1725 1726 for (UINT_32 i = 0; i < numPipeTotalLog2; i++) 1727 { 1728 pipeEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2 + i]); 1729 } 1730 } 1731 } 1732 1733 pMetaEq->shift(1); 1734 } 1735 else 1736 { 1737 UINT_32 maxCompFragLog2 = static_cast<INT_32>(m_maxCompFragLog2); 1738 UINT_32 compFragLog2 = 1739 ((dataSurfaceType == Gfx9DataColor) && (numSamplesLog2 > maxCompFragLog2)) ? 1740 maxCompFragLog2 : numSamplesLog2; 1741 1742 UINT_32 uncompFragLog2 = numSamplesLog2 - compFragLog2; 1743 1744 // Make sure the metaaddr is cleared 1745 pMetaEq->resize(0); 1746 pMetaEq->resize(27); 1747 1748 if (IsThick(resourceType, swizzleMode)) 1749 { 1750 Coordinate cx('x', 0); 1751 Coordinate cy('y', 0); 1752 Coordinate cz('z', 0); 1753 1754 if (maxMip > 0) 1755 { 1756 pMetaEq->mort3d(cy, cx, cz); 1757 } 1758 else 1759 { 1760 pMetaEq->mort3d(cx, cy, cz); 1761 } 1762 } 1763 else 1764 { 1765 Coordinate cx('x', 0); 1766 Coordinate cy('y', 0); 1767 Coordinate cs; 1768 1769 if (maxMip > 0) 1770 { 1771 pMetaEq->mort2d(cy, cx, compFragLog2); 1772 } 1773 else 1774 { 1775 pMetaEq->mort2d(cx, cy, compFragLog2); 1776 } 1777 1778 //------------------------------------------------------------------------------------------------------------------------ 1779 // Put the compressible fragments at the lsb 1780 // the uncompressible frags will be at the msb of the micro address 1781 //------------------------------------------------------------------------------------------------------------------------ 1782 for (UINT_32 s = 0; s < compFragLog2; s++) 1783 { 1784 cs.set('s', s); 1785 (*pMetaEq)[s].add(cs); 1786 } 1787 } 1788 1789 // Keep a copy of the pipe equations 1790 CoordEq origPipeEquation; 1791 pipeEquation.copy(origPipeEquation); 1792 1793 Coordinate co; 1794 // filter out everything under the compressed block size 1795 co.set('x', compBlkWidthLog2); 1796 pMetaEq->Filter('<', co, 0, 'x'); 1797 co.set('y', compBlkHeightLog2); 1798 pMetaEq->Filter('<', co, 0, 'y'); 1799 co.set('z', compBlkDepthLog2); 1800 pMetaEq->Filter('<', co, 0, 'z'); 1801 1802 // For non-color, filter out sample bits 1803 if (dataSurfaceType != Gfx9DataColor) 1804 { 1805 co.set('x', 0); 1806 pMetaEq->Filter('<', co, 0, 's'); 1807 } 1808 1809 // filter out everything above the metablock size 1810 co.set('x', metaBlkWidthLog2 - 1); 1811 pMetaEq->Filter('>', co, 0, 'x'); 1812 co.set('y', metaBlkHeightLog2 - 1); 1813 pMetaEq->Filter('>', co, 0, 'y'); 1814 co.set('z', metaBlkDepthLog2 - 1); 1815 pMetaEq->Filter('>', co, 0, 'z'); 1816 1817 // filter out everything above the metablock size for the channel bits 1818 co.set('x', metaBlkWidthLog2 - 1); 1819 pipeEquation.Filter('>', co, 0, 'x'); 1820 co.set('y', metaBlkHeightLog2 - 1); 1821 pipeEquation.Filter('>', co, 0, 'y'); 1822 co.set('z', metaBlkDepthLog2 - 1); 1823 pipeEquation.Filter('>', co, 0, 'z'); 1824 1825 // Make sure we still have the same number of channel bits 1826 if (pipeEquation.getsize() != numPipeTotalLog2) 1827 { 1828 ADDR_ASSERT_ALWAYS(); 1829 } 1830 1831 // Loop through all channel and rb bits, 1832 // and make sure these components exist in the metadata address 1833 for (UINT_32 i = 0; i < numPipeTotalLog2; i++) 1834 { 1835 for (UINT_32 j = pipeEquation[i].getsize(); j > 0; j--) 1836 { 1837 if (pMetaEq->Exists(pipeEquation[i][j - 1]) == FALSE) 1838 { 1839 ADDR_ASSERT_ALWAYS(); 1840 } 1841 } 1842 } 1843 1844 const UINT_32 numSeLog2 = metaFlag.rbAligned ? m_seLog2 : 0; 1845 const UINT_32 numRbPeSeLog2 = metaFlag.rbAligned ? m_rbPerSeLog2 : 0; 1846 const UINT_32 numRbTotalLog2 = numRbPeSeLog2 + numSeLog2; 1847 CoordEq origRbEquation; 1848 1849 GetRbEquation(&origRbEquation, numRbPeSeLog2, numSeLog2); 1850 1851 CoordEq rbEquation = origRbEquation; 1852 1853 for (UINT_32 i = 0; i < numRbTotalLog2; i++) 1854 { 1855 for (UINT_32 j = rbEquation[i].getsize(); j > 0; j--) 1856 { 1857 if (pMetaEq->Exists(rbEquation[i][j - 1]) == FALSE) 1858 { 1859 ADDR_ASSERT_ALWAYS(); 1860 } 1861 } 1862 } 1863 1864 if (m_settings.applyAliasFix) 1865 { 1866 co.set('z', -1); 1867 } 1868 1869 // Loop through each rb id bit; if it is equal to any of the filtered channel bits, clear it 1870 for (UINT_32 i = 0; i < numRbTotalLog2; i++) 1871 { 1872 for (UINT_32 j = 0; j < numPipeTotalLog2; j++) 1873 { 1874 BOOL_32 isRbEquationInPipeEquation = FALSE; 1875 1876 if (m_settings.applyAliasFix) 1877 { 1878 CoordTerm filteredPipeEq; 1879 filteredPipeEq = pipeEquation[j]; 1880 1881 filteredPipeEq.Filter('>', co, 0, 'z'); 1882 1883 isRbEquationInPipeEquation = (rbEquation[i] == filteredPipeEq); 1884 } 1885 else 1886 { 1887 isRbEquationInPipeEquation = (rbEquation[i] == pipeEquation[j]); 1888 } 1889 1890 if (isRbEquationInPipeEquation) 1891 { 1892 rbEquation[i].Clear(); 1893 } 1894 } 1895 } 1896 1897 bool rbAppendedWithPipeBits[1 << (MaxSeLog2 + MaxRbPerSeLog2)] = {}; 1898 1899 // Loop through each bit of the channel, get the smallest coordinate, 1900 // and remove it from the metaaddr, and rb_equation 1901 for (UINT_32 i = 0; i < numPipeTotalLog2; i++) 1902 { 1903 pipeEquation[i].getsmallest(co); 1904 1905 UINT_32 old_size = pMetaEq->getsize(); 1906 pMetaEq->Filter('=', co); 1907 UINT_32 new_size = pMetaEq->getsize(); 1908 if (new_size != old_size-1) 1909 { 1910 ADDR_ASSERT_ALWAYS(); 1911 } 1912 pipeEquation.remove(co); 1913 for (UINT_32 j = 0; j < numRbTotalLog2; j++) 1914 { 1915 if (rbEquation[j].remove(co)) 1916 { 1917 // if we actually removed something from this bit, then add the remaining 1918 // channel bits, as these can be removed for this bit 1919 for (UINT_32 k = 0; k < pipeEquation[i].getsize(); k++) 1920 { 1921 if (pipeEquation[i][k] != co) 1922 { 1923 rbEquation[j].add(pipeEquation[i][k]); 1924 rbAppendedWithPipeBits[j] = true; 1925 } 1926 } 1927 } 1928 } 1929 } 1930 1931 // Loop through the rb bits and see what remain; 1932 // filter out the smallest coordinate if it remains 1933 UINT_32 rbBitsLeft = 0; 1934 for (UINT_32 i = 0; i < numRbTotalLog2; i++) 1935 { 1936 BOOL_32 isRbEqAppended = FALSE; 1937 1938 if (m_settings.applyAliasFix) 1939 { 1940 isRbEqAppended = (rbEquation[i].getsize() > (rbAppendedWithPipeBits[i] ? 1 : 0)); 1941 } 1942 else 1943 { 1944 isRbEqAppended = (rbEquation[i].getsize() > 0); 1945 } 1946 1947 if (isRbEqAppended) 1948 { 1949 rbBitsLeft++; 1950 rbEquation[i].getsmallest(co); 1951 UINT_32 old_size = pMetaEq->getsize(); 1952 pMetaEq->Filter('=', co); 1953 UINT_32 new_size = pMetaEq->getsize(); 1954 if (new_size != old_size - 1) 1955 { 1956 // assert warning 1957 } 1958 for (UINT_32 j = i + 1; j < numRbTotalLog2; j++) 1959 { 1960 if (rbEquation[j].remove(co)) 1961 { 1962 // if we actually removed something from this bit, then add the remaining 1963 // rb bits, as these can be removed for this bit 1964 for (UINT_32 k = 0; k < rbEquation[i].getsize(); k++) 1965 { 1966 if (rbEquation[i][k] != co) 1967 { 1968 rbEquation[j].add(rbEquation[i][k]); 1969 rbAppendedWithPipeBits[j] |= rbAppendedWithPipeBits[i]; 1970 } 1971 } 1972 } 1973 } 1974 } 1975 } 1976 1977 // capture the size of the metaaddr 1978 UINT_32 metaSize = pMetaEq->getsize(); 1979 // resize to 49 bits...make this a nibble address 1980 pMetaEq->resize(49); 1981 // Concatenate the macro address above the current address 1982 for (UINT_32 i = metaSize, j = 0; i < 49; i++, j++) 1983 { 1984 co.set('m', j); 1985 (*pMetaEq)[i].add(co); 1986 } 1987 1988 // Multiply by meta element size (in nibbles) 1989 if (dataSurfaceType == Gfx9DataColor) 1990 { 1991 pMetaEq->shift(1); 1992 } 1993 else if (dataSurfaceType == Gfx9DataDepthStencil) 1994 { 1995 pMetaEq->shift(3); 1996 } 1997 1998 //------------------------------------------------------------------------------------------ 1999 // Note the pipeInterleaveLog2+1 is because address is a nibble address 2000 // Shift up from pipe interleave number of channel 2001 // and rb bits left, and uncompressed fragments 2002 //------------------------------------------------------------------------------------------ 2003 2004 pMetaEq->shift(numPipeTotalLog2 + rbBitsLeft + uncompFragLog2, pipeInterleaveLog2 + 1); 2005 2006 // Put in the channel bits 2007 for (UINT_32 i = 0; i < numPipeTotalLog2; i++) 2008 { 2009 origPipeEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2+1 + i]); 2010 } 2011 2012 // Put in remaining rb bits 2013 for (UINT_32 i = 0, j = 0; j < rbBitsLeft; i = (i + 1) % numRbTotalLog2) 2014 { 2015 BOOL_32 isRbEqAppended = FALSE; 2016 2017 if (m_settings.applyAliasFix) 2018 { 2019 isRbEqAppended = (rbEquation[i].getsize() > (rbAppendedWithPipeBits[i] ? 1 : 0)); 2020 } 2021 else 2022 { 2023 isRbEqAppended = (rbEquation[i].getsize() > 0); 2024 } 2025 2026 if (isRbEqAppended) 2027 { 2028 origRbEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2 + 1 + numPipeTotalLog2 + j]); 2029 // Mark any rb bit we add in to the rb mask 2030 j++; 2031 } 2032 } 2033 2034 //------------------------------------------------------------------------------------------ 2035 // Put in the uncompressed fragment bits 2036 //------------------------------------------------------------------------------------------ 2037 for (UINT_32 i = 0; i < uncompFragLog2; i++) 2038 { 2039 co.set('s', compFragLog2 + i); 2040 (*pMetaEq)[pipeInterleaveLog2 + 1 + numPipeTotalLog2 + rbBitsLeft + i].add(co); 2041 } 2042 } 2043 } 2044 2045 /** 2046 ************************************************************************************************************************ 2047 * Gfx9Lib::IsEquationSupported 2048 * 2049 * @brief 2050 * Check if equation is supported for given swizzle mode and resource type. 2051 * 2052 * @return 2053 * TRUE if supported 2054 ************************************************************************************************************************ 2055 */ 2056 BOOL_32 Gfx9Lib::IsEquationSupported( 2057 AddrResourceType rsrcType, 2058 AddrSwizzleMode swMode, 2059 UINT_32 elementBytesLog2) const 2060 { 2061 BOOL_32 supported = (elementBytesLog2 < MaxElementBytesLog2) && 2062 (IsLinear(swMode) == FALSE) && 2063 (((IsTex2d(rsrcType) == TRUE) && 2064 ((elementBytesLog2 < 4) || 2065 ((IsRotateSwizzle(swMode) == FALSE) && 2066 (IsZOrderSwizzle(swMode) == FALSE)))) || 2067 ((IsTex3d(rsrcType) == TRUE) && 2068 (IsRotateSwizzle(swMode) == FALSE) && 2069 (IsBlock256b(swMode) == FALSE))); 2070 2071 return supported; 2072 } 2073 2074 /** 2075 ************************************************************************************************************************ 2076 * Gfx9Lib::InitEquationTable 2077 * 2078 * @brief 2079 * Initialize Equation table. 2080 * 2081 * @return 2082 * N/A 2083 ************************************************************************************************************************ 2084 */ 2085 VOID Gfx9Lib::InitEquationTable() 2086 { 2087 memset(m_equationTable, 0, sizeof(m_equationTable)); 2088 2089 // Loop all possible resource type (2D/3D) 2090 for (UINT_32 rsrcTypeIdx = 0; rsrcTypeIdx < MaxRsrcType; rsrcTypeIdx++) 2091 { 2092 AddrResourceType rsrcType = static_cast<AddrResourceType>(rsrcTypeIdx + ADDR_RSRC_TEX_2D); 2093 2094 // Loop all possible swizzle mode 2095 for (UINT_32 swModeIdx = 0; swModeIdx < MaxSwMode; swModeIdx++) 2096 { 2097 AddrSwizzleMode swMode = static_cast<AddrSwizzleMode>(swModeIdx); 2098 2099 // Loop all possible bpp 2100 for (UINT_32 bppIdx = 0; bppIdx < MaxElementBytesLog2; bppIdx++) 2101 { 2102 UINT_32 equationIndex = ADDR_INVALID_EQUATION_INDEX; 2103 2104 // Check if the input is supported 2105 if (IsEquationSupported(rsrcType, swMode, bppIdx)) 2106 { 2107 ADDR_EQUATION equation; 2108 ADDR_E_RETURNCODE retCode; 2109 2110 memset(&equation, 0, sizeof(ADDR_EQUATION)); 2111 2112 // Generate the equation 2113 if (IsBlock256b(swMode) && IsTex2d(rsrcType)) 2114 { 2115 retCode = ComputeBlock256Equation(rsrcType, swMode, bppIdx, &equation); 2116 } 2117 else if (IsThin(rsrcType, swMode)) 2118 { 2119 retCode = ComputeThinEquation(rsrcType, swMode, bppIdx, &equation); 2120 } 2121 else 2122 { 2123 retCode = ComputeThickEquation(rsrcType, swMode, bppIdx, &equation); 2124 } 2125 2126 // Only fill the equation into the table if the return code is ADDR_OK, 2127 // otherwise if the return code is not ADDR_OK, it indicates this is not 2128 // a valid input, we do nothing but just fill invalid equation index 2129 // into the lookup table. 2130 if (retCode == ADDR_OK) 2131 { 2132 equationIndex = m_numEquations; 2133 ADDR_ASSERT(equationIndex < EquationTableSize); 2134 2135 m_equationTable[equationIndex] = equation; 2136 2137 m_numEquations++; 2138 } 2139 else 2140 { 2141 ADDR_ASSERT_ALWAYS(); 2142 } 2143 } 2144 2145 // Fill the index into the lookup table, if the combination is not supported 2146 // fill the invalid equation index 2147 m_equationLookupTable[rsrcTypeIdx][swModeIdx][bppIdx] = equationIndex; 2148 } 2149 } 2150 } 2151 } 2152 2153 /** 2154 ************************************************************************************************************************ 2155 * Gfx9Lib::HwlGetEquationIndex 2156 * 2157 * @brief 2158 * Interface function stub of GetEquationIndex 2159 * 2160 * @return 2161 * ADDR_E_RETURNCODE 2162 ************************************************************************************************************************ 2163 */ 2164 UINT_32 Gfx9Lib::HwlGetEquationIndex( 2165 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, 2166 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut 2167 ) const 2168 { 2169 AddrResourceType rsrcType = pIn->resourceType; 2170 AddrSwizzleMode swMode = pIn->swizzleMode; 2171 UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3); 2172 UINT_32 index = ADDR_INVALID_EQUATION_INDEX; 2173 2174 if (IsEquationSupported(rsrcType, swMode, elementBytesLog2)) 2175 { 2176 UINT_32 rsrcTypeIdx = static_cast<UINT_32>(rsrcType) - 1; 2177 UINT_32 swModeIdx = static_cast<UINT_32>(swMode); 2178 2179 index = m_equationLookupTable[rsrcTypeIdx][swModeIdx][elementBytesLog2]; 2180 } 2181 2182 if (pOut->pMipInfo != NULL) 2183 { 2184 for (UINT_32 i = 0; i < pIn->numMipLevels; i++) 2185 { 2186 pOut->pMipInfo[i].equationIndex = index; 2187 } 2188 } 2189 2190 return index; 2191 } 2192 2193 /** 2194 ************************************************************************************************************************ 2195 * Gfx9Lib::HwlComputeBlock256Equation 2196 * 2197 * @brief 2198 * Interface function stub of ComputeBlock256Equation 2199 * 2200 * @return 2201 * ADDR_E_RETURNCODE 2202 ************************************************************************************************************************ 2203 */ 2204 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeBlock256Equation( 2205 AddrResourceType rsrcType, 2206 AddrSwizzleMode swMode, 2207 UINT_32 elementBytesLog2, 2208 ADDR_EQUATION* pEquation) const 2209 { 2210 ADDR_E_RETURNCODE ret = ADDR_OK; 2211 2212 pEquation->numBits = 8; 2213 2214 UINT_32 i = 0; 2215 for (; i < elementBytesLog2; i++) 2216 { 2217 InitChannel(1, 0 , i, &pEquation->addr[i]); 2218 } 2219 2220 ADDR_CHANNEL_SETTING* pixelBit = &pEquation->addr[elementBytesLog2]; 2221 2222 const UINT_32 maxBitsUsed = 4; 2223 ADDR_CHANNEL_SETTING x[maxBitsUsed] = {}; 2224 ADDR_CHANNEL_SETTING y[maxBitsUsed] = {}; 2225 2226 for (i = 0; i < maxBitsUsed; i++) 2227 { 2228 InitChannel(1, 0, elementBytesLog2 + i, &x[i]); 2229 InitChannel(1, 1, i, &y[i]); 2230 } 2231 2232 if (IsStandardSwizzle(rsrcType, swMode)) 2233 { 2234 switch (elementBytesLog2) 2235 { 2236 case 0: 2237 pixelBit[0] = x[0]; 2238 pixelBit[1] = x[1]; 2239 pixelBit[2] = x[2]; 2240 pixelBit[3] = x[3]; 2241 pixelBit[4] = y[0]; 2242 pixelBit[5] = y[1]; 2243 pixelBit[6] = y[2]; 2244 pixelBit[7] = y[3]; 2245 break; 2246 case 1: 2247 pixelBit[0] = x[0]; 2248 pixelBit[1] = x[1]; 2249 pixelBit[2] = x[2]; 2250 pixelBit[3] = y[0]; 2251 pixelBit[4] = y[1]; 2252 pixelBit[5] = y[2]; 2253 pixelBit[6] = x[3]; 2254 break; 2255 case 2: 2256 pixelBit[0] = x[0]; 2257 pixelBit[1] = x[1]; 2258 pixelBit[2] = y[0]; 2259 pixelBit[3] = y[1]; 2260 pixelBit[4] = y[2]; 2261 pixelBit[5] = x[2]; 2262 break; 2263 case 3: 2264 pixelBit[0] = x[0]; 2265 pixelBit[1] = y[0]; 2266 pixelBit[2] = y[1]; 2267 pixelBit[3] = x[1]; 2268 pixelBit[4] = x[2]; 2269 break; 2270 case 4: 2271 pixelBit[0] = y[0]; 2272 pixelBit[1] = y[1]; 2273 pixelBit[2] = x[0]; 2274 pixelBit[3] = x[1]; 2275 break; 2276 default: 2277 ADDR_ASSERT_ALWAYS(); 2278 ret = ADDR_INVALIDPARAMS; 2279 break; 2280 } 2281 } 2282 else if (IsDisplaySwizzle(rsrcType, swMode)) 2283 { 2284 switch (elementBytesLog2) 2285 { 2286 case 0: 2287 pixelBit[0] = x[0]; 2288 pixelBit[1] = x[1]; 2289 pixelBit[2] = x[2]; 2290 pixelBit[3] = y[1]; 2291 pixelBit[4] = y[0]; 2292 pixelBit[5] = y[2]; 2293 pixelBit[6] = x[3]; 2294 pixelBit[7] = y[3]; 2295 break; 2296 case 1: 2297 pixelBit[0] = x[0]; 2298 pixelBit[1] = x[1]; 2299 pixelBit[2] = x[2]; 2300 pixelBit[3] = y[0]; 2301 pixelBit[4] = y[1]; 2302 pixelBit[5] = y[2]; 2303 pixelBit[6] = x[3]; 2304 break; 2305 case 2: 2306 pixelBit[0] = x[0]; 2307 pixelBit[1] = x[1]; 2308 pixelBit[2] = y[0]; 2309 pixelBit[3] = x[2]; 2310 pixelBit[4] = y[1]; 2311 pixelBit[5] = y[2]; 2312 break; 2313 case 3: 2314 pixelBit[0] = x[0]; 2315 pixelBit[1] = y[0]; 2316 pixelBit[2] = x[1]; 2317 pixelBit[3] = x[2]; 2318 pixelBit[4] = y[1]; 2319 break; 2320 case 4: 2321 pixelBit[0] = x[0]; 2322 pixelBit[1] = y[0]; 2323 pixelBit[2] = x[1]; 2324 pixelBit[3] = y[1]; 2325 break; 2326 default: 2327 ADDR_ASSERT_ALWAYS(); 2328 ret = ADDR_INVALIDPARAMS; 2329 break; 2330 } 2331 } 2332 else if (IsRotateSwizzle(swMode)) 2333 { 2334 switch (elementBytesLog2) 2335 { 2336 case 0: 2337 pixelBit[0] = y[0]; 2338 pixelBit[1] = y[1]; 2339 pixelBit[2] = y[2]; 2340 pixelBit[3] = x[1]; 2341 pixelBit[4] = x[0]; 2342 pixelBit[5] = x[2]; 2343 pixelBit[6] = x[3]; 2344 pixelBit[7] = y[3]; 2345 break; 2346 case 1: 2347 pixelBit[0] = y[0]; 2348 pixelBit[1] = y[1]; 2349 pixelBit[2] = y[2]; 2350 pixelBit[3] = x[0]; 2351 pixelBit[4] = x[1]; 2352 pixelBit[5] = x[2]; 2353 pixelBit[6] = x[3]; 2354 break; 2355 case 2: 2356 pixelBit[0] = y[0]; 2357 pixelBit[1] = y[1]; 2358 pixelBit[2] = x[0]; 2359 pixelBit[3] = y[2]; 2360 pixelBit[4] = x[1]; 2361 pixelBit[5] = x[2]; 2362 break; 2363 case 3: 2364 pixelBit[0] = y[0]; 2365 pixelBit[1] = x[0]; 2366 pixelBit[2] = y[1]; 2367 pixelBit[3] = x[1]; 2368 pixelBit[4] = x[2]; 2369 break; 2370 default: 2371 ADDR_ASSERT_ALWAYS(); 2372 case 4: 2373 ret = ADDR_INVALIDPARAMS; 2374 break; 2375 } 2376 } 2377 else 2378 { 2379 ADDR_ASSERT_ALWAYS(); 2380 ret = ADDR_INVALIDPARAMS; 2381 } 2382 2383 // Post validation 2384 if (ret == ADDR_OK) 2385 { 2386 Dim2d microBlockDim = Block256_2d[elementBytesLog2]; 2387 ADDR_ASSERT((2u << GetMaxValidChannelIndex(pEquation->addr, 8, 0)) == 2388 (microBlockDim.w * (1 << elementBytesLog2))); 2389 ADDR_ASSERT((2u << GetMaxValidChannelIndex(pEquation->addr, 8, 1)) == microBlockDim.h); 2390 } 2391 2392 return ret; 2393 } 2394 2395 /** 2396 ************************************************************************************************************************ 2397 * Gfx9Lib::HwlComputeThinEquation 2398 * 2399 * @brief 2400 * Interface function stub of ComputeThinEquation 2401 * 2402 * @return 2403 * ADDR_E_RETURNCODE 2404 ************************************************************************************************************************ 2405 */ 2406 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeThinEquation( 2407 AddrResourceType rsrcType, 2408 AddrSwizzleMode swMode, 2409 UINT_32 elementBytesLog2, 2410 ADDR_EQUATION* pEquation) const 2411 { 2412 ADDR_E_RETURNCODE ret = ADDR_OK; 2413 2414 UINT_32 blockSizeLog2 = GetBlockSizeLog2(swMode); 2415 2416 UINT_32 maxXorBits = blockSizeLog2; 2417 if (IsNonPrtXor(swMode)) 2418 { 2419 // For non-prt-xor, maybe need to initialize some more bits for xor 2420 // The highest xor bit used in equation will be max the following 3 items: 2421 // 1. m_pipeInterleaveLog2 + 2 * pipeXorBits 2422 // 2. m_pipeInterleaveLog2 + pipeXorBits + 2 * bankXorBits 2423 // 3. blockSizeLog2 2424 2425 maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 + 2 * GetPipeXorBits(blockSizeLog2)); 2426 maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 + 2427 GetPipeXorBits(blockSizeLog2) + 2428 2 * GetBankXorBits(blockSizeLog2)); 2429 } 2430 2431 const UINT_32 maxBitsUsed = 14; 2432 ADDR_ASSERT((2 * maxBitsUsed) >= maxXorBits); 2433 ADDR_CHANNEL_SETTING x[maxBitsUsed] = {}; 2434 ADDR_CHANNEL_SETTING y[maxBitsUsed] = {}; 2435 2436 const UINT_32 extraXorBits = 16; 2437 ADDR_ASSERT(extraXorBits >= maxXorBits - blockSizeLog2); 2438 ADDR_CHANNEL_SETTING xorExtra[extraXorBits] = {}; 2439 2440 for (UINT_32 i = 0; i < maxBitsUsed; i++) 2441 { 2442 InitChannel(1, 0, elementBytesLog2 + i, &x[i]); 2443 InitChannel(1, 1, i, &y[i]); 2444 } 2445 2446 ADDR_CHANNEL_SETTING* pixelBit = pEquation->addr; 2447 2448 for (UINT_32 i = 0; i < elementBytesLog2; i++) 2449 { 2450 InitChannel(1, 0 , i, &pixelBit[i]); 2451 } 2452 2453 UINT_32 xIdx = 0; 2454 UINT_32 yIdx = 0; 2455 UINT_32 lowBits = 0; 2456 2457 if (IsZOrderSwizzle(swMode)) 2458 { 2459 if (elementBytesLog2 <= 3) 2460 { 2461 for (UINT_32 i = elementBytesLog2; i < 6; i++) 2462 { 2463 pixelBit[i] = (((i - elementBytesLog2) & 1) == 0) ? x[xIdx++] : y[yIdx++]; 2464 } 2465 2466 lowBits = 6; 2467 } 2468 else 2469 { 2470 ret = ADDR_INVALIDPARAMS; 2471 } 2472 } 2473 else 2474 { 2475 ret = HwlComputeBlock256Equation(rsrcType, swMode, elementBytesLog2, pEquation); 2476 2477 if (ret == ADDR_OK) 2478 { 2479 Dim2d microBlockDim = Block256_2d[elementBytesLog2]; 2480 xIdx = Log2(microBlockDim.w); 2481 yIdx = Log2(microBlockDim.h); 2482 lowBits = 8; 2483 } 2484 } 2485 2486 if (ret == ADDR_OK) 2487 { 2488 for (UINT_32 i = lowBits; i < blockSizeLog2; i++) 2489 { 2490 pixelBit[i] = ((i & 1) == 0) ? y[yIdx++] : x[xIdx++]; 2491 } 2492 2493 for (UINT_32 i = blockSizeLog2; i < maxXorBits; i++) 2494 { 2495 xorExtra[i - blockSizeLog2] = ((i & 1) == 0) ? y[yIdx++] : x[xIdx++]; 2496 } 2497 2498 if (IsXor(swMode)) 2499 { 2500 // Fill XOR bits 2501 UINT_32 pipeStart = m_pipeInterleaveLog2; 2502 UINT_32 pipeXorBits = GetPipeXorBits(blockSizeLog2); 2503 2504 UINT_32 bankStart = pipeStart + pipeXorBits; 2505 UINT_32 bankXorBits = GetBankXorBits(blockSizeLog2); 2506 2507 for (UINT_32 i = 0; i < pipeXorBits; i++) 2508 { 2509 UINT_32 xor1BitPos = pipeStart + 2 * pipeXorBits - 1 - i; 2510 ADDR_CHANNEL_SETTING* pXor1Src = (xor1BitPos < blockSizeLog2) ? 2511 &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2]; 2512 2513 InitChannel(&pEquation->xor1[pipeStart + i], pXor1Src); 2514 } 2515 2516 for (UINT_32 i = 0; i < bankXorBits; i++) 2517 { 2518 UINT_32 xor1BitPos = bankStart + 2 * bankXorBits - 1 - i; 2519 ADDR_CHANNEL_SETTING* pXor1Src = (xor1BitPos < blockSizeLog2) ? 2520 &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2]; 2521 2522 InitChannel(&pEquation->xor1[bankStart + i], pXor1Src); 2523 } 2524 2525 if (IsPrt(swMode) == FALSE) 2526 { 2527 for (UINT_32 i = 0; i < pipeXorBits; i++) 2528 { 2529 InitChannel(1, 2, pipeXorBits - i - 1, &pEquation->xor2[pipeStart + i]); 2530 } 2531 2532 for (UINT_32 i = 0; i < bankXorBits; i++) 2533 { 2534 InitChannel(1, 2, bankXorBits - i - 1 + pipeXorBits, &pEquation->xor2[bankStart + i]); 2535 } 2536 } 2537 } 2538 2539 pEquation->numBits = blockSizeLog2; 2540 } 2541 2542 return ret; 2543 } 2544 2545 /** 2546 ************************************************************************************************************************ 2547 * Gfx9Lib::HwlComputeThickEquation 2548 * 2549 * @brief 2550 * Interface function stub of ComputeThickEquation 2551 * 2552 * @return 2553 * ADDR_E_RETURNCODE 2554 ************************************************************************************************************************ 2555 */ 2556 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeThickEquation( 2557 AddrResourceType rsrcType, 2558 AddrSwizzleMode swMode, 2559 UINT_32 elementBytesLog2, 2560 ADDR_EQUATION* pEquation) const 2561 { 2562 ADDR_E_RETURNCODE ret = ADDR_OK; 2563 2564 ADDR_ASSERT(IsTex3d(rsrcType)); 2565 2566 UINT_32 blockSizeLog2 = GetBlockSizeLog2(swMode); 2567 2568 UINT_32 maxXorBits = blockSizeLog2; 2569 if (IsNonPrtXor(swMode)) 2570 { 2571 // For non-prt-xor, maybe need to initialize some more bits for xor 2572 // The highest xor bit used in equation will be max the following 3: 2573 // 1. m_pipeInterleaveLog2 + 3 * pipeXorBits 2574 // 2. m_pipeInterleaveLog2 + pipeXorBits + 3 * bankXorBits 2575 // 3. blockSizeLog2 2576 2577 maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 + 3 * GetPipeXorBits(blockSizeLog2)); 2578 maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 + 2579 GetPipeXorBits(blockSizeLog2) + 2580 3 * GetBankXorBits(blockSizeLog2)); 2581 } 2582 2583 for (UINT_32 i = 0; i < elementBytesLog2; i++) 2584 { 2585 InitChannel(1, 0 , i, &pEquation->addr[i]); 2586 } 2587 2588 ADDR_CHANNEL_SETTING* pixelBit = &pEquation->addr[elementBytesLog2]; 2589 2590 const UINT_32 maxBitsUsed = 12; 2591 ADDR_ASSERT((3 * maxBitsUsed) >= maxXorBits); 2592 ADDR_CHANNEL_SETTING x[maxBitsUsed] = {}; 2593 ADDR_CHANNEL_SETTING y[maxBitsUsed] = {}; 2594 ADDR_CHANNEL_SETTING z[maxBitsUsed] = {}; 2595 2596 const UINT_32 extraXorBits = 24; 2597 ADDR_ASSERT(extraXorBits >= maxXorBits - blockSizeLog2); 2598 ADDR_CHANNEL_SETTING xorExtra[extraXorBits] = {}; 2599 2600 for (UINT_32 i = 0; i < maxBitsUsed; i++) 2601 { 2602 InitChannel(1, 0, elementBytesLog2 + i, &x[i]); 2603 InitChannel(1, 1, i, &y[i]); 2604 InitChannel(1, 2, i, &z[i]); 2605 } 2606 2607 if (IsZOrderSwizzle(swMode)) 2608 { 2609 switch (elementBytesLog2) 2610 { 2611 case 0: 2612 pixelBit[0] = x[0]; 2613 pixelBit[1] = y[0]; 2614 pixelBit[2] = x[1]; 2615 pixelBit[3] = y[1]; 2616 pixelBit[4] = z[0]; 2617 pixelBit[5] = z[1]; 2618 pixelBit[6] = x[2]; 2619 pixelBit[7] = z[2]; 2620 pixelBit[8] = y[2]; 2621 pixelBit[9] = x[3]; 2622 break; 2623 case 1: 2624 pixelBit[0] = x[0]; 2625 pixelBit[1] = y[0]; 2626 pixelBit[2] = x[1]; 2627 pixelBit[3] = y[1]; 2628 pixelBit[4] = z[0]; 2629 pixelBit[5] = z[1]; 2630 pixelBit[6] = z[2]; 2631 pixelBit[7] = y[2]; 2632 pixelBit[8] = x[2]; 2633 break; 2634 case 2: 2635 pixelBit[0] = x[0]; 2636 pixelBit[1] = y[0]; 2637 pixelBit[2] = x[1]; 2638 pixelBit[3] = z[0]; 2639 pixelBit[4] = y[1]; 2640 pixelBit[5] = z[1]; 2641 pixelBit[6] = y[2]; 2642 pixelBit[7] = x[2]; 2643 break; 2644 case 3: 2645 pixelBit[0] = x[0]; 2646 pixelBit[1] = y[0]; 2647 pixelBit[2] = z[0]; 2648 pixelBit[3] = x[1]; 2649 pixelBit[4] = z[1]; 2650 pixelBit[5] = y[1]; 2651 pixelBit[6] = x[2]; 2652 break; 2653 case 4: 2654 pixelBit[0] = x[0]; 2655 pixelBit[1] = y[0]; 2656 pixelBit[2] = z[0]; 2657 pixelBit[3] = z[1]; 2658 pixelBit[4] = y[1]; 2659 pixelBit[5] = x[1]; 2660 break; 2661 default: 2662 ADDR_ASSERT_ALWAYS(); 2663 ret = ADDR_INVALIDPARAMS; 2664 break; 2665 } 2666 } 2667 else if (IsStandardSwizzle(rsrcType, swMode)) 2668 { 2669 switch (elementBytesLog2) 2670 { 2671 case 0: 2672 pixelBit[0] = x[0]; 2673 pixelBit[1] = x[1]; 2674 pixelBit[2] = x[2]; 2675 pixelBit[3] = x[3]; 2676 pixelBit[4] = y[0]; 2677 pixelBit[5] = y[1]; 2678 pixelBit[6] = z[0]; 2679 pixelBit[7] = z[1]; 2680 pixelBit[8] = z[2]; 2681 pixelBit[9] = y[2]; 2682 break; 2683 case 1: 2684 pixelBit[0] = x[0]; 2685 pixelBit[1] = x[1]; 2686 pixelBit[2] = x[2]; 2687 pixelBit[3] = y[0]; 2688 pixelBit[4] = y[1]; 2689 pixelBit[5] = z[0]; 2690 pixelBit[6] = z[1]; 2691 pixelBit[7] = z[2]; 2692 pixelBit[8] = y[2]; 2693 break; 2694 case 2: 2695 pixelBit[0] = x[0]; 2696 pixelBit[1] = x[1]; 2697 pixelBit[2] = y[0]; 2698 pixelBit[3] = y[1]; 2699 pixelBit[4] = z[0]; 2700 pixelBit[5] = z[1]; 2701 pixelBit[6] = y[2]; 2702 pixelBit[7] = x[2]; 2703 break; 2704 case 3: 2705 pixelBit[0] = x[0]; 2706 pixelBit[1] = y[0]; 2707 pixelBit[2] = y[1]; 2708 pixelBit[3] = z[0]; 2709 pixelBit[4] = z[1]; 2710 pixelBit[5] = x[1]; 2711 pixelBit[6] = x[2]; 2712 break; 2713 case 4: 2714 pixelBit[0] = y[0]; 2715 pixelBit[1] = y[1]; 2716 pixelBit[2] = z[0]; 2717 pixelBit[3] = z[1]; 2718 pixelBit[4] = x[0]; 2719 pixelBit[5] = x[1]; 2720 break; 2721 default: 2722 ADDR_ASSERT_ALWAYS(); 2723 ret = ADDR_INVALIDPARAMS; 2724 break; 2725 } 2726 } 2727 else 2728 { 2729 ADDR_ASSERT_ALWAYS(); 2730 ret = ADDR_INVALIDPARAMS; 2731 } 2732 2733 if (ret == ADDR_OK) 2734 { 2735 Dim3d microBlockDim = Block1K_3d[elementBytesLog2]; 2736 UINT_32 xIdx = Log2(microBlockDim.w); 2737 UINT_32 yIdx = Log2(microBlockDim.h); 2738 UINT_32 zIdx = Log2(microBlockDim.d); 2739 2740 pixelBit = pEquation->addr; 2741 2742 const UINT_32 lowBits = 10; 2743 ADDR_ASSERT(pEquation->addr[lowBits - 1].valid == 1); 2744 ADDR_ASSERT(pEquation->addr[lowBits].valid == 0); 2745 2746 for (UINT_32 i = lowBits; i < blockSizeLog2; i++) 2747 { 2748 if ((i % 3) == 0) 2749 { 2750 pixelBit[i] = x[xIdx++]; 2751 } 2752 else if ((i % 3) == 1) 2753 { 2754 pixelBit[i] = z[zIdx++]; 2755 } 2756 else 2757 { 2758 pixelBit[i] = y[yIdx++]; 2759 } 2760 } 2761 2762 for (UINT_32 i = blockSizeLog2; i < maxXorBits; i++) 2763 { 2764 if ((i % 3) == 0) 2765 { 2766 xorExtra[i - blockSizeLog2] = x[xIdx++]; 2767 } 2768 else if ((i % 3) == 1) 2769 { 2770 xorExtra[i - blockSizeLog2] = z[zIdx++]; 2771 } 2772 else 2773 { 2774 xorExtra[i - blockSizeLog2] = y[yIdx++]; 2775 } 2776 } 2777 2778 if (IsXor(swMode)) 2779 { 2780 // Fill XOR bits 2781 UINT_32 pipeStart = m_pipeInterleaveLog2; 2782 UINT_32 pipeXorBits = GetPipeXorBits(blockSizeLog2); 2783 for (UINT_32 i = 0; i < pipeXorBits; i++) 2784 { 2785 UINT_32 xor1BitPos = pipeStart + (3 * pipeXorBits) - 1 - (2 * i); 2786 ADDR_CHANNEL_SETTING* pXor1Src = (xor1BitPos < blockSizeLog2) ? 2787 &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2]; 2788 2789 InitChannel(&pEquation->xor1[pipeStart + i], pXor1Src); 2790 2791 UINT_32 xor2BitPos = pipeStart + (3 * pipeXorBits) - 2 - (2 * i); 2792 ADDR_CHANNEL_SETTING* pXor2Src = (xor2BitPos < blockSizeLog2) ? 2793 &pEquation->addr[xor2BitPos] : &xorExtra[xor2BitPos - blockSizeLog2]; 2794 2795 InitChannel(&pEquation->xor2[pipeStart + i], pXor2Src); 2796 } 2797 2798 UINT_32 bankStart = pipeStart + pipeXorBits; 2799 UINT_32 bankXorBits = GetBankXorBits(blockSizeLog2); 2800 for (UINT_32 i = 0; i < bankXorBits; i++) 2801 { 2802 UINT_32 xor1BitPos = bankStart + (3 * bankXorBits) - 1 - (2 * i); 2803 ADDR_CHANNEL_SETTING* pXor1Src = (xor1BitPos < blockSizeLog2) ? 2804 &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2]; 2805 2806 InitChannel(&pEquation->xor1[bankStart + i], pXor1Src); 2807 2808 UINT_32 xor2BitPos = bankStart + (3 * bankXorBits) - 2 - (2 * i); 2809 ADDR_CHANNEL_SETTING* pXor2Src = (xor2BitPos < blockSizeLog2) ? 2810 &pEquation->addr[xor2BitPos] : &xorExtra[xor2BitPos - blockSizeLog2]; 2811 2812 InitChannel(&pEquation->xor2[bankStart + i], pXor2Src); 2813 } 2814 } 2815 2816 pEquation->numBits = blockSizeLog2; 2817 } 2818 2819 return ret; 2820 } 2821 2822 /** 2823 ************************************************************************************************************************ 2824 * Gfx9Lib::IsValidDisplaySwizzleMode 2825 * 2826 * @brief 2827 * Check if a swizzle mode is supported by display engine 2828 * 2829 * @return 2830 * TRUE is swizzle mode is supported by display engine 2831 ************************************************************************************************************************ 2832 */ 2833 BOOL_32 Gfx9Lib::IsValidDisplaySwizzleMode( 2834 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const 2835 { 2836 BOOL_32 support = FALSE; 2837 2838 const AddrResourceType resourceType = pIn->resourceType; 2839 (void)resourceType; 2840 const AddrSwizzleMode swizzleMode = pIn->swizzleMode; 2841 2842 if (m_settings.isDce12) 2843 { 2844 switch (swizzleMode) 2845 { 2846 case ADDR_SW_256B_D: 2847 case ADDR_SW_256B_R: 2848 support = (pIn->bpp == 32); 2849 break; 2850 2851 case ADDR_SW_LINEAR: 2852 case ADDR_SW_4KB_D: 2853 case ADDR_SW_4KB_R: 2854 case ADDR_SW_64KB_D: 2855 case ADDR_SW_64KB_R: 2856 case ADDR_SW_VAR_D: 2857 case ADDR_SW_VAR_R: 2858 case ADDR_SW_4KB_D_X: 2859 case ADDR_SW_4KB_R_X: 2860 case ADDR_SW_64KB_D_X: 2861 case ADDR_SW_64KB_R_X: 2862 case ADDR_SW_VAR_D_X: 2863 case ADDR_SW_VAR_R_X: 2864 support = (pIn->bpp <= 64); 2865 break; 2866 2867 default: 2868 break; 2869 } 2870 } 2871 else if (m_settings.isDcn1) 2872 { 2873 switch (swizzleMode) 2874 { 2875 case ADDR_SW_4KB_D: 2876 case ADDR_SW_64KB_D: 2877 case ADDR_SW_VAR_D: 2878 case ADDR_SW_64KB_D_T: 2879 case ADDR_SW_4KB_D_X: 2880 case ADDR_SW_64KB_D_X: 2881 case ADDR_SW_VAR_D_X: 2882 support = (pIn->bpp == 64); 2883 break; 2884 2885 case ADDR_SW_LINEAR: 2886 case ADDR_SW_4KB_S: 2887 case ADDR_SW_64KB_S: 2888 case ADDR_SW_VAR_S: 2889 case ADDR_SW_64KB_S_T: 2890 case ADDR_SW_4KB_S_X: 2891 case ADDR_SW_64KB_S_X: 2892 case ADDR_SW_VAR_S_X: 2893 support = (pIn->bpp <= 64); 2894 break; 2895 2896 default: 2897 break; 2898 } 2899 } 2900 else 2901 { 2902 ADDR_NOT_IMPLEMENTED(); 2903 } 2904 2905 return support; 2906 } 2907 2908 /** 2909 ************************************************************************************************************************ 2910 * Gfx9Lib::HwlComputePipeBankXor 2911 * 2912 * @brief 2913 * Generate a PipeBankXor value to be ORed into bits above pipeInterleaveBits of address 2914 * 2915 * @return 2916 * PipeBankXor value 2917 ************************************************************************************************************************ 2918 */ 2919 ADDR_E_RETURNCODE Gfx9Lib::HwlComputePipeBankXor( 2920 const ADDR2_COMPUTE_PIPEBANKXOR_INPUT* pIn, 2921 ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT* pOut) const 2922 { 2923 UINT_32 macroBlockBits = GetBlockSizeLog2(pIn->swizzleMode); 2924 UINT_32 pipeBits = GetPipeXorBits(macroBlockBits); 2925 UINT_32 bankBits = GetBankXorBits(macroBlockBits); 2926 2927 UINT_32 pipeXor = 0; 2928 UINT_32 bankXor = 0; 2929 2930 const UINT_32 bankMask = (1 << bankBits) - 1; 2931 const UINT_32 index = pIn->surfIndex & bankMask; 2932 2933 const UINT_32 bpp = pIn->flags.fmask ? 2934 GetFmaskBpp(pIn->numSamples, pIn->numFrags) : GetElemLib()->GetBitsPerPixel(pIn->format); 2935 if (bankBits == 4) 2936 { 2937 static const UINT_32 BankXorSmallBpp[] = {0, 7, 4, 3, 8, 15, 12, 11, 1, 6, 5, 2, 9, 14, 13, 10}; 2938 static const UINT_32 BankXorLargeBpp[] = {0, 7, 8, 15, 4, 3, 12, 11, 1, 6, 9, 14, 5, 2, 13, 10}; 2939 2940 bankXor = (bpp <= 32) ? BankXorSmallBpp[index] : BankXorLargeBpp[index]; 2941 } 2942 else if (bankBits > 0) 2943 { 2944 UINT_32 bankIncrease = (1 << (bankBits - 1)) - 1; 2945 bankIncrease = (bankIncrease == 0) ? 1 : bankIncrease; 2946 bankXor = (index * bankIncrease) & bankMask; 2947 } 2948 2949 pOut->pipeBankXor = (bankXor << pipeBits) | pipeXor; 2950 2951 return ADDR_OK; 2952 } 2953 2954 /** 2955 ************************************************************************************************************************ 2956 * Gfx9Lib::HwlComputeSlicePipeBankXor 2957 * 2958 * @brief 2959 * Generate slice PipeBankXor value based on base PipeBankXor value and slice id 2960 * 2961 * @return 2962 * PipeBankXor value 2963 ************************************************************************************************************************ 2964 */ 2965 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSlicePipeBankXor( 2966 const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT* pIn, 2967 ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT* pOut) const 2968 { 2969 UINT_32 macroBlockBits = GetBlockSizeLog2(pIn->swizzleMode); 2970 UINT_32 pipeBits = GetPipeXorBits(macroBlockBits); 2971 UINT_32 bankBits = GetBankXorBits(macroBlockBits); 2972 2973 UINT_32 pipeXor = ReverseBitVector(pIn->slice, pipeBits); 2974 UINT_32 bankXor = ReverseBitVector(pIn->slice >> pipeBits, bankBits); 2975 2976 pOut->pipeBankXor = pIn->basePipeBankXor ^ (pipeXor | (bankXor << pipeBits)); 2977 2978 return ADDR_OK; 2979 } 2980 2981 /** 2982 ************************************************************************************************************************ 2983 * Gfx9Lib::HwlComputeSubResourceOffsetForSwizzlePattern 2984 * 2985 * @brief 2986 * Compute sub resource offset to support swizzle pattern 2987 * 2988 * @return 2989 * Offset 2990 ************************************************************************************************************************ 2991 */ 2992 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSubResourceOffsetForSwizzlePattern( 2993 const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT* pIn, 2994 ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT* pOut) const 2995 { 2996 ADDR_ASSERT(IsThin(pIn->resourceType, pIn->swizzleMode)); 2997 2998 UINT_32 macroBlockBits = GetBlockSizeLog2(pIn->swizzleMode); 2999 UINT_32 pipeBits = GetPipeXorBits(macroBlockBits); 3000 UINT_32 bankBits = GetBankXorBits(macroBlockBits); 3001 UINT_32 pipeXor = ReverseBitVector(pIn->slice, pipeBits); 3002 UINT_32 bankXor = ReverseBitVector(pIn->slice >> pipeBits, bankBits); 3003 UINT_32 pipeBankXor = ((pipeXor | (bankXor << pipeBits)) ^ (pIn->pipeBankXor)) << m_pipeInterleaveLog2; 3004 3005 pOut->offset = pIn->slice * pIn->sliceSize + 3006 pIn->macroBlockOffset + 3007 (pIn->mipTailOffset ^ pipeBankXor) - 3008 static_cast<UINT_64>(pipeBankXor); 3009 return ADDR_OK; 3010 } 3011 3012 /** 3013 ************************************************************************************************************************ 3014 * Gfx9Lib::HwlComputeSurfaceInfoSanityCheck 3015 * 3016 * @brief 3017 * Compute surface info sanity check 3018 * 3019 * @return 3020 * Offset 3021 ************************************************************************************************************************ 3022 */ 3023 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoSanityCheck( 3024 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const 3025 { 3026 BOOL_32 invalid = FALSE; 3027 3028 if ((pIn->bpp > 128) || (pIn->width == 0) || (pIn->numFrags > 8) || (pIn->numSamples > 16)) 3029 { 3030 invalid = TRUE; 3031 } 3032 else if ((pIn->swizzleMode >= ADDR_SW_MAX_TYPE) || 3033 (pIn->resourceType >= ADDR_RSRC_MAX_TYPE)) 3034 { 3035 invalid = TRUE; 3036 } 3037 3038 BOOL_32 mipmap = (pIn->numMipLevels > 1); 3039 BOOL_32 msaa = (pIn->numFrags > 1); 3040 3041 ADDR2_SURFACE_FLAGS flags = pIn->flags; 3042 BOOL_32 zbuffer = (flags.depth || flags.stencil); 3043 BOOL_32 color = flags.color; 3044 BOOL_32 display = flags.display || flags.rotated; 3045 3046 AddrResourceType rsrcType = pIn->resourceType; 3047 BOOL_32 tex3d = IsTex3d(rsrcType); 3048 AddrSwizzleMode swizzle = pIn->swizzleMode; 3049 BOOL_32 linear = IsLinear(swizzle); 3050 BOOL_32 blk256B = IsBlock256b(swizzle); 3051 BOOL_32 blkVar = IsBlockVariable(swizzle); 3052 BOOL_32 isNonPrtXor = IsNonPrtXor(swizzle); 3053 BOOL_32 prt = flags.prt; 3054 BOOL_32 stereo = flags.qbStereo; 3055 3056 if (invalid == FALSE) 3057 { 3058 if ((pIn->numFrags > 1) && 3059 (GetBlockSize(swizzle) < (m_pipeInterleaveBytes * pIn->numFrags))) 3060 { 3061 // MSAA surface must have blk_bytes/pipe_interleave >= num_samples 3062 invalid = TRUE; 3063 } 3064 } 3065 3066 if (invalid == FALSE) 3067 { 3068 switch (rsrcType) 3069 { 3070 case ADDR_RSRC_TEX_1D: 3071 invalid = msaa || zbuffer || display || (linear == FALSE) || stereo; 3072 break; 3073 case ADDR_RSRC_TEX_2D: 3074 invalid = (msaa && mipmap) || (stereo && msaa) || (stereo && mipmap); 3075 break; 3076 case ADDR_RSRC_TEX_3D: 3077 invalid = msaa || zbuffer || display || stereo; 3078 break; 3079 default: 3080 invalid = TRUE; 3081 break; 3082 } 3083 } 3084 3085 if (invalid == FALSE) 3086 { 3087 if (display) 3088 { 3089 invalid = (IsValidDisplaySwizzleMode(pIn) == FALSE); 3090 } 3091 } 3092 3093 if (invalid == FALSE) 3094 { 3095 if (linear) 3096 { 3097 invalid = ((ADDR_RSRC_TEX_1D != rsrcType) && prt) || 3098 zbuffer || msaa || (pIn->bpp == 0) || ((pIn->bpp % 8) != 0); 3099 } 3100 else 3101 { 3102 if (blk256B || blkVar || isNonPrtXor) 3103 { 3104 invalid = prt; 3105 if (blk256B) 3106 { 3107 invalid = invalid || zbuffer || tex3d || mipmap || msaa; 3108 } 3109 } 3110 3111 if (invalid == FALSE) 3112 { 3113 if (IsZOrderSwizzle(swizzle)) 3114 { 3115 invalid = color && msaa; 3116 } 3117 else if (IsStandardSwizzle(rsrcType, swizzle)) 3118 { 3119 invalid = zbuffer; 3120 } 3121 else if (IsDisplaySwizzle(rsrcType, swizzle)) 3122 { 3123 invalid = zbuffer; 3124 } 3125 else if (IsRotateSwizzle(swizzle)) 3126 { 3127 invalid = zbuffer || (pIn->bpp > 64) || tex3d; 3128 } 3129 else 3130 { 3131 ADDR_ASSERT(!"invalid swizzle mode"); 3132 invalid = TRUE; 3133 } 3134 } 3135 } 3136 } 3137 3138 ADDR_ASSERT(invalid == FALSE); 3139 3140 return invalid ? ADDR_INVALIDPARAMS : ADDR_OK; 3141 } 3142 3143 /** 3144 ************************************************************************************************************************ 3145 * Gfx9Lib::HwlGetPreferredSurfaceSetting 3146 * 3147 * @brief 3148 * Internal function to get suggested surface information for cliet to use 3149 * 3150 * @return 3151 * ADDR_E_RETURNCODE 3152 ************************************************************************************************************************ 3153 */ 3154 ADDR_E_RETURNCODE Gfx9Lib::HwlGetPreferredSurfaceSetting( 3155 const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn, 3156 ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT* pOut) const 3157 { 3158 // Macro define resource block type 3159 enum AddrBlockType 3160 { 3161 AddrBlockMicro = 0, // Resource uses 256B block 3162 AddrBlock4KB = 1, // Resource uses 4KB block 3163 AddrBlock64KB = 2, // Resource uses 64KB block 3164 AddrBlockVar = 3, // Resource uses var blcok 3165 AddrBlockLinear = 4, // Resource uses linear swizzle mode 3166 3167 AddrBlockMaxTiledType = AddrBlock64KB + 1, 3168 }; 3169 3170 enum AddrBlockSet 3171 { 3172 AddrBlockSetMicro = 1 << AddrBlockMicro, 3173 AddrBlockSetMacro4KB = 1 << AddrBlock4KB, 3174 AddrBlockSetMacro64KB = 1 << AddrBlock64KB, 3175 AddrBlockSetVar = 1 << AddrBlockVar, 3176 AddrBlockSetLinear = 1 << AddrBlockLinear, 3177 3178 AddrBlockSetMacro = AddrBlockSetMacro4KB | AddrBlockSetMacro64KB, 3179 }; 3180 3181 enum AddrSwSet 3182 { 3183 AddrSwSetZ = 1 << ADDR_SW_Z, 3184 AddrSwSetS = 1 << ADDR_SW_S, 3185 AddrSwSetD = 1 << ADDR_SW_D, 3186 AddrSwSetR = 1 << ADDR_SW_R, 3187 3188 AddrSwSetAll = AddrSwSetZ | AddrSwSetS | AddrSwSetD | AddrSwSetR, 3189 }; 3190 3191 ADDR_E_RETURNCODE returnCode = ADDR_OK; 3192 ElemLib* pElemLib = GetElemLib(); 3193 3194 // Set format to INVALID will skip this conversion 3195 UINT_32 expandX = 1; 3196 UINT_32 expandY = 1; 3197 UINT_32 bpp = pIn->bpp; 3198 UINT_32 width = pIn->width; 3199 UINT_32 height = pIn->height; 3200 3201 if (pIn->format != ADDR_FMT_INVALID) 3202 { 3203 // Don't care for this case 3204 ElemMode elemMode = ADDR_UNCOMPRESSED; 3205 3206 // Get compression/expansion factors and element mode which indicates compression/expansion 3207 bpp = pElemLib->GetBitsPerPixel(pIn->format, 3208 &elemMode, 3209 &expandX, 3210 &expandY); 3211 3212 UINT_32 basePitch = 0; 3213 GetElemLib()->AdjustSurfaceInfo(elemMode, 3214 expandX, 3215 expandY, 3216 &bpp, 3217 &basePitch, 3218 &width, 3219 &height); 3220 } 3221 3222 UINT_32 numSamples = Max(pIn->numSamples, 1u); 3223 UINT_32 numFrags = (pIn->numFrags == 0) ? numSamples : pIn->numFrags; 3224 UINT_32 slice = Max(pIn->numSlices, 1u); 3225 UINT_32 numMipLevels = Max(pIn->numMipLevels, 1u); 3226 UINT_32 minSizeAlign = NextPow2(pIn->minSizeAlign); 3227 3228 if (pIn->flags.fmask) 3229 { 3230 bpp = GetFmaskBpp(numSamples, numFrags); 3231 numFrags = 1; 3232 numSamples = 1; 3233 pOut->resourceType = ADDR_RSRC_TEX_2D; 3234 } 3235 else 3236 { 3237 // The output may get changed for volume(3D) texture resource in future 3238 pOut->resourceType = pIn->resourceType; 3239 } 3240 3241 if (bpp < 8) 3242 { 3243 ADDR_ASSERT_ALWAYS(); 3244 3245 returnCode = ADDR_INVALIDPARAMS; 3246 } 3247 else if (IsTex1d(pOut->resourceType)) 3248 { 3249 pOut->swizzleMode = ADDR_SW_LINEAR; 3250 pOut->validBlockSet.value = AddrBlockSetLinear; 3251 pOut->canXor = FALSE; 3252 } 3253 else 3254 { 3255 ADDR2_BLOCK_SET blockSet; 3256 blockSet.value = 0; 3257 3258 ADDR2_SWTYPE_SET addrPreferredSwSet, addrValidSwSet, clientPreferredSwSet; 3259 addrPreferredSwSet.value = AddrSwSetS; 3260 addrValidSwSet = addrPreferredSwSet; 3261 clientPreferredSwSet = pIn->preferredSwSet; 3262 3263 if (clientPreferredSwSet.value == 0) 3264 { 3265 clientPreferredSwSet.value = AddrSwSetAll; 3266 } 3267 3268 // prt Xor and non-xor will have less height align requirement for stereo surface 3269 BOOL_32 prtXor = (pIn->flags.prt || pIn->flags.qbStereo) && (pIn->noXor == FALSE); 3270 BOOL_32 displayResource = FALSE; 3271 3272 pOut->canXor = (pIn->flags.prt == FALSE) && (pIn->noXor == FALSE); 3273 3274 // Filter out improper swType and blockSet by HW restriction 3275 if (pIn->flags.fmask || pIn->flags.depth || pIn->flags.stencil) 3276 { 3277 ADDR_ASSERT(IsTex2d(pOut->resourceType)); 3278 blockSet.value = AddrBlockSetMacro; 3279 addrPreferredSwSet.value = AddrSwSetZ; 3280 addrValidSwSet.value = AddrSwSetZ; 3281 3282 if (pIn->flags.depth && pIn->flags.texture) 3283 { 3284 if (((bpp == 16) && (numFrags >= 4)) || 3285 ((bpp == 32) && (numFrags >= 2))) 3286 { 3287 // When _X/_T swizzle mode was used for MSAA depth texture, TC will get zplane 3288 // equation from wrong address within memory range a tile covered and use the 3289 // garbage data for compressed Z reading which finally leads to corruption. 3290 pOut->canXor = FALSE; 3291 prtXor = FALSE; 3292 } 3293 } 3294 } 3295 else if (ElemLib::IsBlockCompressed(pIn->format)) 3296 { 3297 // block compressed formats (BCx, ASTC, ETC2) must be either S or D modes. 3298 // Not sure under what circumstances "_D" would be appropriate as these formats 3299 // are not displayable. 3300 blockSet.value = AddrBlockSetMacro; 3301 3302 // This isn't to be used as texture and caller doesn't allow macro tiled. 3303 if ((pIn->flags.texture == FALSE) && 3304 (pIn->forbiddenBlock.macro4KB && pIn->forbiddenBlock.macro64KB)) 3305 { 3306 blockSet.value |= AddrBlockSetLinear; 3307 } 3308 3309 addrPreferredSwSet.value = AddrSwSetD; 3310 addrValidSwSet.value = AddrSwSetS | AddrSwSetD; 3311 } 3312 else if (ElemLib::IsMacroPixelPacked(pIn->format)) 3313 { 3314 // macro pixel packed formats (BG_RG, GB_GR) does not support the Z modes. 3315 // Its notclear under what circumstances the D or R modes would be appropriate 3316 // since these formats are not displayable. 3317 blockSet.value = AddrBlockSetLinear | AddrBlockSetMacro; 3318 3319 addrPreferredSwSet.value = AddrSwSetS; 3320 addrValidSwSet.value = AddrSwSetS | AddrSwSetD | AddrSwSetR; 3321 } 3322 else if (IsTex3d(pOut->resourceType)) 3323 { 3324 blockSet.value = AddrBlockSetLinear | AddrBlockSetMacro; 3325 3326 if (pIn->flags.prt) 3327 { 3328 // PRT cannot use SW_D which gives an unexpected block dimension 3329 addrPreferredSwSet.value = AddrSwSetZ; 3330 addrValidSwSet.value = AddrSwSetZ | AddrSwSetS; 3331 } 3332 else if ((numMipLevels > 1) && (slice >= width) && (slice >= height)) 3333 { 3334 // When depth (Z) is the maximum dimension then must use one of the SW_*_S 3335 // or SW_*_Z modes if mipmapping is desired on a 3D surface 3336 addrPreferredSwSet.value = AddrSwSetZ; 3337 addrValidSwSet.value = AddrSwSetZ | AddrSwSetS; 3338 } 3339 else if (pIn->flags.color) 3340 { 3341 addrPreferredSwSet.value = AddrSwSetD; 3342 addrValidSwSet.value = AddrSwSetZ | AddrSwSetS | AddrSwSetD; 3343 } 3344 else 3345 { 3346 addrPreferredSwSet.value = AddrSwSetZ; 3347 addrValidSwSet.value = AddrSwSetZ | AddrSwSetD; 3348 if (bpp != 128) 3349 { 3350 addrValidSwSet.value |= AddrSwSetS; 3351 } 3352 } 3353 } 3354 else 3355 { 3356 addrPreferredSwSet.value = ((pIn->flags.display == TRUE) || 3357 (pIn->flags.overlay == TRUE) || 3358 (pIn->bpp == 128)) ? AddrSwSetD : AddrSwSetS; 3359 3360 addrValidSwSet.value = AddrSwSetS | AddrSwSetD | AddrSwSetR; 3361 3362 if (numMipLevels > 1) 3363 { 3364 ADDR_ASSERT(numFrags == 1); 3365 blockSet.value = AddrBlockSetLinear | AddrBlockSetMacro; 3366 } 3367 else if ((numFrags > 1) || (numSamples > 1)) 3368 { 3369 ADDR_ASSERT(IsTex2d(pOut->resourceType)); 3370 blockSet.value = AddrBlockSetMacro; 3371 } 3372 else 3373 { 3374 ADDR_ASSERT(IsTex2d(pOut->resourceType)); 3375 blockSet.value = AddrBlockSetLinear | AddrBlockSetMicro | AddrBlockSetMacro; 3376 3377 displayResource = pIn->flags.rotated || pIn->flags.display; 3378 3379 if (displayResource) 3380 { 3381 addrPreferredSwSet.value = pIn->flags.rotated ? AddrSwSetR : AddrSwSetD; 3382 3383 if (pIn->bpp > 64) 3384 { 3385 blockSet.value = 0; 3386 } 3387 else if (m_settings.isDce12) 3388 { 3389 if (pIn->bpp != 32) 3390 { 3391 blockSet.micro = FALSE; 3392 } 3393 3394 // DCE12 does not support display surface to be _T swizzle mode 3395 prtXor = FALSE; 3396 3397 addrValidSwSet.value = AddrSwSetD | AddrSwSetR; 3398 } 3399 else if (m_settings.isDcn1) 3400 { 3401 // _R is not supported by Dcn1 3402 if (pIn->bpp == 64) 3403 { 3404 addrPreferredSwSet.value = AddrSwSetD; 3405 addrValidSwSet.value = AddrSwSetD; 3406 } 3407 else 3408 { 3409 addrPreferredSwSet.value = AddrSwSetS; 3410 addrValidSwSet.value = AddrSwSetS | AddrSwSetD; 3411 } 3412 3413 blockSet.micro = FALSE; 3414 } 3415 else 3416 { 3417 ADDR_NOT_IMPLEMENTED(); 3418 returnCode = ADDR_NOTSUPPORTED; 3419 } 3420 } 3421 } 3422 } 3423 3424 ADDR_ASSERT((addrValidSwSet.value & addrPreferredSwSet.value) == addrPreferredSwSet.value); 3425 3426 pOut->clientPreferredSwSet = clientPreferredSwSet; 3427 3428 // Clamp client preferred set to valid set 3429 clientPreferredSwSet.value &= addrValidSwSet.value; 3430 3431 pOut->validSwTypeSet = addrValidSwSet; 3432 3433 if (clientPreferredSwSet.value == 0) 3434 { 3435 // Client asks for an invalid swizzle type... 3436 ADDR_ASSERT_ALWAYS(); 3437 returnCode = ADDR_INVALIDPARAMS; 3438 } 3439 else 3440 { 3441 if (IsPow2(clientPreferredSwSet.value)) 3442 { 3443 // Only one swizzle type left, use it directly 3444 addrPreferredSwSet.value = clientPreferredSwSet.value; 3445 } 3446 else if ((clientPreferredSwSet.value & addrPreferredSwSet.value) == 0) 3447 { 3448 // Client wants 2 or more a valid swizzle type but none of them is addrlib preferred 3449 if (clientPreferredSwSet.sw_D) 3450 { 3451 addrPreferredSwSet.value = AddrSwSetD; 3452 } 3453 else if (clientPreferredSwSet.sw_Z) 3454 { 3455 addrPreferredSwSet.value = AddrSwSetZ; 3456 } 3457 else if (clientPreferredSwSet.sw_R) 3458 { 3459 addrPreferredSwSet.value = AddrSwSetR; 3460 } 3461 else 3462 { 3463 ADDR_ASSERT(clientPreferredSwSet.sw_S); 3464 addrPreferredSwSet.value = AddrSwSetS; 3465 } 3466 } 3467 3468 if ((numFrags > 1) && 3469 (GetBlockSize(ADDR_SW_4KB) < (m_pipeInterleaveBytes * numFrags))) 3470 { 3471 // MSAA surface must have blk_bytes/pipe_interleave >= num_samples 3472 blockSet.macro4KB = FALSE; 3473 } 3474 3475 if (pIn->flags.prt) 3476 { 3477 blockSet.value &= AddrBlockSetMacro64KB; 3478 } 3479 3480 // Apply customized forbidden setting 3481 blockSet.value &= ~pIn->forbiddenBlock.value; 3482 3483 if (pIn->maxAlign > 0) 3484 { 3485 if (pIn->maxAlign < GetBlockSize(ADDR_SW_64KB)) 3486 { 3487 blockSet.macro64KB = FALSE; 3488 } 3489 3490 if (pIn->maxAlign < GetBlockSize(ADDR_SW_4KB)) 3491 { 3492 blockSet.macro4KB = FALSE; 3493 } 3494 3495 if (pIn->maxAlign < GetBlockSize(ADDR_SW_256B)) 3496 { 3497 blockSet.micro = FALSE; 3498 } 3499 } 3500 3501 Dim3d blkAlign[AddrBlockMaxTiledType] = {{0}, {0}, {0}}; 3502 Dim3d paddedDim[AddrBlockMaxTiledType] = {{0}, {0}, {0}}; 3503 UINT_64 padSize[AddrBlockMaxTiledType] = {0}; 3504 3505 if (blockSet.micro) 3506 { 3507 returnCode = ComputeBlockDimensionForSurf(&blkAlign[AddrBlockMicro].w, 3508 &blkAlign[AddrBlockMicro].h, 3509 &blkAlign[AddrBlockMicro].d, 3510 bpp, 3511 numFrags, 3512 pOut->resourceType, 3513 ADDR_SW_256B); 3514 3515 if (returnCode == ADDR_OK) 3516 { 3517 if (displayResource) 3518 { 3519 blkAlign[AddrBlockMicro].w = PowTwoAlign(blkAlign[AddrBlockMicro].w, 32); 3520 } 3521 else if ((blkAlign[AddrBlockMicro].w >= width) && (blkAlign[AddrBlockMicro].h >= height) && 3522 (minSizeAlign <= GetBlockSize(ADDR_SW_256B))) 3523 { 3524 // If one 256B block can contain the surface, don't bother bigger block type 3525 blockSet.macro4KB = FALSE; 3526 blockSet.macro64KB = FALSE; 3527 blockSet.var = FALSE; 3528 } 3529 3530 padSize[AddrBlockMicro] = ComputePadSize(&blkAlign[AddrBlockMicro], width, height, 3531 slice, &paddedDim[AddrBlockMicro]); 3532 } 3533 } 3534 3535 if ((returnCode == ADDR_OK) && blockSet.macro4KB) 3536 { 3537 returnCode = ComputeBlockDimensionForSurf(&blkAlign[AddrBlock4KB].w, 3538 &blkAlign[AddrBlock4KB].h, 3539 &blkAlign[AddrBlock4KB].d, 3540 bpp, 3541 numFrags, 3542 pOut->resourceType, 3543 ADDR_SW_4KB); 3544 3545 if (returnCode == ADDR_OK) 3546 { 3547 if (displayResource) 3548 { 3549 blkAlign[AddrBlock4KB].w = PowTwoAlign(blkAlign[AddrBlock4KB].w, 32); 3550 } 3551 3552 padSize[AddrBlock4KB] = ComputePadSize(&blkAlign[AddrBlock4KB], width, height, 3553 slice, &paddedDim[AddrBlock4KB]); 3554 3555 ADDR_ASSERT(padSize[AddrBlock4KB] >= padSize[AddrBlockMicro]); 3556 } 3557 } 3558 3559 if ((returnCode == ADDR_OK) && blockSet.macro64KB) 3560 { 3561 returnCode = ComputeBlockDimensionForSurf(&blkAlign[AddrBlock64KB].w, 3562 &blkAlign[AddrBlock64KB].h, 3563 &blkAlign[AddrBlock64KB].d, 3564 bpp, 3565 numFrags, 3566 pOut->resourceType, 3567 ADDR_SW_64KB); 3568 3569 if (returnCode == ADDR_OK) 3570 { 3571 if (displayResource) 3572 { 3573 blkAlign[AddrBlock64KB].w = PowTwoAlign(blkAlign[AddrBlock64KB].w, 32); 3574 } 3575 3576 padSize[AddrBlock64KB] = ComputePadSize(&blkAlign[AddrBlock64KB], width, height, 3577 slice, &paddedDim[AddrBlock64KB]); 3578 3579 ADDR_ASSERT(padSize[AddrBlock64KB] >= padSize[AddrBlock4KB]); 3580 ADDR_ASSERT(padSize[AddrBlock64KB] >= padSize[AddrBlockMicro]); 3581 } 3582 } 3583 3584 if (returnCode == ADDR_OK) 3585 { 3586 UINT_64 minSizeAlignInElement = Max(minSizeAlign / (bpp >> 3), 1u); 3587 3588 for (UINT_32 i = AddrBlockMicro; i < AddrBlockMaxTiledType; i++) 3589 { 3590 padSize[i] = PowTwoAlign(padSize[i], minSizeAlignInElement); 3591 } 3592 3593 // Use minimum block type which meets all conditions above if flag minimizeAlign was set 3594 if (pIn->flags.minimizeAlign) 3595 { 3596 // If padded size of 64KB block is larger than padded size of 256B block or 4KB 3597 // block, filter out 64KB block from candidate list 3598 if (blockSet.macro64KB && 3599 ((blockSet.micro && (padSize[AddrBlockMicro] < padSize[AddrBlock64KB])) || 3600 (blockSet.macro4KB && (padSize[AddrBlock4KB] < padSize[AddrBlock64KB])))) 3601 { 3602 blockSet.macro64KB = FALSE; 3603 } 3604 3605 // If padded size of 4KB block is larger than padded size of 256B block, 3606 // filter out 4KB block from candidate list 3607 if (blockSet.macro4KB && 3608 blockSet.micro && 3609 (padSize[AddrBlockMicro] < padSize[AddrBlock4KB])) 3610 { 3611 blockSet.macro4KB = FALSE; 3612 } 3613 } 3614 // Filter out 64KB/4KB block if a smaller block type has 2/3 or less memory footprint 3615 else if (pIn->flags.opt4space) 3616 { 3617 UINT_64 threshold = blockSet.micro ? padSize[AddrBlockMicro] : 3618 (blockSet.macro4KB ? padSize[AddrBlock4KB] : padSize[AddrBlock64KB]); 3619 3620 threshold += threshold >> 1; 3621 3622 if (blockSet.macro64KB && (padSize[AddrBlock64KB] > threshold)) 3623 { 3624 blockSet.macro64KB = FALSE; 3625 } 3626 3627 if (blockSet.macro4KB && (padSize[AddrBlock4KB] > threshold)) 3628 { 3629 blockSet.macro4KB = FALSE; 3630 } 3631 } 3632 else 3633 { 3634 if (blockSet.macro64KB && 3635 (padSize[AddrBlock64KB] >= static_cast<UINT_64>(width) * height * slice * 2) && 3636 ((blockSet.value & ~AddrBlockSetMacro64KB) != 0)) 3637 { 3638 // If 64KB block waste more than half memory on padding, filter it out from 3639 // candidate list when it is not the only choice left 3640 blockSet.macro64KB = FALSE; 3641 } 3642 } 3643 3644 if (blockSet.value == 0) 3645 { 3646 // Bad things happen, client will not get any useful information from AddrLib. 3647 // Maybe we should fill in some output earlier instead of outputing nothing? 3648 ADDR_ASSERT_ALWAYS(); 3649 returnCode = ADDR_INVALIDPARAMS; 3650 } 3651 else 3652 { 3653 pOut->validBlockSet = blockSet; 3654 pOut->canXor = pOut->canXor && 3655 (blockSet.macro4KB || blockSet.macro64KB || blockSet.var); 3656 3657 if (blockSet.macro64KB || blockSet.macro4KB) 3658 { 3659 if (addrPreferredSwSet.value == AddrSwSetZ) 3660 { 3661 pOut->swizzleMode = blockSet.macro64KB ? ADDR_SW_64KB_Z : ADDR_SW_4KB_Z; 3662 } 3663 else if (addrPreferredSwSet.value == AddrSwSetS) 3664 { 3665 pOut->swizzleMode = blockSet.macro64KB ? ADDR_SW_64KB_S : ADDR_SW_4KB_S; 3666 } 3667 else if (addrPreferredSwSet.value == AddrSwSetD) 3668 { 3669 pOut->swizzleMode = blockSet.macro64KB ? ADDR_SW_64KB_D : ADDR_SW_4KB_D; 3670 } 3671 else 3672 { 3673 ADDR_ASSERT(addrPreferredSwSet.value == AddrSwSetR); 3674 pOut->swizzleMode = blockSet.macro64KB ? ADDR_SW_64KB_R : ADDR_SW_4KB_R; 3675 } 3676 3677 if (prtXor && blockSet.macro64KB) 3678 { 3679 // Client wants PRTXOR, give back _T swizzle mode if 64KB is available 3680 const UINT_32 prtGap = ADDR_SW_64KB_Z_T - ADDR_SW_64KB_Z; 3681 pOut->swizzleMode = static_cast<AddrSwizzleMode>(pOut->swizzleMode + prtGap); 3682 } 3683 else if (pOut->canXor) 3684 { 3685 // Client wants XOR and this is allowed, return XOR version swizzle mode 3686 const UINT_32 xorGap = ADDR_SW_4KB_Z_X - ADDR_SW_4KB_Z; 3687 pOut->swizzleMode = static_cast<AddrSwizzleMode>(pOut->swizzleMode + xorGap); 3688 } 3689 } 3690 else if (blockSet.micro) 3691 { 3692 if (addrPreferredSwSet.value == AddrSwSetS) 3693 { 3694 pOut->swizzleMode = ADDR_SW_256B_S; 3695 } 3696 else if (addrPreferredSwSet.value == AddrSwSetD) 3697 { 3698 pOut->swizzleMode = ADDR_SW_256B_D; 3699 } 3700 else 3701 { 3702 ADDR_ASSERT(addrPreferredSwSet.value == AddrSwSetR); 3703 pOut->swizzleMode = ADDR_SW_256B_R; 3704 } 3705 } 3706 else if (blockSet.linear) 3707 { 3708 // Fall into this branch doesn't mean linear is suitable, only no other choices! 3709 pOut->swizzleMode = ADDR_SW_LINEAR; 3710 } 3711 else 3712 { 3713 ADDR_ASSERT(blockSet.var); 3714 3715 // Designer consider VAR swizzle mode is usless for most cases 3716 ADDR_UNHANDLED_CASE(); 3717 3718 returnCode = ADDR_NOTSUPPORTED; 3719 } 3720 3721 #if DEBUG 3722 // Post sanity check, at least AddrLib should accept the output generated by its own 3723 if (pOut->swizzleMode != ADDR_SW_LINEAR) 3724 { 3725 ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {0}; 3726 localIn.flags = pIn->flags; 3727 localIn.swizzleMode = pOut->swizzleMode; 3728 localIn.resourceType = pOut->resourceType; 3729 localIn.format = pIn->format; 3730 localIn.bpp = bpp; 3731 localIn.width = width; 3732 localIn.height = height; 3733 localIn.numSlices = slice; 3734 localIn.numMipLevels = numMipLevels; 3735 localIn.numSamples = numSamples; 3736 localIn.numFrags = numFrags; 3737 3738 HwlComputeSurfaceInfoSanityCheck(&localIn); 3739 3740 } 3741 #endif 3742 } 3743 } 3744 } 3745 } 3746 3747 return returnCode; 3748 } 3749 3750 /** 3751 ************************************************************************************************************************ 3752 * Gfx9Lib::ComputeStereoInfo 3753 * 3754 * @brief 3755 * Compute height alignment and right eye pipeBankXor for stereo surface 3756 * 3757 * @return 3758 * Error code 3759 * 3760 ************************************************************************************************************************ 3761 */ 3762 ADDR_E_RETURNCODE Gfx9Lib::ComputeStereoInfo( 3763 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, 3764 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut, 3765 UINT_32* pHeightAlign 3766 ) const 3767 { 3768 ADDR_E_RETURNCODE returnCode = ADDR_OK; 3769 3770 UINT_32 eqIndex = HwlGetEquationIndex(pIn, pOut); 3771 3772 if (eqIndex < m_numEquations) 3773 { 3774 if (IsXor(pIn->swizzleMode)) 3775 { 3776 const UINT_32 blkSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode); 3777 const UINT_32 numPipeBits = GetPipeXorBits(blkSizeLog2); 3778 const UINT_32 numBankBits = GetBankXorBits(blkSizeLog2); 3779 const UINT_32 bppLog2 = Log2(pIn->bpp >> 3); 3780 const UINT_32 maxYCoordBlock256 = Log2(Block256_2d[bppLog2].h) - 1; 3781 const ADDR_EQUATION *pEqToCheck = &m_equationTable[eqIndex]; 3782 3783 ADDR_ASSERT(maxYCoordBlock256 == 3784 GetMaxValidChannelIndex(&pEqToCheck->addr[0], GetBlockSizeLog2(ADDR_SW_256B), 1)); 3785 3786 const UINT_32 maxYCoordInBaseEquation = 3787 (blkSizeLog2 - GetBlockSizeLog2(ADDR_SW_256B)) / 2 + maxYCoordBlock256; 3788 3789 ADDR_ASSERT(maxYCoordInBaseEquation == 3790 GetMaxValidChannelIndex(&pEqToCheck->addr[0], blkSizeLog2, 1)); 3791 3792 const UINT_32 maxYCoordInPipeXor = (numPipeBits == 0) ? 0 : maxYCoordBlock256 + numPipeBits; 3793 3794 ADDR_ASSERT(maxYCoordInPipeXor == 3795 GetMaxValidChannelIndex(&pEqToCheck->xor1[m_pipeInterleaveLog2], numPipeBits, 1)); 3796 3797 const UINT_32 maxYCoordInBankXor = (numBankBits == 0) ? 3798 0 : maxYCoordBlock256 + (numPipeBits + 1) / 2 + numBankBits; 3799 3800 ADDR_ASSERT(maxYCoordInBankXor == 3801 GetMaxValidChannelIndex(&pEqToCheck->xor1[m_pipeInterleaveLog2 + numPipeBits], numBankBits, 1)); 3802 3803 const UINT_32 maxYCoordInPipeBankXor = Max(maxYCoordInPipeXor, maxYCoordInBankXor); 3804 3805 if (maxYCoordInPipeBankXor > maxYCoordInBaseEquation) 3806 { 3807 *pHeightAlign = 1u << maxYCoordInPipeBankXor; 3808 3809 if (pOut->pStereoInfo != NULL) 3810 { 3811 pOut->pStereoInfo->rightSwizzle = 0; 3812 3813 if ((PowTwoAlign(pIn->height, *pHeightAlign) % (*pHeightAlign * 2)) != 0) 3814 { 3815 if (maxYCoordInPipeXor == maxYCoordInPipeBankXor) 3816 { 3817 pOut->pStereoInfo->rightSwizzle |= (1u << 1); 3818 } 3819 3820 if (maxYCoordInBankXor == maxYCoordInPipeBankXor) 3821 { 3822 pOut->pStereoInfo->rightSwizzle |= 3823 1u << ((numPipeBits % 2) ? numPipeBits : numPipeBits + 1); 3824 } 3825 3826 ADDR_ASSERT(pOut->pStereoInfo->rightSwizzle == 3827 GetCoordActiveMask(&pEqToCheck->xor1[m_pipeInterleaveLog2], 3828 numPipeBits + numBankBits, 1, maxYCoordInPipeBankXor)); 3829 } 3830 } 3831 } 3832 } 3833 } 3834 else 3835 { 3836 ADDR_ASSERT_ALWAYS(); 3837 returnCode = ADDR_ERROR; 3838 } 3839 3840 return returnCode; 3841 } 3842 3843 /** 3844 ************************************************************************************************************************ 3845 * Gfx9Lib::HwlComputeSurfaceInfoTiled 3846 * 3847 * @brief 3848 * Internal function to calculate alignment for tiled surface 3849 * 3850 * @return 3851 * ADDR_E_RETURNCODE 3852 ************************************************************************************************************************ 3853 */ 3854 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoTiled( 3855 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure 3856 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure 3857 ) const 3858 { 3859 ADDR_E_RETURNCODE returnCode = ComputeBlockDimensionForSurf(&pOut->blockWidth, 3860 &pOut->blockHeight, 3861 &pOut->blockSlices, 3862 pIn->bpp, 3863 pIn->numFrags, 3864 pIn->resourceType, 3865 pIn->swizzleMode); 3866 3867 if (returnCode == ADDR_OK) 3868 { 3869 UINT_32 pitchAlignInElement = pOut->blockWidth; 3870 3871 if ((IsTex2d(pIn->resourceType) == TRUE) && 3872 (pIn->flags.display || pIn->flags.rotated) && 3873 (pIn->numMipLevels <= 1) && 3874 (pIn->numSamples <= 1) && 3875 (pIn->numFrags <= 1)) 3876 { 3877 // Display engine needs pitch align to be at least 32 pixels. 3878 pitchAlignInElement = PowTwoAlign(pitchAlignInElement, 32); 3879 } 3880 3881 pOut->pitch = PowTwoAlign(pIn->width, pitchAlignInElement); 3882 3883 if ((pIn->numMipLevels <= 1) && (pIn->pitchInElement > 0)) 3884 { 3885 if ((pIn->pitchInElement % pitchAlignInElement) != 0) 3886 { 3887 returnCode = ADDR_INVALIDPARAMS; 3888 } 3889 else if (pIn->pitchInElement < pOut->pitch) 3890 { 3891 returnCode = ADDR_INVALIDPARAMS; 3892 } 3893 else 3894 { 3895 pOut->pitch = pIn->pitchInElement; 3896 } 3897 } 3898 3899 UINT_32 heightAlign = 0; 3900 3901 if (pIn->flags.qbStereo) 3902 { 3903 returnCode = ComputeStereoInfo(pIn, pOut, &heightAlign); 3904 } 3905 3906 if (returnCode == ADDR_OK) 3907 { 3908 pOut->height = PowTwoAlign(pIn->height, pOut->blockHeight); 3909 3910 if (heightAlign > 1) 3911 { 3912 pOut->height = PowTwoAlign(pOut->height, heightAlign); 3913 } 3914 3915 pOut->numSlices = PowTwoAlign(pIn->numSlices, pOut->blockSlices); 3916 3917 pOut->epitchIsHeight = FALSE; 3918 pOut->mipChainInTail = FALSE; 3919 pOut->firstMipIdInTail = pIn->numMipLevels; 3920 3921 pOut->mipChainPitch = pOut->pitch; 3922 pOut->mipChainHeight = pOut->height; 3923 pOut->mipChainSlice = pOut->numSlices; 3924 3925 if (pIn->numMipLevels > 1) 3926 { 3927 pOut->firstMipIdInTail = GetMipChainInfo(pIn->resourceType, 3928 pIn->swizzleMode, 3929 pIn->bpp, 3930 pIn->width, 3931 pIn->height, 3932 pIn->numSlices, 3933 pOut->blockWidth, 3934 pOut->blockHeight, 3935 pOut->blockSlices, 3936 pIn->numMipLevels, 3937 pOut->pMipInfo); 3938 3939 const UINT_32 endingMipId = Min(pOut->firstMipIdInTail, pIn->numMipLevels - 1); 3940 3941 if (endingMipId == 0) 3942 { 3943 const Dim3d tailMaxDim = GetMipTailDim(pIn->resourceType, 3944 pIn->swizzleMode, 3945 pOut->blockWidth, 3946 pOut->blockHeight, 3947 pOut->blockSlices); 3948 3949 pOut->epitchIsHeight = TRUE; 3950 pOut->pitch = tailMaxDim.w; 3951 pOut->height = tailMaxDim.h; 3952 pOut->numSlices = IsThick(pIn->resourceType, pIn->swizzleMode) ? 3953 tailMaxDim.d : pIn->numSlices; 3954 pOut->mipChainInTail = TRUE; 3955 } 3956 else 3957 { 3958 UINT_32 mip0WidthInBlk = pOut->pitch / pOut->blockWidth; 3959 UINT_32 mip0HeightInBlk = pOut->height / pOut->blockHeight; 3960 3961 AddrMajorMode majorMode = GetMajorMode(pIn->resourceType, 3962 pIn->swizzleMode, 3963 mip0WidthInBlk, 3964 mip0HeightInBlk, 3965 pOut->numSlices / pOut->blockSlices); 3966 if (majorMode == ADDR_MAJOR_Y) 3967 { 3968 UINT_32 mip1WidthInBlk = RoundHalf(mip0WidthInBlk); 3969 3970 if ((mip1WidthInBlk == 1) && (endingMipId > 2)) 3971 { 3972 mip1WidthInBlk++; 3973 } 3974 3975 pOut->mipChainPitch += (mip1WidthInBlk * pOut->blockWidth); 3976 3977 pOut->epitchIsHeight = FALSE; 3978 } 3979 else 3980 { 3981 UINT_32 mip1HeightInBlk = RoundHalf(mip0HeightInBlk); 3982 3983 if ((mip1HeightInBlk == 1) && (endingMipId > 2)) 3984 { 3985 mip1HeightInBlk++; 3986 } 3987 3988 pOut->mipChainHeight += (mip1HeightInBlk * pOut->blockHeight); 3989 3990 pOut->epitchIsHeight = TRUE; 3991 } 3992 } 3993 3994 if (pOut->pMipInfo != NULL) 3995 { 3996 UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3); 3997 3998 for (UINT_32 i = 0; i < pIn->numMipLevels; i++) 3999 { 4000 Dim3d mipStartPos = {0}; 4001 UINT_32 mipTailOffsetInBytes = 0; 4002 4003 mipStartPos = GetMipStartPos(pIn->resourceType, 4004 pIn->swizzleMode, 4005 pOut->pitch, 4006 pOut->height, 4007 pOut->numSlices, 4008 pOut->blockWidth, 4009 pOut->blockHeight, 4010 pOut->blockSlices, 4011 i, 4012 elementBytesLog2, 4013 &mipTailOffsetInBytes); 4014 4015 UINT_32 pitchInBlock = 4016 pOut->mipChainPitch / pOut->blockWidth; 4017 UINT_32 sliceInBlock = 4018 (pOut->mipChainHeight / pOut->blockHeight) * pitchInBlock; 4019 UINT_64 blockIndex = 4020 mipStartPos.d * sliceInBlock + mipStartPos.h * pitchInBlock + mipStartPos.w; 4021 UINT_64 macroBlockOffset = 4022 blockIndex << GetBlockSizeLog2(pIn->swizzleMode); 4023 4024 pOut->pMipInfo[i].macroBlockOffset = macroBlockOffset; 4025 pOut->pMipInfo[i].mipTailOffset = mipTailOffsetInBytes; 4026 } 4027 } 4028 } 4029 else if (pOut->pMipInfo != NULL) 4030 { 4031 pOut->pMipInfo[0].pitch = pOut->pitch; 4032 pOut->pMipInfo[0].height = pOut->height; 4033 pOut->pMipInfo[0].depth = IsTex3d(pIn->resourceType)? pOut->numSlices : 1; 4034 pOut->pMipInfo[0].offset = 0; 4035 } 4036 4037 pOut->sliceSize = static_cast<UINT_64>(pOut->mipChainPitch) * pOut->mipChainHeight * 4038 (pIn->bpp >> 3) * pIn->numFrags; 4039 pOut->surfSize = pOut->sliceSize * pOut->mipChainSlice; 4040 pOut->baseAlign = HwlComputeSurfaceBaseAlign(pIn->swizzleMode); 4041 4042 if (pIn->flags.prt) 4043 { 4044 pOut->baseAlign = Max(pOut->baseAlign, PrtAlignment); 4045 } 4046 } 4047 } 4048 4049 return returnCode; 4050 } 4051 4052 /** 4053 ************************************************************************************************************************ 4054 * Gfx9Lib::HwlComputeSurfaceInfoLinear 4055 * 4056 * @brief 4057 * Internal function to calculate alignment for linear surface 4058 * 4059 * @return 4060 * ADDR_E_RETURNCODE 4061 ************************************************************************************************************************ 4062 */ 4063 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoLinear( 4064 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure 4065 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure 4066 ) const 4067 { 4068 ADDR_E_RETURNCODE returnCode = ADDR_OK; 4069 UINT_32 pitch = 0; 4070 UINT_32 actualHeight = 0; 4071 UINT_32 elementBytes = pIn->bpp >> 3; 4072 const UINT_32 alignment = pIn->flags.prt ? PrtAlignment : 256; 4073 4074 if (IsTex1d(pIn->resourceType)) 4075 { 4076 if (pIn->height > 1) 4077 { 4078 returnCode = ADDR_INVALIDPARAMS; 4079 } 4080 else 4081 { 4082 const UINT_32 pitchAlignInElement = alignment / elementBytes; 4083 4084 pitch = PowTwoAlign(pIn->width, pitchAlignInElement); 4085 actualHeight = pIn->numMipLevels; 4086 4087 if (pIn->flags.prt == FALSE) 4088 { 4089 returnCode = ApplyCustomizedPitchHeight(pIn, elementBytes, pitchAlignInElement, 4090 &pitch, &actualHeight); 4091 } 4092 4093 if (returnCode == ADDR_OK) 4094 { 4095 if (pOut->pMipInfo != NULL) 4096 { 4097 for (UINT_32 i = 0; i < pIn->numMipLevels; i++) 4098 { 4099 pOut->pMipInfo[i].offset = pitch * elementBytes * i; 4100 pOut->pMipInfo[i].pitch = pitch; 4101 pOut->pMipInfo[i].height = 1; 4102 pOut->pMipInfo[i].depth = 1; 4103 } 4104 } 4105 } 4106 } 4107 } 4108 else 4109 { 4110 returnCode = ComputeSurfaceLinearPadding(pIn, &pitch, &actualHeight, pOut->pMipInfo); 4111 } 4112 4113 if ((pitch == 0) || (actualHeight == 0)) 4114 { 4115 returnCode = ADDR_INVALIDPARAMS; 4116 } 4117 4118 if (returnCode == ADDR_OK) 4119 { 4120 pOut->pitch = pitch; 4121 pOut->height = pIn->height; 4122 pOut->numSlices = pIn->numSlices; 4123 pOut->mipChainPitch = pitch; 4124 pOut->mipChainHeight = actualHeight; 4125 pOut->mipChainSlice = pOut->numSlices; 4126 pOut->epitchIsHeight = (pIn->numMipLevels > 1) ? TRUE : FALSE; 4127 pOut->sliceSize = static_cast<UINT_64>(pOut->pitch) * actualHeight * elementBytes; 4128 pOut->surfSize = pOut->sliceSize * pOut->numSlices; 4129 pOut->baseAlign = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? (pIn->bpp / 8) : alignment; 4130 pOut->blockWidth = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? 1 : (256 / elementBytes); 4131 pOut->blockHeight = 1; 4132 pOut->blockSlices = 1; 4133 } 4134 4135 // Post calculation validate 4136 ADDR_ASSERT(pOut->sliceSize > 0); 4137 4138 return returnCode; 4139 } 4140 4141 /** 4142 ************************************************************************************************************************ 4143 * Gfx9Lib::GetMipChainInfo 4144 * 4145 * @brief 4146 * Internal function to get out information about mip chain 4147 * 4148 * @return 4149 * Smaller value between Id of first mip fitted in mip tail and max Id of mip being created 4150 ************************************************************************************************************************ 4151 */ 4152 UINT_32 Gfx9Lib::GetMipChainInfo( 4153 AddrResourceType resourceType, 4154 AddrSwizzleMode swizzleMode, 4155 UINT_32 bpp, 4156 UINT_32 mip0Width, 4157 UINT_32 mip0Height, 4158 UINT_32 mip0Depth, 4159 UINT_32 blockWidth, 4160 UINT_32 blockHeight, 4161 UINT_32 blockDepth, 4162 UINT_32 numMipLevel, 4163 ADDR2_MIP_INFO* pMipInfo) const 4164 { 4165 const Dim3d tailMaxDim = 4166 GetMipTailDim(resourceType, swizzleMode, blockWidth, blockHeight, blockDepth); 4167 4168 UINT_32 mipPitch = mip0Width; 4169 UINT_32 mipHeight = mip0Height; 4170 UINT_32 mipDepth = IsTex3d(resourceType) ? mip0Depth : 1; 4171 UINT_32 offset = 0; 4172 UINT_32 firstMipIdInTail = numMipLevel; 4173 BOOL_32 inTail = FALSE; 4174 BOOL_32 finalDim = FALSE; 4175 BOOL_32 is3dThick = IsThick(resourceType, swizzleMode); 4176 BOOL_32 is3dThin = IsTex3d(resourceType) && (is3dThick == FALSE); 4177 4178 for (UINT_32 mipId = 0; mipId < numMipLevel; mipId++) 4179 { 4180 if (inTail) 4181 { 4182 if (finalDim == FALSE) 4183 { 4184 UINT_32 mipSize; 4185 4186 if (is3dThick) 4187 { 4188 mipSize = mipPitch * mipHeight * mipDepth * (bpp >> 3); 4189 } 4190 else 4191 { 4192 mipSize = mipPitch * mipHeight * (bpp >> 3); 4193 } 4194 4195 if (mipSize <= 256) 4196 { 4197 UINT_32 index = Log2(bpp >> 3); 4198 4199 if (is3dThick) 4200 { 4201 mipPitch = Block256_3dZ[index].w; 4202 mipHeight = Block256_3dZ[index].h; 4203 mipDepth = Block256_3dZ[index].d; 4204 } 4205 else 4206 { 4207 mipPitch = Block256_2d[index].w; 4208 mipHeight = Block256_2d[index].h; 4209 } 4210 4211 finalDim = TRUE; 4212 } 4213 } 4214 } 4215 else 4216 { 4217 inTail = IsInMipTail(resourceType, swizzleMode, tailMaxDim, 4218 mipPitch, mipHeight, mipDepth); 4219 4220 if (inTail) 4221 { 4222 firstMipIdInTail = mipId; 4223 mipPitch = tailMaxDim.w; 4224 mipHeight = tailMaxDim.h; 4225 4226 if (is3dThick) 4227 { 4228 mipDepth = tailMaxDim.d; 4229 } 4230 } 4231 else 4232 { 4233 mipPitch = PowTwoAlign(mipPitch, blockWidth); 4234 mipHeight = PowTwoAlign(mipHeight, blockHeight); 4235 4236 if (is3dThick) 4237 { 4238 mipDepth = PowTwoAlign(mipDepth, blockDepth); 4239 } 4240 } 4241 } 4242 4243 if (pMipInfo != NULL) 4244 { 4245 pMipInfo[mipId].pitch = mipPitch; 4246 pMipInfo[mipId].height = mipHeight; 4247 pMipInfo[mipId].depth = mipDepth; 4248 pMipInfo[mipId].offset = offset; 4249 } 4250 4251 offset += (mipPitch * mipHeight * mipDepth * (bpp >> 3)); 4252 4253 if (finalDim) 4254 { 4255 if (is3dThin) 4256 { 4257 mipDepth = Max(mipDepth >> 1, 1u); 4258 } 4259 } 4260 else 4261 { 4262 mipPitch = Max(mipPitch >> 1, 1u); 4263 mipHeight = Max(mipHeight >> 1, 1u); 4264 4265 if (is3dThick || is3dThin) 4266 { 4267 mipDepth = Max(mipDepth >> 1, 1u); 4268 } 4269 } 4270 } 4271 4272 return firstMipIdInTail; 4273 } 4274 4275 /** 4276 ************************************************************************************************************************ 4277 * Gfx9Lib::GetMetaMiptailInfo 4278 * 4279 * @brief 4280 * Get mip tail coordinate information. 4281 * 4282 * @return 4283 * N/A 4284 ************************************************************************************************************************ 4285 */ 4286 VOID Gfx9Lib::GetMetaMiptailInfo( 4287 ADDR2_META_MIP_INFO* pInfo, ///< [out] output structure to store per mip coord 4288 Dim3d mipCoord, ///< [in] mip tail base coord 4289 UINT_32 numMipInTail, ///< [in] number of mips in tail 4290 Dim3d* pMetaBlkDim ///< [in] meta block width/height/depth 4291 ) const 4292 { 4293 BOOL_32 isThick = (pMetaBlkDim->d > 1); 4294 UINT_32 mipWidth = pMetaBlkDim->w; 4295 UINT_32 mipHeight = pMetaBlkDim->h >> 1; 4296 UINT_32 mipDepth = pMetaBlkDim->d; 4297 UINT_32 minInc; 4298 4299 if (isThick) 4300 { 4301 minInc = (pMetaBlkDim->h >= 512) ? 128 : ((pMetaBlkDim->h == 256) ? 64 : 32); 4302 } 4303 else if (pMetaBlkDim->h >= 1024) 4304 { 4305 minInc = 256; 4306 } 4307 else if (pMetaBlkDim->h == 512) 4308 { 4309 minInc = 128; 4310 } 4311 else 4312 { 4313 minInc = 64; 4314 } 4315 4316 UINT_32 blk32MipId = 0xFFFFFFFF; 4317 4318 for (UINT_32 mip = 0; mip < numMipInTail; mip++) 4319 { 4320 pInfo[mip].inMiptail = TRUE; 4321 pInfo[mip].startX = mipCoord.w; 4322 pInfo[mip].startY = mipCoord.h; 4323 pInfo[mip].startZ = mipCoord.d; 4324 pInfo[mip].width = mipWidth; 4325 pInfo[mip].height = mipHeight; 4326 pInfo[mip].depth = mipDepth; 4327 4328 if (mipWidth <= 32) 4329 { 4330 if (blk32MipId == 0xFFFFFFFF) 4331 { 4332 blk32MipId = mip; 4333 } 4334 4335 mipCoord.w = pInfo[blk32MipId].startX; 4336 mipCoord.h = pInfo[blk32MipId].startY; 4337 mipCoord.d = pInfo[blk32MipId].startZ; 4338 4339 switch (mip - blk32MipId) 4340 { 4341 case 0: 4342 mipCoord.w += 32; // 16x16 4343 break; 4344 case 1: 4345 mipCoord.h += 32; // 8x8 4346 break; 4347 case 2: 4348 mipCoord.h += 32; // 4x4 4349 mipCoord.w += 16; 4350 break; 4351 case 3: 4352 mipCoord.h += 32; // 2x2 4353 mipCoord.w += 32; 4354 break; 4355 case 4: 4356 mipCoord.h += 32; // 1x1 4357 mipCoord.w += 48; 4358 break; 4359 // The following are for BC/ASTC formats 4360 case 5: 4361 mipCoord.h += 48; // 1/2 x 1/2 4362 break; 4363 case 6: 4364 mipCoord.h += 48; // 1/4 x 1/4 4365 mipCoord.w += 16; 4366 break; 4367 case 7: 4368 mipCoord.h += 48; // 1/8 x 1/8 4369 mipCoord.w += 32; 4370 break; 4371 case 8: 4372 mipCoord.h += 48; // 1/16 x 1/16 4373 mipCoord.w += 48; 4374 break; 4375 default: 4376 ADDR_ASSERT_ALWAYS(); 4377 break; 4378 } 4379 4380 mipWidth = ((mip - blk32MipId) == 0) ? 16 : 8; 4381 mipHeight = mipWidth; 4382 4383 if (isThick) 4384 { 4385 mipDepth = mipWidth; 4386 } 4387 } 4388 else 4389 { 4390 if (mipWidth <= minInc) 4391 { 4392 // if we're below the minimal increment... 4393 if (isThick) 4394 { 4395 // For 3d, just go in z direction 4396 mipCoord.d += mipDepth; 4397 } 4398 else 4399 { 4400 // For 2d, first go across, then down 4401 if ((mipWidth * 2) == minInc) 4402 { 4403 // if we're 2 mips below, that's when we go back in x, and down in y 4404 mipCoord.w -= minInc; 4405 mipCoord.h += minInc; 4406 } 4407 else 4408 { 4409 // otherwise, just go across in x 4410 mipCoord.w += minInc; 4411 } 4412 } 4413 } 4414 else 4415 { 4416 // On even mip, go down, otherwise, go across 4417 if (mip & 1) 4418 { 4419 mipCoord.w += mipWidth; 4420 } 4421 else 4422 { 4423 mipCoord.h += mipHeight; 4424 } 4425 } 4426 // Divide the width by 2 4427 mipWidth >>= 1; 4428 // After the first mip in tail, the mip is always a square 4429 mipHeight = mipWidth; 4430 // ...or for 3d, a cube 4431 if (isThick) 4432 { 4433 mipDepth = mipWidth; 4434 } 4435 } 4436 } 4437 } 4438 4439 /** 4440 ************************************************************************************************************************ 4441 * Gfx9Lib::GetMipStartPos 4442 * 4443 * @brief 4444 * Internal function to get out information about mip logical start position 4445 * 4446 * @return 4447 * logical start position in macro block width/heith/depth of one mip level within one slice 4448 ************************************************************************************************************************ 4449 */ 4450 Dim3d Gfx9Lib::GetMipStartPos( 4451 AddrResourceType resourceType, 4452 AddrSwizzleMode swizzleMode, 4453 UINT_32 width, 4454 UINT_32 height, 4455 UINT_32 depth, 4456 UINT_32 blockWidth, 4457 UINT_32 blockHeight, 4458 UINT_32 blockDepth, 4459 UINT_32 mipId, 4460 UINT_32 log2ElementBytes, 4461 UINT_32* pMipTailBytesOffset) const 4462 { 4463 Dim3d mipStartPos = {0}; 4464 const Dim3d tailMaxDim = GetMipTailDim(resourceType, swizzleMode, blockWidth, blockHeight, blockDepth); 4465 4466 // Report mip in tail if Mip0 is already in mip tail 4467 BOOL_32 inMipTail = IsInMipTail(resourceType, swizzleMode, tailMaxDim, width, height, depth); 4468 UINT_32 log2blkSize = GetBlockSizeLog2(swizzleMode); 4469 UINT_32 mipIndexInTail = mipId; 4470 4471 if (inMipTail == FALSE) 4472 { 4473 // Mip 0 dimension, unit in block 4474 UINT_32 mipWidthInBlk = width / blockWidth; 4475 UINT_32 mipHeightInBlk = height / blockHeight; 4476 UINT_32 mipDepthInBlk = depth / blockDepth; 4477 AddrMajorMode majorMode = GetMajorMode(resourceType, 4478 swizzleMode, 4479 mipWidthInBlk, 4480 mipHeightInBlk, 4481 mipDepthInBlk); 4482 4483 UINT_32 endingMip = mipId + 1; 4484 4485 for (UINT_32 i = 1; i <= mipId; i++) 4486 { 4487 if ((i == 1) || (i == 3)) 4488 { 4489 if (majorMode == ADDR_MAJOR_Y) 4490 { 4491 mipStartPos.w += mipWidthInBlk; 4492 } 4493 else 4494 { 4495 mipStartPos.h += mipHeightInBlk; 4496 } 4497 } 4498 else 4499 { 4500 if (majorMode == ADDR_MAJOR_X) 4501 { 4502 mipStartPos.w += mipWidthInBlk; 4503 } 4504 else if (majorMode == ADDR_MAJOR_Y) 4505 { 4506 mipStartPos.h += mipHeightInBlk; 4507 } 4508 else 4509 { 4510 mipStartPos.d += mipDepthInBlk; 4511 } 4512 } 4513 4514 BOOL_32 inTail = FALSE; 4515 4516 if (IsThick(resourceType, swizzleMode)) 4517 { 4518 UINT_32 dim = log2blkSize % 3; 4519 4520 if (dim == 0) 4521 { 4522 inTail = 4523 (mipWidthInBlk <= 2) && (mipHeightInBlk == 1) && (mipDepthInBlk <= 2); 4524 } 4525 else if (dim == 1) 4526 { 4527 inTail = 4528 (mipWidthInBlk == 1) && (mipHeightInBlk <= 2) && (mipDepthInBlk <= 2); 4529 } 4530 else 4531 { 4532 inTail = 4533 (mipWidthInBlk <= 2) && (mipHeightInBlk <= 2) && (mipDepthInBlk == 1); 4534 } 4535 } 4536 else 4537 { 4538 if (log2blkSize & 1) 4539 { 4540 inTail = (mipWidthInBlk <= 2) && (mipHeightInBlk == 1); 4541 } 4542 else 4543 { 4544 inTail = (mipWidthInBlk == 1) && (mipHeightInBlk <= 2); 4545 } 4546 } 4547 4548 if (inTail) 4549 { 4550 endingMip = i; 4551 break; 4552 } 4553 4554 mipWidthInBlk = RoundHalf(mipWidthInBlk); 4555 mipHeightInBlk = RoundHalf(mipHeightInBlk); 4556 mipDepthInBlk = RoundHalf(mipDepthInBlk); 4557 } 4558 4559 if (mipId >= endingMip) 4560 { 4561 inMipTail = TRUE; 4562 mipIndexInTail = mipId - endingMip; 4563 } 4564 } 4565 4566 if (inMipTail) 4567 { 4568 UINT_32 index = mipIndexInTail + MaxMacroBits - log2blkSize; 4569 ADDR_ASSERT(index < sizeof(MipTailOffset256B) / sizeof(UINT_32)); 4570 *pMipTailBytesOffset = MipTailOffset256B[index] << 8; 4571 } 4572 4573 return mipStartPos; 4574 } 4575 4576 /** 4577 ************************************************************************************************************************ 4578 * Gfx9Lib::HwlComputeSurfaceAddrFromCoordTiled 4579 * 4580 * @brief 4581 * Internal function to calculate address from coord for tiled swizzle surface 4582 * 4583 * @return 4584 * ADDR_E_RETURNCODE 4585 ************************************************************************************************************************ 4586 */ 4587 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceAddrFromCoordTiled( 4588 const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure 4589 ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure 4590 ) const 4591 { 4592 ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {0}; 4593 localIn.swizzleMode = pIn->swizzleMode; 4594 localIn.flags = pIn->flags; 4595 localIn.resourceType = pIn->resourceType; 4596 localIn.bpp = pIn->bpp; 4597 localIn.width = Max(pIn->unalignedWidth, 1u); 4598 localIn.height = Max(pIn->unalignedHeight, 1u); 4599 localIn.numSlices = Max(pIn->numSlices, 1u); 4600 localIn.numMipLevels = Max(pIn->numMipLevels, 1u); 4601 localIn.numSamples = Max(pIn->numSamples, 1u); 4602 localIn.numFrags = Max(pIn->numFrags, 1u); 4603 if (localIn.numMipLevels <= 1) 4604 { 4605 localIn.pitchInElement = pIn->pitchInElement; 4606 } 4607 4608 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {0}; 4609 ADDR_E_RETURNCODE returnCode = ComputeSurfaceInfoTiled(&localIn, &localOut); 4610 4611 BOOL_32 valid = (returnCode == ADDR_OK) && 4612 (IsThin(pIn->resourceType, pIn->swizzleMode) || 4613 IsThick(pIn->resourceType, pIn->swizzleMode)) && 4614 ((pIn->pipeBankXor == 0) || (IsXor(pIn->swizzleMode))); 4615 4616 if (valid) 4617 { 4618 UINT_32 log2ElementBytes = Log2(pIn->bpp >> 3); 4619 Dim3d mipStartPos = {0}; 4620 UINT_32 mipTailBytesOffset = 0; 4621 4622 if (pIn->numMipLevels > 1) 4623 { 4624 // Mip-map chain cannot be MSAA surface 4625 ADDR_ASSERT((pIn->numSamples <= 1) && (pIn->numFrags<= 1)); 4626 4627 mipStartPos = GetMipStartPos(pIn->resourceType, 4628 pIn->swizzleMode, 4629 localOut.pitch, 4630 localOut.height, 4631 localOut.numSlices, 4632 localOut.blockWidth, 4633 localOut.blockHeight, 4634 localOut.blockSlices, 4635 pIn->mipId, 4636 log2ElementBytes, 4637 &mipTailBytesOffset); 4638 } 4639 4640 UINT_32 interleaveOffset = 0; 4641 UINT_32 pipeBits = 0; 4642 UINT_32 pipeXor = 0; 4643 UINT_32 bankBits = 0; 4644 UINT_32 bankXor = 0; 4645 4646 if (IsThin(pIn->resourceType, pIn->swizzleMode)) 4647 { 4648 UINT_32 blockOffset = 0; 4649 UINT_32 log2blkSize = GetBlockSizeLog2(pIn->swizzleMode); 4650 4651 if (IsZOrderSwizzle(pIn->swizzleMode)) 4652 { 4653 // Morton generation 4654 if ((log2ElementBytes == 0) || (log2ElementBytes == 2)) 4655 { 4656 UINT_32 totalLowBits = 6 - log2ElementBytes; 4657 UINT_32 mortBits = totalLowBits / 2; 4658 UINT_32 lowBitsValue = MortonGen2d(pIn->y, pIn->x, mortBits); 4659 // Are 9 bits enough? 4660 UINT_32 highBitsValue = 4661 MortonGen2d(pIn->x >> mortBits, pIn->y >> mortBits, 9) << totalLowBits; 4662 blockOffset = lowBitsValue | highBitsValue; 4663 ADDR_ASSERT(blockOffset == lowBitsValue + highBitsValue); 4664 } 4665 else 4666 { 4667 blockOffset = MortonGen2d(pIn->y, pIn->x, 13); 4668 } 4669 4670 // Fill LSBs with sample bits 4671 if (pIn->numSamples > 1) 4672 { 4673 blockOffset *= pIn->numSamples; 4674 blockOffset |= pIn->sample; 4675 } 4676 4677 // Shift according to BytesPP 4678 blockOffset <<= log2ElementBytes; 4679 } 4680 else 4681 { 4682 // Micro block offset 4683 UINT_32 microBlockOffset = ComputeSurface2DMicroBlockOffset(pIn); 4684 blockOffset = microBlockOffset; 4685 4686 // Micro block dimension 4687 ADDR_ASSERT(log2ElementBytes < MaxNumOfBpp); 4688 Dim2d microBlockDim = Block256_2d[log2ElementBytes]; 4689 // Morton generation, does 12 bit enough? 4690 blockOffset |= 4691 MortonGen2d((pIn->x / microBlockDim.w), (pIn->y / microBlockDim.h), 12) << 8; 4692 4693 // Sample bits start location 4694 UINT_32 sampleStart = log2blkSize - Log2(pIn->numSamples); 4695 // Join sample bits information to the highest Macro block bits 4696 if (IsNonPrtXor(pIn->swizzleMode)) 4697 { 4698 // Non-prt-Xor : xor highest Macro block bits with sample bits 4699 blockOffset = blockOffset ^ (pIn->sample << sampleStart); 4700 } 4701 else 4702 { 4703 // Non-Xor or prt-Xor: replace highest Macro block bits with sample bits 4704 // after this op, the blockOffset only contains log2 Macro block size bits 4705 blockOffset %= (1 << sampleStart); 4706 blockOffset |= (pIn->sample << sampleStart); 4707 ADDR_ASSERT((blockOffset >> log2blkSize) == 0); 4708 } 4709 } 4710 4711 if (IsXor(pIn->swizzleMode)) 4712 { 4713 // Mask off bits above Macro block bits to keep page synonyms working for prt 4714 if (IsPrt(pIn->swizzleMode)) 4715 { 4716 blockOffset &= ((1 << log2blkSize) - 1); 4717 } 4718 4719 // Preserve offset inside pipe interleave 4720 interleaveOffset = blockOffset & ((1 << m_pipeInterleaveLog2) - 1); 4721 blockOffset >>= m_pipeInterleaveLog2; 4722 4723 // Pipe/Se xor bits 4724 pipeBits = GetPipeXorBits(log2blkSize); 4725 // Pipe xor 4726 pipeXor = FoldXor2d(blockOffset, pipeBits); 4727 blockOffset >>= pipeBits; 4728 4729 // Bank xor bits 4730 bankBits = GetBankXorBits(log2blkSize); 4731 // Bank Xor 4732 bankXor = FoldXor2d(blockOffset, bankBits); 4733 blockOffset >>= bankBits; 4734 4735 // Put all the part back together 4736 blockOffset <<= bankBits; 4737 blockOffset |= bankXor; 4738 blockOffset <<= pipeBits; 4739 blockOffset |= pipeXor; 4740 blockOffset <<= m_pipeInterleaveLog2; 4741 blockOffset |= interleaveOffset; 4742 } 4743 4744 ADDR_ASSERT((blockOffset | mipTailBytesOffset) == (blockOffset + mipTailBytesOffset)); 4745 ADDR_ASSERT((mipTailBytesOffset == 0u) || (blockOffset < (1u << log2blkSize))); 4746 4747 blockOffset |= mipTailBytesOffset; 4748 4749 if (IsNonPrtXor(pIn->swizzleMode) && (pIn->numSamples <= 1)) 4750 { 4751 // Apply slice xor if not MSAA/PRT 4752 blockOffset ^= (ReverseBitVector(pIn->slice, pipeBits) << m_pipeInterleaveLog2); 4753 blockOffset ^= (ReverseBitVector(pIn->slice >> pipeBits, bankBits) << 4754 (m_pipeInterleaveLog2 + pipeBits)); 4755 } 4756 4757 returnCode = ApplyCustomerPipeBankXor(pIn->swizzleMode, pIn->pipeBankXor, 4758 bankBits, pipeBits, &blockOffset); 4759 4760 blockOffset %= (1 << log2blkSize); 4761 4762 UINT_32 pitchInMacroBlock = localOut.mipChainPitch / localOut.blockWidth; 4763 UINT_32 paddedHeightInMacroBlock = localOut.mipChainHeight / localOut.blockHeight; 4764 UINT_32 sliceSizeInMacroBlock = pitchInMacroBlock * paddedHeightInMacroBlock; 4765 UINT_32 macroBlockIndex = 4766 (pIn->slice + mipStartPos.d) * sliceSizeInMacroBlock + 4767 ((pIn->y / localOut.blockHeight) + mipStartPos.h) * pitchInMacroBlock + 4768 ((pIn->x / localOut.blockWidth) + mipStartPos.w); 4769 4770 UINT_64 macroBlockOffset = (static_cast<UINT_64>(macroBlockIndex) << 4771 GetBlockSizeLog2(pIn->swizzleMode)); 4772 4773 pOut->addr = blockOffset | macroBlockOffset; 4774 } 4775 else 4776 { 4777 UINT_32 log2blkSize = GetBlockSizeLog2(pIn->swizzleMode); 4778 4779 Dim3d microBlockDim = Block1K_3d[log2ElementBytes]; 4780 4781 UINT_32 blockOffset = MortonGen3d((pIn->x / microBlockDim.w), 4782 (pIn->y / microBlockDim.h), 4783 (pIn->slice / microBlockDim.d), 4784 8); 4785 4786 blockOffset <<= 10; 4787 blockOffset |= ComputeSurface3DMicroBlockOffset(pIn); 4788 4789 if (IsXor(pIn->swizzleMode)) 4790 { 4791 // Mask off bits above Macro block bits to keep page synonyms working for prt 4792 if (IsPrt(pIn->swizzleMode)) 4793 { 4794 blockOffset &= ((1 << log2blkSize) - 1); 4795 } 4796 4797 // Preserve offset inside pipe interleave 4798 interleaveOffset = blockOffset & ((1 << m_pipeInterleaveLog2) - 1); 4799 blockOffset >>= m_pipeInterleaveLog2; 4800 4801 // Pipe/Se xor bits 4802 pipeBits = GetPipeXorBits(log2blkSize); 4803 // Pipe xor 4804 pipeXor = FoldXor3d(blockOffset, pipeBits); 4805 blockOffset >>= pipeBits; 4806 4807 // Bank xor bits 4808 bankBits = GetBankXorBits(log2blkSize); 4809 // Bank Xor 4810 bankXor = FoldXor3d(blockOffset, bankBits); 4811 blockOffset >>= bankBits; 4812 4813 // Put all the part back together 4814 blockOffset <<= bankBits; 4815 blockOffset |= bankXor; 4816 blockOffset <<= pipeBits; 4817 blockOffset |= pipeXor; 4818 blockOffset <<= m_pipeInterleaveLog2; 4819 blockOffset |= interleaveOffset; 4820 } 4821 4822 ADDR_ASSERT((blockOffset | mipTailBytesOffset) == (blockOffset + mipTailBytesOffset)); 4823 ADDR_ASSERT((mipTailBytesOffset == 0u) || (blockOffset < (1u << log2blkSize))); 4824 blockOffset |= mipTailBytesOffset; 4825 4826 returnCode = ApplyCustomerPipeBankXor(pIn->swizzleMode, pIn->pipeBankXor, 4827 bankBits, pipeBits, &blockOffset); 4828 4829 blockOffset %= (1 << log2blkSize); 4830 4831 UINT_32 xb = pIn->x / localOut.blockWidth + mipStartPos.w; 4832 UINT_32 yb = pIn->y / localOut.blockHeight + mipStartPos.h; 4833 UINT_32 zb = pIn->slice / localOut.blockSlices + + mipStartPos.d; 4834 4835 UINT_32 pitchInBlock = localOut.mipChainPitch / localOut.blockWidth; 4836 UINT_32 sliceSizeInBlock = 4837 (localOut.mipChainHeight / localOut.blockHeight) * pitchInBlock; 4838 UINT_32 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb; 4839 4840 pOut->addr = blockOffset | (blockIndex << log2blkSize); 4841 } 4842 } 4843 else 4844 { 4845 returnCode = ADDR_INVALIDPARAMS; 4846 } 4847 4848 return returnCode; 4849 } 4850 4851 /** 4852 ************************************************************************************************************************ 4853 * Gfx9Lib::ComputeSurfaceInfoLinear 4854 * 4855 * @brief 4856 * Internal function to calculate padding for linear swizzle 2D/3D surface 4857 * 4858 * @return 4859 * N/A 4860 ************************************************************************************************************************ 4861 */ 4862 ADDR_E_RETURNCODE Gfx9Lib::ComputeSurfaceLinearPadding( 4863 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input srtucture 4864 UINT_32* pMipmap0PaddedWidth, ///< [out] padded width in element 4865 UINT_32* pSlice0PaddedHeight, ///< [out] padded height for HW 4866 ADDR2_MIP_INFO* pMipInfo ///< [out] per mip information 4867 ) const 4868 { 4869 ADDR_E_RETURNCODE returnCode = ADDR_OK; 4870 4871 UINT_32 elementBytes = pIn->bpp >> 3; 4872 UINT_32 pitchAlignInElement = 0; 4873 4874 if (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) 4875 { 4876 ADDR_ASSERT(pIn->numMipLevels <= 1); 4877 ADDR_ASSERT(pIn->numSlices <= 1); 4878 pitchAlignInElement = 1; 4879 } 4880 else 4881 { 4882 pitchAlignInElement = (256 / elementBytes); 4883 } 4884 4885 UINT_32 mipChainWidth = PowTwoAlign(pIn->width, pitchAlignInElement); 4886 UINT_32 slice0PaddedHeight = pIn->height; 4887 4888 returnCode = ApplyCustomizedPitchHeight(pIn, elementBytes, pitchAlignInElement, 4889 &mipChainWidth, &slice0PaddedHeight); 4890 4891 if (returnCode == ADDR_OK) 4892 { 4893 UINT_32 mipChainHeight = 0; 4894 UINT_32 mipHeight = pIn->height; 4895 4896 for (UINT_32 i = 0; i < pIn->numMipLevels; i++) 4897 { 4898 if (pMipInfo != NULL) 4899 { 4900 pMipInfo[i].offset = mipChainWidth * mipChainHeight * elementBytes; 4901 pMipInfo[i].pitch = mipChainWidth; 4902 pMipInfo[i].height = mipHeight; 4903 pMipInfo[i].depth = 1; 4904 } 4905 4906 mipChainHeight += mipHeight; 4907 mipHeight = RoundHalf(mipHeight); 4908 mipHeight = Max(mipHeight, 1u); 4909 } 4910 4911 *pMipmap0PaddedWidth = mipChainWidth; 4912 *pSlice0PaddedHeight = (pIn->numMipLevels > 1) ? mipChainHeight : slice0PaddedHeight; 4913 } 4914 4915 return returnCode; 4916 } 4917 4918 } // V2 4919 } // Addr 4920