Home | History | Annotate | Download | only in memory
      1 /****************************************************************************
      2 * Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
      3 *
      4 * Permission is hereby granted, free of charge, to any person obtaining a
      5 * copy of this software and associated documentation files (the "Software"),
      6 * to deal in the Software without restriction, including without limitation
      7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8 * and/or sell copies of the Software, and to permit persons to whom the
      9 * Software is furnished to do so, subject to the following conditions:
     10 *
     11 * The above copyright notice and this permission notice (including the next
     12 * paragraph) shall be included in all copies or substantial portions of the
     13 * Software.
     14 *
     15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
     21 * IN THE SOFTWARE.
     22 *
     23 * @file TilingFunctions.h
     24 *
     25 * @brief Tiling functions.
     26 *
     27 ******************************************************************************/
     28 #pragma once
     29 
     30 #include "core/state.h"
     31 #include "core/format_traits.h"
     32 #include "memory/tilingtraits.h"
     33 
     34 #include <algorithm>
     35 
     36 #define MAX_NUM_LOD 15
     37 
     38 #define GFX_ALIGN(x, a) (((x) + ((a) - 1)) - (((x) + ((a) - 1)) & ((a) - 1))) // Alt implementation with bitwise not (~) has issue with uint32 align used with 64-bit value, since ~'ed value will remain 32-bit.
     39 
     40 //////////////////////////////////////////////////////////////////////////
     41 /// SimdTile SSE(2x2), AVX(4x2), or AVX-512(4x4?)
     42 //////////////////////////////////////////////////////////////////////////
     43 template<SWR_FORMAT HotTileFormat, SWR_FORMAT SrcOrDstFormat>
     44 struct SimdTile
     45 {
     46     // SimdTile is SOA (e.g. rrrrrrrr gggggggg bbbbbbbb aaaaaaaa )
     47     float color[FormatTraits<HotTileFormat>::numComps][KNOB_SIMD_WIDTH];
     48 
     49     //////////////////////////////////////////////////////////////////////////
     50     /// @brief Retrieve color from simd.
     51     /// @param index - linear index to color within simd.
     52     /// @param outputColor - output color
     53     INLINE void GetSwizzledColor(
     54         uint32_t index,
     55         float outputColor[4])
     56     {
     57         // SOA pattern for 2x2 is a subset of 4x2.
     58         //   0 1 4 5
     59         //   2 3 6 7
     60         // The offset converts pattern to linear
     61 #if (SIMD_TILE_X_DIM == 4)
     62         static const uint32_t offset[] = { 0, 1, 4, 5, 2, 3, 6, 7 };
     63 #elif (SIMD_TILE_X_DIM == 2)
     64         static const uint32_t offset[] = { 0, 1, 2, 3 };
     65 #endif
     66 
     67         for (uint32_t i = 0; i < FormatTraits<SrcOrDstFormat>::numComps; ++i)
     68         {
     69             outputColor[i] = this->color[FormatTraits<SrcOrDstFormat>::swizzle(i)][offset[index]];
     70         }
     71     }
     72 
     73     //////////////////////////////////////////////////////////////////////////
     74     /// @brief Retrieve color from simd.
     75     /// @param index - linear index to color within simd.
     76     /// @param outputColor - output color
     77     INLINE void SetSwizzledColor(
     78         uint32_t index,
     79         const float src[4])
     80     {
     81         // SOA pattern for 2x2 is a subset of 4x2.
     82         //   0 1 4 5
     83         //   2 3 6 7
     84         // The offset converts pattern to linear
     85 #if (SIMD_TILE_X_DIM == 4)
     86         static const uint32_t offset[] = { 0, 1, 4, 5, 2, 3, 6, 7 };
     87 #elif (SIMD_TILE_X_DIM == 2)
     88         static const uint32_t offset[] = { 0, 1, 2, 3 };
     89 #endif
     90 
     91         // Only loop over the components needed for destination.
     92         for (uint32_t i = 0; i < FormatTraits<SrcOrDstFormat>::numComps; ++i)
     93         {
     94             this->color[i][offset[index]] = src[i];
     95         }
     96     }
     97 };
     98 
     99 template<>
    100 struct SimdTile <R8_UINT,R8_UINT>
    101 {
    102     // SimdTile is SOA (e.g. rrrrrrrr gggggggg bbbbbbbb aaaaaaaa )
    103     uint8_t color[FormatTraits<R8_UINT>::numComps][KNOB_SIMD_WIDTH];
    104 
    105     //////////////////////////////////////////////////////////////////////////
    106     /// @brief Retrieve color from simd.
    107     /// @param index - linear index to color within simd.
    108     /// @param outputColor - output color
    109     INLINE void GetSwizzledColor(
    110         uint32_t index,
    111         float outputColor[4])
    112     {
    113         // SOA pattern for 2x2 is a subset of 4x2.
    114         //   0 1 4 5
    115         //   2 3 6 7
    116         // The offset converts pattern to linear
    117 #if (SIMD_TILE_X_DIM == 4)
    118         static const uint32_t offset[] = { 0, 1, 4, 5, 2, 3, 6, 7 };
    119 #elif (SIMD_TILE_X_DIM == 2)
    120         static const uint32_t offset[] = { 0, 1, 2, 3 };
    121 #endif
    122 
    123         for (uint32_t i = 0; i < FormatTraits<R8_UINT>::numComps; ++i)
    124         {
    125             uint32_t src = this->color[FormatTraits<R8_UINT>::swizzle(i)][offset[index]];
    126             outputColor[i] = *(float*)&src;
    127         }
    128     }
    129 
    130     //////////////////////////////////////////////////////////////////////////
    131     /// @brief Retrieve color from simd.
    132     /// @param index - linear index to color within simd.
    133     /// @param outputColor - output color
    134     INLINE void SetSwizzledColor(
    135         uint32_t index,
    136         const float src[4])
    137     {
    138         // SOA pattern for 2x2 is a subset of 4x2.
    139         //   0 1 4 5
    140         //   2 3 6 7
    141         // The offset converts pattern to linear
    142 #if (SIMD_TILE_X_DIM == 4)
    143         static const uint32_t offset[] = { 0, 1, 4, 5, 2, 3, 6, 7 };
    144 #elif (SIMD_TILE_X_DIM == 2)
    145         static const uint32_t offset[] = { 0, 1, 2, 3 };
    146 #endif
    147 
    148         // Only loop over the components needed for destination.
    149         for (uint32_t i = 0; i < FormatTraits<R8_UINT>::numComps; ++i)
    150         {
    151             this->color[i][offset[index]] = *(uint8_t*)&src[i];
    152         }
    153     }
    154 };
    155 
    156 #if ENABLE_AVX512_SIMD16
    157 //////////////////////////////////////////////////////////////////////////
    158 /// SimdTile 8x2 for AVX-512
    159 //////////////////////////////////////////////////////////////////////////
    160 
    161 template<SWR_FORMAT HotTileFormat, SWR_FORMAT SrcOrDstFormat>
    162 struct SimdTile_16
    163 {
    164     // SimdTile is SOA (e.g. rrrrrrrrrrrrrrrr gggggggggggggggg bbbbbbbbbbbbbbbb aaaaaaaaaaaaaaaa )
    165     float color[FormatTraits<HotTileFormat>::numComps][KNOB_SIMD16_WIDTH];
    166 
    167     //////////////////////////////////////////////////////////////////////////
    168     /// @brief Retrieve color from simd.
    169     /// @param index - linear index to color within simd.
    170     /// @param outputColor - output color
    171     INLINE void GetSwizzledColor(
    172         uint32_t index,
    173         float outputColor[4])
    174     {
    175         // SOA pattern for 8x2..
    176         //   0 1 4 5 8 9 C D
    177         //   2 3 6 7 A B E F
    178         // The offset converts pattern to linear
    179         static const uint32_t offset[KNOB_SIMD16_WIDTH] = { 0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15 };
    180 
    181         for (uint32_t i = 0; i < FormatTraits<SrcOrDstFormat>::numComps; ++i)
    182         {
    183             outputColor[i] = this->color[FormatTraits<SrcOrDstFormat>::swizzle(i)][offset[index]];
    184         }
    185     }
    186 
    187     //////////////////////////////////////////////////////////////////////////
    188     /// @brief Retrieve color from simd.
    189     /// @param index - linear index to color within simd.
    190     /// @param outputColor - output color
    191     INLINE void SetSwizzledColor(
    192         uint32_t index,
    193         const float src[4])
    194     {
    195         // SOA pattern for 8x2..
    196         //   0 1 4 5 8 9 C D
    197         //   2 3 6 7 A B E F
    198         // The offset converts pattern to linear
    199         static const uint32_t offset[KNOB_SIMD16_WIDTH] = { 0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15 };
    200 
    201         for (uint32_t i = 0; i < FormatTraits<SrcOrDstFormat>::numComps; ++i)
    202         {
    203             this->color[i][offset[index]] = src[i];
    204         }
    205     }
    206 };
    207 
    208 template<>
    209 struct SimdTile_16 <R8_UINT, R8_UINT>
    210 {
    211     // SimdTile is SOA (e.g. rrrrrrrrrrrrrrrr gggggggggggggggg bbbbbbbbbbbbbbbb aaaaaaaaaaaaaaaa )
    212     uint8_t color[FormatTraits<R8_UINT>::numComps][KNOB_SIMD16_WIDTH];
    213 
    214     //////////////////////////////////////////////////////////////////////////
    215     /// @brief Retrieve color from simd.
    216     /// @param index - linear index to color within simd.
    217     /// @param outputColor - output color
    218     INLINE void GetSwizzledColor(
    219         uint32_t index,
    220         float outputColor[4])
    221     {
    222         // SOA pattern for 8x2..
    223         //   0 1 4 5 8 9 C D
    224         //   2 3 6 7 A B E F
    225         // The offset converts pattern to linear
    226         static const uint32_t offset[KNOB_SIMD16_WIDTH] = { 0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15 };
    227 
    228         for (uint32_t i = 0; i < FormatTraits<R8_UINT>::numComps; ++i)
    229         {
    230             uint32_t src = this->color[FormatTraits<R8_UINT>::swizzle(i)][offset[index]];
    231             outputColor[i] = *(float*)&src;
    232         }
    233     }
    234 
    235     //////////////////////////////////////////////////////////////////////////
    236     /// @brief Retrieve color from simd.
    237     /// @param index - linear index to color within simd.
    238     /// @param outputColor - output color
    239     INLINE void SetSwizzledColor(
    240         uint32_t index,
    241         const float src[4])
    242     {
    243         // SOA pattern for 8x2..
    244         //   0 1 4 5 8 9 C D
    245         //   2 3 6 7 A B E F
    246         // The offset converts pattern to linear
    247         static const uint32_t offset[KNOB_SIMD16_WIDTH] = { 0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15 };
    248 
    249         for (uint32_t i = 0; i < FormatTraits<R8_UINT>::numComps; ++i)
    250         {
    251             this->color[i][offset[index]] = *(uint8_t*)&src[i];
    252         }
    253     }
    254 };
    255 
    256 #endif
    257 //////////////////////////////////////////////////////////////////////////
    258 /// @brief Computes lod offset for 1D surface at specified lod.
    259 /// @param baseWidth - width of basemip (mip 0).
    260 /// @param hAlign - horizontal alignment per miip, in texels
    261 /// @param lod - lod index
    262 /// @param offset - output offset.
    263 INLINE void ComputeLODOffset1D(
    264     const SWR_FORMAT_INFO& info,
    265     uint32_t baseWidth,
    266     uint32_t hAlign,
    267     uint32_t lod,
    268     uint32_t &offset)
    269 {
    270     if (lod == 0)
    271     {
    272         offset = 0;
    273     }
    274     else
    275     {
    276         uint32_t curWidth = baseWidth;
    277         // @note hAlign is already in blocks for compressed formats so upconvert
    278         //       so that we have the desired alignment post-divide.
    279         if (info.isBC)
    280         {
    281             hAlign *= info.bcWidth;
    282         }
    283 
    284         offset = GFX_ALIGN(curWidth, hAlign);
    285         for (uint32_t l = 1; l < lod; ++l)
    286         {
    287             curWidth = std::max<uint32_t>(curWidth >> 1, 1U);
    288             offset += GFX_ALIGN(curWidth, hAlign);
    289         }
    290 
    291         if (info.isSubsampled || info.isBC)
    292         {
    293             offset /= info.bcWidth;
    294         }
    295     }
    296 }
    297 
    298 //////////////////////////////////////////////////////////////////////////
    299 /// @brief Computes x lod offset for 2D surface at specified lod.
    300 /// @param baseWidth - width of basemip (mip 0).
    301 /// @param hAlign - horizontal alignment per mip, in texels
    302 /// @param lod - lod index
    303 /// @param offset - output offset.
    304 INLINE void ComputeLODOffsetX(
    305     const SWR_FORMAT_INFO& info,
    306     uint32_t baseWidth,
    307     uint32_t hAlign,
    308     uint32_t lod,
    309     uint32_t &offset)
    310 {
    311     if (lod < 2)
    312     {
    313         offset = 0;
    314     }
    315     else
    316     {
    317         uint32_t curWidth = baseWidth;
    318         // @note hAlign is already in blocks for compressed formats so upconvert
    319         //       so that we have the desired alignment post-divide.
    320         if (info.isBC)
    321         {
    322             hAlign *= info.bcWidth;
    323         }
    324 
    325         curWidth = std::max<uint32_t>(curWidth >> 1, 1U);
    326         curWidth = GFX_ALIGN(curWidth, hAlign);
    327 
    328         if (info.isSubsampled || info.isBC)
    329         {
    330             curWidth /= info.bcWidth;
    331         }
    332 
    333         offset = curWidth;
    334     }
    335 }
    336 
    337 //////////////////////////////////////////////////////////////////////////
    338 /// @brief Computes y lod offset for 2D surface at specified lod.
    339 /// @param baseWidth - width of basemip (mip 0).
    340 /// @param vAlign - vertical alignment per mip, in rows
    341 /// @param lod - lod index
    342 /// @param offset - output offset.
    343 INLINE void ComputeLODOffsetY(
    344     const SWR_FORMAT_INFO& info,
    345     uint32_t baseHeight,
    346     uint32_t vAlign,
    347     uint32_t lod,
    348     uint32_t &offset)
    349 {
    350     if (lod == 0)
    351     {
    352         offset = 0;
    353     }
    354     else
    355     {
    356         offset = 0;
    357         uint32_t mipHeight = baseHeight;
    358 
    359         // @note vAlign is already in blocks for compressed formats so upconvert
    360         //       so that we have the desired alignment post-divide.
    361         if (info.isBC)
    362         {
    363             vAlign *= info.bcHeight;
    364         }
    365 
    366         for (uint32_t l = 1; l <= lod; ++l)
    367         {
    368             uint32_t alignedMipHeight = GFX_ALIGN(mipHeight, vAlign);
    369             offset += ((l != 2) ? alignedMipHeight : 0);
    370             mipHeight = std::max<uint32_t>(mipHeight >> 1, 1U);
    371         }
    372 
    373         if (info.isBC)
    374         {
    375             offset /= info.bcHeight;
    376         }
    377     }
    378 }
    379 
    380 //////////////////////////////////////////////////////////////////////////
    381 /// @brief Computes 1D surface offset
    382 /// @param x - offset from start of array slice at given lod.
    383 /// @param array - array slice index
    384 /// @param lod - lod index
    385 /// @param pState - surface state
    386 /// @param xOffsetBytes - output offset in bytes.
    387 template<bool UseCachedOffsets>
    388 INLINE void ComputeSurfaceOffset1D(
    389     uint32_t x,
    390     uint32_t array,
    391     uint32_t lod,
    392     const SWR_SURFACE_STATE *pState,
    393     uint32_t &xOffsetBytes)
    394 {
    395     const SWR_FORMAT_INFO &info = GetFormatInfo(pState->format);
    396     uint32_t lodOffset;
    397 
    398     if (UseCachedOffsets)
    399     {
    400         lodOffset = pState->lodOffsets[0][lod];
    401     }
    402     else
    403     {
    404         ComputeLODOffset1D(info, pState->width, pState->halign, lod, lodOffset);
    405     }
    406 
    407     xOffsetBytes = (array * pState->qpitch + lodOffset + x) * info.Bpp;
    408 }
    409 
    410 //////////////////////////////////////////////////////////////////////////
    411 /// @brief Adjusts the array slice for legacy TileY MSAA
    412 /// @param pState - surface state
    413 /// @param array - array slice index
    414 /// @param sampleNum - requested sample
    415 INLINE void AdjustCoordsForMSAA(const SWR_SURFACE_STATE *pState, uint32_t& x, uint32_t& y, uint32_t& arrayIndex, uint32_t sampleNum)
    416 {
    417     /// @todo: might want to templatize adjusting for sample slices when we support tileYS/tileYF.
    418     if((pState->tileMode == SWR_TILE_MODE_YMAJOR ||
    419         pState->tileMode == SWR_TILE_MODE_WMAJOR) &&
    420        pState->bInterleavedSamples)
    421     {
    422         uint32_t newX, newY, newSampleX, newSampleY;
    423         switch(pState->numSamples)
    424         {
    425         case 1:
    426             newX = x;
    427             newY = y;
    428             newSampleX = newSampleY = 0;
    429             break;
    430         case 2:
    431         {
    432             assert(pState->type == SURFACE_2D);
    433             static const uint32_t xMask = 0xFFFFFFFD;
    434             static const uint32_t sampleMaskX = 0x1;
    435             newX = pdep_u32(x, xMask);
    436             newY = y;
    437             newSampleX = pext_u32(sampleNum, sampleMaskX);
    438             newSampleY = 0;
    439         }
    440             break;
    441         case 4:
    442         {
    443             assert(pState->type == SURFACE_2D);
    444             static const uint32_t mask = 0xFFFFFFFD;
    445             static const uint32_t sampleMaskX = 0x1;
    446             static const uint32_t sampleMaskY = 0x2;
    447             newX = pdep_u32(x, mask);
    448             newY = pdep_u32(y, mask);
    449             newSampleX = pext_u32(sampleNum, sampleMaskX);
    450             newSampleY = pext_u32(sampleNum, sampleMaskY);
    451         }
    452             break;
    453         case 8:
    454         {
    455             assert(pState->type == SURFACE_2D);
    456             static const uint32_t xMask = 0xFFFFFFF9;
    457             static const uint32_t yMask = 0xFFFFFFFD;
    458             static const uint32_t sampleMaskX = 0x5;
    459             static const uint32_t sampleMaskY = 0x2;
    460             newX = pdep_u32(x, xMask);
    461             newY = pdep_u32(y, yMask);
    462             newSampleX = pext_u32(sampleNum, sampleMaskX);
    463             newSampleY = pext_u32(sampleNum, sampleMaskY);
    464         }
    465             break;
    466         case 16:
    467         {
    468             assert(pState->type == SURFACE_2D);
    469             static const uint32_t mask = 0xFFFFFFF9;
    470             static const uint32_t sampleMaskX = 0x5;
    471             static const uint32_t sampleMaskY = 0xA;
    472             newX = pdep_u32(x, mask);
    473             newY = pdep_u32(y, mask);
    474             newSampleX = pext_u32(sampleNum, sampleMaskX);
    475             newSampleY = pext_u32(sampleNum, sampleMaskY);
    476         }
    477             break;
    478         default:
    479             assert(0 && "Unsupported sample count");
    480             newX = newY = 0;
    481             newSampleX = newSampleY = 0;
    482             break;
    483         }
    484         x = newX | (newSampleX << 1);
    485         y = newY | (newSampleY << 1);
    486     }
    487     else if(pState->tileMode == SWR_TILE_MODE_YMAJOR ||
    488             pState->tileMode == SWR_TILE_NONE)
    489     {
    490         uint32_t sampleShift;
    491         switch(pState->numSamples)
    492         {
    493         case 1:
    494             assert(sampleNum == 0);
    495             sampleShift = 0;
    496             break;
    497         case 2:
    498             assert(pState->type == SURFACE_2D);
    499             sampleShift = 1;
    500             break;
    501         case 4:
    502             assert(pState->type == SURFACE_2D);
    503             sampleShift = 2;
    504             break;
    505         case 8:
    506             assert(pState->type == SURFACE_2D);
    507             sampleShift = 3;
    508             break;
    509         case 16:
    510             assert(pState->type == SURFACE_2D);
    511             sampleShift = 4;
    512             break;
    513         default:
    514             assert(0 && "Unsupported sample count");
    515             sampleShift = 0;
    516             break;
    517         }
    518         arrayIndex = (arrayIndex << sampleShift) | sampleNum;
    519     }
    520 }
    521 
    522 //////////////////////////////////////////////////////////////////////////
    523 /// @brief Computes 2D surface offset
    524 /// @param x - horizontal offset from start of array slice and lod.
    525 /// @param y - vertical offset from start of array slice and lod.
    526 /// @param array - array slice index
    527 /// @param lod - lod index
    528 /// @param pState - surface state
    529 /// @param xOffsetBytes - output x offset in bytes.
    530 /// @param yOffsetRows - output y offset in bytes.
    531 template<bool UseCachedOffsets>
    532 INLINE void ComputeSurfaceOffset2D(uint32_t x, uint32_t y, uint32_t array, uint32_t sampleNum, uint32_t lod, const SWR_SURFACE_STATE *pState, uint32_t &xOffsetBytes, uint32_t &yOffsetRows)
    533 {
    534     const SWR_FORMAT_INFO &info = GetFormatInfo(pState->format);
    535     uint32_t lodOffsetX, lodOffsetY;
    536 
    537     if (UseCachedOffsets)
    538     {
    539         lodOffsetX = pState->lodOffsets[0][lod];
    540         lodOffsetY = pState->lodOffsets[1][lod];
    541     }
    542     else
    543     {
    544         ComputeLODOffsetX(info, pState->width, pState->halign, lod, lodOffsetX);
    545         ComputeLODOffsetY(info, pState->height, pState->valign, lod, lodOffsetY);
    546     }
    547 
    548     AdjustCoordsForMSAA(pState, x, y, array, sampleNum);
    549     xOffsetBytes = (x + lodOffsetX + pState->xOffset) * info.Bpp;
    550     yOffsetRows = (array * pState->qpitch) + lodOffsetY + y + pState->yOffset;
    551 }
    552 
    553 //////////////////////////////////////////////////////////////////////////
    554 /// @brief Computes 3D surface offset
    555 /// @param x - horizontal offset from start of array slice and lod.
    556 /// @param y - vertical offset from start of array slice and lod.
    557 /// @param z - depth offset from start of array slice and lod.
    558 /// @param lod - lod index
    559 /// @param pState - surface state
    560 /// @param xOffsetBytes - output x offset in bytes.
    561 /// @param yOffsetRows - output y offset in rows.
    562 /// @param zOffsetSlices - output y offset in slices.
    563 template<bool UseCachedOffsets>
    564 INLINE void ComputeSurfaceOffset3D(uint32_t x, uint32_t y, uint32_t z, uint32_t lod, const SWR_SURFACE_STATE *pState, uint32_t &xOffsetBytes, uint32_t &yOffsetRows, uint32_t &zOffsetSlices)
    565 {
    566     const SWR_FORMAT_INFO &info = GetFormatInfo(pState->format);
    567     uint32_t lodOffsetX, lodOffsetY;
    568 
    569     if (UseCachedOffsets)
    570     {
    571         lodOffsetX = pState->lodOffsets[0][lod];
    572         lodOffsetY = pState->lodOffsets[1][lod];
    573     }
    574     else
    575     {
    576         ComputeLODOffsetX(info, pState->width, pState->halign, lod, lodOffsetX);
    577         ComputeLODOffsetY(info, pState->height, pState->valign, lod, lodOffsetY);
    578     }
    579 
    580     xOffsetBytes = (x + lodOffsetX) * info.Bpp;
    581     yOffsetRows = lodOffsetY + y;
    582     zOffsetSlices = z;
    583 }
    584 
    585 //////////////////////////////////////////////////////////////////////////
    586 /// @brief Swizzles the linear x,y offsets depending on surface tiling mode
    587 ///        and returns final surface address
    588 /// @param xOffsetBytes - x offset from base of surface in bytes
    589 /// @param yOffsetRows - y offset from base of surface in rows
    590 /// @param pState - pointer to the surface state
    591 template<typename TTraits>
    592 INLINE uint32_t ComputeTileSwizzle2D(uint32_t xOffsetBytes, uint32_t yOffsetRows, const SWR_SURFACE_STATE *pState)
    593 {
    594     return ComputeOffset2D<TTraits>(pState->pitch, xOffsetBytes, yOffsetRows);
    595 }
    596 
    597 //////////////////////////////////////////////////////////////////////////
    598 /// @brief Swizzles the linear x,y offsets depending on surface tiling mode
    599 ///        and returns final surface address
    600 /// @param xOffsetBytes - x offset from base of surface in bytes
    601 /// @param yOffsetRows - y offset from base of surface in rows
    602 /// @param pState - pointer to the surface state
    603 template<typename TTraits>
    604 INLINE uint32_t ComputeTileSwizzle3D(uint32_t xOffsetBytes, uint32_t yOffsetRows, uint32_t zOffsetSlices, const SWR_SURFACE_STATE *pState)
    605 {
    606     return ComputeOffset3D<TTraits>(pState->qpitch, pState->pitch, xOffsetBytes, yOffsetRows, zOffsetSlices);
    607 }
    608 
    609 //////////////////////////////////////////////////////////////////////////
    610 /// @brief Swizzles the linear x,y offsets depending on surface tiling mode
    611 ///        and returns final surface address
    612 /// @param xOffsetBytes - x offset from base of surface in bytes
    613 /// @param yOffsetRows - y offset from base of surface in rows
    614 /// @param pState - pointer to the surface state
    615 INLINE
    616 uint32_t TileSwizzle2D(uint32_t xOffsetBytes, uint32_t yOffsetRows, const SWR_SURFACE_STATE *pState)
    617 {
    618     switch (pState->tileMode)
    619     {
    620     case SWR_TILE_NONE: return ComputeTileSwizzle2D<TilingTraits<SWR_TILE_NONE, 32> >(xOffsetBytes, yOffsetRows, pState);
    621     case SWR_TILE_SWRZ: return ComputeTileSwizzle2D<TilingTraits<SWR_TILE_SWRZ, 32> >(xOffsetBytes, yOffsetRows, pState);
    622     case SWR_TILE_MODE_XMAJOR: return ComputeTileSwizzle2D<TilingTraits<SWR_TILE_MODE_XMAJOR, 8> >(xOffsetBytes, yOffsetRows, pState);
    623     case SWR_TILE_MODE_YMAJOR: return ComputeTileSwizzle2D<TilingTraits<SWR_TILE_MODE_YMAJOR, 32> >(xOffsetBytes, yOffsetRows, pState);
    624     case SWR_TILE_MODE_WMAJOR: return ComputeTileSwizzle2D<TilingTraits<SWR_TILE_MODE_WMAJOR, 8> >(xOffsetBytes, yOffsetRows, pState);
    625     default: SWR_INVALID("Unsupported tiling mode");
    626     }
    627     return 0;
    628 }
    629 
    630 //////////////////////////////////////////////////////////////////////////
    631 /// @brief Swizzles the linear x,y,z offsets depending on surface tiling mode
    632 ///        and returns final surface address
    633 /// @param xOffsetBytes - x offset from base of surface in bytes
    634 /// @param yOffsetRows - y offset from base of surface in rows
    635 /// @param zOffsetSlices - z offset from base of surface in slices
    636 /// @param pState - pointer to the surface state
    637 INLINE
    638 uint32_t TileSwizzle3D(uint32_t xOffsetBytes, uint32_t yOffsetRows, uint32_t zOffsetSlices, const SWR_SURFACE_STATE *pState)
    639 {
    640     switch (pState->tileMode)
    641     {
    642     case SWR_TILE_NONE: return ComputeTileSwizzle3D<TilingTraits<SWR_TILE_NONE, 32> >(xOffsetBytes, yOffsetRows, zOffsetSlices, pState);
    643     case SWR_TILE_SWRZ: return ComputeTileSwizzle3D<TilingTraits<SWR_TILE_SWRZ, 32> >(xOffsetBytes, yOffsetRows, zOffsetSlices, pState);
    644     case SWR_TILE_MODE_YMAJOR: return ComputeTileSwizzle3D<TilingTraits<SWR_TILE_MODE_YMAJOR, 32> >(xOffsetBytes, yOffsetRows, zOffsetSlices, pState);
    645     default: SWR_INVALID("Unsupported tiling mode");
    646     }
    647     return 0;
    648 }
    649 
    650 template<bool UseCachedOffsets>
    651 INLINE
    652 uint32_t ComputeSurfaceOffset(uint32_t x, uint32_t y, uint32_t z, uint32_t array, uint32_t sampleNum, uint32_t lod, const SWR_SURFACE_STATE *pState)
    653 {
    654     uint32_t offsetX = 0, offsetY = 0, offsetZ = 0;
    655     switch (pState->type)
    656     {
    657     case SURFACE_BUFFER:
    658     case SURFACE_STRUCTURED_BUFFER:
    659         offsetX = x * pState->pitch;
    660         return offsetX;
    661         break;
    662     case SURFACE_1D:
    663         ComputeSurfaceOffset1D<UseCachedOffsets>(x, array, lod, pState, offsetX);
    664         return TileSwizzle2D(offsetX, 0, pState);
    665         break;
    666     case SURFACE_2D:
    667         ComputeSurfaceOffset2D<UseCachedOffsets>(x, y, array, sampleNum, lod, pState, offsetX, offsetY);
    668         return TileSwizzle2D(offsetX, offsetY, pState);
    669     case SURFACE_3D:
    670         ComputeSurfaceOffset3D<UseCachedOffsets>(x, y, z, lod, pState, offsetX, offsetY, offsetZ);
    671         return TileSwizzle3D(offsetX, offsetY, offsetZ, pState);
    672         break;
    673     case SURFACE_CUBE:
    674         ComputeSurfaceOffset2D<UseCachedOffsets>(x, y, array, sampleNum, lod, pState, offsetX, offsetY);
    675         return TileSwizzle2D(offsetX, offsetY, pState);
    676         break;
    677     default: SWR_INVALID("Unsupported format");
    678     }
    679 
    680     return 0;
    681 }
    682 
    683 typedef void*(*PFN_COMPUTESURFADDR)(uint32_t, uint32_t, uint32_t, uint32_t, uint32_t, uint32_t, const SWR_SURFACE_STATE*);
    684 
    685 //////////////////////////////////////////////////////////////////////////
    686 /// @brief Computes surface address at the given location and lod
    687 /// @param x - x location in pixels
    688 /// @param y - y location in rows
    689 /// @param z - z location for 3D surfaces
    690 /// @param array - array slice for 1D and 2D surfaces
    691 /// @param lod - level of detail
    692 /// @param pState - pointer to the surface state
    693 template<bool UseCachedOffsets, bool IsRead>
    694 INLINE
    695 void* ComputeSurfaceAddress(uint32_t x, uint32_t y, uint32_t z, uint32_t array, uint32_t sampleNum, uint32_t lod, const SWR_SURFACE_STATE *pState)
    696 {
    697     return (void*)(pState->xpBaseAddress + ComputeSurfaceOffset<UseCachedOffsets>(x, y, z, array, sampleNum, lod, pState));
    698 }
    699