Home | History | Annotate | Download | only in core
      1 /****************************************************************************
      2 * Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
      3 *
      4 * Permission is hereby granted, free of charge, to any person obtaining a
      5 * copy of this software and associated documentation files (the "Software"),
      6 * to deal in the Software without restriction, including without limitation
      7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8 * and/or sell copies of the Software, and to permit persons to whom the
      9 * Software is furnished to do so, subject to the following conditions:
     10 *
     11 * The above copyright notice and this permission notice (including the next
     12 * paragraph) shall be included in all copies or substantial portions of the
     13 * Software.
     14 *
     15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
     21 * IN THE SOFTWARE.
     22 *
     23 * @file backend.cpp
     24 *
     25 * @brief Backend handles rasterization, pixel shading and output merger
     26 *        operations.
     27 *
     28 ******************************************************************************/
     29 
     30 #include <smmintrin.h>
     31 
     32 #include "backend.h"
     33 #include "backend_impl.h"
     34 #include "tilemgr.h"
     35 #include "memory/tilingtraits.h"
     36 #include "core/multisample.h"
     37 
     38 #include <algorithm>
     39 
     40 template<SWR_FORMAT format>
     41 void ClearRasterTile(uint8_t *pTileBuffer, simdvector &value)
     42 {
     43     auto lambda = [&](int32_t comp)
     44     {
     45         FormatTraits<format>::storeSOA(comp, pTileBuffer, value.v[comp]);
     46 
     47         pTileBuffer += (KNOB_SIMD_WIDTH * FormatTraits<format>::GetBPC(comp) / 8);
     48     };
     49 
     50     const uint32_t numIter = (KNOB_TILE_Y_DIM / SIMD_TILE_Y_DIM) * (KNOB_TILE_X_DIM / SIMD_TILE_X_DIM);
     51 
     52     for (uint32_t i = 0; i < numIter; ++i)
     53     {
     54         UnrollerL<0, FormatTraits<format>::numComps, 1>::step(lambda);
     55     }
     56 }
     57 
     58 #if USE_8x2_TILE_BACKEND
     59 template<SWR_FORMAT format>
     60 void ClearRasterTile(uint8_t *pTileBuffer, simd16vector &value)
     61 {
     62     auto lambda = [&](int32_t comp)
     63     {
     64         FormatTraits<format>::storeSOA(comp, pTileBuffer, value.v[comp]);
     65 
     66         pTileBuffer += (KNOB_SIMD16_WIDTH * FormatTraits<format>::GetBPC(comp) / 8);
     67     };
     68 
     69     const uint32_t numIter = (KNOB_TILE_Y_DIM / SIMD16_TILE_Y_DIM) * (KNOB_TILE_X_DIM / SIMD16_TILE_X_DIM);
     70 
     71     for (uint32_t i = 0; i < numIter; ++i)
     72     {
     73         UnrollerL<0, FormatTraits<format>::numComps, 1>::step(lambda);
     74     }
     75 }
     76 
     77 #endif
     78 template<SWR_FORMAT format>
     79 INLINE void ClearMacroTile(DRAW_CONTEXT *pDC, SWR_RENDERTARGET_ATTACHMENT rt, uint32_t macroTile, uint32_t renderTargetArrayIndex, DWORD clear[4], const SWR_RECT& rect)
     80 {
     81     // convert clear color to hottile format
     82     // clear color is in RGBA float/uint32
     83 #if USE_8x2_TILE_BACKEND
     84     simd16vector vClear;
     85     for (uint32_t comp = 0; comp < FormatTraits<format>::numComps; ++comp)
     86     {
     87         simd16scalar vComp;
     88         vComp = _simd16_load1_ps((const float*)&clear[comp]);
     89         if (FormatTraits<format>::isNormalized(comp))
     90         {
     91             vComp = _simd16_mul_ps(vComp, _simd16_set1_ps(FormatTraits<format>::fromFloat(comp)));
     92             vComp = _simd16_castsi_ps(_simd16_cvtps_epi32(vComp));
     93         }
     94         vComp = FormatTraits<format>::pack(comp, vComp);
     95         vClear.v[FormatTraits<format>::swizzle(comp)] = vComp;
     96     }
     97 
     98 #else
     99     simdvector vClear;
    100     for (uint32_t comp = 0; comp < FormatTraits<format>::numComps; ++comp)
    101     {
    102         simdscalar vComp;
    103         vComp = _simd_load1_ps((const float*)&clear[comp]);
    104         if (FormatTraits<format>::isNormalized(comp))
    105         {
    106             vComp = _simd_mul_ps(vComp, _simd_set1_ps(FormatTraits<format>::fromFloat(comp)));
    107             vComp = _simd_castsi_ps(_simd_cvtps_epi32(vComp));
    108         }
    109         vComp = FormatTraits<format>::pack(comp, vComp);
    110         vClear.v[FormatTraits<format>::swizzle(comp)] = vComp;
    111     }
    112 
    113 #endif
    114     uint32_t tileX, tileY;
    115     MacroTileMgr::getTileIndices(macroTile, tileX, tileY);
    116 
    117     // Init to full macrotile
    118     SWR_RECT clearTile =
    119     {
    120         KNOB_MACROTILE_X_DIM * int32_t(tileX),
    121         KNOB_MACROTILE_Y_DIM * int32_t(tileY),
    122         KNOB_MACROTILE_X_DIM * int32_t(tileX + 1),
    123         KNOB_MACROTILE_Y_DIM * int32_t(tileY + 1),
    124     };
    125 
    126     // intersect with clear rect
    127     clearTile &= rect;
    128 
    129     // translate to local hottile origin
    130     clearTile.Translate(-int32_t(tileX) * KNOB_MACROTILE_X_DIM, -int32_t(tileY) * KNOB_MACROTILE_Y_DIM);
    131 
    132     // Make maximums inclusive (needed for convert to raster tiles)
    133     clearTile.xmax -= 1;
    134     clearTile.ymax -= 1;
    135 
    136     // convert to raster tiles
    137     clearTile.ymin >>= (KNOB_TILE_Y_DIM_SHIFT);
    138     clearTile.ymax >>= (KNOB_TILE_Y_DIM_SHIFT);
    139     clearTile.xmin >>= (KNOB_TILE_X_DIM_SHIFT);
    140     clearTile.xmax >>= (KNOB_TILE_X_DIM_SHIFT);
    141 
    142     const int32_t numSamples = GetNumSamples(pDC->pState->state.rastState.sampleCount);
    143     // compute steps between raster tile samples / raster tiles / macro tile rows
    144     const uint32_t rasterTileSampleStep = KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<format>::bpp / 8;
    145     const uint32_t rasterTileStep = (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * (FormatTraits<format>::bpp / 8)) * numSamples;
    146     const uint32_t macroTileRowStep = (KNOB_MACROTILE_X_DIM / KNOB_TILE_X_DIM) * rasterTileStep;
    147     const uint32_t pitch = (FormatTraits<format>::bpp * KNOB_MACROTILE_X_DIM / 8);
    148 
    149     HOTTILE *pHotTile = pDC->pContext->pHotTileMgr->GetHotTile(pDC->pContext, pDC, macroTile, rt, true, numSamples, renderTargetArrayIndex);
    150     uint32_t rasterTileStartOffset = (ComputeTileOffset2D< TilingTraits<SWR_TILE_SWRZ, FormatTraits<format>::bpp > >(pitch, clearTile.xmin, clearTile.ymin)) * numSamples;
    151     uint8_t* pRasterTileRow = pHotTile->pBuffer + rasterTileStartOffset; //(ComputeTileOffset2D< TilingTraits<SWR_TILE_SWRZ, FormatTraits<format>::bpp > >(pitch, x, y)) * numSamples;
    152 
    153     // loop over all raster tiles in the current hot tile
    154     for (int32_t y = clearTile.ymin; y <= clearTile.ymax; ++y)
    155     {
    156         uint8_t* pRasterTile = pRasterTileRow;
    157         for (int32_t x = clearTile.xmin; x <= clearTile.xmax; ++x)
    158         {
    159             for( int32_t sampleNum = 0; sampleNum < numSamples; sampleNum++)
    160             {
    161                 ClearRasterTile<format>(pRasterTile, vClear);
    162                 pRasterTile += rasterTileSampleStep;
    163             }
    164         }
    165         pRasterTileRow += macroTileRowStep;
    166     }
    167 
    168     pHotTile->state = HOTTILE_DIRTY;
    169 }
    170 
    171 
    172 void ProcessClearBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pUserData)
    173 {
    174     SWR_CONTEXT *pContext = pDC->pContext;
    175 
    176     if (KNOB_FAST_CLEAR)
    177     {
    178         CLEAR_DESC *pClear = (CLEAR_DESC*)pUserData;
    179         SWR_MULTISAMPLE_COUNT sampleCount = pDC->pState->state.rastState.sampleCount;
    180         uint32_t numSamples = GetNumSamples(sampleCount);
    181 
    182         SWR_ASSERT(pClear->attachmentMask != 0); // shouldn't be here without a reason.
    183 
    184         AR_BEGIN(BEClear, pDC->drawId);
    185 
    186         if (pClear->attachmentMask & SWR_ATTACHMENT_MASK_COLOR)
    187         {
    188             unsigned long rt = 0;
    189             uint32_t mask = pClear->attachmentMask & SWR_ATTACHMENT_MASK_COLOR;
    190             while (_BitScanForward(&rt, mask))
    191             {
    192                 mask &= ~(1 << rt);
    193 
    194                 HOTTILE *pHotTile = pContext->pHotTileMgr->GetHotTile(pContext, pDC, macroTile, (SWR_RENDERTARGET_ATTACHMENT)rt, true, numSamples, pClear->renderTargetArrayIndex);
    195 
    196                 // All we want to do here is to mark the hot tile as being in a "needs clear" state.
    197                 pHotTile->clearData[0] = *(DWORD*)&(pClear->clearRTColor[0]);
    198                 pHotTile->clearData[1] = *(DWORD*)&(pClear->clearRTColor[1]);
    199                 pHotTile->clearData[2] = *(DWORD*)&(pClear->clearRTColor[2]);
    200                 pHotTile->clearData[3] = *(DWORD*)&(pClear->clearRTColor[3]);
    201                 pHotTile->state = HOTTILE_CLEAR;
    202             }
    203         }
    204 
    205         if (pClear->attachmentMask & SWR_ATTACHMENT_DEPTH_BIT)
    206         {
    207             HOTTILE *pHotTile = pContext->pHotTileMgr->GetHotTile(pContext, pDC, macroTile, SWR_ATTACHMENT_DEPTH, true, numSamples, pClear->renderTargetArrayIndex);
    208             pHotTile->clearData[0] = *(DWORD*)&pClear->clearDepth;
    209             pHotTile->state = HOTTILE_CLEAR;
    210         }
    211 
    212         if (pClear->attachmentMask & SWR_ATTACHMENT_STENCIL_BIT)
    213         {
    214             HOTTILE *pHotTile = pContext->pHotTileMgr->GetHotTile(pContext, pDC, macroTile, SWR_ATTACHMENT_STENCIL, true, numSamples, pClear->renderTargetArrayIndex);
    215 
    216             pHotTile->clearData[0] = pClear->clearStencil;
    217             pHotTile->state = HOTTILE_CLEAR;
    218         }
    219 
    220         AR_END(BEClear, 1);
    221     }
    222     else
    223     {
    224         // Legacy clear
    225         CLEAR_DESC *pClear = (CLEAR_DESC*)pUserData;
    226         AR_BEGIN(BEClear, pDC->drawId);
    227 
    228         if (pClear->attachmentMask & SWR_ATTACHMENT_MASK_COLOR)
    229         {
    230             DWORD clearData[4];
    231             clearData[0] = *(DWORD*)&(pClear->clearRTColor[0]);
    232             clearData[1] = *(DWORD*)&(pClear->clearRTColor[1]);
    233             clearData[2] = *(DWORD*)&(pClear->clearRTColor[2]);
    234             clearData[3] = *(DWORD*)&(pClear->clearRTColor[3]);
    235 
    236             PFN_CLEAR_TILES pfnClearTiles = gClearTilesTable[KNOB_COLOR_HOT_TILE_FORMAT];
    237             SWR_ASSERT(pfnClearTiles != nullptr);
    238 
    239             unsigned long rt = 0;
    240             uint32_t mask = pClear->attachmentMask & SWR_ATTACHMENT_MASK_COLOR;
    241             while (_BitScanForward(&rt, mask))
    242             {
    243                 mask &= ~(1 << rt);
    244 
    245                 pfnClearTiles(pDC, (SWR_RENDERTARGET_ATTACHMENT)rt, macroTile, pClear->renderTargetArrayIndex, clearData, pClear->rect);
    246             }
    247         }
    248 
    249         if (pClear->attachmentMask & SWR_ATTACHMENT_DEPTH_BIT)
    250         {
    251             DWORD clearData[4];
    252             clearData[0] = *(DWORD*)&pClear->clearDepth;
    253             PFN_CLEAR_TILES pfnClearTiles = gClearTilesTable[KNOB_DEPTH_HOT_TILE_FORMAT];
    254             SWR_ASSERT(pfnClearTiles != nullptr);
    255 
    256             pfnClearTiles(pDC, SWR_ATTACHMENT_DEPTH, macroTile, pClear->renderTargetArrayIndex, clearData, pClear->rect);
    257         }
    258 
    259         if (pClear->attachmentMask & SWR_ATTACHMENT_STENCIL_BIT)
    260         {
    261             DWORD clearData[4];
    262             clearData[0] = pClear->clearStencil;
    263             PFN_CLEAR_TILES pfnClearTiles = gClearTilesTable[KNOB_STENCIL_HOT_TILE_FORMAT];
    264 
    265             pfnClearTiles(pDC, SWR_ATTACHMENT_STENCIL, macroTile, pClear->renderTargetArrayIndex, clearData, pClear->rect);
    266         }
    267 
    268         AR_END(BEClear, 1);
    269     }
    270 }
    271 
    272 void InitClearTilesTable()
    273 {
    274     memset(gClearTilesTable, 0, sizeof(gClearTilesTable));
    275 
    276     gClearTilesTable[R8G8B8A8_UNORM]        = ClearMacroTile<R8G8B8A8_UNORM>;
    277     gClearTilesTable[B8G8R8A8_UNORM]        = ClearMacroTile<B8G8R8A8_UNORM>;
    278     gClearTilesTable[R32_FLOAT]             = ClearMacroTile<R32_FLOAT>;
    279     gClearTilesTable[R32G32B32A32_FLOAT]    = ClearMacroTile<R32G32B32A32_FLOAT>;
    280     gClearTilesTable[R8_UINT]               = ClearMacroTile<R8_UINT>;
    281 }
    282