1 /**************************************************************************** 2 * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * @file backend.cpp 24 * 25 * @brief Backend handles rasterization, pixel shading and output merger 26 * operations. 27 * 28 ******************************************************************************/ 29 30 #include <smmintrin.h> 31 32 #include "backend.h" 33 #include "backend_impl.h" 34 #include "tilemgr.h" 35 #include "memory/tilingtraits.h" 36 #include "core/multisample.h" 37 38 #include <algorithm> 39 40 template<SWR_FORMAT format> 41 void ClearRasterTile(uint8_t *pTileBuffer, simdvector &value) 42 { 43 auto lambda = [&](int32_t comp) 44 { 45 FormatTraits<format>::storeSOA(comp, pTileBuffer, value.v[comp]); 46 47 pTileBuffer += (KNOB_SIMD_WIDTH * FormatTraits<format>::GetBPC(comp) / 8); 48 }; 49 50 const uint32_t numIter = (KNOB_TILE_Y_DIM / SIMD_TILE_Y_DIM) * (KNOB_TILE_X_DIM / SIMD_TILE_X_DIM); 51 52 for (uint32_t i = 0; i < numIter; ++i) 53 { 54 UnrollerL<0, FormatTraits<format>::numComps, 1>::step(lambda); 55 } 56 } 57 58 #if USE_8x2_TILE_BACKEND 59 template<SWR_FORMAT format> 60 void ClearRasterTile(uint8_t *pTileBuffer, simd16vector &value) 61 { 62 auto lambda = [&](int32_t comp) 63 { 64 FormatTraits<format>::storeSOA(comp, pTileBuffer, value.v[comp]); 65 66 pTileBuffer += (KNOB_SIMD16_WIDTH * FormatTraits<format>::GetBPC(comp) / 8); 67 }; 68 69 const uint32_t numIter = (KNOB_TILE_Y_DIM / SIMD16_TILE_Y_DIM) * (KNOB_TILE_X_DIM / SIMD16_TILE_X_DIM); 70 71 for (uint32_t i = 0; i < numIter; ++i) 72 { 73 UnrollerL<0, FormatTraits<format>::numComps, 1>::step(lambda); 74 } 75 } 76 77 #endif 78 template<SWR_FORMAT format> 79 INLINE void ClearMacroTile(DRAW_CONTEXT *pDC, SWR_RENDERTARGET_ATTACHMENT rt, uint32_t macroTile, uint32_t renderTargetArrayIndex, DWORD clear[4], const SWR_RECT& rect) 80 { 81 // convert clear color to hottile format 82 // clear color is in RGBA float/uint32 83 #if USE_8x2_TILE_BACKEND 84 simd16vector vClear; 85 for (uint32_t comp = 0; comp < FormatTraits<format>::numComps; ++comp) 86 { 87 simd16scalar vComp; 88 vComp = _simd16_load1_ps((const float*)&clear[comp]); 89 if (FormatTraits<format>::isNormalized(comp)) 90 { 91 vComp = _simd16_mul_ps(vComp, _simd16_set1_ps(FormatTraits<format>::fromFloat(comp))); 92 vComp = _simd16_castsi_ps(_simd16_cvtps_epi32(vComp)); 93 } 94 vComp = FormatTraits<format>::pack(comp, vComp); 95 vClear.v[FormatTraits<format>::swizzle(comp)] = vComp; 96 } 97 98 #else 99 simdvector vClear; 100 for (uint32_t comp = 0; comp < FormatTraits<format>::numComps; ++comp) 101 { 102 simdscalar vComp; 103 vComp = _simd_load1_ps((const float*)&clear[comp]); 104 if (FormatTraits<format>::isNormalized(comp)) 105 { 106 vComp = _simd_mul_ps(vComp, _simd_set1_ps(FormatTraits<format>::fromFloat(comp))); 107 vComp = _simd_castsi_ps(_simd_cvtps_epi32(vComp)); 108 } 109 vComp = FormatTraits<format>::pack(comp, vComp); 110 vClear.v[FormatTraits<format>::swizzle(comp)] = vComp; 111 } 112 113 #endif 114 uint32_t tileX, tileY; 115 MacroTileMgr::getTileIndices(macroTile, tileX, tileY); 116 117 // Init to full macrotile 118 SWR_RECT clearTile = 119 { 120 KNOB_MACROTILE_X_DIM * int32_t(tileX), 121 KNOB_MACROTILE_Y_DIM * int32_t(tileY), 122 KNOB_MACROTILE_X_DIM * int32_t(tileX + 1), 123 KNOB_MACROTILE_Y_DIM * int32_t(tileY + 1), 124 }; 125 126 // intersect with clear rect 127 clearTile &= rect; 128 129 // translate to local hottile origin 130 clearTile.Translate(-int32_t(tileX) * KNOB_MACROTILE_X_DIM, -int32_t(tileY) * KNOB_MACROTILE_Y_DIM); 131 132 // Make maximums inclusive (needed for convert to raster tiles) 133 clearTile.xmax -= 1; 134 clearTile.ymax -= 1; 135 136 // convert to raster tiles 137 clearTile.ymin >>= (KNOB_TILE_Y_DIM_SHIFT); 138 clearTile.ymax >>= (KNOB_TILE_Y_DIM_SHIFT); 139 clearTile.xmin >>= (KNOB_TILE_X_DIM_SHIFT); 140 clearTile.xmax >>= (KNOB_TILE_X_DIM_SHIFT); 141 142 const int32_t numSamples = GetNumSamples(pDC->pState->state.rastState.sampleCount); 143 // compute steps between raster tile samples / raster tiles / macro tile rows 144 const uint32_t rasterTileSampleStep = KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<format>::bpp / 8; 145 const uint32_t rasterTileStep = (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * (FormatTraits<format>::bpp / 8)) * numSamples; 146 const uint32_t macroTileRowStep = (KNOB_MACROTILE_X_DIM / KNOB_TILE_X_DIM) * rasterTileStep; 147 const uint32_t pitch = (FormatTraits<format>::bpp * KNOB_MACROTILE_X_DIM / 8); 148 149 HOTTILE *pHotTile = pDC->pContext->pHotTileMgr->GetHotTile(pDC->pContext, pDC, macroTile, rt, true, numSamples, renderTargetArrayIndex); 150 uint32_t rasterTileStartOffset = (ComputeTileOffset2D< TilingTraits<SWR_TILE_SWRZ, FormatTraits<format>::bpp > >(pitch, clearTile.xmin, clearTile.ymin)) * numSamples; 151 uint8_t* pRasterTileRow = pHotTile->pBuffer + rasterTileStartOffset; //(ComputeTileOffset2D< TilingTraits<SWR_TILE_SWRZ, FormatTraits<format>::bpp > >(pitch, x, y)) * numSamples; 152 153 // loop over all raster tiles in the current hot tile 154 for (int32_t y = clearTile.ymin; y <= clearTile.ymax; ++y) 155 { 156 uint8_t* pRasterTile = pRasterTileRow; 157 for (int32_t x = clearTile.xmin; x <= clearTile.xmax; ++x) 158 { 159 for( int32_t sampleNum = 0; sampleNum < numSamples; sampleNum++) 160 { 161 ClearRasterTile<format>(pRasterTile, vClear); 162 pRasterTile += rasterTileSampleStep; 163 } 164 } 165 pRasterTileRow += macroTileRowStep; 166 } 167 168 pHotTile->state = HOTTILE_DIRTY; 169 } 170 171 172 void ProcessClearBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pUserData) 173 { 174 SWR_CONTEXT *pContext = pDC->pContext; 175 176 if (KNOB_FAST_CLEAR) 177 { 178 CLEAR_DESC *pClear = (CLEAR_DESC*)pUserData; 179 SWR_MULTISAMPLE_COUNT sampleCount = pDC->pState->state.rastState.sampleCount; 180 uint32_t numSamples = GetNumSamples(sampleCount); 181 182 SWR_ASSERT(pClear->attachmentMask != 0); // shouldn't be here without a reason. 183 184 AR_BEGIN(BEClear, pDC->drawId); 185 186 if (pClear->attachmentMask & SWR_ATTACHMENT_MASK_COLOR) 187 { 188 unsigned long rt = 0; 189 uint32_t mask = pClear->attachmentMask & SWR_ATTACHMENT_MASK_COLOR; 190 while (_BitScanForward(&rt, mask)) 191 { 192 mask &= ~(1 << rt); 193 194 HOTTILE *pHotTile = pContext->pHotTileMgr->GetHotTile(pContext, pDC, macroTile, (SWR_RENDERTARGET_ATTACHMENT)rt, true, numSamples, pClear->renderTargetArrayIndex); 195 196 // All we want to do here is to mark the hot tile as being in a "needs clear" state. 197 pHotTile->clearData[0] = *(DWORD*)&(pClear->clearRTColor[0]); 198 pHotTile->clearData[1] = *(DWORD*)&(pClear->clearRTColor[1]); 199 pHotTile->clearData[2] = *(DWORD*)&(pClear->clearRTColor[2]); 200 pHotTile->clearData[3] = *(DWORD*)&(pClear->clearRTColor[3]); 201 pHotTile->state = HOTTILE_CLEAR; 202 } 203 } 204 205 if (pClear->attachmentMask & SWR_ATTACHMENT_DEPTH_BIT) 206 { 207 HOTTILE *pHotTile = pContext->pHotTileMgr->GetHotTile(pContext, pDC, macroTile, SWR_ATTACHMENT_DEPTH, true, numSamples, pClear->renderTargetArrayIndex); 208 pHotTile->clearData[0] = *(DWORD*)&pClear->clearDepth; 209 pHotTile->state = HOTTILE_CLEAR; 210 } 211 212 if (pClear->attachmentMask & SWR_ATTACHMENT_STENCIL_BIT) 213 { 214 HOTTILE *pHotTile = pContext->pHotTileMgr->GetHotTile(pContext, pDC, macroTile, SWR_ATTACHMENT_STENCIL, true, numSamples, pClear->renderTargetArrayIndex); 215 216 pHotTile->clearData[0] = pClear->clearStencil; 217 pHotTile->state = HOTTILE_CLEAR; 218 } 219 220 AR_END(BEClear, 1); 221 } 222 else 223 { 224 // Legacy clear 225 CLEAR_DESC *pClear = (CLEAR_DESC*)pUserData; 226 AR_BEGIN(BEClear, pDC->drawId); 227 228 if (pClear->attachmentMask & SWR_ATTACHMENT_MASK_COLOR) 229 { 230 DWORD clearData[4]; 231 clearData[0] = *(DWORD*)&(pClear->clearRTColor[0]); 232 clearData[1] = *(DWORD*)&(pClear->clearRTColor[1]); 233 clearData[2] = *(DWORD*)&(pClear->clearRTColor[2]); 234 clearData[3] = *(DWORD*)&(pClear->clearRTColor[3]); 235 236 PFN_CLEAR_TILES pfnClearTiles = gClearTilesTable[KNOB_COLOR_HOT_TILE_FORMAT]; 237 SWR_ASSERT(pfnClearTiles != nullptr); 238 239 unsigned long rt = 0; 240 uint32_t mask = pClear->attachmentMask & SWR_ATTACHMENT_MASK_COLOR; 241 while (_BitScanForward(&rt, mask)) 242 { 243 mask &= ~(1 << rt); 244 245 pfnClearTiles(pDC, (SWR_RENDERTARGET_ATTACHMENT)rt, macroTile, pClear->renderTargetArrayIndex, clearData, pClear->rect); 246 } 247 } 248 249 if (pClear->attachmentMask & SWR_ATTACHMENT_DEPTH_BIT) 250 { 251 DWORD clearData[4]; 252 clearData[0] = *(DWORD*)&pClear->clearDepth; 253 PFN_CLEAR_TILES pfnClearTiles = gClearTilesTable[KNOB_DEPTH_HOT_TILE_FORMAT]; 254 SWR_ASSERT(pfnClearTiles != nullptr); 255 256 pfnClearTiles(pDC, SWR_ATTACHMENT_DEPTH, macroTile, pClear->renderTargetArrayIndex, clearData, pClear->rect); 257 } 258 259 if (pClear->attachmentMask & SWR_ATTACHMENT_STENCIL_BIT) 260 { 261 DWORD clearData[4]; 262 clearData[0] = pClear->clearStencil; 263 PFN_CLEAR_TILES pfnClearTiles = gClearTilesTable[KNOB_STENCIL_HOT_TILE_FORMAT]; 264 265 pfnClearTiles(pDC, SWR_ATTACHMENT_STENCIL, macroTile, pClear->renderTargetArrayIndex, clearData, pClear->rect); 266 } 267 268 AR_END(BEClear, 1); 269 } 270 } 271 272 void InitClearTilesTable() 273 { 274 memset(gClearTilesTable, 0, sizeof(gClearTilesTable)); 275 276 gClearTilesTable[R8G8B8A8_UNORM] = ClearMacroTile<R8G8B8A8_UNORM>; 277 gClearTilesTable[B8G8R8A8_UNORM] = ClearMacroTile<B8G8R8A8_UNORM>; 278 gClearTilesTable[R32_FLOAT] = ClearMacroTile<R32_FLOAT>; 279 gClearTilesTable[R32G32B32A32_FLOAT] = ClearMacroTile<R32G32B32A32_FLOAT>; 280 gClearTilesTable[R8_UINT] = ClearMacroTile<R8_UINT>; 281 } 282