Home | History | Annotate | Download | only in core
      1 /****************************************************************************
      2 * Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
      3 *
      4 * Permission is hereby granted, free of charge, to any person obtaining a
      5 * copy of this software and associated documentation files (the "Software"),
      6 * to deal in the Software without restriction, including without limitation
      7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8 * and/or sell copies of the Software, and to permit persons to whom the
      9 * Software is furnished to do so, subject to the following conditions:
     10 *
     11 * The above copyright notice and this permission notice (including the next
     12 * paragraph) shall be included in all copies or substantial portions of the
     13 * Software.
     14 *
     15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
     21 * IN THE SOFTWARE.
     22 *
     23 * @file tilemgr.cpp
     24 *
     25 * @brief Implementation for Macro Tile Manager which provides the facilities
     26 *        for threads to work on an macro tile.
     27 *
     28 ******************************************************************************/
     29 #include <unordered_map>
     30 
     31 #include "fifo.hpp"
     32 #include "core/tilemgr.h"
     33 #include "core/multisample.h"
     34 #include "rdtsc_core.h"
     35 
     36 #define TILE_ID(x,y) ((x << 16 | y))
     37 
     38 MacroTileMgr::MacroTileMgr(CachingArena& arena) : mArena(arena)
     39 {
     40 }
     41 
     42 void MacroTileMgr::enqueue(uint32_t x, uint32_t y, BE_WORK *pWork)
     43 {
     44     // Should not enqueue more then what we have backing for in the hot tile manager.
     45     SWR_ASSERT(x < KNOB_NUM_HOT_TILES_X);
     46     SWR_ASSERT(y < KNOB_NUM_HOT_TILES_Y);
     47 
     48     if ((x & ~(KNOB_NUM_HOT_TILES_X-1)) | (y & ~(KNOB_NUM_HOT_TILES_Y-1)))
     49     {
     50         return;
     51     }
     52 
     53     uint32_t id = TILE_ID(x, y);
     54 
     55     MacroTileQueue &tile = mTiles[id];
     56     tile.mWorkItemsFE++;
     57     tile.mId = id;
     58 
     59     if (tile.mWorkItemsFE == 1)
     60     {
     61         tile.clear(mArena);
     62         mDirtyTiles.push_back(&tile);
     63     }
     64 
     65     mWorkItemsProduced++;
     66     tile.enqueue_try_nosync(mArena, pWork);
     67 }
     68 
     69 void MacroTileMgr::markTileComplete(uint32_t id)
     70 {
     71     SWR_ASSERT(mTiles.find(id) != mTiles.end());
     72     MacroTileQueue &tile = mTiles[id];
     73     uint32_t numTiles = tile.mWorkItemsFE;
     74     InterlockedExchangeAdd(&mWorkItemsConsumed, numTiles);
     75 
     76     _ReadWriteBarrier();
     77     tile.mWorkItemsBE += numTiles;
     78     SWR_ASSERT(tile.mWorkItemsFE == tile.mWorkItemsBE);
     79 
     80     // clear out tile, but defer fifo clear until the next DC first queues to it.
     81     // this prevents worker threads from constantly locking a completed macro tile
     82     tile.mWorkItemsFE = 0;
     83     tile.mWorkItemsBE = 0;
     84 }
     85 
     86 HOTTILE* HotTileMgr::GetHotTile(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t macroID, SWR_RENDERTARGET_ATTACHMENT attachment, bool create, uint32_t numSamples,
     87     uint32_t renderTargetArrayIndex)
     88 {
     89     uint32_t x, y;
     90     MacroTileMgr::getTileIndices(macroID, x, y);
     91 
     92     SWR_ASSERT(x < KNOB_NUM_HOT_TILES_X);
     93     SWR_ASSERT(y < KNOB_NUM_HOT_TILES_Y);
     94 
     95     HotTileSet &tile = mHotTiles[x][y];
     96     HOTTILE& hotTile = tile.Attachment[attachment];
     97     if (hotTile.pBuffer == NULL)
     98     {
     99         if (create)
    100         {
    101             uint32_t size = numSamples * mHotTileSize[attachment];
    102             uint32_t numaNode = ((x ^ y) & pContext->threadPool.numaMask);
    103             hotTile.pBuffer = (uint8_t*)AllocHotTileMem(size, KNOB_SIMD_WIDTH * 4, numaNode);
    104             hotTile.state = HOTTILE_INVALID;
    105             hotTile.numSamples = numSamples;
    106             hotTile.renderTargetArrayIndex = renderTargetArrayIndex;
    107         }
    108         else
    109         {
    110             return NULL;
    111         }
    112     }
    113     else
    114     {
    115         // free the old tile and create a new one with enough space to hold all samples
    116         if (numSamples > hotTile.numSamples)
    117         {
    118             // tile should be either uninitialized or resolved if we're deleting and switching to a
    119             // new sample count
    120             SWR_ASSERT((hotTile.state == HOTTILE_INVALID) ||
    121                 (hotTile.state == HOTTILE_RESOLVED) ||
    122                 (hotTile.state == HOTTILE_CLEAR));
    123             FreeHotTileMem(hotTile.pBuffer);
    124 
    125             uint32_t size = numSamples * mHotTileSize[attachment];
    126             uint32_t numaNode = ((x ^ y) & pContext->threadPool.numaMask);
    127             hotTile.pBuffer = (uint8_t*)AllocHotTileMem(size, KNOB_SIMD_WIDTH * 4, numaNode);
    128             hotTile.state = HOTTILE_INVALID;
    129             hotTile.numSamples = numSamples;
    130         }
    131 
    132         // if requested render target array index isn't currently loaded, need to store out the current hottile
    133         // and load the requested array slice
    134         if (renderTargetArrayIndex != hotTile.renderTargetArrayIndex)
    135         {
    136             SWR_FORMAT format;
    137             switch (attachment)
    138             {
    139             case SWR_ATTACHMENT_COLOR0:
    140             case SWR_ATTACHMENT_COLOR1:
    141             case SWR_ATTACHMENT_COLOR2:
    142             case SWR_ATTACHMENT_COLOR3:
    143             case SWR_ATTACHMENT_COLOR4:
    144             case SWR_ATTACHMENT_COLOR5:
    145             case SWR_ATTACHMENT_COLOR6:
    146             case SWR_ATTACHMENT_COLOR7: format = KNOB_COLOR_HOT_TILE_FORMAT; break;
    147             case SWR_ATTACHMENT_DEPTH: format = KNOB_DEPTH_HOT_TILE_FORMAT; break;
    148             case SWR_ATTACHMENT_STENCIL: format = KNOB_STENCIL_HOT_TILE_FORMAT; break;
    149             default: SWR_ASSERT(false, "Unknown attachment: %d", attachment); format = KNOB_COLOR_HOT_TILE_FORMAT; break;
    150             }
    151 
    152             if (hotTile.state == HOTTILE_CLEAR)
    153             {
    154                 if (attachment == SWR_ATTACHMENT_STENCIL)
    155                     ClearStencilHotTile(&hotTile);
    156                 else if (attachment == SWR_ATTACHMENT_DEPTH)
    157                     ClearDepthHotTile(&hotTile);
    158                 else
    159                     ClearColorHotTile(&hotTile);
    160 
    161                 hotTile.state = HOTTILE_DIRTY;
    162             }
    163 
    164             if (hotTile.state == HOTTILE_DIRTY)
    165             {
    166                 pContext->pfnStoreTile(GetPrivateState(pDC), format, attachment,
    167                     x * KNOB_MACROTILE_X_DIM, y * KNOB_MACROTILE_Y_DIM, hotTile.renderTargetArrayIndex, hotTile.pBuffer);
    168             }
    169 
    170             pContext->pfnLoadTile(GetPrivateState(pDC), format, attachment,
    171                 x * KNOB_MACROTILE_X_DIM, y * KNOB_MACROTILE_Y_DIM, renderTargetArrayIndex, hotTile.pBuffer);
    172 
    173             hotTile.renderTargetArrayIndex = renderTargetArrayIndex;
    174             hotTile.state = HOTTILE_DIRTY;
    175         }
    176     }
    177     return &tile.Attachment[attachment];
    178 }
    179 
    180 HOTTILE* HotTileMgr::GetHotTileNoLoad(
    181     SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t macroID,
    182     SWR_RENDERTARGET_ATTACHMENT attachment, bool create, uint32_t numSamples)
    183 {
    184     uint32_t x, y;
    185     MacroTileMgr::getTileIndices(macroID, x, y);
    186 
    187     SWR_ASSERT(x < KNOB_NUM_HOT_TILES_X);
    188     SWR_ASSERT(y < KNOB_NUM_HOT_TILES_Y);
    189 
    190     HotTileSet &tile = mHotTiles[x][y];
    191     HOTTILE& hotTile = tile.Attachment[attachment];
    192     if (hotTile.pBuffer == NULL)
    193     {
    194         if (create)
    195         {
    196             uint32_t size = numSamples * mHotTileSize[attachment];
    197             hotTile.pBuffer = (uint8_t*)AlignedMalloc(size, KNOB_SIMD_WIDTH * 4);
    198             hotTile.state = HOTTILE_INVALID;
    199             hotTile.numSamples = numSamples;
    200             hotTile.renderTargetArrayIndex = 0;
    201         }
    202         else
    203         {
    204             return NULL;
    205         }
    206     }
    207 
    208     return &hotTile;
    209 }
    210 
    211 #if USE_8x2_TILE_BACKEND
    212 void HotTileMgr::ClearColorHotTile(const HOTTILE* pHotTile)  // clear a macro tile from float4 clear data.
    213 {
    214     // Load clear color into SIMD register...
    215     float *pClearData = (float *)(pHotTile->clearData);
    216     simd16scalar valR = _simd16_broadcast_ss(&pClearData[0]);
    217     simd16scalar valG = _simd16_broadcast_ss(&pClearData[1]);
    218     simd16scalar valB = _simd16_broadcast_ss(&pClearData[2]);
    219     simd16scalar valA = _simd16_broadcast_ss(&pClearData[3]);
    220 
    221     float *pfBuf = (float *)pHotTile->pBuffer;
    222     uint32_t numSamples = pHotTile->numSamples;
    223 
    224     for (uint32_t row = 0; row < KNOB_MACROTILE_Y_DIM; row += KNOB_TILE_Y_DIM)
    225     {
    226         for (uint32_t col = 0; col < KNOB_MACROTILE_X_DIM; col += KNOB_TILE_X_DIM)
    227         {
    228             for (uint32_t si = 0; si < (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * numSamples); si += SIMD16_TILE_X_DIM * SIMD16_TILE_Y_DIM)
    229             {
    230                 _simd16_store_ps(pfBuf, valR);
    231                 pfBuf += KNOB_SIMD16_WIDTH;
    232 
    233                 _simd16_store_ps(pfBuf, valG);
    234                 pfBuf += KNOB_SIMD16_WIDTH;
    235 
    236                 _simd16_store_ps(pfBuf, valB);
    237                 pfBuf += KNOB_SIMD16_WIDTH;
    238 
    239                 _simd16_store_ps(pfBuf, valA);
    240                 pfBuf += KNOB_SIMD16_WIDTH;
    241             }
    242         }
    243     }
    244 }
    245 
    246 void HotTileMgr::ClearDepthHotTile(const HOTTILE* pHotTile)  // clear a macro tile from float4 clear data.
    247 {
    248     // Load clear color into SIMD register...
    249     float *pClearData = (float *)(pHotTile->clearData);
    250     simd16scalar valZ = _simd16_broadcast_ss(&pClearData[0]);
    251 
    252     float *pfBuf = (float *)pHotTile->pBuffer;
    253     uint32_t numSamples = pHotTile->numSamples;
    254 
    255     for (uint32_t row = 0; row < KNOB_MACROTILE_Y_DIM; row += KNOB_TILE_Y_DIM)
    256     {
    257         for (uint32_t col = 0; col < KNOB_MACROTILE_X_DIM; col += KNOB_TILE_X_DIM)
    258         {
    259             for (uint32_t si = 0; si < (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * numSamples); si += SIMD16_TILE_X_DIM * SIMD16_TILE_Y_DIM)
    260             {
    261                 _simd16_store_ps(pfBuf, valZ);
    262                 pfBuf += KNOB_SIMD16_WIDTH;
    263             }
    264         }
    265     }
    266 }
    267 
    268 void HotTileMgr::ClearStencilHotTile(const HOTTILE* pHotTile)
    269 {
    270     // convert from F32 to U8.
    271     uint8_t clearVal = (uint8_t)(pHotTile->clearData[0]);
    272     //broadcast 32x into __m256i...
    273     simd16scalari valS = _simd16_set1_epi8(clearVal);
    274 
    275     simd16scalari *pBuf = (simd16scalari *)pHotTile->pBuffer;
    276     uint32_t numSamples = pHotTile->numSamples;
    277 
    278     for (uint32_t row = 0; row < KNOB_MACROTILE_Y_DIM; row += KNOB_TILE_Y_DIM)
    279     {
    280         for (uint32_t col = 0; col < KNOB_MACROTILE_X_DIM; col += KNOB_TILE_X_DIM)
    281         {
    282             // We're putting 4 pixels in each of the 32-bit slots, so increment 4 times as quickly.
    283             for (uint32_t si = 0; si < (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * numSamples); si += SIMD16_TILE_X_DIM * SIMD16_TILE_Y_DIM * 4)
    284             {
    285                 _simd16_store_si(pBuf, valS);
    286                 pBuf += 1;
    287             }
    288         }
    289     }
    290 }
    291 
    292 #else
    293 void HotTileMgr::ClearColorHotTile(const HOTTILE* pHotTile)  // clear a macro tile from float4 clear data.
    294 {
    295     // Load clear color into SIMD register...
    296     float *pClearData = (float*)(pHotTile->clearData);
    297     simdscalar valR = _simd_broadcast_ss(&pClearData[0]);
    298     simdscalar valG = _simd_broadcast_ss(&pClearData[1]);
    299     simdscalar valB = _simd_broadcast_ss(&pClearData[2]);
    300     simdscalar valA = _simd_broadcast_ss(&pClearData[3]);
    301 
    302     float *pfBuf = (float*)pHotTile->pBuffer;
    303     uint32_t numSamples = pHotTile->numSamples;
    304 
    305     for (uint32_t row = 0; row < KNOB_MACROTILE_Y_DIM; row += KNOB_TILE_Y_DIM)
    306     {
    307         for (uint32_t col = 0; col < KNOB_MACROTILE_X_DIM; col += KNOB_TILE_X_DIM)
    308         {
    309             for (uint32_t si = 0; si < (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * numSamples); si += SIMD_TILE_X_DIM * SIMD_TILE_Y_DIM) //SIMD_TILE_X_DIM * SIMD_TILE_Y_DIM); si++)
    310             {
    311                 _simd_store_ps(pfBuf, valR);
    312                 pfBuf += KNOB_SIMD_WIDTH;
    313                 _simd_store_ps(pfBuf, valG);
    314                 pfBuf += KNOB_SIMD_WIDTH;
    315                 _simd_store_ps(pfBuf, valB);
    316                 pfBuf += KNOB_SIMD_WIDTH;
    317                 _simd_store_ps(pfBuf, valA);
    318                 pfBuf += KNOB_SIMD_WIDTH;
    319             }
    320         }
    321     }
    322 }
    323 
    324 void HotTileMgr::ClearDepthHotTile(const HOTTILE* pHotTile)  // clear a macro tile from float4 clear data.
    325 {
    326     // Load clear color into SIMD register...
    327     float *pClearData = (float*)(pHotTile->clearData);
    328     simdscalar valZ = _simd_broadcast_ss(&pClearData[0]);
    329 
    330     float *pfBuf = (float*)pHotTile->pBuffer;
    331     uint32_t numSamples = pHotTile->numSamples;
    332 
    333     for (uint32_t row = 0; row < KNOB_MACROTILE_Y_DIM; row += KNOB_TILE_Y_DIM)
    334     {
    335         for (uint32_t col = 0; col < KNOB_MACROTILE_X_DIM; col += KNOB_TILE_X_DIM)
    336         {
    337             for (uint32_t si = 0; si < (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * numSamples); si += SIMD_TILE_X_DIM * SIMD_TILE_Y_DIM)
    338             {
    339                 _simd_store_ps(pfBuf, valZ);
    340                 pfBuf += KNOB_SIMD_WIDTH;
    341             }
    342         }
    343     }
    344 }
    345 
    346 void HotTileMgr::ClearStencilHotTile(const HOTTILE* pHotTile)
    347 {
    348     // convert from F32 to U8.
    349     uint8_t clearVal = (uint8_t)(pHotTile->clearData[0]);
    350     //broadcast 32x into __m256i...
    351     simdscalari valS = _simd_set1_epi8(clearVal);
    352 
    353     simdscalari* pBuf = (simdscalari*)pHotTile->pBuffer;
    354     uint32_t numSamples = pHotTile->numSamples;
    355 
    356     for (uint32_t row = 0; row < KNOB_MACROTILE_Y_DIM; row += KNOB_TILE_Y_DIM)
    357     {
    358         for (uint32_t col = 0; col < KNOB_MACROTILE_X_DIM; col += KNOB_TILE_X_DIM)
    359         {
    360             // We're putting 4 pixels in each of the 32-bit slots, so increment 4 times as quickly.
    361             for (uint32_t si = 0; si < (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * numSamples); si += SIMD_TILE_X_DIM * SIMD_TILE_Y_DIM * 4)
    362             {
    363                 _simd_store_si(pBuf, valS);
    364                 pBuf += 1;
    365             }
    366         }
    367     }
    368 }
    369 
    370 #endif
    371 //////////////////////////////////////////////////////////////////////////
    372 /// @brief InitializeHotTiles
    373 /// for draw calls, we initialize the active hot tiles and perform deferred
    374 /// load on them if tile is in invalid state. we do this in the outer thread
    375 /// loop instead of inside the draw routine itself mainly for performance,
    376 /// to avoid unnecessary setup every triangle
    377 /// @todo support deferred clear
    378 /// @param pCreateInfo - pointer to creation info.
    379 void HotTileMgr::InitializeHotTiles(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroID)
    380 {
    381     const API_STATE& state = GetApiState(pDC);
    382 
    383     uint32_t x, y;
    384     MacroTileMgr::getTileIndices(macroID, x, y);
    385     x *= KNOB_MACROTILE_X_DIM;
    386     y *= KNOB_MACROTILE_Y_DIM;
    387 
    388     uint32_t numSamples = GetNumSamples(state.rastState.sampleCount);
    389 
    390     // check RT if enabled
    391     unsigned long rtSlot = 0;
    392     uint32_t colorHottileEnableMask = state.colorHottileEnable;
    393     while (_BitScanForward(&rtSlot, colorHottileEnableMask))
    394     {
    395         HOTTILE* pHotTile = GetHotTile(pContext, pDC, macroID, (SWR_RENDERTARGET_ATTACHMENT)(SWR_ATTACHMENT_COLOR0 + rtSlot), true, numSamples);
    396 
    397         if (pHotTile->state == HOTTILE_INVALID)
    398         {
    399             AR_BEGIN(BELoadTiles, pDC->drawId);
    400             // invalid hottile before draw requires a load from surface before we can draw to it
    401             pContext->pfnLoadTile(GetPrivateState(pDC), KNOB_COLOR_HOT_TILE_FORMAT, (SWR_RENDERTARGET_ATTACHMENT)(SWR_ATTACHMENT_COLOR0 + rtSlot), x, y, pHotTile->renderTargetArrayIndex, pHotTile->pBuffer);
    402             pHotTile->state = HOTTILE_DIRTY;
    403             AR_END(BELoadTiles, 0);
    404         }
    405         else if (pHotTile->state == HOTTILE_CLEAR)
    406         {
    407             AR_BEGIN(BELoadTiles, pDC->drawId);
    408             // Clear the tile.
    409             ClearColorHotTile(pHotTile);
    410             pHotTile->state = HOTTILE_DIRTY;
    411             AR_END(BELoadTiles, 0);
    412         }
    413         colorHottileEnableMask &= ~(1 << rtSlot);
    414     }
    415 
    416     // check depth if enabled
    417     if (state.depthHottileEnable)
    418     {
    419         HOTTILE* pHotTile = GetHotTile(pContext, pDC, macroID, SWR_ATTACHMENT_DEPTH, true, numSamples);
    420         if (pHotTile->state == HOTTILE_INVALID)
    421         {
    422             AR_BEGIN(BELoadTiles, pDC->drawId);
    423             // invalid hottile before draw requires a load from surface before we can draw to it
    424             pContext->pfnLoadTile(GetPrivateState(pDC), KNOB_DEPTH_HOT_TILE_FORMAT, SWR_ATTACHMENT_DEPTH, x, y, pHotTile->renderTargetArrayIndex, pHotTile->pBuffer);
    425             pHotTile->state = HOTTILE_DIRTY;
    426             AR_END(BELoadTiles, 0);
    427         }
    428         else if (pHotTile->state == HOTTILE_CLEAR)
    429         {
    430             AR_BEGIN(BELoadTiles, pDC->drawId);
    431             // Clear the tile.
    432             ClearDepthHotTile(pHotTile);
    433             pHotTile->state = HOTTILE_DIRTY;
    434             AR_END(BELoadTiles, 0);
    435         }
    436     }
    437 
    438     // check stencil if enabled
    439     if (state.stencilHottileEnable)
    440     {
    441         HOTTILE* pHotTile = GetHotTile(pContext, pDC, macroID, SWR_ATTACHMENT_STENCIL, true, numSamples);
    442         if (pHotTile->state == HOTTILE_INVALID)
    443         {
    444             AR_BEGIN(BELoadTiles, pDC->drawId);
    445             // invalid hottile before draw requires a load from surface before we can draw to it
    446             pContext->pfnLoadTile(GetPrivateState(pDC), KNOB_STENCIL_HOT_TILE_FORMAT, SWR_ATTACHMENT_STENCIL, x, y, pHotTile->renderTargetArrayIndex, pHotTile->pBuffer);
    447             pHotTile->state = HOTTILE_DIRTY;
    448             AR_END(BELoadTiles, 0);
    449         }
    450         else if (pHotTile->state == HOTTILE_CLEAR)
    451         {
    452             AR_BEGIN(BELoadTiles, pDC->drawId);
    453             // Clear the tile.
    454             ClearStencilHotTile(pHotTile);
    455             pHotTile->state = HOTTILE_DIRTY;
    456             AR_END(BELoadTiles, 0);
    457         }
    458     }
    459 }
    460