Home | History | Annotate | Download | only in core
      1 /****************************************************************************
      2 * Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
      3 *
      4 * Permission is hereby granted, free of charge, to any person obtaining a
      5 * copy of this software and associated documentation files (the "Software"),
      6 * to deal in the Software without restriction, including without limitation
      7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8 * and/or sell copies of the Software, and to permit persons to whom the
      9 * Software is furnished to do so, subject to the following conditions:
     10 *
     11 * The above copyright notice and this permission notice (including the next
     12 * paragraph) shall be included in all copies or substantial portions of the
     13 * Software.
     14 *
     15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
     21 * IN THE SOFTWARE.
     22 *
     23 * @file tilemgr.h
     24 *
     25 * @brief Definitions for Macro Tile Manager which provides the facilities
     26 *        for threads to work on an macro tile.
     27 *
     28 ******************************************************************************/
     29 #pragma once
     30 
     31 #include <set>
     32 #include <unordered_map>
     33 #include "common/formats.h"
     34 #include "fifo.hpp"
     35 #include "context.h"
     36 #include "format_traits.h"
     37 
     38 //////////////////////////////////////////////////////////////////////////
     39 /// MacroTile - work queue for a tile.
     40 //////////////////////////////////////////////////////////////////////////
     41 struct MacroTileQueue
     42 {
     43     MacroTileQueue() { }
     44     ~MacroTileQueue() { }
     45 
     46     //////////////////////////////////////////////////////////////////////////
     47     /// @brief Returns number of work items queued for this tile.
     48     uint32_t getNumQueued()
     49     {
     50         return mFifo.getNumQueued();
     51     }
     52 
     53     //////////////////////////////////////////////////////////////////////////
     54     /// @brief Attempt to lock the work fifo. If already locked then return false.
     55     bool tryLock()
     56     {
     57         return mFifo.tryLock();
     58     }
     59 
     60     //////////////////////////////////////////////////////////////////////////
     61     /// @brief Clear fifo and unlock it.
     62     template <typename ArenaT>
     63     void clear(ArenaT& arena)
     64     {
     65         mFifo.clear(arena);
     66     }
     67 
     68     //////////////////////////////////////////////////////////////////////////
     69     /// @brief Peek at work sitting at the front of the fifo.
     70     BE_WORK* peek()
     71     {
     72         return mFifo.peek();
     73     }
     74 
     75     template <typename ArenaT>
     76     bool enqueue_try_nosync(ArenaT& arena, const BE_WORK* entry)
     77     {
     78         return mFifo.enqueue_try_nosync(arena, entry);
     79     }
     80 
     81     //////////////////////////////////////////////////////////////////////////
     82     /// @brief Move to next work item
     83     void dequeue()
     84     {
     85         mFifo.dequeue_noinc();
     86     }
     87 
     88     //////////////////////////////////////////////////////////////////////////
     89     /// @brief Destroy fifo
     90     void destroy()
     91     {
     92         mFifo.destroy();
     93     }
     94 
     95     ///@todo This will all be private.
     96     uint32_t mWorkItemsFE = 0;
     97     uint32_t mWorkItemsBE = 0;
     98     uint32_t mId = 0;
     99 
    100 private:
    101     QUEUE<BE_WORK> mFifo;
    102 };
    103 
    104 //////////////////////////////////////////////////////////////////////////
    105 /// MacroTileMgr - Manages macrotiles for a draw.
    106 //////////////////////////////////////////////////////////////////////////
    107 class MacroTileMgr
    108 {
    109 public:
    110     MacroTileMgr(CachingArena& arena);
    111     ~MacroTileMgr()
    112     {
    113         for (auto &tile : mTiles)
    114         {
    115             tile.second.destroy();
    116         }
    117     }
    118 
    119     INLINE void initialize()
    120     {
    121         mWorkItemsProduced = 0;
    122         mWorkItemsConsumed = 0;
    123 
    124         mDirtyTiles.clear();
    125     }
    126 
    127     INLINE std::vector<MacroTileQueue*>& getDirtyTiles() { return mDirtyTiles; }
    128     void markTileComplete(uint32_t id);
    129 
    130     INLINE bool isWorkComplete()
    131     {
    132         return mWorkItemsProduced == mWorkItemsConsumed;
    133     }
    134 
    135     void enqueue(uint32_t x, uint32_t y, BE_WORK *pWork);
    136 
    137     static INLINE void getTileIndices(uint32_t tileID, uint32_t &x, uint32_t &y)
    138     {
    139         y = tileID & 0xffff;
    140         x = (tileID >> 16) & 0xffff;
    141     }
    142 
    143 private:
    144     CachingArena& mArena;
    145     std::unordered_map<uint32_t, MacroTileQueue> mTiles;
    146 
    147     // Any tile that has work queued to it is a dirty tile.
    148     std::vector<MacroTileQueue*> mDirtyTiles;
    149 
    150     OSALIGNLINE(LONG) mWorkItemsProduced { 0 };
    151     OSALIGNLINE(volatile LONG) mWorkItemsConsumed { 0 };
    152 };
    153 
    154 typedef void(*PFN_DISPATCH)(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t threadGroupId, void*& pSpillFillBuffer);
    155 
    156 //////////////////////////////////////////////////////////////////////////
    157 /// DispatchQueue - work queue for dispatch
    158 //////////////////////////////////////////////////////////////////////////
    159 class DispatchQueue
    160 {
    161 public:
    162     DispatchQueue() {}
    163 
    164     //////////////////////////////////////////////////////////////////////////
    165     /// @brief Setup the producer consumer counts.
    166     void initialize(uint32_t totalTasks, void* pTaskData, PFN_DISPATCH pfnDispatch)
    167     {
    168         // The available and outstanding counts start with total tasks.
    169         // At the start there are N tasks available and outstanding.
    170         // When both the available and outstanding counts have reached 0 then all work has completed.
    171         // When a worker starts on a threadgroup then it decrements the available count.
    172         // When a worker completes a threadgroup then it decrements the outstanding count.
    173 
    174         mTasksAvailable = totalTasks;
    175         mTasksOutstanding = totalTasks;
    176 
    177         mpTaskData = pTaskData;
    178         mPfnDispatch = pfnDispatch;
    179     }
    180 
    181     //////////////////////////////////////////////////////////////////////////
    182     /// @brief Returns number of tasks available for this dispatch.
    183     uint32_t getNumQueued()
    184     {
    185         return (mTasksAvailable > 0) ? mTasksAvailable : 0;
    186     }
    187 
    188     //////////////////////////////////////////////////////////////////////////
    189     /// @brief Atomically decrement the work available count. If the result
    190     //         is greater than 0 then we can on the associated thread group.
    191     //         Otherwise, there is no more work to do.
    192     bool getWork(uint32_t& groupId)
    193     {
    194         LONG result = InterlockedDecrement(&mTasksAvailable);
    195 
    196         if (result >= 0)
    197         {
    198             groupId = result;
    199             return true;
    200         }
    201 
    202         return false;
    203     }
    204 
    205     //////////////////////////////////////////////////////////////////////////
    206     /// @brief Atomically decrement the outstanding count. A worker is notifying
    207     ///        us that he just finished some work. Also, return true if we're
    208     ///        the last worker to complete this dispatch.
    209     bool finishedWork()
    210     {
    211         LONG result = InterlockedDecrement(&mTasksOutstanding);
    212         SWR_ASSERT(result >= 0, "Should never oversubscribe work");
    213 
    214         return (result == 0) ? true : false;
    215     }
    216 
    217     //////////////////////////////////////////////////////////////////////////
    218     /// @brief Work is complete once both the available/outstanding counts have reached 0.
    219     bool isWorkComplete()
    220     {
    221         return ((mTasksAvailable <= 0) &&
    222                 (mTasksOutstanding <= 0));
    223     }
    224 
    225     //////////////////////////////////////////////////////////////////////////
    226     /// @brief Return pointer to task data.
    227     const void* GetTasksData()
    228     {
    229         return mpTaskData;
    230     }
    231 
    232     //////////////////////////////////////////////////////////////////////////
    233     /// @brief Dispatches a unit of work
    234     void dispatch(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t threadGroupId, void*& pSpillFillBuffer)
    235     {
    236         SWR_ASSERT(mPfnDispatch != nullptr);
    237         mPfnDispatch(pDC, workerId, threadGroupId, pSpillFillBuffer);
    238     }
    239 
    240     void* mpTaskData{ nullptr };        // The API thread will set this up and the callback task function will interpet this.
    241     PFN_DISPATCH mPfnDispatch{ nullptr };      // Function to call per dispatch
    242 
    243     OSALIGNLINE(volatile LONG) mTasksAvailable{ 0 };
    244     OSALIGNLINE(volatile LONG) mTasksOutstanding{ 0 };
    245 };
    246 
    247 
    248 enum HOTTILE_STATE
    249 {
    250     HOTTILE_INVALID,        // tile is in unitialized state and should be loaded with surface contents before rendering
    251     HOTTILE_CLEAR,          // tile should be cleared
    252     HOTTILE_DIRTY,          // tile has been rendered to
    253     HOTTILE_RESOLVED,       // tile has been stored to memory
    254 };
    255 
    256 struct HOTTILE
    257 {
    258     uint8_t *pBuffer;
    259     HOTTILE_STATE state;
    260     DWORD clearData[4];                 // May need to change based on pfnClearTile implementation.  Reorder for alignment?
    261     uint32_t numSamples;
    262     uint32_t renderTargetArrayIndex;    // current render target array index loaded
    263 };
    264 
    265 union HotTileSet
    266 {
    267     struct
    268     {
    269         HOTTILE Color[SWR_NUM_RENDERTARGETS];
    270         HOTTILE Depth;
    271         HOTTILE Stencil;
    272     };
    273     HOTTILE Attachment[SWR_NUM_ATTACHMENTS];
    274 };
    275 
    276 class HotTileMgr
    277 {
    278 public:
    279     HotTileMgr()
    280     {
    281         memset(mHotTiles, 0, sizeof(mHotTiles));
    282 
    283         // cache hottile size
    284         for (uint32_t i = SWR_ATTACHMENT_COLOR0; i <= SWR_ATTACHMENT_COLOR7; ++i)
    285         {
    286             mHotTileSize[i] = KNOB_MACROTILE_X_DIM * KNOB_MACROTILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8;
    287         }
    288         mHotTileSize[SWR_ATTACHMENT_DEPTH] = KNOB_MACROTILE_X_DIM * KNOB_MACROTILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8;
    289         mHotTileSize[SWR_ATTACHMENT_STENCIL] = KNOB_MACROTILE_X_DIM * KNOB_MACROTILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8;
    290     }
    291 
    292     ~HotTileMgr()
    293     {
    294         for (int x = 0; x < KNOB_NUM_HOT_TILES_X; ++x)
    295         {
    296             for (int y = 0; y < KNOB_NUM_HOT_TILES_Y; ++y)
    297             {
    298                 for (int a = 0; a < SWR_NUM_ATTACHMENTS; ++a)
    299                 {
    300                     FreeHotTileMem(mHotTiles[x][y].Attachment[a].pBuffer);
    301                 }
    302             }
    303         }
    304     }
    305 
    306     void InitializeHotTiles(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroID);
    307 
    308     HOTTILE *GetHotTile(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t macroID, SWR_RENDERTARGET_ATTACHMENT attachment, bool create, uint32_t numSamples = 1,
    309         uint32_t renderTargetArrayIndex = 0);
    310 
    311     HOTTILE *GetHotTileNoLoad(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t macroID, SWR_RENDERTARGET_ATTACHMENT attachment, bool create, uint32_t numSamples = 1);
    312 
    313     static void ClearColorHotTile(const HOTTILE* pHotTile);
    314     static void ClearDepthHotTile(const HOTTILE* pHotTile);
    315     static void ClearStencilHotTile(const HOTTILE* pHotTile);
    316 
    317 private:
    318     HotTileSet mHotTiles[KNOB_NUM_HOT_TILES_X][KNOB_NUM_HOT_TILES_Y];
    319     uint32_t mHotTileSize[SWR_NUM_ATTACHMENTS];
    320 
    321     void* AllocHotTileMem(size_t size, uint32_t align, uint32_t numaNode)
    322     {
    323         void* p = nullptr;
    324 #if defined(_WIN32)
    325         HANDLE hProcess = GetCurrentProcess();
    326         p = VirtualAllocExNuma(hProcess, nullptr, size, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE, numaNode);
    327 #else
    328         p = AlignedMalloc(size, align);
    329 #endif
    330 
    331         return p;
    332     }
    333 
    334     void FreeHotTileMem(void* pBuffer)
    335     {
    336         if (pBuffer)
    337         {
    338 #if defined(_WIN32)
    339             VirtualFree(pBuffer, 0, MEM_RELEASE);
    340 #else
    341             AlignedFree(pBuffer);
    342 #endif
    343         }
    344     }
    345 };
    346 
    347